[PATCHv2,ARM,libgcc] New aeabi_idiv function for armv6-m

Message ID	561D38F7.809@arm.com
State	New
Headers	show Return-Path: <gcc-patches-return-410061-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:to :from:subject:message-id:date:mime-version:content-type; q=dns; s=default; b=PGU440Mm6qjczDuiQIXkahXT7QlRnsvRKeUgMoYyLvhzUelVHy 4Ii3kiKsGacj+dAxI90oVphaxM0rfja9vPrFxiGRkVTWjjuU/3m3Fe0YxXhl73lh B199w32dT3uLxoBLHefgQ/rfQiyo/hw8CTcehzJ5QwFc+TRHiv/9gRQzQ= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org To: GCC Patches <gcc-patches@gcc.gnu.org> From: Andre Vieira <Andre.SimoesDiasVieira@arm.com> Subject: [PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m Message-ID: <561D38F7.809@arm.com> Date: Tue, 13 Oct 2015 18:01:43 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------050007070807020500090108"

On 13/10/15 18:01, Andre Vieira wrote: > This patch ports the aeabi_idiv routine from Linaro Cortex-Strings (https://git.linaro.org/toolchain/cortex-strings.git), which was contributed by ARM under Free BSD license. > > The new aeabi_idiv routine is used to replace the one in libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1 wrapper. The new routine is under LGPLv3 license. This is not under LGPLv3 . It is under GPLv3 with the runtime library exception license, there's a difference. Assuming your licensing expectation is ok .... read on for more of a review. > > The main advantage of this version is that it can improve the performance of the aeabi_idiv function for Thumb1. This solution will also increase the code size. So it will only be used if __OPTIMIZE_SIZE__ is not defined. > > Make check passed for armv6-m. > > libgcc/ChangeLog: > 2015-08-10 Hale Wang <hale.wang@arm.com> > Andre Vieira <andre.simoesdiasvieira@arm.com> > > * config/arm/lib1funcs.S: Add new wrapper. > > 0001-integer-division.patch > > > From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001 > From: Andre Simoes Dias Vieira <andsim01@arm.com> > Date: Fri, 21 Aug 2015 14:23:28 +0100 > Subject: [PATCH] new wrapper idivmod > > --- > libgcc/config/arm/lib1funcs.S | 250 ++++++++++++++++++++++++++++++++++++------ > 1 file changed, 217 insertions(+), 33 deletions(-) > > diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S > index 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816 100644 > --- a/libgcc/config/arm/lib1funcs.S > +++ b/libgcc/config/arm/lib1funcs.S > @@ -306,34 +306,12 @@ LSYM(Lend_fde): > #ifdef __ARM_EABI__ > .macro THUMB_LDIV0 name signed > #if defined(__ARM_ARCH_6M__) > - .ifc \signed, unsigned > - cmp r0, #0 > - beq 1f > - mov r0, #0 > - mvn r0, r0 @ 0xffffffff > -1: > - .else > - cmp r0, #0 > - beq 2f > - blt 3f > + > + push {r0, lr} > mov r0, #0 > - mvn r0, r0 > - lsr r0, r0, #1 @ 0x7fffffff > - b 2f > -3: mov r0, #0x80 > - lsl r0, r0, #24 @ 0x80000000 > -2: > - .endif > - push {r0, r1, r2} > - ldr r0, 4f > - adr r1, 4f > - add r0, r1 > - str r0, [sp, #8] > - @ We know we are not on armv4t, so pop pc is safe. > - pop {r0, r1, pc} > - .align 2 > -4: > - .word __aeabi_idiv0 - 4b > + bl SYM(__aeabi_idiv0) > + pop {r1, pc} > + I'd still retain the comment about pop pc here because there's often a misconception of merging armv4t and armv6m code. > #elif defined(__thumb2__) > .syntax unified > .ifc \signed, unsigned > @@ -945,7 +923,170 @@ LSYM(Lover7): > add dividend, work > .endif > LSYM(Lgot_result): > -.endm > +.endm > + > +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__) > +/* If performance is preferred, the following functions are provided. */ > + Comment above #if please and also check elsewhere in patch. > +/* Branch to div(n), and jump to label if curbit is lo than divisior. */ > +.macro BranchToDiv n, label > + lsr curbit, dividend, \n > + cmp curbit, divisor > + blo \label > +.endm > + > +/* Body of div(n). Shift the divisor in n bits and compare the divisor > + and dividend. Update the dividend as the substruction result. */ > +.macro DoDiv n > + lsr curbit, dividend, \n > + cmp curbit, divisor > + bcc 1f > + lsl curbit, divisor, \n > + sub dividend, dividend, curbit > + > +1: adc result, result > +.endm > + > +/* The body of division with positive divisor. Unless the divisor is very > + big, shift it up in multiples of four bits, since this is the amount of > + unwinding in the main division loop. Continue shifting until the divisor > + is larger than the dividend. */ > +.macro THUMB1_Div_Positive > + mov result, #0 > + BranchToDiv #1, LSYM(Lthumb1_div1) > + BranchToDiv #4, LSYM(Lthumb1_div4) > + BranchToDiv #8, LSYM(Lthumb1_div8) > + BranchToDiv #12, LSYM(Lthumb1_div12) > + BranchToDiv #16, LSYM(Lthumb1_div16) > +LSYM(Lthumb1_div_large_positive): > + mov result, #0xff > + lsl divisor, divisor, #8 > + rev result, result > + lsr curbit, dividend, #16 > + cmp curbit, divisor > + blo 1f > + asr result, #8 > + lsl divisor, divisor, #8 > + beq LSYM(Ldivbyzero_waypoint) > + > +1: lsr curbit, dividend, #12 > + cmp curbit, divisor > + blo LSYM(Lthumb1_div12) > + b LSYM(Lthumb1_div16) > +LSYM(Lthumb1_div_loop): > + lsr divisor, divisor, #8 > +LSYM(Lthumb1_div16): > + Dodiv #15 > + Dodiv #14 > + Dodiv #13 > + Dodiv #12 > +LSYM(Lthumb1_div12): > + Dodiv #11 > + Dodiv #10 > + Dodiv #9 > + Dodiv #8 > + bcs LSYM(Lthumb1_div_loop) > +LSYM(Lthumb1_div8): > + Dodiv #7 > + Dodiv #6 > + Dodiv #5 > +LSYM(Lthumb1_div5): > + Dodiv #4 > +LSYM(Lthumb1_div4): > + Dodiv #3 > +LSYM(Lthumb1_div3): > + Dodiv #2 > +LSYM(Lthumb1_div2): > + Dodiv #1 > +LSYM(Lthumb1_div1): > + sub divisor, dividend, divisor > + bcs 1f > + cpy divisor, dividend > + > +1: adc result, result > + cpy dividend, result > + RET > + > +LSYM(Ldivbyzero_waypoint): > + b LSYM(Ldiv0) > +.endm > + > +/* The body of division with negative divisor. Similar with > + THUMB1_Div_Positive except that the shift steps are in multiples > + of six bits. */ > +.macro THUMB1_Div_Negative > + lsr result, divisor, #31 > + beq 1f > + neg divisor, divisor > + > +1: asr curbit, dividend, #32 > + bcc 2f > + neg dividend, dividend > + > +2: eor curbit, result > + mov result, #0 > + cpy ip, curbit > + BranchToDiv #4, LSYM(Lthumb1_div_negative4) > + BranchToDiv #8, LSYM(Lthumb1_div_negative8) > +LSYM(Lthumb1_div_large): > + mov result, #0xfc > + lsl divisor, divisor, #6 > + rev result, result > + lsr curbit, dividend, #8 > + cmp curbit, divisor > + blo LSYM(Lthumb1_div_negative8) > + > + lsl divisor, divisor, #6 > + asr result, result, #6 > + cmp curbit, divisor > + blo LSYM(Lthumb1_div_negative8) > + > + lsl divisor, divisor, #6 > + asr result, result, #6 > + cmp curbit, divisor > + blo LSYM(Lthumb1_div_negative8) > + > + lsl divisor, divisor, #6 > + beq LSYM(Ldivbyzero_negative) > + asr result, result, #6 > + b LSYM(Lthumb1_div_negative8) > +LSYM(Lthumb1_div_negative_loop): > + lsr divisor, divisor, #6 > +LSYM(Lthumb1_div_negative8): > + DoDiv #7 > + DoDiv #6 > + DoDiv #5 > + DoDiv #4 > +LSYM(Lthumb1_div_negative4): > + DoDiv #3 > + DoDiv #2 > + bcs LSYM(Lthumb1_div_negative_loop) > + DoDiv #1 > + sub divisor, dividend, divisor > + bcs 1f > + cpy divisor, dividend > + > +1: cpy curbit, ip > + adc result, result > + asr curbit, curbit, #1 > + cpy dividend, result > + bcc 2f > + neg dividend, dividend > + cmp curbit, #0 > + > +2: bpl 3f > + neg divisor, divisor > + > +3: RET > + > +LSYM(Ldivbyzero_negative): > + cpy curbit, ip > + asr curbit, curbit, #1 > + bcc LSYM(Ldiv0) > + neg dividend, dividend > +.endm > +#endif /* ARM Thumb version. */ > + > /* ------------------------------------------------------------------------ */ > /* Start of the Real Functions */ > /* ------------------------------------------------------------------------ */ > @@ -955,6 +1096,7 @@ LSYM(Lgot_result): > > FUNC_START udivsi3 > FUNC_ALIAS aeabi_uidiv udivsi3 > +#if defined(__OPTIMIZE_SIZE__) > > cmp divisor, #0 > beq LSYM(Ldiv0) > @@ -972,6 +1114,14 @@ LSYM(udivsi3_skip_div0_test): > pop { work } > RET > > +#else > + /* Implementation of aeabi_uidiv for ARMv6m. This version is only > + used in ARMv6-M when we need an efficient implementation. */ > +LSYM(udivsi3_skip_div0_test): > + THUMB1_Div_Positive > + > +#endif /* __OPTIMIZE_SIZE__ */ > + > #elif defined(__ARM_ARCH_EXT_IDIV__) > > ARM_FUNC_START udivsi3 > @@ -1023,12 +1173,21 @@ LSYM(udivsi3_skip_div0_test): > FUNC_START aeabi_uidivmod > cmp r1, #0 > beq LSYM(Ldiv0) > +# if defined(__OPTIMIZE_SIZE__) > push {r0, r1, lr} > bl LSYM(udivsi3_skip_div0_test) > POP {r1, r2, r3} > mul r2, r0 > sub r1, r1, r2 > bx r3 > +# else > + /* Both the quotient and remainder are calculated simultaneously > + in THUMB1_Div_Positive. There is no need to calculate the > + remainder again here. */ > + b LSYM(udivsi3_skip_div0_test) > + RET > +# endif /* __OPTIMIZE_SIZE__ */ > + > #elif defined(__ARM_ARCH_EXT_IDIV__) > ARM_FUNC_START aeabi_uidivmod > cmp r1, #0 > @@ -1084,7 +1243,7 @@ LSYM(Lover10): > RET > > #else /* ARM version. */ > - > + > FUNC_START umodsi3 > > subs r2, r1, #1 @ compare divisor with 1 > @@ -1109,8 +1268,9 @@ LSYM(Lover10): > > #if defined(__prefer_thumb__) > > - FUNC_START divsi3 > + FUNC_START divsi3 > FUNC_ALIAS aeabi_idiv divsi3 > +#if defined(__OPTIMIZE_SIZE__) > > cmp divisor, #0 > beq LSYM(Ldiv0) > @@ -1133,7 +1293,7 @@ LSYM(Lover11): > blo LSYM(Lgot_result) > > THUMB_DIV_MOD_BODY 0 > - > + > mov r0, result > mov work, ip > cmp work, #0 > @@ -1142,6 +1302,21 @@ LSYM(Lover11): > LSYM(Lover12): > pop { work } > RET > +#else > + /* Implementation of aeabi_idiv for ARMv6m. This version is only > + used in ARMv6-M when we need an efficient implementation. */ > +LSYM(divsi3_skip_div0_test): > + cpy curbit, dividend > + orr curbit, divisor > + bmi LSYM(Lthumb1_div_negative) > + > +LSYM(Lthumb1_div_positive): > + THUMB1_Div_Positive > + > +LSYM(Lthumb1_div_negative): > + THUMB1_Div_Negative > + > +#endif /* __OPTIMIZE_SIZE__ */ > > #elif defined(__ARM_ARCH_EXT_IDIV__) > > @@ -1154,8 +1329,8 @@ LSYM(Lover12): > RET > > #else /* ARM/Thumb-2 version. */ > - > - ARM_FUNC_START divsi3 > + > + ARM_FUNC_START divsi3 > ARM_FUNC_ALIAS aeabi_idiv divsi3 > > cmp r1, #0 > @@ -1209,12 +1384,21 @@ LSYM(divsi3_skip_div0_test): > FUNC_START aeabi_idivmod > cmp r1, #0 > beq LSYM(Ldiv0) > +# if defined(__OPTIMIZE_SIZE__) > push {r0, r1, lr} > bl LSYM(divsi3_skip_div0_test) > POP {r1, r2, r3} > mul r2, r0 > sub r1, r1, r2 > bx r3 > +# else > + /* Both the quotient and remainder are calculated simultaneously > + in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no > + need to calculate the remainder again here. */ > + b LSYM(divsi3_skip_div0_test) > + RET > +# endif /* __OPTIMIZE_SIZE__ */ > + > #elif defined(__ARM_ARCH_EXT_IDIV__) > ARM_FUNC_START aeabi_idivmod > cmp r1, #0 > -- 1.9.1 > Otherwise OK if no regressions and the following request passes. Can you ensure that libgcc for one ARM state and one Thumb2 state non-v6m configuration should give identical binaries with and without your patch, no ? regards Ramana

From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001 From: Andre Simoes Dias Vieira <andsim01@arm.com> Date: Fri, 21 Aug 2015 14:23:28 +0100 Subject: [PATCH] new wrapper idivmod --- libgcc/config/arm/lib1funcs.S | 250 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 217 insertions(+), 33 deletions(-) diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S index 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816 100644 --- a/libgcc/config/arm/lib1funcs.S +++ b/libgcc/config/arm/lib1funcs.S @@ -306,34 +306,12 @@ LSYM(Lend_fde): #ifdef __ARM_EABI__ .macro THUMB_LDIV0 name signed #if defined(__ARM_ARCH_6M__) - .ifc \signed, unsigned - cmp r0, #0 - beq 1f - mov r0, #0 - mvn r0, r0 @ 0xffffffff -1: - .else - cmp r0, #0 - beq 2f - blt 3f + + push {r0, lr} mov r0, #0 - mvn r0, r0 - lsr r0, r0, #1 @ 0x7fffffff - b 2f -3: mov r0, #0x80 - lsl r0, r0, #24 @ 0x80000000 -2: - .endif - push {r0, r1, r2} - ldr r0, 4f - adr r1, 4f - add r0, r1 - str r0, [sp, #8] - @ We know we are not on armv4t, so pop pc is safe. - pop {r0, r1, pc} - .align 2 -4: - .word __aeabi_idiv0 - 4b + bl SYM(__aeabi_idiv0) + pop {r1, pc} + #elif defined(__thumb2__) .syntax unified .ifc \signed, unsigned @@ -945,7 +923,170 @@ LSYM(Lover7): add dividend, work .endif LSYM(Lgot_result): -.endm +.endm + +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__) +/* If performance is preferred, the following functions are provided. */ + +/* Branch to div(n), and jump to label if curbit is lo than divisior. */ +.macro BranchToDiv n, label + lsr curbit, dividend, \n + cmp curbit, divisor + blo \label +.endm + +/* Body of div(n). Shift the divisor in n bits and compare the divisor + and dividend. Update the dividend as the substruction result. */ +.macro DoDiv n + lsr curbit, dividend, \n + cmp curbit, divisor + bcc 1f + lsl curbit, divisor, \n + sub dividend, dividend, curbit + +1: adc result, result +.endm + +/* The body of division with positive divisor. Unless the divisor is very + big, shift it up in multiples of four bits, since this is the amount of + unwinding in the main division loop. Continue shifting until the divisor + is larger than the dividend. */ +.macro THUMB1_Div_Positive + mov result, #0 + BranchToDiv #1, LSYM(Lthumb1_div1) + BranchToDiv #4, LSYM(Lthumb1_div4) + BranchToDiv #8, LSYM(Lthumb1_div8) + BranchToDiv #12, LSYM(Lthumb1_div12) + BranchToDiv #16, LSYM(Lthumb1_div16) +LSYM(Lthumb1_div_large_positive): + mov result, #0xff + lsl divisor, divisor, #8 + rev result, result + lsr curbit, dividend, #16 + cmp curbit, divisor + blo 1f + asr result, #8 + lsl divisor, divisor, #8 + beq LSYM(Ldivbyzero_waypoint) + +1: lsr curbit, dividend, #12 + cmp curbit, divisor + blo LSYM(Lthumb1_div12) + b LSYM(Lthumb1_div16) +LSYM(Lthumb1_div_loop): + lsr divisor, divisor, #8 +LSYM(Lthumb1_div16): + Dodiv #15 + Dodiv #14 + Dodiv #13 + Dodiv #12 +LSYM(Lthumb1_div12): + Dodiv #11 + Dodiv #10 + Dodiv #9 + Dodiv #8 + bcs LSYM(Lthumb1_div_loop) +LSYM(Lthumb1_div8): + Dodiv #7 + Dodiv #6 + Dodiv #5 +LSYM(Lthumb1_div5): + Dodiv #4 +LSYM(Lthumb1_div4): + Dodiv #3 +LSYM(Lthumb1_div3): + Dodiv #2 +LSYM(Lthumb1_div2): + Dodiv #1 +LSYM(Lthumb1_div1): + sub divisor, dividend, divisor + bcs 1f + cpy divisor, dividend + +1: adc result, result + cpy dividend, result + RET + +LSYM(Ldivbyzero_waypoint): + b LSYM(Ldiv0) +.endm + +/* The body of division with negative divisor. Similar with + THUMB1_Div_Positive except that the shift steps are in multiples + of six bits. */ +.macro THUMB1_Div_Negative + lsr result, divisor, #31 + beq 1f + neg divisor, divisor + +1: asr curbit, dividend, #32 + bcc 2f + neg dividend, dividend + +2: eor curbit, result + mov result, #0 + cpy ip, curbit + BranchToDiv #4, LSYM(Lthumb1_div_negative4) + BranchToDiv #8, LSYM(Lthumb1_div_negative8) +LSYM(Lthumb1_div_large): + mov result, #0xfc + lsl divisor, divisor, #6 + rev result, result + lsr curbit, dividend, #8 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + asr result, result, #6 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + asr result, result, #6 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + beq LSYM(Ldivbyzero_negative) + asr result, result, #6 + b LSYM(Lthumb1_div_negative8) +LSYM(Lthumb1_div_negative_loop): + lsr divisor, divisor, #6 +LSYM(Lthumb1_div_negative8): + DoDiv #7 + DoDiv #6 + DoDiv #5 + DoDiv #4 +LSYM(Lthumb1_div_negative4): + DoDiv #3 + DoDiv #2 + bcs LSYM(Lthumb1_div_negative_loop) + DoDiv #1 + sub divisor, dividend, divisor + bcs 1f + cpy divisor, dividend + +1: cpy curbit, ip + adc result, result + asr curbit, curbit, #1 + cpy dividend, result + bcc 2f + neg dividend, dividend + cmp curbit, #0 + +2: bpl 3f + neg divisor, divisor + +3: RET + +LSYM(Ldivbyzero_negative): + cpy curbit, ip + asr curbit, curbit, #1 + bcc LSYM(Ldiv0) + neg dividend, dividend +.endm +#endif /* ARM Thumb version. */ + /* ------------------------------------------------------------------------ */ /* Start of the Real Functions */ /* ------------------------------------------------------------------------ */ @@ -955,6 +1096,7 @@ LSYM(Lgot_result): FUNC_START udivsi3 FUNC_ALIAS aeabi_uidiv udivsi3 +#if defined(__OPTIMIZE_SIZE__) cmp divisor, #0 beq LSYM(Ldiv0) @@ -972,6 +1114,14 @@ LSYM(udivsi3_skip_div0_test): pop { work } RET +#else + /* Implementation of aeabi_uidiv for ARMv6m. This version is only + used in ARMv6-M when we need an efficient implementation. */ +LSYM(udivsi3_skip_div0_test): + THUMB1_Div_Positive + +#endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START udivsi3 @@ -1023,12 +1173,21 @@ LSYM(udivsi3_skip_div0_test): FUNC_START aeabi_uidivmod cmp r1, #0 beq LSYM(Ldiv0) +# if defined(__OPTIMIZE_SIZE__) push {r0, r1, lr} bl LSYM(udivsi3_skip_div0_test) POP {r1, r2, r3} mul r2, r0 sub r1, r1, r2 bx r3 +# else + /* Both the quotient and remainder are calculated simultaneously + in THUMB1_Div_Positive. There is no need to calculate the + remainder again here. */ + b LSYM(udivsi3_skip_div0_test) + RET +# endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START aeabi_uidivmod cmp r1, #0 @@ -1084,7 +1243,7 @@ LSYM(Lover10): RET #else /* ARM version. */ - + FUNC_START umodsi3 subs r2, r1, #1 @ compare divisor with 1 @@ -1109,8 +1268,9 @@ LSYM(Lover10): #if defined(__prefer_thumb__) - FUNC_START divsi3 + FUNC_START divsi3 FUNC_ALIAS aeabi_idiv divsi3 +#if defined(__OPTIMIZE_SIZE__) cmp divisor, #0 beq LSYM(Ldiv0) @@ -1133,7 +1293,7 @@ LSYM(Lover11): blo LSYM(Lgot_result) THUMB_DIV_MOD_BODY 0 - + mov r0, result mov work, ip cmp work, #0 @@ -1142,6 +1302,21 @@ LSYM(Lover11): LSYM(Lover12): pop { work } RET +#else + /* Implementation of aeabi_idiv for ARMv6m. This version is only + used in ARMv6-M when we need an efficient implementation. */ +LSYM(divsi3_skip_div0_test): + cpy curbit, dividend + orr curbit, divisor + bmi LSYM(Lthumb1_div_negative) + +LSYM(Lthumb1_div_positive): + THUMB1_Div_Positive + +LSYM(Lthumb1_div_negative): + THUMB1_Div_Negative + +#endif /* __OPTIMIZE_SIZE__ */ #elif defined(__ARM_ARCH_EXT_IDIV__) @@ -1154,8 +1329,8 @@ LSYM(Lover12): RET #else /* ARM/Thumb-2 version. */ - - ARM_FUNC_START divsi3 + + ARM_FUNC_START divsi3 ARM_FUNC_ALIAS aeabi_idiv divsi3 cmp r1, #0 @@ -1209,12 +1384,21 @@ LSYM(divsi3_skip_div0_test): FUNC_START aeabi_idivmod cmp r1, #0 beq LSYM(Ldiv0) +# if defined(__OPTIMIZE_SIZE__) push {r0, r1, lr} bl LSYM(divsi3_skip_div0_test) POP {r1, r2, r3} mul r2, r0 sub r1, r1, r2 bx r3 +# else + /* Both the quotient and remainder are calculated simultaneously + in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no + need to calculate the remainder again here. */ + b LSYM(divsi3_skip_div0_test) + RET +# endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START aeabi_idivmod cmp r1, #0 -- 1.9.1

[PATCHv2,ARM,libgcc] New aeabi_idiv function for armv6-m

Commit Message

Comments

Patch