Message ID | 5783D2D8.2090707@arm.com |
---|---|
State | New |
Headers | show |
On 11/07/16 18:09, Andre Vieira (lists) wrote: > On 06/07/16 11:52, Andre Vieira (lists) wrote: >> On 01/07/16 14:40, Ramana Radhakrishnan wrote: >>> >>> >>> On 13/10/15 18:01, Andre Vieira wrote: >>>> This patch ports the aeabi_idiv routine from Linaro Cortex-Strings (https://git.linaro.org/toolchain/cortex-strings.git), which was contributed by ARM under Free BSD license. >>>> >>>> The new aeabi_idiv routine is used to replace the one in libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1 wrapper. The new routine is under LGPLv3 license. >>> >>> This is not under LGPLv3 . It is under GPLv3 with the runtime library exception license, there's a difference. Assuming your licensing expectation is ok .... read on for more of a review. >>> >>>> >>>> The main advantage of this version is that it can improve the performance of the aeabi_idiv function for Thumb1. This solution will also increase the code size. So it will only be used if __OPTIMIZE_SIZE__ is not defined. >>>> >>>> Make check passed for armv6-m. >>>> >>>> libgcc/ChangeLog: >>>> 2015-08-10 Hale Wang <hale.wang@arm.com> >>>> Andre Vieira <andre.simoesdiasvieira@arm.com> >>>> >>>> * config/arm/lib1funcs.S: Add new wrapper. >>>> >>>> 0001-integer-division.patch >>>> >>>> >>>> From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001 >>>> From: Andre Simoes Dias Vieira <andsim01@arm.com> >>>> Date: Fri, 21 Aug 2015 14:23:28 +0100 >>>> Subject: [PATCH] new wrapper idivmod >>>> >>>> --- >>>> libgcc/config/arm/lib1funcs.S | 250 ++++++++++++++++++++++++++++++++++++------ >>>> 1 file changed, 217 insertions(+), 33 deletions(-) >>>> >>>> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S >>>> index 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816 100644 >>>> --- a/libgcc/config/arm/lib1funcs.S >>>> +++ b/libgcc/config/arm/lib1funcs.S >>>> @@ -306,34 +306,12 @@ LSYM(Lend_fde): >>>> #ifdef __ARM_EABI__ >>>> .macro THUMB_LDIV0 name signed >>>> #if defined(__ARM_ARCH_6M__) >>>> - .ifc \signed, unsigned >>>> - cmp r0, #0 >>>> - beq 1f >>>> - mov r0, #0 >>>> - mvn r0, r0 @ 0xffffffff >>>> -1: >>>> - .else >>>> - cmp r0, #0 >>>> - beq 2f >>>> - blt 3f >>>> + >>>> + push {r0, lr} >>>> mov r0, #0 >>>> - mvn r0, r0 >>>> - lsr r0, r0, #1 @ 0x7fffffff >>>> - b 2f >>>> -3: mov r0, #0x80 >>>> - lsl r0, r0, #24 @ 0x80000000 >>>> -2: >>>> - .endif >>>> - push {r0, r1, r2} >>>> - ldr r0, 4f >>>> - adr r1, 4f >>>> - add r0, r1 >>>> - str r0, [sp, #8] >>>> - @ We know we are not on armv4t, so pop pc is safe. >>>> - pop {r0, r1, pc} >>>> - .align 2 >>>> -4: >>>> - .word __aeabi_idiv0 - 4b >>>> + bl SYM(__aeabi_idiv0) >>>> + pop {r1, pc} >>>> + >>> >>> I'd still retain the comment about pop pc here because there's often a misconception of merging armv4t and armv6m code. >>> >>>> #elif defined(__thumb2__) >>>> .syntax unified >>>> .ifc \signed, unsigned >>>> @@ -945,7 +923,170 @@ LSYM(Lover7): >>>> add dividend, work >>>> .endif >>>> LSYM(Lgot_result): >>>> -.endm >>>> +.endm >>>> + >>>> +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__) >>>> +/* If performance is preferred, the following functions are provided. */ >>>> + >>> >>> Comment above #if please and also check elsewhere in patch. >>> >>>> +/* Branch to div(n), and jump to label if curbit is lo than divisior. */ >>>> +.macro BranchToDiv n, label >>>> + lsr curbit, dividend, \n >>>> + cmp curbit, divisor >>>> + blo \label >>>> +.endm >>>> + >>>> +/* Body of div(n). Shift the divisor in n bits and compare the divisor >>>> + and dividend. Update the dividend as the substruction result. */ >>>> +.macro DoDiv n >>>> + lsr curbit, dividend, \n >>>> + cmp curbit, divisor >>>> + bcc 1f >>>> + lsl curbit, divisor, \n >>>> + sub dividend, dividend, curbit >>>> + >>>> +1: adc result, result >>>> +.endm >>>> + >>>> +/* The body of division with positive divisor. Unless the divisor is very >>>> + big, shift it up in multiples of four bits, since this is the amount of >>>> + unwinding in the main division loop. Continue shifting until the divisor >>>> + is larger than the dividend. */ >>>> +.macro THUMB1_Div_Positive >>>> + mov result, #0 >>>> + BranchToDiv #1, LSYM(Lthumb1_div1) >>>> + BranchToDiv #4, LSYM(Lthumb1_div4) >>>> + BranchToDiv #8, LSYM(Lthumb1_div8) >>>> + BranchToDiv #12, LSYM(Lthumb1_div12) >>>> + BranchToDiv #16, LSYM(Lthumb1_div16) >>>> +LSYM(Lthumb1_div_large_positive): >>>> + mov result, #0xff >>>> + lsl divisor, divisor, #8 >>>> + rev result, result >>>> + lsr curbit, dividend, #16 >>>> + cmp curbit, divisor >>>> + blo 1f >>>> + asr result, #8 >>>> + lsl divisor, divisor, #8 >>>> + beq LSYM(Ldivbyzero_waypoint) >>>> + >>>> +1: lsr curbit, dividend, #12 >>>> + cmp curbit, divisor >>>> + blo LSYM(Lthumb1_div12) >>>> + b LSYM(Lthumb1_div16) >>>> +LSYM(Lthumb1_div_loop): >>>> + lsr divisor, divisor, #8 >>>> +LSYM(Lthumb1_div16): >>>> + Dodiv #15 >>>> + Dodiv #14 >>>> + Dodiv #13 >>>> + Dodiv #12 >>>> +LSYM(Lthumb1_div12): >>>> + Dodiv #11 >>>> + Dodiv #10 >>>> + Dodiv #9 >>>> + Dodiv #8 >>>> + bcs LSYM(Lthumb1_div_loop) >>>> +LSYM(Lthumb1_div8): >>>> + Dodiv #7 >>>> + Dodiv #6 >>>> + Dodiv #5 >>>> +LSYM(Lthumb1_div5): >>>> + Dodiv #4 >>>> +LSYM(Lthumb1_div4): >>>> + Dodiv #3 >>>> +LSYM(Lthumb1_div3): >>>> + Dodiv #2 >>>> +LSYM(Lthumb1_div2): >>>> + Dodiv #1 >>>> +LSYM(Lthumb1_div1): >>>> + sub divisor, dividend, divisor >>>> + bcs 1f >>>> + cpy divisor, dividend >>>> + >>>> +1: adc result, result >>>> + cpy dividend, result >>>> + RET >>>> + >>>> +LSYM(Ldivbyzero_waypoint): >>>> + b LSYM(Ldiv0) >>>> +.endm >>>> + >>>> +/* The body of division with negative divisor. Similar with >>>> + THUMB1_Div_Positive except that the shift steps are in multiples >>>> + of six bits. */ >>>> +.macro THUMB1_Div_Negative >>>> + lsr result, divisor, #31 >>>> + beq 1f >>>> + neg divisor, divisor >>>> + >>>> +1: asr curbit, dividend, #32 >>>> + bcc 2f >>>> + neg dividend, dividend >>>> + >>>> +2: eor curbit, result >>>> + mov result, #0 >>>> + cpy ip, curbit >>>> + BranchToDiv #4, LSYM(Lthumb1_div_negative4) >>>> + BranchToDiv #8, LSYM(Lthumb1_div_negative8) >>>> +LSYM(Lthumb1_div_large): >>>> + mov result, #0xfc >>>> + lsl divisor, divisor, #6 >>>> + rev result, result >>>> + lsr curbit, dividend, #8 >>>> + cmp curbit, divisor >>>> + blo LSYM(Lthumb1_div_negative8) >>>> + >>>> + lsl divisor, divisor, #6 >>>> + asr result, result, #6 >>>> + cmp curbit, divisor >>>> + blo LSYM(Lthumb1_div_negative8) >>>> + >>>> + lsl divisor, divisor, #6 >>>> + asr result, result, #6 >>>> + cmp curbit, divisor >>>> + blo LSYM(Lthumb1_div_negative8) >>>> + >>>> + lsl divisor, divisor, #6 >>>> + beq LSYM(Ldivbyzero_negative) >>>> + asr result, result, #6 >>>> + b LSYM(Lthumb1_div_negative8) >>>> +LSYM(Lthumb1_div_negative_loop): >>>> + lsr divisor, divisor, #6 >>>> +LSYM(Lthumb1_div_negative8): >>>> + DoDiv #7 >>>> + DoDiv #6 >>>> + DoDiv #5 >>>> + DoDiv #4 >>>> +LSYM(Lthumb1_div_negative4): >>>> + DoDiv #3 >>>> + DoDiv #2 >>>> + bcs LSYM(Lthumb1_div_negative_loop) >>>> + DoDiv #1 >>>> + sub divisor, dividend, divisor >>>> + bcs 1f >>>> + cpy divisor, dividend >>>> + >>>> +1: cpy curbit, ip >>>> + adc result, result >>>> + asr curbit, curbit, #1 >>>> + cpy dividend, result >>>> + bcc 2f >>>> + neg dividend, dividend >>>> + cmp curbit, #0 >>>> + >>>> +2: bpl 3f >>>> + neg divisor, divisor >>>> + >>>> +3: RET >>>> + >>>> +LSYM(Ldivbyzero_negative): >>>> + cpy curbit, ip >>>> + asr curbit, curbit, #1 >>>> + bcc LSYM(Ldiv0) >>>> + neg dividend, dividend >>>> +.endm >>>> +#endif /* ARM Thumb version. */ >>>> + >>>> /* ------------------------------------------------------------------------ */ >>>> /* Start of the Real Functions */ >>>> /* ------------------------------------------------------------------------ */ >>>> @@ -955,6 +1096,7 @@ LSYM(Lgot_result): >>>> >>>> FUNC_START udivsi3 >>>> FUNC_ALIAS aeabi_uidiv udivsi3 >>>> +#if defined(__OPTIMIZE_SIZE__) >>>> >>>> cmp divisor, #0 >>>> beq LSYM(Ldiv0) >>>> @@ -972,6 +1114,14 @@ LSYM(udivsi3_skip_div0_test): >>>> pop { work } >>>> RET >>>> >>>> +#else >>>> + /* Implementation of aeabi_uidiv for ARMv6m. This version is only >>>> + used in ARMv6-M when we need an efficient implementation. */ >>>> +LSYM(udivsi3_skip_div0_test): >>>> + THUMB1_Div_Positive >>>> + >>>> +#endif /* __OPTIMIZE_SIZE__ */ >>>> + >>>> #elif defined(__ARM_ARCH_EXT_IDIV__) >>>> >>>> ARM_FUNC_START udivsi3 >>>> @@ -1023,12 +1173,21 @@ LSYM(udivsi3_skip_div0_test): >>>> FUNC_START aeabi_uidivmod >>>> cmp r1, #0 >>>> beq LSYM(Ldiv0) >>>> +# if defined(__OPTIMIZE_SIZE__) >>>> push {r0, r1, lr} >>>> bl LSYM(udivsi3_skip_div0_test) >>>> POP {r1, r2, r3} >>>> mul r2, r0 >>>> sub r1, r1, r2 >>>> bx r3 >>>> +# else >>>> + /* Both the quotient and remainder are calculated simultaneously >>>> + in THUMB1_Div_Positive. There is no need to calculate the >>>> + remainder again here. */ >>>> + b LSYM(udivsi3_skip_div0_test) >>>> + RET >>>> +# endif /* __OPTIMIZE_SIZE__ */ >>>> + >>>> #elif defined(__ARM_ARCH_EXT_IDIV__) >>>> ARM_FUNC_START aeabi_uidivmod >>>> cmp r1, #0 >>>> @@ -1084,7 +1243,7 @@ LSYM(Lover10): >>>> RET >>>> >>>> #else /* ARM version. */ >>>> - >>>> + >>>> FUNC_START umodsi3 >>>> >>>> subs r2, r1, #1 @ compare divisor with 1 >>>> @@ -1109,8 +1268,9 @@ LSYM(Lover10): >>>> >>>> #if defined(__prefer_thumb__) >>>> >>>> - FUNC_START divsi3 >>>> + FUNC_START divsi3 >>>> FUNC_ALIAS aeabi_idiv divsi3 >>>> +#if defined(__OPTIMIZE_SIZE__) >>>> >>>> cmp divisor, #0 >>>> beq LSYM(Ldiv0) >>>> @@ -1133,7 +1293,7 @@ LSYM(Lover11): >>>> blo LSYM(Lgot_result) >>>> >>>> THUMB_DIV_MOD_BODY 0 >>>> - >>>> + >>>> mov r0, result >>>> mov work, ip >>>> cmp work, #0 >>>> @@ -1142,6 +1302,21 @@ LSYM(Lover11): >>>> LSYM(Lover12): >>>> pop { work } >>>> RET >>>> +#else >>>> + /* Implementation of aeabi_idiv for ARMv6m. This version is only >>>> + used in ARMv6-M when we need an efficient implementation. */ >>>> +LSYM(divsi3_skip_div0_test): >>>> + cpy curbit, dividend >>>> + orr curbit, divisor >>>> + bmi LSYM(Lthumb1_div_negative) >>>> + >>>> +LSYM(Lthumb1_div_positive): >>>> + THUMB1_Div_Positive >>>> + >>>> +LSYM(Lthumb1_div_negative): >>>> + THUMB1_Div_Negative >>>> + >>>> +#endif /* __OPTIMIZE_SIZE__ */ >>>> >>>> #elif defined(__ARM_ARCH_EXT_IDIV__) >>>> >>>> @@ -1154,8 +1329,8 @@ LSYM(Lover12): >>>> RET >>>> >>>> #else /* ARM/Thumb-2 version. */ >>>> - >>>> - ARM_FUNC_START divsi3 >>>> + >>>> + ARM_FUNC_START divsi3 >>>> ARM_FUNC_ALIAS aeabi_idiv divsi3 >>>> >>>> cmp r1, #0 >>>> @@ -1209,12 +1384,21 @@ LSYM(divsi3_skip_div0_test): >>>> FUNC_START aeabi_idivmod >>>> cmp r1, #0 >>>> beq LSYM(Ldiv0) >>>> +# if defined(__OPTIMIZE_SIZE__) >>>> push {r0, r1, lr} >>>> bl LSYM(divsi3_skip_div0_test) >>>> POP {r1, r2, r3} >>>> mul r2, r0 >>>> sub r1, r1, r2 >>>> bx r3 >>>> +# else >>>> + /* Both the quotient and remainder are calculated simultaneously >>>> + in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no >>>> + need to calculate the remainder again here. */ >>>> + b LSYM(divsi3_skip_div0_test) >>>> + RET >>>> +# endif /* __OPTIMIZE_SIZE__ */ >>>> + >>>> #elif defined(__ARM_ARCH_EXT_IDIV__) >>>> ARM_FUNC_START aeabi_idivmod >>>> cmp r1, #0 >>>> -- 1.9.1 >>>> >>> >>> Otherwise OK if no regressions and the following request passes. >>> >>> Can you ensure that libgcc for one ARM state and one Thumb2 state non-v6m configuration should give identical binaries with and without your patch, no ? >>> >>> regards >>> Ramana >>> >> Hi Ramana, >> >> Thank you for the comments. Sorry about the license, must have been a >> mixup somewhere. >> >> I put back the 'pop pc is safe' assembly comment and I moved some >> comments before the #if and #else as requested. I left some in place >> because they did not apply to the whole block but simply to the first >> assembly instruction after the #if/else. >> >> I checked that the assembly generated for libgcc was the same with and >> without the patch for armv7-a in arm mode and armv7-m in thumb mode. >> >> Is this OK? >> >> Cheers, >> Andre >> >> libgcc/ChangeLog: >> 2016-07-06 Hale Wang <hale.wang@arm.com> >> Andre Vieira <andre.simoesdiasvieira@arm.com> >> >> * config/arm/lib1funcs.S: Add new wrapper. >> > I had to rebase the patch due to the ARMv8-M patches. This implied > changing a context line that changed due to the code for ARMv6-M being > reused for ARMv8-M Baseline. > > I ran regression tests for both ARMv6-M and ARMv8-M Baseline and > compared the generated libgcc for ARMv7-A in ARM mode and ARMv7-M in > Thumb mode, observing no changes. > > Applying patch, as it was previously OK'ed. > > Cheers, > Andre > Backported to embedded-6-branch at revision r238240. Cheers, Andre
From ad757144fad2d9608ed840153071bb5d470193ef Mon Sep 17 00:00:00 2001 From: Andre Simoes Dias Vieira <andsim01@arm.com> Date: Thu, 7 Jul 2016 10:49:19 +0100 Subject: [PATCH] integer division --- libgcc/config/arm/lib1funcs.S | 250 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 218 insertions(+), 32 deletions(-) diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S index 96e206ee542126c5d68091087446afe9f01aa51f..ba52e7b762f5573445349a574a3878859a992f13 100644 --- a/libgcc/config/arm/lib1funcs.S +++ b/libgcc/config/arm/lib1funcs.S @@ -311,34 +311,13 @@ LSYM(Lend_fde): #ifdef __ARM_EABI__ .macro THUMB_LDIV0 name signed #ifdef NOT_ISA_TARGET_32BIT - .ifc \signed, unsigned - cmp r0, #0 - beq 1f - mov r0, #0 - mvn r0, r0 @ 0xffffffff -1: - .else - cmp r0, #0 - beq 2f - blt 3f + + push {r0, lr} mov r0, #0 - mvn r0, r0 - lsr r0, r0, #1 @ 0x7fffffff - b 2f -3: mov r0, #0x80 - lsl r0, r0, #24 @ 0x80000000 -2: - .endif - push {r0, r1, r2} - ldr r0, 4f - adr r1, 4f - add r0, r1 - str r0, [sp, #8] + bl SYM(__aeabi_idiv0) @ We know we are not on armv4t, so pop pc is safe. - pop {r0, r1, pc} - .align 2 -4: - .word __aeabi_idiv0 - 4b + pop {r1, pc} + #elif defined(__thumb2__) .syntax unified .ifc \signed, unsigned @@ -950,7 +929,170 @@ LSYM(Lover7): add dividend, work .endif LSYM(Lgot_result): -.endm +.endm + +/* If performance is preferred, the following functions are provided. */ +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__) + +/* Branch to div(n), and jump to label if curbit is lo than divisior. */ +.macro BranchToDiv n, label + lsr curbit, dividend, \n + cmp curbit, divisor + blo \label +.endm + +/* Body of div(n). Shift the divisor in n bits and compare the divisor + and dividend. Update the dividend as the substruction result. */ +.macro DoDiv n + lsr curbit, dividend, \n + cmp curbit, divisor + bcc 1f + lsl curbit, divisor, \n + sub dividend, dividend, curbit + +1: adc result, result +.endm + +/* The body of division with positive divisor. Unless the divisor is very + big, shift it up in multiples of four bits, since this is the amount of + unwinding in the main division loop. Continue shifting until the divisor + is larger than the dividend. */ +.macro THUMB1_Div_Positive + mov result, #0 + BranchToDiv #1, LSYM(Lthumb1_div1) + BranchToDiv #4, LSYM(Lthumb1_div4) + BranchToDiv #8, LSYM(Lthumb1_div8) + BranchToDiv #12, LSYM(Lthumb1_div12) + BranchToDiv #16, LSYM(Lthumb1_div16) +LSYM(Lthumb1_div_large_positive): + mov result, #0xff + lsl divisor, divisor, #8 + rev result, result + lsr curbit, dividend, #16 + cmp curbit, divisor + blo 1f + asr result, #8 + lsl divisor, divisor, #8 + beq LSYM(Ldivbyzero_waypoint) + +1: lsr curbit, dividend, #12 + cmp curbit, divisor + blo LSYM(Lthumb1_div12) + b LSYM(Lthumb1_div16) +LSYM(Lthumb1_div_loop): + lsr divisor, divisor, #8 +LSYM(Lthumb1_div16): + Dodiv #15 + Dodiv #14 + Dodiv #13 + Dodiv #12 +LSYM(Lthumb1_div12): + Dodiv #11 + Dodiv #10 + Dodiv #9 + Dodiv #8 + bcs LSYM(Lthumb1_div_loop) +LSYM(Lthumb1_div8): + Dodiv #7 + Dodiv #6 + Dodiv #5 +LSYM(Lthumb1_div5): + Dodiv #4 +LSYM(Lthumb1_div4): + Dodiv #3 +LSYM(Lthumb1_div3): + Dodiv #2 +LSYM(Lthumb1_div2): + Dodiv #1 +LSYM(Lthumb1_div1): + sub divisor, dividend, divisor + bcs 1f + cpy divisor, dividend + +1: adc result, result + cpy dividend, result + RET + +LSYM(Ldivbyzero_waypoint): + b LSYM(Ldiv0) +.endm + +/* The body of division with negative divisor. Similar with + THUMB1_Div_Positive except that the shift steps are in multiples + of six bits. */ +.macro THUMB1_Div_Negative + lsr result, divisor, #31 + beq 1f + neg divisor, divisor + +1: asr curbit, dividend, #32 + bcc 2f + neg dividend, dividend + +2: eor curbit, result + mov result, #0 + cpy ip, curbit + BranchToDiv #4, LSYM(Lthumb1_div_negative4) + BranchToDiv #8, LSYM(Lthumb1_div_negative8) +LSYM(Lthumb1_div_large): + mov result, #0xfc + lsl divisor, divisor, #6 + rev result, result + lsr curbit, dividend, #8 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + asr result, result, #6 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + asr result, result, #6 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + beq LSYM(Ldivbyzero_negative) + asr result, result, #6 + b LSYM(Lthumb1_div_negative8) +LSYM(Lthumb1_div_negative_loop): + lsr divisor, divisor, #6 +LSYM(Lthumb1_div_negative8): + DoDiv #7 + DoDiv #6 + DoDiv #5 + DoDiv #4 +LSYM(Lthumb1_div_negative4): + DoDiv #3 + DoDiv #2 + bcs LSYM(Lthumb1_div_negative_loop) + DoDiv #1 + sub divisor, dividend, divisor + bcs 1f + cpy divisor, dividend + +1: cpy curbit, ip + adc result, result + asr curbit, curbit, #1 + cpy dividend, result + bcc 2f + neg dividend, dividend + cmp curbit, #0 + +2: bpl 3f + neg divisor, divisor + +3: RET + +LSYM(Ldivbyzero_negative): + cpy curbit, ip + asr curbit, curbit, #1 + bcc LSYM(Ldiv0) + neg dividend, dividend +.endm +#endif /* ARM Thumb version. */ + /* ------------------------------------------------------------------------ */ /* Start of the Real Functions */ /* ------------------------------------------------------------------------ */ @@ -960,6 +1102,7 @@ LSYM(Lgot_result): FUNC_START udivsi3 FUNC_ALIAS aeabi_uidiv udivsi3 +#if defined(__OPTIMIZE_SIZE__) cmp divisor, #0 beq LSYM(Ldiv0) @@ -977,6 +1120,14 @@ LSYM(udivsi3_skip_div0_test): pop { work } RET +/* Implementation of aeabi_uidiv for ARMv6m. This version is only + used in ARMv6-M when we need an efficient implementation. */ +#else +LSYM(udivsi3_skip_div0_test): + THUMB1_Div_Positive + +#endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START udivsi3 @@ -1028,12 +1179,21 @@ LSYM(udivsi3_skip_div0_test): FUNC_START aeabi_uidivmod cmp r1, #0 beq LSYM(Ldiv0) +# if defined(__OPTIMIZE_SIZE__) push {r0, r1, lr} bl LSYM(udivsi3_skip_div0_test) POP {r1, r2, r3} mul r2, r0 sub r1, r1, r2 bx r3 +# else + /* Both the quotient and remainder are calculated simultaneously + in THUMB1_Div_Positive. There is no need to calculate the + remainder again here. */ + b LSYM(udivsi3_skip_div0_test) + RET +# endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START aeabi_uidivmod cmp r1, #0 @@ -1089,7 +1249,7 @@ LSYM(Lover10): RET #else /* ARM version. */ - + FUNC_START umodsi3 subs r2, r1, #1 @ compare divisor with 1 @@ -1114,8 +1274,9 @@ LSYM(Lover10): #if defined(__prefer_thumb__) - FUNC_START divsi3 + FUNC_START divsi3 FUNC_ALIAS aeabi_idiv divsi3 +#if defined(__OPTIMIZE_SIZE__) cmp divisor, #0 beq LSYM(Ldiv0) @@ -1138,7 +1299,7 @@ LSYM(Lover11): blo LSYM(Lgot_result) THUMB_DIV_MOD_BODY 0 - + mov r0, result mov work, ip cmp work, #0 @@ -1148,6 +1309,22 @@ LSYM(Lover12): pop { work } RET +/* Implementation of aeabi_idiv for ARMv6m. This version is only + used in ARMv6-M when we need an efficient implementation. */ +#else +LSYM(divsi3_skip_div0_test): + cpy curbit, dividend + orr curbit, divisor + bmi LSYM(Lthumb1_div_negative) + +LSYM(Lthumb1_div_positive): + THUMB1_Div_Positive + +LSYM(Lthumb1_div_negative): + THUMB1_Div_Negative + +#endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START divsi3 @@ -1159,8 +1336,8 @@ LSYM(Lover12): RET #else /* ARM/Thumb-2 version. */ - - ARM_FUNC_START divsi3 + + ARM_FUNC_START divsi3 ARM_FUNC_ALIAS aeabi_idiv divsi3 cmp r1, #0 @@ -1214,12 +1391,21 @@ LSYM(divsi3_skip_div0_test): FUNC_START aeabi_idivmod cmp r1, #0 beq LSYM(Ldiv0) +# if defined(__OPTIMIZE_SIZE__) push {r0, r1, lr} bl LSYM(divsi3_skip_div0_test) POP {r1, r2, r3} mul r2, r0 sub r1, r1, r2 bx r3 +# else + /* Both the quotient and remainder are calculated simultaneously + in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no + need to calculate the remainder again here. */ + b LSYM(divsi3_skip_div0_test) + RET +# endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START aeabi_idivmod cmp r1, #0 -- 1.9.1