diff mbox

[v3] Improve strtok(_r) performance

Message ID e4210655-f5d9-69ad-b1f7-248d87c48401@linaro.org
State New
Headers show

Commit Message

Adhemerval Zanella Netto Dec. 14, 2016, 7:43 p.m. UTC
On 14/12/2016 17:26, Adhemerval Zanella wrote:
> 
> 
> On 14/12/2016 16:25, Wilco Dijkstra wrote:
>> Hi,
>>
>> In powerpc64/power7/memchr.S this bit looks suspicious:
>>
>> L(done):
>> #ifdef __LITTLE_ENDIAN__
>>         addi    r0,r3,-1
>>         andc    r0,r0,r3
>>         popcntd r0,r0         /* Count trailing zeros.  */
>> #else
>>         cntlzd  r0,r3         /* Count leading zeros before the match.  */
>> #endif
>>         cmpld   r8,r7         /* Are we on the last dword?  */
>>         srdi    r0,r0,3       /* Convert leading/trailing zeros to bytes.  */
>>         add     r3,r8,r0
>>         cmpld   cr7,r0,r5     /* If on the last dword, check byte offset.  */
>>         bnelr
>>         blelr   cr7
>>         li      r3,0
>>         blr
>>
>> When the size is the maximum and the input pointer is at offset 2 or larger within an 8-byte aligned address, r7 == r8, and it will return NULL if it matches in the first 8 bytes since the match appears to be after the end pointer. When it matches later, r7 != r8 and it works.
> 
> I think this snippet is ok, the problem is at r7 calculation where it should
> handle overflow.  This simple test triggers the issue:
> 
> #define _GNU_SOURCE 1
> #include <string.h>
> #include <stdio.h>
> 
> void *
> my_rawmemchr (const void *s, int c)
> { 
>   if (c != '\0')
>     return memchr (s, c, (size_t)-1);
>   return (char *)s + strlen (s);
> }
> 
> int main ()
> {
>   // p=0x3fffb057fe00 | aling=10
>   int seek_char = 0x41;
>   size_t align = 10;
>   unsigned char input [32];
>   input[10] = 0x34;
>   input[11] = 0x78;
>   input[12] = 0x3d;
>   input[13] = 0x7b;
>   input[14] = 0xa1;
>   input[15] = seek_char;
> 
>   printf ("%p\n", my_rawmemchr (input+align, seek_char));
>   printf ("%p\n", rawmemchr (input+align, seek_char));
>   return 0;
> }
> 
> On POWER7 memchr.S:
> 
>  24 ENTRY (__memchr)
>  25         CALL_MCOUNT 3
>  26         dcbt    0,r3
>  27         clrrdi  r8,r3,3
>  28         insrdi  r4,r4,8,48
>  29         add     r7,r3,r5      /* Calculate the last acceptable address.  */
> 
> And using the example above:
> 
> (gdb) i r r3 r5
> r3             0x3fffffffeab2   70368744172210
> r5             0xffffffffffffffff       18446744073709551615
> (gdb) ni
> 30      in ../sysdeps/powerpc/powerpc64/power7/memchr.S
> (gdb) i r r7
> r7             0x3fffffffeab1   70368744172209
> 
> If we set r7 to maximum pointer value (0xf...f) memchr returns the expected result.
> 

This patch should fix test-rawmemchr by adding a saturated addition when
calculating the last double address to check.  I will prepare a patch
when the make check finished on the powerpc64le machine I have access.
diff mbox

Patch

diff --git a/sysdeps/powerpc/powerpc64/power7/memchr.S b/sysdeps/powerpc/powerpc64/power7/memchr.S
index 03f0d7c..34605a7 100644
--- a/sysdeps/powerpc/powerpc64/power7/memchr.S
+++ b/sysdeps/powerpc/powerpc64/power7/memchr.S
@@ -26,7 +26,15 @@  ENTRY (__memchr)
        dcbt    0,r3
        clrrdi  r8,r3,3
        insrdi  r4,r4,8,48
-       add     r7,r3,r5      /* Calculate the last acceptable address.  */
+
+       /* Calculate the last acceptable address and also take care of possible
+          overflow by using a large size.  */
+       add     r7,r3,r5
+       subfc   r6,r3,r7
+       subfe   r9,r9,r9
+       extsw   r6,r9
+       or      r7,r7,r6
+
        insrdi  r4,r4,16,32
        cmpldi  r5,32
        li      r9, -1