| Backport performance improvements of memmem and strstr from glibc 2.30. |
| |
| Changes come from the following commits w/o midifications: |
| 680942b016 Improve performance of memmem |
| string/memmem.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------ |
| 1 file changed, 85 insertions(+), 42 deletions(-) |
| 5e0a7ecb66 Improve performance of strstr |
| string/str-two-way.h | 9 +++++--- |
| string/strstr.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------- |
| 2 files changed, 123 insertions(+), 51 deletions(-) |
| |
| Additional commits were included in the patch to resolve the conflicts. |
| No modifications were added. |
| |
| 34a5a1460e Break some lines before not after operators. |
| string/test-memmem.c | 8 ++++---- |
| 1 file changed, 4 insertions(+), 4 deletions(-) |
| 04277e02d7 Update copyright dates with scripts/update-copyrights. |
| string/memmem.c | 2 +- |
| string/str-two-way.h | 2 +- |
| string/strcasestr.c | 2 +- |
| string/strstr.c | 2 +- |
| string/test-memmem.c | 2 +- |
| string/test-strstr.c | 2 +- |
| 83a552b0bb Fix strstr bug with huge needles (bug 23637) |
| string/strcasestr.c | 5 +++-- |
| string/strstr.c | 5 +++-- |
| string/test-strstr.c | 30 ++++++++++++++++++++++++++++++ |
| 3 files changed, 36 insertions(+), 4 deletions(-) |
| c8dd67e7c9 Speedup first memmem match |
| string/memmem.c | 4 ++++ |
| 1 file changed, 4 insertions(+) |
| 284f42bc77 Simplify and speedup strstr/strcasestr first match |
| string/strcasestr.c | 37 ++++++++++++++----------------------- |
| string/strstr.c | 43 +++++++++++++++++++++---------------------- |
| 2 files changed, 35 insertions(+), 45 deletions(-) |
| 3ae725dfb6 Improve strstr performance |
| string/memmem.c | 1 + |
| string/str-two-way.h | 56 +++++++++++++++++++++++++++----------------------------- |
| string/strcasestr.c | 4 ++-- |
| string/strstr.c | 5 +++-- |
| string/test-strstr.c | 1 + |
| 5 files changed, 34 insertions(+), 33 deletions(-) |
| |
| diff --git a/string/memmem.c b/string/memmem.c |
| index c17e1cf6a6..83ee75e8c7 100644 |
| --- a/string/memmem.c |
| +++ b/string/memmem.c |
| @@ -1,4 +1,4 @@ |
| -/* Copyright (C) 1991-2018 Free Software Foundation, Inc. |
| +/* Copyright (C) 1991-2019 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| @@ -15,67 +15,115 @@ |
| License along with the GNU C Library; if not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| -/* This particular implementation was written by Eric Blake, 2008. */ |
| - |
| #ifndef _LIBC |
| # include <config.h> |
| #endif |
| |
| -/* Specification of memmem. */ |
| #include <string.h> |
| |
| #ifndef _LIBC |
| -# define __builtin_expect(expr, val) (expr) |
| # define __memmem memmem |
| #endif |
| |
| #define RETURN_TYPE void * |
| #define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l)) |
| +#define FASTSEARCH(S,C,N) (void*) memchr ((void *)(S), (C), (N)) |
| #include "str-two-way.h" |
| |
| #undef memmem |
| |
| -/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK |
| - if NEEDLE_LEN is 0, otherwise NULL if NEEDLE is not found in |
| - HAYSTACK. */ |
| +/* Hash character pairs so a small shift table can be used. All bits of |
| + p[0] are included, but not all bits from p[-1]. So if two equal hashes |
| + match on p[-1], p[0] matches too. Hash collisions are harmless and result |
| + in smaller shifts. */ |
| +#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift)) |
| + |
| +/* Fast memmem algorithm with guaranteed linear-time performance. |
| + Small needles up to size 2 use a dedicated linear search. Longer needles |
| + up to size 256 use a novel modified Horspool algorithm. It hashes pairs |
| + of characters to quickly skip past mismatches. The main search loop only |
| + exits if the last 2 characters match, avoiding unnecessary calls to memcmp |
| + and allowing for a larger skip if there is no match. A self-adapting |
| + filtering check is used to quickly detect mismatches in long needles. |
| + By limiting the needle length to 256, the shift table can be reduced to 8 |
| + bits per entry, lowering preprocessing overhead and minimizing cache effects. |
| + The limit also implies worst-case performance is linear. |
| + Needles larger than 256 characters use the linear-time Two-Way algorithm. */ |
| void * |
| -__memmem (const void *haystack_start, size_t haystack_len, |
| - const void *needle_start, size_t needle_len) |
| +__memmem (const void *haystack, size_t hs_len, |
| + const void *needle, size_t ne_len) |
| { |
| - /* Abstract memory is considered to be an array of 'unsigned char' values, |
| - not an array of 'char' values. See ISO C 99 section 6.2.6.1. */ |
| - const unsigned char *haystack = (const unsigned char *) haystack_start; |
| - const unsigned char *needle = (const unsigned char *) needle_start; |
| - |
| - if (needle_len == 0) |
| - /* The first occurrence of the empty string is deemed to occur at |
| - the beginning of the string. */ |
| - return (void *) haystack; |
| - |
| - /* Sanity check, otherwise the loop might search through the whole |
| - memory. */ |
| - if (__glibc_unlikely (haystack_len < needle_len)) |
| + const unsigned char *hs = (const unsigned char *) haystack; |
| + const unsigned char *ne = (const unsigned char *) needle; |
| + |
| + if (ne_len == 0) |
| + return (void *) hs; |
| + if (ne_len == 1) |
| + return (void *) memchr (hs, ne[0], hs_len); |
| + |
| + /* Ensure haystack length is >= needle length. */ |
| + if (hs_len < ne_len) |
| return NULL; |
| |
| - /* Use optimizations in memchr when possible, to reduce the search |
| - size of haystack using a linear algorithm with a smaller |
| - coefficient. However, avoid memchr for long needles, since we |
| - can often achieve sublinear performance. */ |
| - if (needle_len < LONG_NEEDLE_THRESHOLD) |
| + const unsigned char *end = hs + hs_len - ne_len; |
| + |
| + if (ne_len == 2) |
| + { |
| + uint32_t nw = ne[0] << 16 | ne[1], hw = hs[0] << 16 | hs[1]; |
| + for (hs++; hs <= end && hw != nw; ) |
| + hw = hw << 16 | *++hs; |
| + return hw == nw ? (void *)hs - 1 : NULL; |
| + } |
| + |
| + /* Use Two-Way algorithm for very long needles. */ |
| + if (__builtin_expect (ne_len > 256, 0)) |
| + return two_way_long_needle (hs, hs_len, ne, ne_len); |
| + |
| + uint8_t shift[256]; |
| + size_t tmp, shift1; |
| + size_t m1 = ne_len - 1; |
| + size_t offset = 0; |
| + |
| + memset (shift, 0, sizeof (shift)); |
| + for (int i = 1; i < m1; i++) |
| + shift[hash2 (ne + i)] = i; |
| + /* Shift1 is the amount we can skip after matching the hash of the |
| + needle end but not the full needle. */ |
| + shift1 = m1 - shift[hash2 (ne + m1)]; |
| + shift[hash2 (ne + m1)] = m1; |
| + |
| + for ( ; hs <= end; ) |
| { |
| - haystack = memchr (haystack, *needle, haystack_len); |
| - if (!haystack || __builtin_expect (needle_len == 1, 0)) |
| - return (void *) haystack; |
| - haystack_len -= haystack - (const unsigned char *) haystack_start; |
| - if (haystack_len < needle_len) |
| - return NULL; |
| - return two_way_short_needle (haystack, haystack_len, needle, needle_len); |
| + /* Skip past character pairs not in the needle. */ |
| + do |
| + { |
| + hs += m1; |
| + tmp = shift[hash2 (hs)]; |
| + } |
| + while (tmp == 0 && hs <= end); |
| + |
| + /* If the match is not at the end of the needle, shift to the end |
| + and continue until we match the hash of the needle end. */ |
| + hs -= tmp; |
| + if (tmp < m1) |
| + continue; |
| + |
| + /* Hash of the last 2 characters matches. If the needle is long, |
| + try to quickly filter out mismatches. */ |
| + if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0) |
| + { |
| + if (memcmp (hs, ne, m1) == 0) |
| + return (void *) hs; |
| + |
| + /* Adjust filter offset when it doesn't find the mismatch. */ |
| + offset = (offset >= 8 ? offset : m1) - 8; |
| + } |
| + |
| + /* Skip based on matching the hash of the needle end. */ |
| + hs += shift1; |
| } |
| - else |
| - return two_way_long_needle (haystack, haystack_len, needle, needle_len); |
| + return NULL; |
| } |
| libc_hidden_def (__memmem) |
| weak_alias (__memmem, memmem) |
| libc_hidden_weak (memmem) |
| - |
| -#undef LONG_NEEDLE_THRESHOLD |
| diff --git a/string/str-two-way.h b/string/str-two-way.h |
| index cd2605857d..f43c613f5a 100644 |
| --- a/string/str-two-way.h |
| +++ b/string/str-two-way.h |
| @@ -1,5 +1,5 @@ |
| /* Byte-wise substring search, using the Two-Way algorithm. |
| - Copyright (C) 2008-2018 Free Software Foundation, Inc. |
| + Copyright (C) 2008-2019 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| Written by Eric Blake <ebb9@byu.net>, 2008. |
| |
| @@ -221,7 +221,7 @@ critical_factorization (const unsigned char *needle, size_t needle_len, |
| most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. |
| If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * |
| HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. */ |
| -static RETURN_TYPE |
| +static inline RETURN_TYPE |
| two_way_short_needle (const unsigned char *haystack, size_t haystack_len, |
| const unsigned char *needle, size_t needle_len) |
| { |
| @@ -281,50 +281,50 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, |
| } |
| else |
| { |
| - const unsigned char *phaystack = &haystack[suffix]; |
| + const unsigned char *phaystack; |
| /* The comparison always starts from needle[suffix], so cache it |
| and use an optimized first-character loop. */ |
| unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]); |
| |
| -#if CHECK_EOL |
| - /* We start matching from the SUFFIX'th element, so make sure we |
| - don't hit '\0' before that. */ |
| - if (haystack_len < suffix + 1 |
| - && !AVAILABLE (haystack, haystack_len, 0, suffix + 1)) |
| - return NULL; |
| -#endif |
| - |
| /* The two halves of needle are distinct; no extra memory is |
| required, and any mismatch results in a maximal shift. */ |
| period = MAX (suffix, needle_len - suffix) + 1; |
| j = 0; |
| - while (1 |
| -#if !CHECK_EOL |
| - && AVAILABLE (haystack, haystack_len, j, needle_len) |
| -#endif |
| - ) |
| + while (AVAILABLE (haystack, haystack_len, j, needle_len)) |
| { |
| unsigned char haystack_char; |
| const unsigned char *pneedle; |
| |
| - /* TODO: The first-character loop can be sped up by adapting |
| - longword-at-a-time implementation of memchr/strchr. */ |
| - if (needle_suffix |
| + phaystack = &haystack[suffix + j]; |
| + |
| +#ifdef FASTSEARCH |
| + if (*phaystack++ != needle_suffix) |
| + { |
| + phaystack = FASTSEARCH (phaystack, needle_suffix, |
| + haystack_len - needle_len - j); |
| + if (phaystack == NULL) |
| + goto ret0; |
| + j = phaystack - &haystack[suffix]; |
| + phaystack++; |
| + } |
| +#else |
| + while (needle_suffix |
| != (haystack_char = CANON_ELEMENT (*phaystack++))) |
| { |
| RET0_IF_0 (haystack_char); |
| -#if !CHECK_EOL |
| +# if !CHECK_EOL |
| ++j; |
| -#endif |
| - continue; |
| + if (!AVAILABLE (haystack, haystack_len, j, needle_len)) |
| + goto ret0; |
| +# endif |
| } |
| |
| -#if CHECK_EOL |
| +# if CHECK_EOL |
| /* Calculate J if it wasn't kept up-to-date in the first-character |
| loop. */ |
| j = phaystack - &haystack[suffix] - 1; |
| +# endif |
| #endif |
| - |
| /* Scan for matches in right half. */ |
| i = suffix + 1; |
| pneedle = &needle[i]; |
| @@ -338,6 +338,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, |
| } |
| ++i; |
| } |
| +#if CHECK_EOL |
| + /* Update minimal length of haystack. */ |
| + if (phaystack > haystack + haystack_len) |
| + haystack_len = phaystack - haystack; |
| +#endif |
| if (needle_len <= i) |
| { |
| /* Scan for matches in left half. */ |
| @@ -360,13 +365,6 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, |
| } |
| else |
| j += i - suffix + 1; |
| - |
| -#if CHECK_EOL |
| - if (!AVAILABLE (haystack, haystack_len, j, needle_len)) |
| - break; |
| -#endif |
| - |
| - phaystack = &haystack[suffix + j]; |
| } |
| } |
| ret0: __attribute__ ((unused)) |
| @@ -384,8 +382,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, |
| and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible. |
| If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * |
| HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and |
| - sublinear performance is not possible. */ |
| -static RETURN_TYPE |
| + sublinear performance is not possible. |
| + |
| + Since this function is large and complex, block inlining to avoid |
| + slowing down the common case of small needles. */ |
| +__attribute__((noinline)) static RETURN_TYPE |
| two_way_long_needle (const unsigned char *haystack, size_t haystack_len, |
| const unsigned char *needle, size_t needle_len) |
| { |
| diff --git a/string/strcasestr.c b/string/strcasestr.c |
| index 90ba189790..a2aba000b1 100644 |
| --- a/string/strcasestr.c |
| +++ b/string/strcasestr.c |
| @@ -1,5 +1,5 @@ |
| /* Return the offset of one string within another. |
| - Copyright (C) 1994-2018 Free Software Foundation, Inc. |
| + Copyright (C) 1994-2019 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| @@ -37,8 +37,9 @@ |
| /* Two-Way algorithm. */ |
| #define RETURN_TYPE char * |
| #define AVAILABLE(h, h_l, j, n_l) \ |
| - (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ |
| - && ((h_l) = (j) + (n_l))) |
| + (((j) + (n_l) <= (h_l)) \ |
| + || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \ |
| + (j) + (n_l) <= (h_l))) |
| #define CHECK_EOL (1) |
| #define RET0_IF_0(a) if (!a) goto ret0 |
| #define CANON_ELEMENT(c) TOLOWER (c) |
| @@ -58,31 +59,22 @@ |
| case-insensitive comparison. This function gives unspecified |
| results in multibyte locales. */ |
| char * |
| -STRCASESTR (const char *haystack_start, const char *needle_start) |
| +STRCASESTR (const char *haystack, const char *needle) |
| { |
| - const char *haystack = haystack_start; |
| - const char *needle = needle_start; |
| size_t needle_len; /* Length of NEEDLE. */ |
| size_t haystack_len; /* Known minimum length of HAYSTACK. */ |
| - bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */ |
| - |
| - /* Determine length of NEEDLE, and in the process, make sure |
| - HAYSTACK is at least as long (no point processing all of a long |
| - NEEDLE if HAYSTACK is too short). */ |
| - while (*haystack && *needle) |
| - { |
| - ok &= (TOLOWER ((unsigned char) *haystack) |
| - == TOLOWER ((unsigned char) *needle)); |
| - haystack++; |
| - needle++; |
| - } |
| - if (*needle) |
| + |
| + /* Handle empty NEEDLE special case. */ |
| + if (needle[0] == '\0') |
| + return (char *) haystack; |
| + |
| + /* Ensure HAYSTACK length is at least as long as NEEDLE length. |
| + Since a match may occur early on in a huge HAYSTACK, use strnlen |
| + and read ahead a few cachelines for improved performance. */ |
| + needle_len = strlen (needle); |
| + haystack_len = __strnlen (haystack, needle_len + 256); |
| + if (haystack_len < needle_len) |
| return NULL; |
| - if (ok) |
| - return (char *) haystack_start; |
| - needle_len = needle - needle_start; |
| - haystack = haystack_start + 1; |
| - haystack_len = needle_len - 1; |
| |
| /* Perform the search. Abstract memory is considered to be an array |
| of 'unsigned char' values, not an array of 'char' values. See |
| @@ -90,10 +82,10 @@ STRCASESTR (const char *haystack_start, const char *needle_start) |
| if (needle_len < LONG_NEEDLE_THRESHOLD) |
| return two_way_short_needle ((const unsigned char *) haystack, |
| haystack_len, |
| - (const unsigned char *) needle_start, |
| + (const unsigned char *) needle, |
| needle_len); |
| return two_way_long_needle ((const unsigned char *) haystack, haystack_len, |
| - (const unsigned char *) needle_start, |
| + (const unsigned char *) needle, |
| needle_len); |
| } |
| |
| diff --git a/string/strstr.c b/string/strstr.c |
| index b3b5deb673..408cce8ed5 100644 |
| --- a/string/strstr.c |
| +++ b/string/strstr.c |
| @@ -1,5 +1,5 @@ |
| /* Return the offset of one string within another. |
| - Copyright (C) 1994-2018 Free Software Foundation, Inc. |
| + Copyright (C) 1994-2019 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| @@ -16,27 +16,17 @@ |
| License along with the GNU C Library; if not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| -/* This particular implementation was written by Eric Blake, 2008. */ |
| - |
| #ifndef _LIBC |
| # include <config.h> |
| #endif |
| |
| -/* Specification of strstr. */ |
| #include <string.h> |
| |
| -#include <stdbool.h> |
| - |
| -#ifndef _LIBC |
| -# define __builtin_expect(expr, val) (expr) |
| -#endif |
| - |
| #define RETURN_TYPE char * |
| #define AVAILABLE(h, h_l, j, n_l) \ |
| - (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ |
| - && ((h_l) = (j) + (n_l))) |
| -#define CHECK_EOL (1) |
| -#define RET0_IF_0(a) if (!a) goto ret0 |
| + (((j) + (n_l) <= (h_l)) \ |
| + || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \ |
| + (j) + (n_l) <= (h_l))) |
| #include "str-two-way.h" |
| |
| #undef strstr |
| @@ -45,48 +35,128 @@ |
| #define STRSTR strstr |
| #endif |
| |
| -/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK |
| - if NEEDLE is empty, otherwise NULL if NEEDLE is not found in |
| - HAYSTACK. */ |
| +static inline char * |
| +strstr2 (const unsigned char *hs, const unsigned char *ne) |
| +{ |
| + uint32_t h1 = (ne[0] << 16) | ne[1]; |
| + uint32_t h2 = 0; |
| + for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs) |
| + h2 = (h2 << 16) | c; |
| + return h1 == h2 ? (char *)hs - 2 : NULL; |
| +} |
| + |
| +static inline char * |
| +strstr3 (const unsigned char *hs, const unsigned char *ne) |
| +{ |
| + uint32_t h1 = ((uint32_t)ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8); |
| + uint32_t h2 = 0; |
| + for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs) |
| + h2 = (h2 | c) << 8; |
| + return h1 == h2 ? (char *)hs - 3 : NULL; |
| +} |
| + |
| +/* Hash character pairs so a small shift table can be used. All bits of |
| + p[0] are included, but not all bits from p[-1]. So if two equal hashes |
| + match on p[-1], p[0] matches too. Hash collisions are harmless and result |
| + in smaller shifts. */ |
| +#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift)) |
| + |
| +/* Fast strstr algorithm with guaranteed linear-time performance. |
| + Small needles up to size 3 use a dedicated linear search. Longer needles |
| + up to size 256 use a novel modified Horspool algorithm. It hashes pairs |
| + of characters to quickly skip past mismatches. The main search loop only |
| + exits if the last 2 characters match, avoiding unnecessary calls to memcmp |
| + and allowing for a larger skip if there is no match. A self-adapting |
| + filtering check is used to quickly detect mismatches in long needles. |
| + By limiting the needle length to 256, the shift table can be reduced to 8 |
| + bits per entry, lowering preprocessing overhead and minimizing cache effects. |
| + The limit also implies worst-case performance is linear. |
| + Needles larger than 256 characters use the linear-time Two-Way algorithm. */ |
| char * |
| -STRSTR (const char *haystack_start, const char *needle_start) |
| +STRSTR (const char *haystack, const char *needle) |
| { |
| - const char *haystack = haystack_start; |
| - const char *needle = needle_start; |
| - size_t needle_len; /* Length of NEEDLE. */ |
| - size_t haystack_len; /* Known minimum length of HAYSTACK. */ |
| - bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */ |
| - |
| - /* Determine length of NEEDLE, and in the process, make sure |
| - HAYSTACK is at least as long (no point processing all of a long |
| - NEEDLE if HAYSTACK is too short). */ |
| - while (*haystack && *needle) |
| - ok &= *haystack++ == *needle++; |
| - if (*needle) |
| + const unsigned char *hs = (const unsigned char *) haystack; |
| + const unsigned char *ne = (const unsigned char *) needle; |
| + |
| + /* Handle short needle special cases first. */ |
| + if (ne[0] == '\0') |
| + return (char *)hs; |
| + hs = (const unsigned char *)strchr ((const char*)hs, ne[0]); |
| + if (hs == NULL || ne[1] == '\0') |
| + return (char*)hs; |
| + if (ne[2] == '\0') |
| + return strstr2 (hs, ne); |
| + if (ne[3] == '\0') |
| + return strstr3 (hs, ne); |
| + |
| + /* Ensure haystack length is at least as long as needle length. |
| + Since a match may occur early on in a huge haystack, use strnlen |
| + and read ahead a few cachelines for improved performance. */ |
| + size_t ne_len = strlen ((const char*)ne); |
| + size_t hs_len = __strnlen ((const char*)hs, ne_len | 512); |
| + if (hs_len < ne_len) |
| return NULL; |
| - if (ok) |
| - return (char *) haystack_start; |
| - |
| - /* Reduce the size of haystack using strchr, since it has a smaller |
| - linear coefficient than the Two-Way algorithm. */ |
| - needle_len = needle - needle_start; |
| - haystack = strchr (haystack_start + 1, *needle_start); |
| - if (!haystack || __builtin_expect (needle_len == 1, 0)) |
| - return (char *) haystack; |
| - needle -= needle_len; |
| - haystack_len = (haystack > haystack_start + needle_len ? 1 |
| - : needle_len + haystack_start - haystack); |
| - |
| - /* Perform the search. Abstract memory is considered to be an array |
| - of 'unsigned char' values, not an array of 'char' values. See |
| - ISO C 99 section 6.2.6.1. */ |
| - if (needle_len < LONG_NEEDLE_THRESHOLD) |
| - return two_way_short_needle ((const unsigned char *) haystack, |
| - haystack_len, |
| - (const unsigned char *) needle, needle_len); |
| - return two_way_long_needle ((const unsigned char *) haystack, haystack_len, |
| - (const unsigned char *) needle, needle_len); |
| + |
| + /* Check whether we have a match. This improves performance since we |
| + avoid initialization overheads. */ |
| + if (memcmp (hs, ne, ne_len) == 0) |
| + return (char *) hs; |
| + |
| + /* Use Two-Way algorithm for very long needles. */ |
| + if (__glibc_unlikely (ne_len > 256)) |
| + return two_way_long_needle (hs, hs_len, ne, ne_len); |
| + |
| + const unsigned char *end = hs + hs_len - ne_len; |
| + uint8_t shift[256]; |
| + size_t tmp, shift1; |
| + size_t m1 = ne_len - 1; |
| + size_t offset = 0; |
| + |
| + /* Initialize bad character shift hash table. */ |
| + memset (shift, 0, sizeof (shift)); |
| + for (int i = 1; i < m1; i++) |
| + shift[hash2 (ne + i)] = i; |
| + /* Shift1 is the amount we can skip after matching the hash of the |
| + needle end but not the full needle. */ |
| + shift1 = m1 - shift[hash2 (ne + m1)]; |
| + shift[hash2 (ne + m1)] = m1; |
| + |
| + while (1) |
| + { |
| + if (__glibc_unlikely (hs > end)) |
| + { |
| + end += __strnlen ((const char*)end + m1 + 1, 2048); |
| + if (hs > end) |
| + return NULL; |
| + } |
| + |
| + /* Skip past character pairs not in the needle. */ |
| + do |
| + { |
| + hs += m1; |
| + tmp = shift[hash2 (hs)]; |
| + } |
| + while (tmp == 0 && hs <= end); |
| + |
| + /* If the match is not at the end of the needle, shift to the end |
| + and continue until we match the hash of the needle end. */ |
| + hs -= tmp; |
| + if (tmp < m1) |
| + continue; |
| + |
| + /* Hash of the last 2 characters matches. If the needle is long, |
| + try to quickly filter out mismatches. */ |
| + if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0) |
| + { |
| + if (memcmp (hs, ne, m1) == 0) |
| + return (void *) hs; |
| + |
| + /* Adjust filter offset when it doesn't find the mismatch. */ |
| + offset = (offset >= 8 ? offset : m1) - 8; |
| + } |
| + |
| + /* Skip based on matching the hash of the needle end. */ |
| + hs += shift1; |
| + } |
| } |
| libc_hidden_builtin_def (strstr) |
| - |
| -#undef LONG_NEEDLE_THRESHOLD |
| diff --git a/string/test-memmem.c b/string/test-memmem.c |
| index 51f58d1eda..2ac7f18c71 100644 |
| --- a/string/test-memmem.c |
| +++ b/string/test-memmem.c |
| @@ -1,5 +1,5 @@ |
| /* Test and measure memmem functions. |
| - Copyright (C) 2008-2018 Free Software Foundation, Inc. |
| + Copyright (C) 2008-2019 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| Written by Ulrich Drepper <drepper@redhat.com>, 2008. |
| |
| @@ -48,10 +48,10 @@ simple_memmem (const void *haystack, size_t haystack_len, const void *needle, |
| return NULL; |
| |
| for (begin = (const char *) haystack; begin <= last_possible; ++begin) |
| - if (begin[0] == ((const char *) needle)[0] && |
| - !memcmp ((const void *) &begin[1], |
| - (const void *) ((const char *) needle + 1), |
| - needle_len - 1)) |
| + if (begin[0] == ((const char *) needle)[0] |
| + && !memcmp ((const void *) &begin[1], |
| + (const void *) ((const char *) needle + 1), |
| + needle_len - 1)) |
| return (void *) begin; |
| |
| return NULL; |
| diff --git a/string/test-strstr.c b/string/test-strstr.c |
| index acf6ff8224..031aff5534 100644 |
| --- a/string/test-strstr.c |
| +++ b/string/test-strstr.c |
| @@ -1,5 +1,5 @@ |
| /* Test and measure strstr functions. |
| - Copyright (C) 2010-2018 Free Software Foundation, Inc. |
| + Copyright (C) 2010-2019 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| Written by Ulrich Drepper <drepper@redhat.com>, 2010. |
| |
| @@ -24,6 +24,7 @@ |
| |
| #define STRSTR simple_strstr |
| #define libc_hidden_builtin_def(arg) /* nothing */ |
| +#define __strnlen strnlen |
| #include "strstr.c" |
| |
| |
| @@ -150,6 +151,32 @@ check2 (void) |
| } |
| } |
| |
| +#define N 1024 |
| + |
| +static void |
| +pr23637 (void) |
| +{ |
| + char *h = (char*) buf1; |
| + char *n = (char*) buf2; |
| + |
| + for (int i = 0; i < N; i++) |
| + { |
| + n[i] = 'x'; |
| + h[i] = ' '; |
| + h[i + N] = 'x'; |
| + } |
| + |
| + n[N] = '\0'; |
| + h[N * 2] = '\0'; |
| + |
| + /* Ensure we don't match at the first 'x'. */ |
| + h[0] = 'x'; |
| + |
| + char *exp_result = stupid_strstr (h, n); |
| + FOR_EACH_IMPL (impl, 0) |
| + check_result (impl, h, n, exp_result); |
| +} |
| + |
| static int |
| test_main (void) |
| { |
| @@ -157,6 +184,7 @@ test_main (void) |
| |
| check1 (); |
| check2 (); |
| + pr23637 (); |
| |
| printf ("%23s", ""); |
| FOR_EACH_IMPL (impl, 0) |
| @@ -201,6 +229,9 @@ test_main (void) |
| do_test (15, 9, hlen, klen, 1); |
| do_test (15, 15, hlen, klen, 0); |
| do_test (15, 15, hlen, klen, 1); |
| + |
| + do_test (15, 15, hlen + klen * 4, klen * 4, 0); |
| + do_test (15, 15, hlen + klen * 4, klen * 4, 1); |
| } |
| |
| do_test (0, 0, page_size - 1, 16, 0); |