blob: aafc0e0c0348ef63e53d7a4f4d845c04e62f22ee [file] [log] [blame]
Backport performance improvements of memmem and strstr from glibc 2.30.
Changes come from the following commits w/o midifications:
680942b016 Improve performance of memmem
string/memmem.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------
1 file changed, 85 insertions(+), 42 deletions(-)
5e0a7ecb66 Improve performance of strstr
string/str-two-way.h | 9 +++++---
string/strstr.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------
2 files changed, 123 insertions(+), 51 deletions(-)
Additional commits were included in the patch to resolve the conflicts.
No modifications were added.
34a5a1460e Break some lines before not after operators.
string/test-memmem.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
04277e02d7 Update copyright dates with scripts/update-copyrights.
string/memmem.c | 2 +-
string/str-two-way.h | 2 +-
string/strcasestr.c | 2 +-
string/strstr.c | 2 +-
string/test-memmem.c | 2 +-
string/test-strstr.c | 2 +-
83a552b0bb Fix strstr bug with huge needles (bug 23637)
string/strcasestr.c | 5 +++--
string/strstr.c | 5 +++--
string/test-strstr.c | 30 ++++++++++++++++++++++++++++++
3 files changed, 36 insertions(+), 4 deletions(-)
c8dd67e7c9 Speedup first memmem match
string/memmem.c | 4 ++++
1 file changed, 4 insertions(+)
284f42bc77 Simplify and speedup strstr/strcasestr first match
string/strcasestr.c | 37 ++++++++++++++-----------------------
string/strstr.c | 43 +++++++++++++++++++++----------------------
2 files changed, 35 insertions(+), 45 deletions(-)
3ae725dfb6 Improve strstr performance
string/memmem.c | 1 +
string/str-two-way.h | 56 +++++++++++++++++++++++++++-----------------------------
string/strcasestr.c | 4 ++--
string/strstr.c | 5 +++--
string/test-strstr.c | 1 +
5 files changed, 34 insertions(+), 33 deletions(-)
diff --git a/string/memmem.c b/string/memmem.c
index c17e1cf6a6..83ee75e8c7 100644
--- a/string/memmem.c
+++ b/string/memmem.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991-2018 Free Software Foundation, Inc.
+/* Copyright (C) 1991-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -15,67 +15,115 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* This particular implementation was written by Eric Blake, 2008. */
-
#ifndef _LIBC
# include <config.h>
#endif
-/* Specification of memmem. */
#include <string.h>
#ifndef _LIBC
-# define __builtin_expect(expr, val) (expr)
# define __memmem memmem
#endif
#define RETURN_TYPE void *
#define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l))
+#define FASTSEARCH(S,C,N) (void*) memchr ((void *)(S), (C), (N))
#include "str-two-way.h"
#undef memmem
-/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK
- if NEEDLE_LEN is 0, otherwise NULL if NEEDLE is not found in
- HAYSTACK. */
+/* Hash character pairs so a small shift table can be used. All bits of
+ p[0] are included, but not all bits from p[-1]. So if two equal hashes
+ match on p[-1], p[0] matches too. Hash collisions are harmless and result
+ in smaller shifts. */
+#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift))
+
+/* Fast memmem algorithm with guaranteed linear-time performance.
+ Small needles up to size 2 use a dedicated linear search. Longer needles
+ up to size 256 use a novel modified Horspool algorithm. It hashes pairs
+ of characters to quickly skip past mismatches. The main search loop only
+ exits if the last 2 characters match, avoiding unnecessary calls to memcmp
+ and allowing for a larger skip if there is no match. A self-adapting
+ filtering check is used to quickly detect mismatches in long needles.
+ By limiting the needle length to 256, the shift table can be reduced to 8
+ bits per entry, lowering preprocessing overhead and minimizing cache effects.
+ The limit also implies worst-case performance is linear.
+ Needles larger than 256 characters use the linear-time Two-Way algorithm. */
void *
-__memmem (const void *haystack_start, size_t haystack_len,
- const void *needle_start, size_t needle_len)
+__memmem (const void *haystack, size_t hs_len,
+ const void *needle, size_t ne_len)
{
- /* Abstract memory is considered to be an array of 'unsigned char' values,
- not an array of 'char' values. See ISO C 99 section 6.2.6.1. */
- const unsigned char *haystack = (const unsigned char *) haystack_start;
- const unsigned char *needle = (const unsigned char *) needle_start;
-
- if (needle_len == 0)
- /* The first occurrence of the empty string is deemed to occur at
- the beginning of the string. */
- return (void *) haystack;
-
- /* Sanity check, otherwise the loop might search through the whole
- memory. */
- if (__glibc_unlikely (haystack_len < needle_len))
+ const unsigned char *hs = (const unsigned char *) haystack;
+ const unsigned char *ne = (const unsigned char *) needle;
+
+ if (ne_len == 0)
+ return (void *) hs;
+ if (ne_len == 1)
+ return (void *) memchr (hs, ne[0], hs_len);
+
+ /* Ensure haystack length is >= needle length. */
+ if (hs_len < ne_len)
return NULL;
- /* Use optimizations in memchr when possible, to reduce the search
- size of haystack using a linear algorithm with a smaller
- coefficient. However, avoid memchr for long needles, since we
- can often achieve sublinear performance. */
- if (needle_len < LONG_NEEDLE_THRESHOLD)
+ const unsigned char *end = hs + hs_len - ne_len;
+
+ if (ne_len == 2)
+ {
+ uint32_t nw = ne[0] << 16 | ne[1], hw = hs[0] << 16 | hs[1];
+ for (hs++; hs <= end && hw != nw; )
+ hw = hw << 16 | *++hs;
+ return hw == nw ? (void *)hs - 1 : NULL;
+ }
+
+ /* Use Two-Way algorithm for very long needles. */
+ if (__builtin_expect (ne_len > 256, 0))
+ return two_way_long_needle (hs, hs_len, ne, ne_len);
+
+ uint8_t shift[256];
+ size_t tmp, shift1;
+ size_t m1 = ne_len - 1;
+ size_t offset = 0;
+
+ memset (shift, 0, sizeof (shift));
+ for (int i = 1; i < m1; i++)
+ shift[hash2 (ne + i)] = i;
+ /* Shift1 is the amount we can skip after matching the hash of the
+ needle end but not the full needle. */
+ shift1 = m1 - shift[hash2 (ne + m1)];
+ shift[hash2 (ne + m1)] = m1;
+
+ for ( ; hs <= end; )
{
- haystack = memchr (haystack, *needle, haystack_len);
- if (!haystack || __builtin_expect (needle_len == 1, 0))
- return (void *) haystack;
- haystack_len -= haystack - (const unsigned char *) haystack_start;
- if (haystack_len < needle_len)
- return NULL;
- return two_way_short_needle (haystack, haystack_len, needle, needle_len);
+ /* Skip past character pairs not in the needle. */
+ do
+ {
+ hs += m1;
+ tmp = shift[hash2 (hs)];
+ }
+ while (tmp == 0 && hs <= end);
+
+ /* If the match is not at the end of the needle, shift to the end
+ and continue until we match the hash of the needle end. */
+ hs -= tmp;
+ if (tmp < m1)
+ continue;
+
+ /* Hash of the last 2 characters matches. If the needle is long,
+ try to quickly filter out mismatches. */
+ if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0)
+ {
+ if (memcmp (hs, ne, m1) == 0)
+ return (void *) hs;
+
+ /* Adjust filter offset when it doesn't find the mismatch. */
+ offset = (offset >= 8 ? offset : m1) - 8;
+ }
+
+ /* Skip based on matching the hash of the needle end. */
+ hs += shift1;
}
- else
- return two_way_long_needle (haystack, haystack_len, needle, needle_len);
+ return NULL;
}
libc_hidden_def (__memmem)
weak_alias (__memmem, memmem)
libc_hidden_weak (memmem)
-
-#undef LONG_NEEDLE_THRESHOLD
diff --git a/string/str-two-way.h b/string/str-two-way.h
index cd2605857d..f43c613f5a 100644
--- a/string/str-two-way.h
+++ b/string/str-two-way.h
@@ -1,5 +1,5 @@
/* Byte-wise substring search, using the Two-Way algorithm.
- Copyright (C) 2008-2018 Free Software Foundation, Inc.
+ Copyright (C) 2008-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Eric Blake <ebb9@byu.net>, 2008.
@@ -221,7 +221,7 @@ critical_factorization (const unsigned char *needle, size_t needle_len,
most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching.
If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. */
-static RETURN_TYPE
+static inline RETURN_TYPE
two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
const unsigned char *needle, size_t needle_len)
{
@@ -281,50 +281,50 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
}
else
{
- const unsigned char *phaystack = &haystack[suffix];
+ const unsigned char *phaystack;
/* The comparison always starts from needle[suffix], so cache it
and use an optimized first-character loop. */
unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
-#if CHECK_EOL
- /* We start matching from the SUFFIX'th element, so make sure we
- don't hit '\0' before that. */
- if (haystack_len < suffix + 1
- && !AVAILABLE (haystack, haystack_len, 0, suffix + 1))
- return NULL;
-#endif
-
/* The two halves of needle are distinct; no extra memory is
required, and any mismatch results in a maximal shift. */
period = MAX (suffix, needle_len - suffix) + 1;
j = 0;
- while (1
-#if !CHECK_EOL
- && AVAILABLE (haystack, haystack_len, j, needle_len)
-#endif
- )
+ while (AVAILABLE (haystack, haystack_len, j, needle_len))
{
unsigned char haystack_char;
const unsigned char *pneedle;
- /* TODO: The first-character loop can be sped up by adapting
- longword-at-a-time implementation of memchr/strchr. */
- if (needle_suffix
+ phaystack = &haystack[suffix + j];
+
+#ifdef FASTSEARCH
+ if (*phaystack++ != needle_suffix)
+ {
+ phaystack = FASTSEARCH (phaystack, needle_suffix,
+ haystack_len - needle_len - j);
+ if (phaystack == NULL)
+ goto ret0;
+ j = phaystack - &haystack[suffix];
+ phaystack++;
+ }
+#else
+ while (needle_suffix
!= (haystack_char = CANON_ELEMENT (*phaystack++)))
{
RET0_IF_0 (haystack_char);
-#if !CHECK_EOL
+# if !CHECK_EOL
++j;
-#endif
- continue;
+ if (!AVAILABLE (haystack, haystack_len, j, needle_len))
+ goto ret0;
+# endif
}
-#if CHECK_EOL
+# if CHECK_EOL
/* Calculate J if it wasn't kept up-to-date in the first-character
loop. */
j = phaystack - &haystack[suffix] - 1;
+# endif
#endif
-
/* Scan for matches in right half. */
i = suffix + 1;
pneedle = &needle[i];
@@ -338,6 +338,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
}
++i;
}
+#if CHECK_EOL
+ /* Update minimal length of haystack. */
+ if (phaystack > haystack + haystack_len)
+ haystack_len = phaystack - haystack;
+#endif
if (needle_len <= i)
{
/* Scan for matches in left half. */
@@ -360,13 +365,6 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
}
else
j += i - suffix + 1;
-
-#if CHECK_EOL
- if (!AVAILABLE (haystack, haystack_len, j, needle_len))
- break;
-#endif
-
- phaystack = &haystack[suffix + j];
}
}
ret0: __attribute__ ((unused))
@@ -384,8 +382,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible.
If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and
- sublinear performance is not possible. */
-static RETURN_TYPE
+ sublinear performance is not possible.
+
+ Since this function is large and complex, block inlining to avoid
+ slowing down the common case of small needles. */
+__attribute__((noinline)) static RETURN_TYPE
two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
const unsigned char *needle, size_t needle_len)
{
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 90ba189790..a2aba000b1 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -1,5 +1,5 @@
/* Return the offset of one string within another.
- Copyright (C) 1994-2018 Free Software Foundation, Inc.
+ Copyright (C) 1994-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -37,8 +37,9 @@
/* Two-Way algorithm. */
#define RETURN_TYPE char *
#define AVAILABLE(h, h_l, j, n_l) \
- (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \
- && ((h_l) = (j) + (n_l)))
+ (((j) + (n_l) <= (h_l)) \
+ || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \
+ (j) + (n_l) <= (h_l)))
#define CHECK_EOL (1)
#define RET0_IF_0(a) if (!a) goto ret0
#define CANON_ELEMENT(c) TOLOWER (c)
@@ -58,31 +59,22 @@
case-insensitive comparison. This function gives unspecified
results in multibyte locales. */
char *
-STRCASESTR (const char *haystack_start, const char *needle_start)
+STRCASESTR (const char *haystack, const char *needle)
{
- const char *haystack = haystack_start;
- const char *needle = needle_start;
size_t needle_len; /* Length of NEEDLE. */
size_t haystack_len; /* Known minimum length of HAYSTACK. */
- bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
-
- /* Determine length of NEEDLE, and in the process, make sure
- HAYSTACK is at least as long (no point processing all of a long
- NEEDLE if HAYSTACK is too short). */
- while (*haystack && *needle)
- {
- ok &= (TOLOWER ((unsigned char) *haystack)
- == TOLOWER ((unsigned char) *needle));
- haystack++;
- needle++;
- }
- if (*needle)
+
+ /* Handle empty NEEDLE special case. */
+ if (needle[0] == '\0')
+ return (char *) haystack;
+
+ /* Ensure HAYSTACK length is at least as long as NEEDLE length.
+ Since a match may occur early on in a huge HAYSTACK, use strnlen
+ and read ahead a few cachelines for improved performance. */
+ needle_len = strlen (needle);
+ haystack_len = __strnlen (haystack, needle_len + 256);
+ if (haystack_len < needle_len)
return NULL;
- if (ok)
- return (char *) haystack_start;
- needle_len = needle - needle_start;
- haystack = haystack_start + 1;
- haystack_len = needle_len - 1;
/* Perform the search. Abstract memory is considered to be an array
of 'unsigned char' values, not an array of 'char' values. See
@@ -90,10 +82,10 @@ STRCASESTR (const char *haystack_start, const char *needle_start)
if (needle_len < LONG_NEEDLE_THRESHOLD)
return two_way_short_needle ((const unsigned char *) haystack,
haystack_len,
- (const unsigned char *) needle_start,
+ (const unsigned char *) needle,
needle_len);
return two_way_long_needle ((const unsigned char *) haystack, haystack_len,
- (const unsigned char *) needle_start,
+ (const unsigned char *) needle,
needle_len);
}
diff --git a/string/strstr.c b/string/strstr.c
index b3b5deb673..408cce8ed5 100644
--- a/string/strstr.c
+++ b/string/strstr.c
@@ -1,5 +1,5 @@
/* Return the offset of one string within another.
- Copyright (C) 1994-2018 Free Software Foundation, Inc.
+ Copyright (C) 1994-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,27 +16,17 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* This particular implementation was written by Eric Blake, 2008. */
-
#ifndef _LIBC
# include <config.h>
#endif
-/* Specification of strstr. */
#include <string.h>
-#include <stdbool.h>
-
-#ifndef _LIBC
-# define __builtin_expect(expr, val) (expr)
-#endif
-
#define RETURN_TYPE char *
#define AVAILABLE(h, h_l, j, n_l) \
- (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \
- && ((h_l) = (j) + (n_l)))
-#define CHECK_EOL (1)
-#define RET0_IF_0(a) if (!a) goto ret0
+ (((j) + (n_l) <= (h_l)) \
+ || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \
+ (j) + (n_l) <= (h_l)))
#include "str-two-way.h"
#undef strstr
@@ -45,48 +35,128 @@
#define STRSTR strstr
#endif
-/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK
- if NEEDLE is empty, otherwise NULL if NEEDLE is not found in
- HAYSTACK. */
+static inline char *
+strstr2 (const unsigned char *hs, const unsigned char *ne)
+{
+ uint32_t h1 = (ne[0] << 16) | ne[1];
+ uint32_t h2 = 0;
+ for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs)
+ h2 = (h2 << 16) | c;
+ return h1 == h2 ? (char *)hs - 2 : NULL;
+}
+
+static inline char *
+strstr3 (const unsigned char *hs, const unsigned char *ne)
+{
+ uint32_t h1 = ((uint32_t)ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8);
+ uint32_t h2 = 0;
+ for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs)
+ h2 = (h2 | c) << 8;
+ return h1 == h2 ? (char *)hs - 3 : NULL;
+}
+
+/* Hash character pairs so a small shift table can be used. All bits of
+ p[0] are included, but not all bits from p[-1]. So if two equal hashes
+ match on p[-1], p[0] matches too. Hash collisions are harmless and result
+ in smaller shifts. */
+#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift))
+
+/* Fast strstr algorithm with guaranteed linear-time performance.
+ Small needles up to size 3 use a dedicated linear search. Longer needles
+ up to size 256 use a novel modified Horspool algorithm. It hashes pairs
+ of characters to quickly skip past mismatches. The main search loop only
+ exits if the last 2 characters match, avoiding unnecessary calls to memcmp
+ and allowing for a larger skip if there is no match. A self-adapting
+ filtering check is used to quickly detect mismatches in long needles.
+ By limiting the needle length to 256, the shift table can be reduced to 8
+ bits per entry, lowering preprocessing overhead and minimizing cache effects.
+ The limit also implies worst-case performance is linear.
+ Needles larger than 256 characters use the linear-time Two-Way algorithm. */
char *
-STRSTR (const char *haystack_start, const char *needle_start)
+STRSTR (const char *haystack, const char *needle)
{
- const char *haystack = haystack_start;
- const char *needle = needle_start;
- size_t needle_len; /* Length of NEEDLE. */
- size_t haystack_len; /* Known minimum length of HAYSTACK. */
- bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
-
- /* Determine length of NEEDLE, and in the process, make sure
- HAYSTACK is at least as long (no point processing all of a long
- NEEDLE if HAYSTACK is too short). */
- while (*haystack && *needle)
- ok &= *haystack++ == *needle++;
- if (*needle)
+ const unsigned char *hs = (const unsigned char *) haystack;
+ const unsigned char *ne = (const unsigned char *) needle;
+
+ /* Handle short needle special cases first. */
+ if (ne[0] == '\0')
+ return (char *)hs;
+ hs = (const unsigned char *)strchr ((const char*)hs, ne[0]);
+ if (hs == NULL || ne[1] == '\0')
+ return (char*)hs;
+ if (ne[2] == '\0')
+ return strstr2 (hs, ne);
+ if (ne[3] == '\0')
+ return strstr3 (hs, ne);
+
+ /* Ensure haystack length is at least as long as needle length.
+ Since a match may occur early on in a huge haystack, use strnlen
+ and read ahead a few cachelines for improved performance. */
+ size_t ne_len = strlen ((const char*)ne);
+ size_t hs_len = __strnlen ((const char*)hs, ne_len | 512);
+ if (hs_len < ne_len)
return NULL;
- if (ok)
- return (char *) haystack_start;
-
- /* Reduce the size of haystack using strchr, since it has a smaller
- linear coefficient than the Two-Way algorithm. */
- needle_len = needle - needle_start;
- haystack = strchr (haystack_start + 1, *needle_start);
- if (!haystack || __builtin_expect (needle_len == 1, 0))
- return (char *) haystack;
- needle -= needle_len;
- haystack_len = (haystack > haystack_start + needle_len ? 1
- : needle_len + haystack_start - haystack);
-
- /* Perform the search. Abstract memory is considered to be an array
- of 'unsigned char' values, not an array of 'char' values. See
- ISO C 99 section 6.2.6.1. */
- if (needle_len < LONG_NEEDLE_THRESHOLD)
- return two_way_short_needle ((const unsigned char *) haystack,
- haystack_len,
- (const unsigned char *) needle, needle_len);
- return two_way_long_needle ((const unsigned char *) haystack, haystack_len,
- (const unsigned char *) needle, needle_len);
+
+ /* Check whether we have a match. This improves performance since we
+ avoid initialization overheads. */
+ if (memcmp (hs, ne, ne_len) == 0)
+ return (char *) hs;
+
+ /* Use Two-Way algorithm for very long needles. */
+ if (__glibc_unlikely (ne_len > 256))
+ return two_way_long_needle (hs, hs_len, ne, ne_len);
+
+ const unsigned char *end = hs + hs_len - ne_len;
+ uint8_t shift[256];
+ size_t tmp, shift1;
+ size_t m1 = ne_len - 1;
+ size_t offset = 0;
+
+ /* Initialize bad character shift hash table. */
+ memset (shift, 0, sizeof (shift));
+ for (int i = 1; i < m1; i++)
+ shift[hash2 (ne + i)] = i;
+ /* Shift1 is the amount we can skip after matching the hash of the
+ needle end but not the full needle. */
+ shift1 = m1 - shift[hash2 (ne + m1)];
+ shift[hash2 (ne + m1)] = m1;
+
+ while (1)
+ {
+ if (__glibc_unlikely (hs > end))
+ {
+ end += __strnlen ((const char*)end + m1 + 1, 2048);
+ if (hs > end)
+ return NULL;
+ }
+
+ /* Skip past character pairs not in the needle. */
+ do
+ {
+ hs += m1;
+ tmp = shift[hash2 (hs)];
+ }
+ while (tmp == 0 && hs <= end);
+
+ /* If the match is not at the end of the needle, shift to the end
+ and continue until we match the hash of the needle end. */
+ hs -= tmp;
+ if (tmp < m1)
+ continue;
+
+ /* Hash of the last 2 characters matches. If the needle is long,
+ try to quickly filter out mismatches. */
+ if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0)
+ {
+ if (memcmp (hs, ne, m1) == 0)
+ return (void *) hs;
+
+ /* Adjust filter offset when it doesn't find the mismatch. */
+ offset = (offset >= 8 ? offset : m1) - 8;
+ }
+
+ /* Skip based on matching the hash of the needle end. */
+ hs += shift1;
+ }
}
libc_hidden_builtin_def (strstr)
-
-#undef LONG_NEEDLE_THRESHOLD
diff --git a/string/test-memmem.c b/string/test-memmem.c
index 51f58d1eda..2ac7f18c71 100644
--- a/string/test-memmem.c
+++ b/string/test-memmem.c
@@ -1,5 +1,5 @@
/* Test and measure memmem functions.
- Copyright (C) 2008-2018 Free Software Foundation, Inc.
+ Copyright (C) 2008-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Ulrich Drepper <drepper@redhat.com>, 2008.
@@ -48,10 +48,10 @@ simple_memmem (const void *haystack, size_t haystack_len, const void *needle,
return NULL;
for (begin = (const char *) haystack; begin <= last_possible; ++begin)
- if (begin[0] == ((const char *) needle)[0] &&
- !memcmp ((const void *) &begin[1],
- (const void *) ((const char *) needle + 1),
- needle_len - 1))
+ if (begin[0] == ((const char *) needle)[0]
+ && !memcmp ((const void *) &begin[1],
+ (const void *) ((const char *) needle + 1),
+ needle_len - 1))
return (void *) begin;
return NULL;
diff --git a/string/test-strstr.c b/string/test-strstr.c
index acf6ff8224..031aff5534 100644
--- a/string/test-strstr.c
+++ b/string/test-strstr.c
@@ -1,5 +1,5 @@
/* Test and measure strstr functions.
- Copyright (C) 2010-2018 Free Software Foundation, Inc.
+ Copyright (C) 2010-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Ulrich Drepper <drepper@redhat.com>, 2010.
@@ -24,6 +24,7 @@
#define STRSTR simple_strstr
#define libc_hidden_builtin_def(arg) /* nothing */
+#define __strnlen strnlen
#include "strstr.c"
@@ -150,6 +151,32 @@ check2 (void)
}
}
+#define N 1024
+
+static void
+pr23637 (void)
+{
+ char *h = (char*) buf1;
+ char *n = (char*) buf2;
+
+ for (int i = 0; i < N; i++)
+ {
+ n[i] = 'x';
+ h[i] = ' ';
+ h[i + N] = 'x';
+ }
+
+ n[N] = '\0';
+ h[N * 2] = '\0';
+
+ /* Ensure we don't match at the first 'x'. */
+ h[0] = 'x';
+
+ char *exp_result = stupid_strstr (h, n);
+ FOR_EACH_IMPL (impl, 0)
+ check_result (impl, h, n, exp_result);
+}
+
static int
test_main (void)
{
@@ -157,6 +184,7 @@ test_main (void)
check1 ();
check2 ();
+ pr23637 ();
printf ("%23s", "");
FOR_EACH_IMPL (impl, 0)
@@ -201,6 +229,9 @@ test_main (void)
do_test (15, 9, hlen, klen, 1);
do_test (15, 15, hlen, klen, 0);
do_test (15, 15, hlen, klen, 1);
+
+ do_test (15, 15, hlen + klen * 4, klen * 4, 0);
+ do_test (15, 15, hlen + klen * 4, klen * 4, 1);
}
do_test (0, 0, page_size - 1, 16, 0);