blob: d0efa3f1c54b0e25b10edd2c52fd7cc3b0d00c02 [file] [log] [blame]
--- regcomp.c 2006-01-08 12:59:27.000000000 -0800
+++ regcomp.c 2007-10-05 12:07:55.000000000 -0700
@@ -135,7 +135,8 @@
I32 extralen;
I32 seen_zerolen;
I32 seen_evals;
- I32 utf8;
+ I32 utf8; /* pattern is utf8 or not */
+ I32 orig_utf8; /* pattern was originally utf8 */
#if ADD_TO_REGEXEC
char *starttry; /* -Dr: where regtry was called. */
#define RExC_starttry (pRExC_state->starttry)
@@ -161,6 +162,7 @@
#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
#define RExC_seen_evals (pRExC_state->seen_evals)
#define RExC_utf8 (pRExC_state->utf8)
+#define RExC_orig_utf8 (pRExC_state->orig_utf8)
#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
@@ -1749,15 +1751,17 @@
if (exp == NULL)
FAIL("NULL regexp argument");
- RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
+ RExC_orig_utf8 = RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
- RExC_precomp = exp;
DEBUG_r({
if (!PL_colorset) reginitcolors();
PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
PL_colors[4],PL_colors[5],PL_colors[0],
- (int)(xend - exp), RExC_precomp, PL_colors[1]);
+ (int)(xend - exp), exp, PL_colors[1]);
});
+
+redo_first_pass:
+ RExC_precomp = exp;
RExC_flags = pm->op_pmflags;
RExC_sawback = 0;
@@ -1783,6 +1787,17 @@
RExC_precomp = Nullch;
return(NULL);
}
+ if (RExC_utf8 && !RExC_orig_utf8) {
+ STRLEN len = xend-exp;
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
+ exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
+ xend = exp + len;
+ RExC_orig_utf8 = RExC_utf8;
+ SAVEFREEPV(exp);
+ goto redo_first_pass;
+ }
+