X-Git-Url: https://git.saurik.com/apple/libc.git/blobdiff_plain/ad3c9f2af814c84582fdd1649e49ec4f68572c5a..refs/heads/master:/regex/TRE/lib/tre-match-utils.h

diff --git a/regex/TRE/lib/tre-match-utils.h b/regex/TRE/lib/tre-match-utils.h
index 83258e8..5d4cd22 100644
--- a/regex/TRE/lib/tre-match-utils.h
+++ b/regex/TRE/lib/tre-match-utils.h
@@ -17,6 +17,7 @@
 /* Wide character and multibyte support. */
 
 #ifdef TRE_STR_USER
+#error TRE_STR_USER defined
 #define GET_NEXT_WCHAR()						      \
   do {									      \
     prev_c = next_c;							      \
@@ -82,30 +83,43 @@
       }									      \
   } while(/*CONSTCOND*/0)
 #else /* !TRE_STR_USER */
+/*
+ * Because all multibyte encodings are exclusively single-shift encoding,
+ * with the shift codes having the high bit set, we can make an optimization
+ * for STR_MBS that only calls tre_mbrtowc_l() when a high-bit character
+ * is detected, and just do a direct copy for ASCII characters.
+ */
 #define GET_NEXT_WCHAR()						      \
   do {									      \
     prev_c = next_c;							      \
-    if (type == STR_BYTE)						      \
+    switch (type)							      \
       {									      \
+      case STR_BYTE:							      \
 	pos++;								      \
 	if (len >= 0 && pos >= len)					      \
 	  next_c = '\0';						      \
 	else								      \
 	  next_c = (unsigned char)(*str_byte++);			      \
-      }									      \
-    else if (type == STR_WIDE)						      \
-      {									      \
+	break;								      \
+      case STR_WIDE:							      \
 	pos++;								      \
 	if (len >= 0 && pos >= len)					      \
 	  next_c = L'\0';						      \
 	else								      \
 	  next_c = *str_wide++;						      \
-      }									      \
-    else if (type == STR_MBS)						      \
-      {									      \
-        pos += pos_add_next;					      	      \
-	if (str_byte == NULL)						      \
-	  next_c = L'\0';						      \
+	break;								      \
+      case STR_MBS:							      \
+	pos += pos_add_next;					      	      \
+	if (__builtin_expect(len >= 0 && pos >= len, 0))		      \
+	  {								      \
+	    next_c = L'\0';						      \
+	    pos_add_next = 1;						      \
+	  }								      \
+	else if (__builtin_expect(!(*str_byte & 0x80), 1))		      \
+	  {								      \
+	    next_c = (unsigned char)(*str_byte++);			      \
+	    pos_add_next = 1;						      \
+	  }								      \
 	else								      \
 	  {								      \
 	    size_t w;							      \
@@ -114,30 +128,23 @@
 	      max = len - pos;						      \
 	    else							      \
 	      max = 32;							      \
-	    if (max <= 0)						      \
+	    w = tre_mbrtowc_l(&next_c, str_byte, (size_t)max, &mbstate,	      \
+			      tnfa->loc);				      \
+	    if (w == (size_t)-1 || w == (size_t)-2)			      \
+	      return REG_ILLSEQ;					      \
+	    if (w == 0 && len >= 0)					      \
 	      {								      \
-		next_c = L'\0';						      \
 		pos_add_next = 1;					      \
+		next_c = 0;						      \
+		str_byte++;						      \
 	      }								      \
 	    else							      \
 	      {								      \
-		w = tre_mbrtowc_l(&next_c, str_byte, (size_t)max, &mbstate,   \
-				  tnfa->loc);                                 \
-		if (w == (size_t)-1 || w == (size_t)-2)			      \
-		  return REG_ILLSEQ;					      \
-		if (w == 0 && len >= 0)					      \
-		  {							      \
-		    pos_add_next = 1;					      \
-		    next_c = 0;						      \
-		    str_byte++;						      \
-		  }							      \
-		else							      \
-		  {							      \
-		    pos_add_next = w;					      \
-		    str_byte += w;					      \
-		  }							      \
+		pos_add_next = w;					      \
+		str_byte += w;						      \
 	      }								      \
 	  }								      \
+	break;								      \
       }									      \
   } while(/*CONSTCOND*/0)
 #endif /* !TRE_STR_USER */
@@ -145,6 +152,7 @@
 #else /* !TRE_MULTIBYTE */
 
 /* Wide character support, no multibyte support. */
+#error TRE_MULTIBYTE undefined
 
 #ifdef TRE_STR_USER
 #define GET_NEXT_WCHAR()						      \
@@ -201,6 +209,7 @@
 #else /* !TRE_WCHAR */
 
 /* No wide character or multibyte support. */
+#error TRE_WCHAR undefined
 
 #ifdef TRE_STR_USER
 #define GET_NEXT_WCHAR()						      \
@@ -614,7 +623,9 @@ tre_bracket_match(tre_bracket_match_list_t * __restrict list, tre_cint_t wc,
 	  break;
     }
 error:
-  if (list->flags & TRE_BRACKET_MATCH_FLAG_NEGATE)
+  if (list->flags & TRE_BRACKET_MATCH_FLAG_NEGATE) {
+    if ((tnfa->cflags & REG_NEWLINE) && wc == '\n') return 0;
     match = !match;
+  }
   return match;
 }