Use start conditions to parse regular expressions.

author Jay Freeman (saurik) <saurik@saurik.com>

Wed, 2 Dec 2015 07:32:39 +0000 (23:32 -0800)

committer Jay Freeman (saurik) <saurik@saurik.com>

Wed, 2 Dec 2015 07:32:39 +0000 (23:32 -0800)
author Jay Freeman (saurik) <saurik@saurik.com>
Wed, 2 Dec 2015 07:32:39 +0000 (23:32 -0800)
committer Jay Freeman (saurik) <saurik@saurik.com>
Wed, 2 Dec 2015 07:32:39 +0000 (23:32 -0800)
diff --git a/Cycript.l.in b/Cycript.l.in

index 55502d4361ab9fc1b250fa0cb50ce9f6e7dc8ad7..114b19a301b36e7d1d17c03849c2794cb01be789 100644 (file)
--- a/Cycript.l.in
+++ b/Cycript.l.in
@@ -220,6 +220,9 @@ WhiteSpace [\x09\x0b\x0c\x20]|\xc2\xa0|\xef\xbb\xbf
  UnicodeEscape \\u({HexDigit}{4}|\{{HexDigit}+\})
  
  @include NotLineTerminator.l
+NoneTerminatorCharacter [^\r\n\x80-\xff]|{NotLineTerminator}
+RegExCharacter [^/[\\]{-}[\r\n\x80-\xff]|{NotLineTerminator}
+RegClsCharacter [^]\\]{-}[\r\n\x80-\xff]|{NotLineTerminator}
  CommentCharacter [^*/]{-}[\r\n\x80-\xff]|{NotLineTerminator}
  SingleCharacter [^'\\]{-}[\r\n\x80-\xff]|{NotLineTerminator}
  DoubleCharacter [^"\\]{-}[\r\n\x80-\xff]|{NotLineTerminator}
@@ -239,13 +242,8 @@ IdentifierPart {UnicodePart}|{UnicodeEscape}
  IdentifierFail {UnicodeFail}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))?
  IdentifierScrap {IdentifierPart}*{IdentifierFail}?
  
-NonTerminator [^\n]
-BackslashSequence \\{NonTerminator}
-RegularExpressionFirstChar [^\n*\\/]|{BackslashSequence}
-RegularExpressionChar [^\n\\/]|{BackslashSequence}
-RegularExpressionFlags {UnicodePart}*
-RegularExpressionChars {RegularExpressionChar}*
-RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionChars}
+RegularExpressionBackslashSequence \\{NoneTerminatorCharacter}
+RegularExpressionClassChars ({RegClsCharacter}|{RegularExpressionBackslashSequence})*
  
  @begin E4X
  XMLNameStart [a-zA-Z_:]
@@ -253,6 +251,7 @@ XMLNamePart [a-zA-Z0-9.-_:]
  XMLName {XMLNameStart}{XMLNamePart}*
  @end
  
+%x RegularExpression
  %x MultiLine
  
  %x LegacySingleString
@@ -275,10 +274,25 @@ XMLName {XMLNameStart}{XMLNamePart}*
  %%
  
      /* RegEx {{{ */
-<RegExp,RegExpOrTemplateTail>{
-    \/{RegularExpressionBody}\/{RegularExpressionFlags} L I(literal, RegEx(Y), tk::RegularExpressionLiteral, hi::Constant);
-    \/{RegularExpressionBody}\/{RegularExpressionFlags}{UnicodeFail} L E("invalid flags")
-    \/{RegularExpressionBody}?\\? L E("unterminated regex")
+<RegExp,RegExpOrTemplateTail>\/ L CYLexBufferStart(RegularExpression); CYLexBufferUnit('/');
+
+<RegularExpression>{
+    \/{UnicodePart}* R CYLexBufferUnits(yytext, yyleng); CYLexBufferEnd(literal, RegEx, tk::RegularExpressionLiteral, hi::Constant);
+    \/{UnicodePart}*{UnicodeFail} R E("invalid flags")
+
+    {RegExCharacter}+ R CYLexBufferUnits(yytext, yyleng);
+    {RegExCharacter}*{UnicodeFail} R E("invalid character");
+
+    {RegularExpressionBackslashSequence} R CYLexBufferUnits(yytext, yyleng);
+    \\{UnicodeFail}? R E("invalid escape")
+
+    "["{RegularExpressionClassChars}"]" R CYLexBufferUnits(yytext, yyleng);
+    "["{RegularExpressionClassChars}\\? R E("invalid class");
+    "["{RegularExpressionClassChars}\\?{UnicodeFail} R E("invalid character");
+    "["{RegularExpressionClassChars}\\?{LineTerminatorSequence} R E("invalid newline");
+
+    (\\|{RegExCharacter}+)?{LineTerminatorSequence} R E("invalid newline");
+    <<EOF>> R E("unterminated regex")
  }
      /* }}} */
      /* Comment {{{ */
diff --git a/Parser.hpp b/Parser.hpp

index 1ea1451a7c26a2a6da8b761587d1bfbafbe0e742..02b08ebee8b448a9b1c78ce2f19b7910ab6837be 100644 (file)
--- a/Parser.hpp
+++ b/Parser.hpp
@@ -860,9 +860,11 @@ struct CYRegEx :
      CYTrivial
  {
      const char *value_;
+    size_t size_;
  
-    CYRegEx(const char *value) :
-        value_(value)
+    CYRegEx(const char *value, size_t size) :
+        value_(value),
+        size_(size)
      {
      }
author	Jay Freeman (saurik) <saurik@saurik.com>
	Wed, 2 Dec 2015 07:32:39 +0000 (23:32 -0800)
committer	Jay Freeman (saurik) <saurik@saurik.com>
	Wed, 2 Dec 2015 07:32:39 +0000 (23:32 -0800)
Cycript.l.in		patch \| blob \| blame \| history
Parser.hpp		patch \| blob \| blame \| history