From c8a2a786eb3f1cda8780b348f27e2c8240621045 Mon Sep 17 00:00:00 2001 From: "Jay Freeman (saurik)" Date: Tue, 1 Dec 2015 23:32:39 -0800 Subject: [PATCH] Use start conditions to parse regular expressions. --- Cycript.l.in | 36 +++++++++++++++++++++++++----------- Parser.hpp | 6 ++++-- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Cycript.l.in b/Cycript.l.in index 55502d4..114b19a 100644 --- a/Cycript.l.in +++ b/Cycript.l.in @@ -220,6 +220,9 @@ WhiteSpace [\x09\x0b\x0c\x20]|\xc2\xa0|\xef\xbb\xbf UnicodeEscape \\u({HexDigit}{4}|\{{HexDigit}+\}) @include NotLineTerminator.l +NoneTerminatorCharacter [^\r\n\x80-\xff]|{NotLineTerminator} +RegExCharacter [^/[\\]{-}[\r\n\x80-\xff]|{NotLineTerminator} +RegClsCharacter [^]\\]{-}[\r\n\x80-\xff]|{NotLineTerminator} CommentCharacter [^*/]{-}[\r\n\x80-\xff]|{NotLineTerminator} SingleCharacter [^'\\]{-}[\r\n\x80-\xff]|{NotLineTerminator} DoubleCharacter [^"\\]{-}[\r\n\x80-\xff]|{NotLineTerminator} @@ -239,13 +242,8 @@ IdentifierPart {UnicodePart}|{UnicodeEscape} IdentifierFail {UnicodeFail}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))? IdentifierScrap {IdentifierPart}*{IdentifierFail}? -NonTerminator [^\n] -BackslashSequence \\{NonTerminator} -RegularExpressionFirstChar [^\n*\\/]|{BackslashSequence} -RegularExpressionChar [^\n\\/]|{BackslashSequence} -RegularExpressionFlags {UnicodePart}* -RegularExpressionChars {RegularExpressionChar}* -RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionChars} +RegularExpressionBackslashSequence \\{NoneTerminatorCharacter} +RegularExpressionClassChars ({RegClsCharacter}|{RegularExpressionBackslashSequence})* @begin E4X XMLNameStart [a-zA-Z_:] @@ -253,6 +251,7 @@ XMLNamePart [a-zA-Z0-9.-_:] XMLName {XMLNameStart}{XMLNamePart}* @end +%x RegularExpression %x MultiLine %x LegacySingleString @@ -275,10 +274,25 @@ XMLName {XMLNameStart}{XMLNamePart}* %% /* RegEx {{{ */ -{ - \/{RegularExpressionBody}\/{RegularExpressionFlags} L I(literal, RegEx(Y), tk::RegularExpressionLiteral, hi::Constant); - \/{RegularExpressionBody}\/{RegularExpressionFlags}{UnicodeFail} L E("invalid flags") - \/{RegularExpressionBody}?\\? L E("unterminated regex") +\/ L CYLexBufferStart(RegularExpression); CYLexBufferUnit('/'); + +{ + \/{UnicodePart}* R CYLexBufferUnits(yytext, yyleng); CYLexBufferEnd(literal, RegEx, tk::RegularExpressionLiteral, hi::Constant); + \/{UnicodePart}*{UnicodeFail} R E("invalid flags") + + {RegExCharacter}+ R CYLexBufferUnits(yytext, yyleng); + {RegExCharacter}*{UnicodeFail} R E("invalid character"); + + {RegularExpressionBackslashSequence} R CYLexBufferUnits(yytext, yyleng); + \\{UnicodeFail}? R E("invalid escape") + + "["{RegularExpressionClassChars}"]" R CYLexBufferUnits(yytext, yyleng); + "["{RegularExpressionClassChars}\\? R E("invalid class"); + "["{RegularExpressionClassChars}\\?{UnicodeFail} R E("invalid character"); + "["{RegularExpressionClassChars}\\?{LineTerminatorSequence} R E("invalid newline"); + + (\\|{RegExCharacter}+)?{LineTerminatorSequence} R E("invalid newline"); + <> R E("unterminated regex") } /* }}} */ /* Comment {{{ */ diff --git a/Parser.hpp b/Parser.hpp index 1ea1451..02b08eb 100644 --- a/Parser.hpp +++ b/Parser.hpp @@ -860,9 +860,11 @@ struct CYRegEx : CYTrivial { const char *value_; + size_t size_; - CYRegEx(const char *value) : - value_(value) + CYRegEx(const char *value, size_t size) : + value_(value), + size_(size) { } -- 2.47.2