X-Git-Url: https://git.saurik.com/cycript.git/blobdiff_plain/5f6902c29e9bba2538e729694ec7c8ced38bf9ec..322286dd48cfe4c3b9eff66583931e89038e4ba4:/Scanner.lpp.in diff --git a/Scanner.lpp.in b/Scanner.lpp.in index e34f675..1fb16aa 100644 --- a/Scanner.lpp.in +++ b/Scanner.lpp.in @@ -41,6 +41,9 @@ typedef cy::parser::token tk; #include "Highlight.hpp" +#include "IdentifierStart.h" +#include "IdentifierContinue.h" + #define YY_EXTRA_TYPE CYDriver * #define F(value, highlight) do { \ @@ -140,7 +143,7 @@ static void U(Type_ &local, unsigned point) { } else _assert(false); } -static void U(char *&local, const char *text, yy_size_t &i) { +static unsigned U(char *&local, const char *text, yy_size_t &i) { unsigned point; char next(text[++i]); @@ -158,6 +161,7 @@ static void U(char *&local, const char *text, yy_size_t &i) { } U(local, point); + return point; } #define CYLexBufferPoint(point) do { \ @@ -217,6 +221,7 @@ U2 [\xc2-\xdf] U3 [\xe0-\xef] U4 [\xf0-\xf4] UN [\xc0-\xc1\xf5-\xff] +UE {U1}|{U2}|{U3}|{U4}|{UN} HexDigit [0-9a-fA-F] LineTerminatorSequence \r?\n|\r|\xe2\x80[\xa8\xa9] @@ -238,12 +243,12 @@ IdentifierMore [$_] UnicodeStart {IdentifierMore}|{UnicodeIDStart} UnicodePart {IdentifierMore}|\xe2\x80[\x8c\x8d]|{UnicodeIDContinue} -UnicodeFail {U2}|{U3}|{U3}{U0}|{U4}|{U4}{U0}|{U4}{U0}{U0}|{UN}|{U0} -UnicodeScrap {UnicodePart}*{UnicodeFail}? +UnicodeScrap {U2}|{U3}{U0}{0,1}|{U4}{U0}{0,2}|{UN}|{U0} +UnicodeError ({U2}|{U3}{U0}{0,1}|{U4}{U0}{0,2}){UE}|{UN}|{U0} IdentifierStart {UnicodeStart}|{UnicodeEscape} IdentifierPart {UnicodePart}|{UnicodeEscape} -IdentifierFail {UnicodeFail}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))? +IdentifierFail {UnicodeError}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))? IdentifierScrap {IdentifierPart}*{IdentifierFail}? RegularExpressionBackslashSequence \\{NoneTerminatorCharacter} @@ -278,26 +283,30 @@ XMLName {XMLNameStart}{XMLNamePart}* /* RegEx {{{ */ { \/{UnicodePart}* R CYLexBufferUnits(yytext, yyleng); CYLexBufferEnd(literal, RegEx, tk::RegularExpressionLiteral, hi::Constant); - \/{UnicodePart}*{UnicodeFail} R E("invalid flags") + \/{UnicodePart}*{UnicodeError} R E("invalid character"); {RegExCharacter}+ R CYLexBufferUnits(yytext, yyleng); - {RegExCharacter}*{UnicodeFail} R E("invalid character"); {RegularExpressionBackslashSequence} R CYLexBufferUnits(yytext, yyleng); - \\{UnicodeFail}? R E("invalid escape") + \\ R E("invalid escape") + + (\\|{RegExCharacter}+)?{LineTerminatorSequence} R E("invalid newline"); + (\\|{RegExCharacter}+)?{UnicodeScrap} R E("invalid character"); "["{RegularExpressionClassChars}"]" R CYLexBufferUnits(yytext, yyleng); "["{RegularExpressionClassChars}\\? R E("invalid class"); - "["{RegularExpressionClassChars}\\?{UnicodeFail} R E("invalid character"); + "["{RegularExpressionClassChars}\\?{LineTerminatorSequence} R E("invalid newline"); + "["{RegularExpressionClassChars}\\?{UnicodeScrap} R E("invalid character"); - (\\|{RegExCharacter}+)?{LineTerminatorSequence} R E("invalid newline"); <> R E("unterminated regex") } /* }}} */ /* Comment {{{ */ -#![^\n]* L M -\/\/[^\n]* L M + /* XXX: maybe fold LineTerminatorSequence into these definitions */ +#!{NoneTerminatorCharacter}* L M +\/\/{NoneTerminatorCharacter}* L M +(#!|\/\/){NoneTerminatorCharacter}*{UnicodeError} L E("invalid character"); \/\* L yy_push_state(MultiLine, yyscanner); @@ -305,7 +314,10 @@ XMLName {XMLNameStart}{XMLNamePart}* \**\*\/ R yy_pop_state(yyscanner); M N \**{LineTerminatorSequence} yylloc->end.Lines(); yyextra->last_ = true; \**{CommentCharacter}|\/ R - \**({UnicodeFail}|\*) R E("invalid comment"); + + \**{UnicodeScrap} R E("invalid character"); + \**\* R E("invalid comment"); + <> R E("invalid comment") } /* }}} */ @@ -425,7 +437,7 @@ XMLName {XMLNameStart}{XMLNamePart}* "@YES" L F(tk::At_YES_, hi::Constant); @end -@({UnicodeStart}{UnicodeScrap}|{UnicodeFail}) L E("invalid keyword") +@({UnicodeStart}{UnicodePart}*{UnicodeError}?|{UnicodeError}) L E("invalid keyword") /* }}} */ /* Highlight {{{ */ "undefined" L F(tk::_undefined_, hi::Operator); @@ -493,9 +505,10 @@ XMLName {XMLNameStart}{XMLNamePart}* "set" L /*III*/ F(tk::_set_, hi::Meta); "short" L /*FII*/ F(tk::_short_, hi::Type); "static" L /*FS?*/ F(tk::_static_, hi::Meta); -"super" L /*FFK*/ F(yyextra->super_.top() ? tk::_super__ : tk::_super_, hi::Constant); +"super" L /*FFK*/ F(tk::_super_, hi::Constant); "switch" L /*KKK*/ F(tk::_switch_, hi::Control); "synchronized" L /*FII*/ F(tk::_synchronized_, hi::Meta); +"target" L /*III*/ F(tk::_target_, hi::Identifier); "this" L /*KKK*/ F(tk::_this_, hi::Constant); "throw" L /*KKK*/ F(tk::_throw_, hi::Control); "throws" L /*FII*/ F(tk::_throws_, hi::Meta); @@ -542,8 +555,12 @@ XMLName {XMLNameStart}{XMLNamePart}* char next(yytext[i]); if (next != '\\') *local++ = next; - else - U(local, yytext, ++i); + else { + bool (*is)(unsigned) = (i == 0 ? &IsIdentifierStart : &IsIdentifierContinue); + unsigned point(U(local, yytext, ++i)); + if (!is(point)) + E("invalid character"); + } } *local = '\0'; @@ -569,16 +586,16 @@ XMLName {XMLNameStart}{XMLNamePart}* { \' R CYLexBufferEnd(string, String, tk::StringLiteral, hi::Constant); {SingleCharacter}+ R CYLexBufferUnits(yytext, yyleng); - {SingleCharacter}*{UnicodeFail} R E("invalid character"); - {LineTerminatorSequence} R E("invalid newline"); + {SingleCharacter}*{LineTerminatorSequence} R E("invalid newline"); + {SingleCharacter}*{UnicodeScrap} R E("invalid character"); } \" L CYLexBufferStart(LegacyDoubleString); { \" R CYLexBufferEnd(string, String, tk::StringLiteral, hi::Constant); {DoubleCharacter}+ R CYLexBufferUnits(yytext, yyleng); - {DoubleCharacter}*{UnicodeFail} R E("invalid character"); - {LineTerminatorSequence} R E("invalid newline"); + {DoubleCharacter}*{LineTerminatorSequence} R E("invalid newline"); + {DoubleCharacter}*{UnicodeScrap} R E("invalid character"); } /* }}} */ /* Template {{{ */ @@ -592,8 +609,8 @@ XMLName {XMLNameStart}{XMLNamePart}* "$" R CYLexBufferUnit('$'); {PlateCharacter}+ R CYLexBufferUnits(yytext, yyleng); - {PlateCharacter}*{UnicodeFail} R E("invalid character"); - {LineTerminatorSequence} R E("invalid newline"); + {PlateCharacter}*{LineTerminatorSequence} R E("invalid newline"); + {PlateCharacter}*{UnicodeScrap} R E("invalid character"); } /* }}} */ /* Escapes {{{ */ @@ -628,20 +645,20 @@ XMLName {XMLNameStart}{XMLNamePart}* } \\{LineTerminatorSequence} yylloc->end.Lines(); - \\(.|{NotLineTerminator}) R CYLexBufferUnits(yytext + 1, yyleng - 1); + \\{NoneTerminatorCharacter} R CYLexBufferUnits(yytext + 1, yyleng - 1); + \\{UnicodeScrap} R E("invalid character"); - \\(x{HexDigit}{0,1}|u({HexDigit}{0,3}|\{{HexDigit}*)|{UnicodeFail})? R E("invalid escape"); + \\(x{HexDigit}{0,1}|u({HexDigit}{0,3}|\{{HexDigit}*))? R E("invalid escape"); <> R E("invalid string"); } /* }}} */ {LineTerminatorSequence} yylloc->step(); yylloc->end.Lines(); yyextra->last_ = true; N {WhiteSpace} L +{U1}|{UnicodeScrap} L E("invalid character"); <> if (yyextra->auto_) { yyextra->auto_ = false; F(tk::AutoComplete, hi::Nothing); } L yyterminate(); -. L E("invalid character") - %% #undef yyextra