X-Git-Url: https://git.saurik.com/cycript.git/blobdiff_plain/c5b15840ab1bb70218506531f9a73fe2d112b007..1e8d80477a3e058a30c477955f1e0c56deb6e956:/Scanner.lpp.in?ds=inline diff --git a/Scanner.lpp.in b/Scanner.lpp.in index 5563b06..d9614df 100644 --- a/Scanner.lpp.in +++ b/Scanner.lpp.in @@ -1,5 +1,5 @@ -/* Cycript - Optimizing JavaScript Compiler/Runtime - * Copyright (C) 2009-2015 Jay Freeman (saurik) +/* Cycript - The Truly Universal Scripting Language + * Copyright (C) 2009-2016 Jay Freeman (saurik) */ /* GNU Affero General Public License, Version 3 {{{ */ @@ -41,12 +41,12 @@ typedef cy::parser::token tk; #include "Highlight.hpp" +#include "IdentifierStart.h" +#include "IdentifierContinue.h" + #define YY_EXTRA_TYPE CYDriver * #define F(value, highlight) do { \ - yyextra->newline_ = yyextra->last_; \ - yyextra->last_ = false; \ - yyextra->next_ = false; \ BEGIN(yyextra->template_.top() ? DivOrTemplateTail : Div); \ yylval->highlight_ = highlight; \ return value; \ @@ -64,11 +64,12 @@ typedef cy::parser::token tk; #define I(type, Type, value, highlight) do { \ yylval->semantic_.type ## _ = A CY ## Type; \ + yylval->semantic_.type ## _->location_ = *yylloc; \ F(value, highlight); \ } while (false) #define N \ - if (yyextra->last_ && yyextra->next_) { \ + if (yyextra->last_) { \ yyextra->last_ = false; \ F(tk::NewLine, hi::Nothing); \ } @@ -83,13 +84,13 @@ typedef cy::parser::token tk; nl = reinterpret_cast(memchr(nl + 1, '\n', left)); \ } while (nl != NULL); \ yylloc->step(); \ - yylloc->end.lines(lines); \ - yylloc->end.columns(left); \ + yylloc->end.Lines(lines); \ + yylloc->end.Columns(left); \ more \ } else L \ } -#define R yylloc->end.columns(yyleng); +#define R yylloc->end.Columns(yyleng); #define L yylloc->step(); R #define H(value, highlight) do { \ @@ -139,7 +140,7 @@ static void U(Type_ &local, unsigned point) { } else _assert(false); } -static void U(char *&local, const char *text, yy_size_t &i) { +static unsigned U(char *&local, const char *text, yy_size_t &i) { unsigned point; char next(text[++i]); @@ -157,6 +158,7 @@ static void U(char *&local, const char *text, yy_size_t &i) { } U(local, point); + return point; } #define CYLexBufferPoint(point) do { \ @@ -182,15 +184,9 @@ static void U(char *&local, const char *text, yy_size_t &i) { I(type, Type(P.strmemdup(yyextra->buffer_.data(), yyextra->buffer_.size()), yyextra->buffer_.size()), value, highlight); \ } while (false) -#define YY_INPUT(data, value, size) { \ - if (yyextra->data_.eof()) \ - value = YY_NULL; \ - else { \ - yyextra->data_.read(data, size); \ - size_t copy(yyextra->data_.gcount()); \ - value = copy == 0 ? YY_NULL : copy; \ - } \ -} +#define YY_INPUT(data, value, size) do { \ + value = yyextra->data_.sgetn(data, size) ?: YY_NULL; \ +} while (false) %} @@ -216,6 +212,7 @@ U2 [\xc2-\xdf] U3 [\xe0-\xef] U4 [\xf0-\xf4] UN [\xc0-\xc1\xf5-\xff] +UE {U1}|{U2}|{U3}|{U4}|{UN} HexDigit [0-9a-fA-F] LineTerminatorSequence \r?\n|\r|\xe2\x80[\xa8\xa9] @@ -237,12 +234,12 @@ IdentifierMore [$_] UnicodeStart {IdentifierMore}|{UnicodeIDStart} UnicodePart {IdentifierMore}|\xe2\x80[\x8c\x8d]|{UnicodeIDContinue} -UnicodeFail {U2}|{U3}|{U3}{U0}|{U4}|{U4}{U0}|{U4}{U0}{U0}|{UN}|{U0} -UnicodeScrap {UnicodePart}*{UnicodeFail}? +UnicodeScrap {U2}|{U3}{U0}{0,1}|{U4}{U0}{0,2}|{UN}|{U0} +UnicodeError ({U2}|{U3}{U0}{0,1}|{U4}{U0}{0,2}){UE}|{UN}|{U0} IdentifierStart {UnicodeStart}|{UnicodeEscape} IdentifierPart {UnicodePart}|{UnicodeEscape} -IdentifierFail {UnicodeFail}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))? +IdentifierFail {UnicodeError}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))? IdentifierScrap {IdentifierPart}*{IdentifierFail}? RegularExpressionBackslashSequence \\{NoneTerminatorCharacter} @@ -266,8 +263,6 @@ XMLName {XMLNameStart}{XMLNamePart}* %s Div %s DivOrTemplateTail -%s RegExp -%s RegExpOrTemplateTail @begin E4X %x XMLContent @@ -277,38 +272,43 @@ XMLName {XMLNameStart}{XMLNamePart}* %% /* RegEx {{{ */ -\/ L CYLexBufferStart(RegularExpression); CYLexBufferUnit('/'); - { - \/{UnicodePart}* R CYLexBufferUnits(yytext, yyleng); CYLexBufferEnd(literal, RegEx, tk::RegularExpressionLiteral, hi::Constant); - \/{UnicodePart}*{UnicodeFail} R E("invalid flags") + \/{UnicodePart}* R CYLexBufferUnits(yytext, yyleng); CYLexBufferEnd(literal, RegEx, tk::RegularExpressionLiteral_, hi::Constant); + \/{UnicodePart}*{UnicodeError} R E("invalid character"); {RegExCharacter}+ R CYLexBufferUnits(yytext, yyleng); - {RegExCharacter}*{UnicodeFail} R E("invalid character"); {RegularExpressionBackslashSequence} R CYLexBufferUnits(yytext, yyleng); - \\{UnicodeFail}? R E("invalid escape") + \\ R E("invalid escape") + + (\\|{RegExCharacter}+)?{LineTerminatorSequence} R E("invalid newline"); + (\\|{RegExCharacter}+)?{UnicodeScrap} R E("invalid character"); "["{RegularExpressionClassChars}"]" R CYLexBufferUnits(yytext, yyleng); "["{RegularExpressionClassChars}\\? R E("invalid class"); - "["{RegularExpressionClassChars}\\?{UnicodeFail} R E("invalid character"); + "["{RegularExpressionClassChars}\\?{LineTerminatorSequence} R E("invalid newline"); + "["{RegularExpressionClassChars}\\?{UnicodeScrap} R E("invalid character"); - (\\|{RegExCharacter}+)?{LineTerminatorSequence} R E("invalid newline"); <> R E("unterminated regex") } /* }}} */ /* Comment {{{ */ -#![^\n]* L M -\/\/[^\n]* L M + /* XXX: maybe fold LineTerminatorSequence into these definitions */ +#!{NoneTerminatorCharacter}* L M +\/\/{NoneTerminatorCharacter}* L M +(#!|\/\/){NoneTerminatorCharacter}*{UnicodeError} L E("invalid character"); \/\* L yy_push_state(MultiLine, yyscanner); { \**\*\/ R yy_pop_state(yyscanner); M N - \**{LineTerminatorSequence} yylloc->end.lines(); yyextra->last_ = true; + \**{LineTerminatorSequence} yylloc->end.Lines(); yyextra->last_ = true; \**{CommentCharacter}|\/ R - \**({UnicodeFail}|\*) R E("invalid comment"); + + \**{UnicodeScrap} R E("invalid character"); + \**\* R E("invalid comment"); + <> R E("invalid comment") } /* }}} */ @@ -340,13 +340,11 @@ XMLName {XMLNameStart}{XMLNamePart}* ".." L E("invalid operator") @begin E4X -"::" L F(tk::ColonColon, hi::Operator); ".." L F(tk::PeriodPeriod, hi::Operator); @end @begin E4X ObjectiveC "@" L F(tk::At, hi::Operator); -"#" L F(tk::Pound, hi::Operator); @end "&" L F(tk::Ampersand, hi::Operator); @@ -357,13 +355,13 @@ XMLName {XMLNameStart}{XMLNamePart}* "=" L F(tk::Equal, hi::Operator); "==" L F(tk::EqualEqual, hi::Operator); "===" L F(tk::EqualEqualEqual, hi::Operator); -"=>" L F(yyextra->newline_ ? tk::EqualRight_ : tk::EqualRight, hi::Operator); +"=>" L F(tk::EqualRight, hi::Operator); "!" L F(tk::Exclamation, hi::Operator); "!=" L F(tk::ExclamationEqual, hi::Operator); "!==" L F(tk::ExclamationEqualEqual, hi::Operator); "-" L F(tk::Hyphen, hi::Operator); "-=" L F(tk::HyphenEqual, hi::Operator); -"--" L F(yyextra->newline_ ? tk::HyphenHyphen_ : tk::HyphenHyphen, hi::Operator); +"--" L F(tk::HyphenHyphen, hi::Operator); "->" L F(tk::HyphenRight, hi::Operator); "<" L F(tk::Left, hi::Operator); "<=" L F(tk::LeftEqual, hi::Operator); @@ -377,7 +375,7 @@ XMLName {XMLNameStart}{XMLNamePart}* "||" L F(tk::PipePipe, hi::Operator); "+" L F(tk::Plus, hi::Operator); "+=" L F(tk::PlusEqual, hi::Operator); -"++" L F(yyextra->newline_ ? tk::PlusPlus_ : tk::PlusPlus, hi::Operator); +"++" L F(tk::PlusPlus, hi::Operator); ">" L F(tk::Right, hi::Operator); ">=" L F(tk::RightEqual, hi::Operator); ">>" L F(tk::RightRight, hi::Operator); @@ -388,19 +386,21 @@ XMLName {XMLNameStart}{XMLNamePart}* "*=" L F(tk::StarEqual, hi::Operator); "~" L F(tk::Tilde, hi::Operator); -"/" L F(tk::Slash, hi::Operator); -"/=" L F(tk::SlashEqual, hi::Operator); +"/" L F(tk::Slash, hi::Operator); +"/=" L F(tk::SlashEqual, hi::Operator); ":" L F(tk::Colon, hi::Structure); +"::" L F(tk::ColonColon, hi::Structure); "," L F(tk::Comma, hi::Structure); "?" L F(tk::Question, hi::Structure); ";" L F(tk::SemiColon, hi::Structure); +"#" L F(tk::Pound, hi::Operator); "(" L F(tk::OpenParen, hi::Structure); ")" L F(tk::CloseParen, hi::Structure); -"{" L yyextra->template_.push(false); F(yyextra->newline_ ? tk::OpenBrace_ : tk::OpenBrace, hi::Structure); -"}" L S(template_); F(tk::CloseBrace, hi::Structure); +"{" L yyextra->template_.push(false); F(tk::OpenBrace, hi::Structure); +
"}" L S(template_); F(tk::CloseBrace, hi::Structure); "[" L F(tk::OpenBracket, hi::Structure); "]" L F(tk::CloseBracket, hi::Structure); @@ -428,7 +428,7 @@ XMLName {XMLNameStart}{XMLNamePart}* "@YES" L F(tk::At_YES_, hi::Constant); @end -@({UnicodeStart}{UnicodeScrap}|{UnicodeFail}) L E("invalid keyword") +@({UnicodeStart}{UnicodePart}*{UnicodeError}?|{UnicodeError}) L E("invalid keyword") /* }}} */ /* Highlight {{{ */ "undefined" L F(tk::_undefined_, hi::Operator); @@ -444,6 +444,7 @@ XMLName {XMLNameStart}{XMLNamePart}* /* }}} */ /* Reserved {{{ */ "abstract" L /*FII*/ F(tk::_abstract_, hi::Meta); +"as" L /*III*/ F(tk::_as_, hi::Meta); "await" L /*II?*/ F(tk::_await_, hi::Meta); "boolean" L /*FII*/ F(tk::_boolean_, hi::Type); "break" L /*KKK*/ F(tk::_break_, hi::Control); @@ -464,6 +465,7 @@ XMLName {XMLNameStart}{XMLNamePart}* "enum" L /*FFF*/ F(tk::_enum_, hi::Meta); "export" L /*FFK*/ F(tk::_export_, hi::Meta); "extends" L /*FFK*/ F(tk::_extends_, hi::Meta); +"eval" L /*III*/ F(tk::_eval_, hi::Special); "false" L /*LLL*/ F(tk::_false_, hi::Constant); "final" L /*FII*/ F(tk::_final_, hi::Meta); "finally" L /*KKK*/ F(tk::_finally_, hi::Control); @@ -476,9 +478,11 @@ XMLName {XMLNameStart}{XMLNamePart}* "if" L /*KKK*/ F(tk::_if_, hi::Control); "implements" L /*FSS*/ F(tk::_implements_, hi::Meta); "import" L /*FFK*/ F(tk::_import_, hi::Meta); -"in" L /*KKK*/ F(yyextra->in_.top() ? tk::_in__ : tk::_in_, hi::Operator); +"in" L /*KKK*/ F(tk::_in_, hi::Operator); +"Infinity" L /*III*/ F(tk::_Infinity_, hi::Constant); "instanceof" L /*KKK*/ F(tk::_instanceof_, hi::Operator); "int" L /*FII*/ F(tk::_int_, hi::Type); +"__int128" L /*III*/ F(tk::___int128_, hi::Type); "interface" L /*FSS*/ F(tk::_interface_, hi::Meta); "let" L /*IS?*/ F(tk::_let_, hi::Meta); "long" L /*FII*/ F(tk::_long_, hi::Type); @@ -488,36 +492,41 @@ XMLName {XMLNameStart}{XMLNamePart}* "package" L /*FSS*/ F(tk::_package_, hi::Meta); "private" L /*FSS*/ F(tk::_private_, hi::Meta); "protected" L /*FSS*/ F(tk::_protected_, hi::Meta); +"__proto__" L /*III*/ F(tk::___proto___, hi::Special); "prototype" L /*III*/ F(tk::_prototype_, hi::Special); "public" L /*FSS*/ F(tk::_public_, hi::Meta); -"return" L /*KKK*/ F(yyextra->return_.top() ? tk::_return__ : tk::_return_, hi::Control); +"__restrict" L /*III*/ F(tk::___restrict_, hi::Meta); +"restrict" L /*III*/ F(tk::_restrict_, hi::Meta); +"return" L /*KKK*/ F(tk::_return_, hi::Control); "set" L /*III*/ F(tk::_set_, hi::Meta); "short" L /*FII*/ F(tk::_short_, hi::Type); "static" L /*FS?*/ F(tk::_static_, hi::Meta); -"super" L /*FFK*/ F(yyextra->super_.top() ? tk::_super__ : tk::_super_, hi::Constant); +"super" L /*FFK*/ F(tk::_super_, hi::Constant); "switch" L /*KKK*/ F(tk::_switch_, hi::Control); "synchronized" L /*FII*/ F(tk::_synchronized_, hi::Meta); +"target" L /*III*/ F(tk::_target_, hi::Identifier); "this" L /*KKK*/ F(tk::_this_, hi::Constant); "throw" L /*KKK*/ F(tk::_throw_, hi::Control); "throws" L /*FII*/ F(tk::_throws_, hi::Meta); "transient" L /*FII*/ F(tk::_transient_, hi::Meta); "true" L /*LLL*/ F(tk::_true_, hi::Constant); "try" L /*KKK*/ F(tk::_try_, hi::Control); +"typeid" L /*III*/ F(tk::_typeid_, hi::Operator); "typeof" L /*KKK*/ F(tk::_typeof_, hi::Operator); "var" L /*KKK*/ F(tk::_var_, hi::Meta); "void" L /*KKK*/ F(tk::_void_, hi::Operator); "volatile" L /*FII*/ F(tk::_volatile_, hi::Meta); "while" L /*KKK*/ F(tk::_while_, hi::Control); "with" L /*KKK*/ F(tk::_with_, hi::Control); -"yield" L /*IS?*/ F(yyextra->yield_.top() ? tk::_yield__ : tk::_yield_, hi::Control); +"yield" L /*IS?*/ F(tk::_yield_, hi::Control); -"auto" L F(tk::_auto_, hi::Meta); "each" L F(tk::_each_, hi::Control); "of" L F(tk::_of_, hi::Operator); @begin C "extern" L F(tk::_extern_, hi::Type); "signed" L F(tk::_signed_, hi::Type); +"struct" L F(tk::_struct_, hi::Meta); "typedef" L F(tk::_typedef_, hi::Meta); "unsigned" L F(tk::_unsigned_, hi::Type); @end @@ -543,8 +552,12 @@ XMLName {XMLNameStart}{XMLNamePart}* char next(yytext[i]); if (next != '\\') *local++ = next; - else - U(local, yytext, ++i); + else { + bool (*is)(unsigned) = (i == 0 ? &IsIdentifierStart : &IsIdentifierContinue); + unsigned point(U(local, yytext, ++i)); + if (!is(point)) + E("invalid character"); + } } *local = '\0'; @@ -570,21 +583,21 @@ XMLName {XMLNameStart}{XMLNamePart}* { \' R CYLexBufferEnd(string, String, tk::StringLiteral, hi::Constant); {SingleCharacter}+ R CYLexBufferUnits(yytext, yyleng); - {SingleCharacter}*{UnicodeFail} R E("invalid character"); - {LineTerminatorSequence} R E("invalid newline"); + {SingleCharacter}*{LineTerminatorSequence} R E("invalid newline"); + {SingleCharacter}*{UnicodeScrap} R E("invalid character"); } \" L CYLexBufferStart(LegacyDoubleString); { \" R CYLexBufferEnd(string, String, tk::StringLiteral, hi::Constant); {DoubleCharacter}+ R CYLexBufferUnits(yytext, yyleng); - {DoubleCharacter}*{UnicodeFail} R E("invalid character"); - {LineTerminatorSequence} R E("invalid newline"); + {DoubleCharacter}*{LineTerminatorSequence} R E("invalid newline"); + {DoubleCharacter}*{UnicodeScrap} R E("invalid character"); } /* }}} */ /* Template {{{ */ "`" L yyextra->tail_ = false; CYLexBufferStart(StrictAccentString); -"}" L yyextra->tail_ = true; S(template_); CYLexBufferStart(StrictAccentString); +"}" L yyextra->tail_ = true; S(template_); CYLexBufferStart(StrictAccentString); { "`" R CYLexBufferEnd(string, String, yyextra->tail_ ? tk::TemplateTail : tk::NoSubstitutionTemplate, hi::Constant); @@ -593,8 +606,10 @@ XMLName {XMLNameStart}{XMLNamePart}* "$" R CYLexBufferUnit('$'); {PlateCharacter}+ R CYLexBufferUnits(yytext, yyleng); - {PlateCharacter}*{UnicodeFail} R E("invalid character"); - {LineTerminatorSequence} R E("invalid newline"); + {PlateCharacter}*{UnicodeScrap} R E("invalid character"); + + {PlateCharacter}*{LineTerminatorSequence} yylloc->end.Lines(); CYLexBufferUnits(yytext, yyleng); + \\{LineTerminatorSequence} yylloc->end.Lines(); } /* }}} */ /* Escapes {{{ */ @@ -628,23 +643,27 @@ XMLName {XMLNameStart}{XMLNamePart}* CYLexBufferPoint(point); } - \\{LineTerminatorSequence} yylloc->end.lines(); - \\(.|{NotLineTerminator}) R CYLexBufferUnits(yytext + 1, yyleng - 1); + \\{LineTerminatorSequence} yylloc->end.Lines(); + \\{NoneTerminatorCharacter} R CYLexBufferUnits(yytext + 1, yyleng - 1); + \\{UnicodeScrap} R E("invalid character"); - \\(x{HexDigit}{0,1}|u({HexDigit}{0,3}|\{{HexDigit}*)|{UnicodeFail})? R E("invalid escape"); + \\(x{HexDigit}{0,1}|u({HexDigit}{0,3}|\{{HexDigit}*))? R E("invalid escape"); <> R E("invalid string"); } /* }}} */ -{LineTerminatorSequence} yylloc->step(); yylloc->end.lines(); yyextra->last_ = true; N +{LineTerminatorSequence} yylloc->step(); yylloc->end.Lines(); yyextra->last_ = true; N {WhiteSpace} L +{U1}|{UnicodeScrap} L E("invalid character"); <> if (yyextra->auto_) { yyextra->auto_ = false; F(tk::AutoComplete, hi::Nothing); } L yyterminate(); -. L E("invalid character") - %% +#undef yyextra +#define yyextra this +#define yyscanner scanner_ + void CYDriver::ScannerInit() { cylex_init(&scanner_); cyset_extra(this, scanner_); @@ -654,13 +673,17 @@ void CYDriver::ScannerDestroy() { cylex_destroy(scanner_); } +void CYDriver::SetRegEx(bool equal) { + CYLexBufferStart(RegularExpression); + CYLexBufferUnit('/'); + if (equal) + CYLexBufferUnit('='); +} + void CYDriver::SetCondition(Condition condition) { struct yyguts_t *yyg(reinterpret_cast(scanner_)); switch (condition) { - case RegExpCondition: - BEGIN(template_.top() ? RegExpOrTemplateTail : RegExp); - break; @begin E4X case XMLContentCondition: BEGIN(XMLContent); @@ -676,9 +699,6 @@ void CYDriver::SetCondition(Condition condition) { void CYDriver::PushCondition(Condition condition) { switch (condition) { - case RegExpCondition: - yy_push_state(RegExp, scanner_); - break; @begin E4X case XMLContentCondition: yy_push_state(XMLContent, scanner_);