]> git.saurik.com Git - cycript.git/blobdiff - Scanner.lpp.in
Improve support for char values (from JS strings).
[cycript.git] / Scanner.lpp.in
index 139901104f1ed2dd029149b2312b5d20cee2402b..d9614df0b2c5c4e316d1cf123abfecb46e9dae0d 100644 (file)
@@ -1,5 +1,5 @@
-/* Cycript - Optimizing JavaScript Compiler/Runtime
- * Copyright (C) 2009-2015  Jay Freeman (saurik)
+/* Cycript - The Truly Universal Scripting Language
+ * Copyright (C) 2009-2016  Jay Freeman (saurik)
 */
 
 /* GNU Affero General Public License, Version 3 {{{ */
@@ -41,12 +41,12 @@ typedef cy::parser::token tk;
 
 #include "Highlight.hpp"
 
+#include "IdentifierStart.h"
+#include "IdentifierContinue.h"
+
 #define YY_EXTRA_TYPE CYDriver *
 
 #define F(value, highlight) do { \
-    yyextra->newline_ = yyextra->last_; \
-    yyextra->last_ = false; \
-    yyextra->next_ = false; \
     BEGIN(yyextra->template_.top() ? DivOrTemplateTail : Div); \
     yylval->highlight_ = highlight; \
     return value; \
@@ -64,11 +64,12 @@ typedef cy::parser::token tk;
 
 #define I(type, Type, value, highlight) do { \
     yylval->semantic_.type ## _ = A CY ## Type; \
+    yylval->semantic_.type ## _->location_ = *yylloc; \
     F(value, highlight); \
 } while (false)
 
 #define N \
-    if (yyextra->last_ && yyextra->next_) { \
+    if (yyextra->last_) { \
         yyextra->last_ = false; \
         F(tk::NewLine, hi::Nothing); \
     }
@@ -139,7 +140,7 @@ static void U(Type_ &local, unsigned point) {
     } else _assert(false);
 }
 
-static void U(char *&local, const char *text, yy_size_t &i) {
+static unsigned U(char *&local, const char *text, yy_size_t &i) {
     unsigned point;
 
     char next(text[++i]);
@@ -157,6 +158,7 @@ static void U(char *&local, const char *text, yy_size_t &i) {
     }
 
     U(local, point);
+    return point;
 }
 
 #define CYLexBufferPoint(point) do { \
@@ -182,15 +184,9 @@ static void U(char *&local, const char *text, yy_size_t &i) {
     I(type, Type(P.strmemdup(yyextra->buffer_.data(), yyextra->buffer_.size()), yyextra->buffer_.size()), value, highlight); \
 } while (false)
 
-#define YY_INPUT(data, value, size) { \
-    if (yyextra->data_.eof()) \
-        value = YY_NULL; \
-    else { \
-        yyextra->data_.read(data, size); \
-        size_t copy(yyextra->data_.gcount()); \
-        value = copy == 0 ? YY_NULL : copy; \
-    } \
-}
+#define YY_INPUT(data, value, size) do { \
+    value = yyextra->data_.sgetn(data, size) ?: YY_NULL; \
+} while (false)
 
 %}
 
@@ -216,6 +212,7 @@ U2 [\xc2-\xdf]
 U3 [\xe0-\xef]
 U4 [\xf0-\xf4]
 UN [\xc0-\xc1\xf5-\xff]
+UE {U1}|{U2}|{U3}|{U4}|{UN}
 
 HexDigit [0-9a-fA-F]
 LineTerminatorSequence \r?\n|\r|\xe2\x80[\xa8\xa9]
@@ -237,12 +234,12 @@ IdentifierMore [$_]
 
 UnicodeStart {IdentifierMore}|{UnicodeIDStart}
 UnicodePart {IdentifierMore}|\xe2\x80[\x8c\x8d]|{UnicodeIDContinue}
-UnicodeFail {U2}|{U3}|{U3}{U0}|{U4}|{U4}{U0}|{U4}{U0}{U0}|{UN}|{U0}
-UnicodeScrap {UnicodePart}*{UnicodeFail}?
+UnicodeScrap {U2}|{U3}{U0}{0,1}|{U4}{U0}{0,2}|{UN}|{U0}
+UnicodeError ({U2}|{U3}{U0}{0,1}|{U4}{U0}{0,2}){UE}|{UN}|{U0}
 
 IdentifierStart {UnicodeStart}|{UnicodeEscape}
 IdentifierPart {UnicodePart}|{UnicodeEscape}
-IdentifierFail {UnicodeFail}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))?
+IdentifierFail {UnicodeError}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))?
 IdentifierScrap {IdentifierPart}*{IdentifierFail}?
 
 RegularExpressionBackslashSequence \\{NoneTerminatorCharacter}
@@ -276,27 +273,31 @@ XMLName {XMLNameStart}{XMLNamePart}*
 
     /* RegEx {{{ */
 <RegularExpression>{
-    \/{UnicodePart}* R CYLexBufferUnits(yytext, yyleng); CYLexBufferEnd(literal, RegEx, tk::RegularExpressionLiteral, hi::Constant);
-    \/{UnicodePart}*{UnicodeFail} R E("invalid flags")
+    \/{UnicodePart}* R CYLexBufferUnits(yytext, yyleng); CYLexBufferEnd(literal, RegEx, tk::RegularExpressionLiteral_, hi::Constant);
+    \/{UnicodePart}*{UnicodeError} R E("invalid character");
 
     {RegExCharacter}+ R CYLexBufferUnits(yytext, yyleng);
-    {RegExCharacter}*{UnicodeFail} R E("invalid character");
 
     {RegularExpressionBackslashSequence} R CYLexBufferUnits(yytext, yyleng);
-    \\{UnicodeFail}? R E("invalid escape")
+    \\ R E("invalid escape")
+
+    (\\|{RegExCharacter}+)?{LineTerminatorSequence} R E("invalid newline");
+    (\\|{RegExCharacter}+)?{UnicodeScrap} R E("invalid character");
 
     "["{RegularExpressionClassChars}"]" R CYLexBufferUnits(yytext, yyleng);
     "["{RegularExpressionClassChars}\\? R E("invalid class");
-    "["{RegularExpressionClassChars}\\?{UnicodeFail} R E("invalid character");
+
     "["{RegularExpressionClassChars}\\?{LineTerminatorSequence} R E("invalid newline");
+    "["{RegularExpressionClassChars}\\?{UnicodeScrap} R E("invalid character");
 
-    (\\|{RegExCharacter}+)?{LineTerminatorSequence} R E("invalid newline");
     <<EOF>> R E("unterminated regex")
 }
     /* }}} */
     /* Comment {{{ */
-#![^\n]* L M
-\/\/[^\n]* L M
+    /* XXX: maybe fold LineTerminatorSequence into these definitions */
+#!{NoneTerminatorCharacter}* L M
+\/\/{NoneTerminatorCharacter}* L M
+(#!|\/\/){NoneTerminatorCharacter}*{UnicodeError} L E("invalid character");
 
 \/\* L yy_push_state(MultiLine, yyscanner);
 
@@ -304,7 +305,10 @@ XMLName {XMLNameStart}{XMLNamePart}*
     \**\*\/ R yy_pop_state(yyscanner); M N
     \**{LineTerminatorSequence} yylloc->end.Lines(); yyextra->last_ = true;
     \**{CommentCharacter}|\/ R
-    \**({UnicodeFail}|\*) R E("invalid comment");
+
+    \**{UnicodeScrap} R E("invalid character");
+    \**\* R E("invalid comment");
+
     <<EOF>> R E("invalid comment")
 }
     /* }}} */
@@ -336,13 +340,11 @@ XMLName {XMLNameStart}{XMLNamePart}*
 ".."   L E("invalid operator")
 
 @begin E4X
-"::"   L F(tk::ColonColon, hi::Operator);
 ".."   L F(tk::PeriodPeriod, hi::Operator);
 @end
 
 @begin E4X ObjectiveC
 "@"    L F(tk::At, hi::Operator);
-"#"    L F(tk::Pound, hi::Operator);
 @end
 
 "&"    L F(tk::Ampersand, hi::Operator);
@@ -353,13 +355,13 @@ XMLName {XMLNameStart}{XMLNamePart}*
 "="    L F(tk::Equal, hi::Operator);
 "=="   L F(tk::EqualEqual, hi::Operator);
 "==="  L F(tk::EqualEqualEqual, hi::Operator);
-"=>"   L F(yyextra->newline_ ? tk::EqualRight_ : tk::EqualRight, hi::Operator);
+"=>"   L F(tk::EqualRight, hi::Operator);
 "!"    L F(tk::Exclamation, hi::Operator);
 "!="   L F(tk::ExclamationEqual, hi::Operator);
 "!=="  L F(tk::ExclamationEqualEqual, hi::Operator);
 "-"    L F(tk::Hyphen, hi::Operator);
 "-="   L F(tk::HyphenEqual, hi::Operator);
-"--"   L F(yyextra->newline_ ? tk::HyphenHyphen_ : tk::HyphenHyphen, hi::Operator);
+"--"   L F(tk::HyphenHyphen, hi::Operator);
 "->"   L F(tk::HyphenRight, hi::Operator);
 "<"    L F(tk::Left, hi::Operator);
 "<="   L F(tk::LeftEqual, hi::Operator);
@@ -373,7 +375,7 @@ XMLName {XMLNameStart}{XMLNamePart}*
 "||"   L F(tk::PipePipe, hi::Operator);
 "+"    L F(tk::Plus, hi::Operator);
 "+="   L F(tk::PlusEqual, hi::Operator);
-"++"   L F(yyextra->newline_ ? tk::PlusPlus_ : tk::PlusPlus, hi::Operator);
+"++"   L F(tk::PlusPlus, hi::Operator);
 ">"    L F(tk::Right, hi::Operator);
 ">="   L F(tk::RightEqual, hi::Operator);
 ">>"   L F(tk::RightRight, hi::Operator);
@@ -388,14 +390,16 @@ XMLName {XMLNameStart}{XMLNamePart}*
 "/=" L F(tk::SlashEqual, hi::Operator);
 
 ":"    L F(tk::Colon, hi::Structure);
+"::"   L F(tk::ColonColon, hi::Structure);
 ","    L F(tk::Comma, hi::Structure);
 "?"    L F(tk::Question, hi::Structure);
 ";"    L F(tk::SemiColon, hi::Structure);
+"#"    L F(tk::Pound, hi::Operator);
 
 "("    L F(tk::OpenParen, hi::Structure);
 ")"    L F(tk::CloseParen, hi::Structure);
 
-"{"    L yyextra->template_.push(false); F(yyextra->newline_ ? tk::OpenBrace_ : tk::OpenBrace, hi::Structure);
+"{"    L yyextra->template_.push(false); F(tk::OpenBrace, hi::Structure);
 <Div>"}" L S(template_); F(tk::CloseBrace, hi::Structure);
 
 "["    L F(tk::OpenBracket, hi::Structure);
@@ -424,7 +428,7 @@ XMLName {XMLNameStart}{XMLNamePart}*
 "@YES"            L F(tk::At_YES_, hi::Constant);
 @end
 
-@({UnicodeStart}{UnicodeScrap}|{UnicodeFail}) L E("invalid keyword")
+@({UnicodeStart}{UnicodePart}*{UnicodeError}?|{UnicodeError}) L E("invalid keyword")
     /* }}} */
     /* Highlight {{{ */
 "undefined"       L F(tk::_undefined_, hi::Operator);
@@ -440,6 +444,7 @@ XMLName {XMLNameStart}{XMLNamePart}*
     /* }}} */
     /* Reserved {{{ */
 "abstract"        L /*FII*/ F(tk::_abstract_, hi::Meta);
+"as"              L /*III*/ F(tk::_as_, hi::Meta);
 "await"           L /*II?*/ F(tk::_await_, hi::Meta);
 "boolean"         L /*FII*/ F(tk::_boolean_, hi::Type);
 "break"           L /*KKK*/ F(tk::_break_, hi::Control);
@@ -460,6 +465,7 @@ XMLName {XMLNameStart}{XMLNamePart}*
 "enum"            L /*FFF*/ F(tk::_enum_, hi::Meta);
 "export"          L /*FFK*/ F(tk::_export_, hi::Meta);
 "extends"         L /*FFK*/ F(tk::_extends_, hi::Meta);
+"eval"            L /*III*/ F(tk::_eval_, hi::Special);
 "false"           L /*LLL*/ F(tk::_false_, hi::Constant);
 "final"           L /*FII*/ F(tk::_final_, hi::Meta);
 "finally"         L /*KKK*/ F(tk::_finally_, hi::Control);
@@ -472,9 +478,11 @@ XMLName {XMLNameStart}{XMLNamePart}*
 "if"              L /*KKK*/ F(tk::_if_, hi::Control);
 "implements"      L /*FSS*/ F(tk::_implements_, hi::Meta);
 "import"          L /*FFK*/ F(tk::_import_, hi::Meta);
-"in"              L /*KKK*/ F(yyextra->in_.top() ? tk::_in__ : tk::_in_, hi::Operator);
+"in"              L /*KKK*/ F(tk::_in_, hi::Operator);
+"Infinity"        L /*III*/ F(tk::_Infinity_, hi::Constant);
 "instanceof"      L /*KKK*/ F(tk::_instanceof_, hi::Operator);
 "int"             L /*FII*/ F(tk::_int_, hi::Type);
+"__int128"        L /*III*/ F(tk::___int128_, hi::Type);
 "interface"       L /*FSS*/ F(tk::_interface_, hi::Meta);
 "let"             L /*IS?*/ F(tk::_let_, hi::Meta);
 "long"            L /*FII*/ F(tk::_long_, hi::Type);
@@ -484,36 +492,41 @@ XMLName {XMLNameStart}{XMLNamePart}*
 "package"         L /*FSS*/ F(tk::_package_, hi::Meta);
 "private"         L /*FSS*/ F(tk::_private_, hi::Meta);
 "protected"       L /*FSS*/ F(tk::_protected_, hi::Meta);
+"__proto__"       L /*III*/ F(tk::___proto___, hi::Special);
 "prototype"       L /*III*/ F(tk::_prototype_, hi::Special);
 "public"          L /*FSS*/ F(tk::_public_, hi::Meta);
-"return"          L /*KKK*/ F(yyextra->return_.top() ? tk::_return__ : tk::_return_, hi::Control);
+"__restrict"      L /*III*/ F(tk::___restrict_, hi::Meta);
+"restrict"        L /*III*/ F(tk::_restrict_, hi::Meta);
+"return"          L /*KKK*/ F(tk::_return_, hi::Control);
 "set"             L /*III*/ F(tk::_set_, hi::Meta);
 "short"           L /*FII*/ F(tk::_short_, hi::Type);
 "static"          L /*FS?*/ F(tk::_static_, hi::Meta);
-"super"           L /*FFK*/ F(yyextra->super_.top() ? tk::_super__ : tk::_super_, hi::Constant);
+"super"           L /*FFK*/ F(tk::_super_, hi::Constant);
 "switch"          L /*KKK*/ F(tk::_switch_, hi::Control);
 "synchronized"    L /*FII*/ F(tk::_synchronized_, hi::Meta);
+"target"          L /*III*/ F(tk::_target_, hi::Identifier);
 "this"            L /*KKK*/ F(tk::_this_, hi::Constant);
 "throw"           L /*KKK*/ F(tk::_throw_, hi::Control);
 "throws"          L /*FII*/ F(tk::_throws_, hi::Meta);
 "transient"       L /*FII*/ F(tk::_transient_, hi::Meta);
 "true"            L /*LLL*/ F(tk::_true_, hi::Constant);
 "try"             L /*KKK*/ F(tk::_try_, hi::Control);
+"typeid"          L /*III*/ F(tk::_typeid_, hi::Operator);
 "typeof"          L /*KKK*/ F(tk::_typeof_, hi::Operator);
 "var"             L /*KKK*/ F(tk::_var_, hi::Meta);
 "void"            L /*KKK*/ F(tk::_void_, hi::Operator);
 "volatile"        L /*FII*/ F(tk::_volatile_, hi::Meta);
 "while"           L /*KKK*/ F(tk::_while_, hi::Control);
 "with"            L /*KKK*/ F(tk::_with_, hi::Control);
-"yield"           L /*IS?*/ F(yyextra->yield_.top() ? tk::_yield__ : tk::_yield_, hi::Control);
+"yield"           L /*IS?*/ F(tk::_yield_, hi::Control);
 
-"auto"            L F(tk::_auto_, hi::Meta);
 "each"            L F(tk::_each_, hi::Control);
 "of"              L F(tk::_of_, hi::Operator);
 
 @begin C
 "extern"          L F(tk::_extern_, hi::Type);
 "signed"          L F(tk::_signed_, hi::Type);
+"struct"          L F(tk::_struct_, hi::Meta);
 "typedef"         L F(tk::_typedef_, hi::Meta);
 "unsigned"        L F(tk::_unsigned_, hi::Type);
 @end
@@ -539,8 +552,12 @@ XMLName {XMLNameStart}{XMLNamePart}*
         char next(yytext[i]);
         if (next != '\\')
             *local++ = next;
-        else
-            U(local, yytext, ++i);
+        else {
+            bool (*is)(unsigned) = (i == 0 ? &IsIdentifierStart : &IsIdentifierContinue);
+            unsigned point(U(local, yytext, ++i));
+            if (!is(point))
+                E("invalid character");
+        }
     }
 
     *local = '\0';
@@ -566,16 +583,16 @@ XMLName {XMLNameStart}{XMLNamePart}*
 <LegacySingleString,StrictSingleString>{
     \' R CYLexBufferEnd(string, String, tk::StringLiteral, hi::Constant);
     {SingleCharacter}+ R CYLexBufferUnits(yytext, yyleng);
-    {SingleCharacter}*{UnicodeFail} R E("invalid character");
-    {LineTerminatorSequence} R E("invalid newline");
+    {SingleCharacter}*{LineTerminatorSequence} R E("invalid newline");
+    {SingleCharacter}*{UnicodeScrap} R E("invalid character");
 }
 
 \" L CYLexBufferStart(LegacyDoubleString);
 <LegacyDoubleString,StrictDoubleString>{
     \" R CYLexBufferEnd(string, String, tk::StringLiteral, hi::Constant);
     {DoubleCharacter}+ R CYLexBufferUnits(yytext, yyleng);
-    {DoubleCharacter}*{UnicodeFail} R E("invalid character");
-    {LineTerminatorSequence} R E("invalid newline");
+    {DoubleCharacter}*{LineTerminatorSequence} R E("invalid newline");
+    {DoubleCharacter}*{UnicodeScrap} R E("invalid character");
 }
     /* }}} */
     /* Template {{{ */
@@ -589,8 +606,10 @@ XMLName {XMLNameStart}{XMLNamePart}*
     "$" R CYLexBufferUnit('$');
 
     {PlateCharacter}+ R CYLexBufferUnits(yytext, yyleng);
-    {PlateCharacter}*{UnicodeFail} R E("invalid character");
-    {LineTerminatorSequence} R E("invalid newline");
+    {PlateCharacter}*{UnicodeScrap} R E("invalid character");
+
+    {PlateCharacter}*{LineTerminatorSequence} yylloc->end.Lines(); CYLexBufferUnits(yytext, yyleng);
+    \\{LineTerminatorSequence} yylloc->end.Lines();
 }
     /* }}} */
     /* Escapes {{{ */
@@ -625,20 +644,20 @@ XMLName {XMLNameStart}{XMLNamePart}*
     }
 
     \\{LineTerminatorSequence} yylloc->end.Lines();
-    \\(.|{NotLineTerminator}) R CYLexBufferUnits(yytext + 1, yyleng - 1);
+    \\{NoneTerminatorCharacter} R CYLexBufferUnits(yytext + 1, yyleng - 1);
+    \\{UnicodeScrap} R E("invalid character");
 
-    \\(x{HexDigit}{0,1}|u({HexDigit}{0,3}|\{{HexDigit}*)|{UnicodeFail})? R E("invalid escape");
+    \\(x{HexDigit}{0,1}|u({HexDigit}{0,3}|\{{HexDigit}*))? R E("invalid escape");
     <<EOF>> R E("invalid string");
 }
     /* }}} */
 
 {LineTerminatorSequence} yylloc->step(); yylloc->end.Lines(); yyextra->last_ = true; N
 {WhiteSpace} L
+{U1}|{UnicodeScrap} L E("invalid character");
 
 <<EOF>> if (yyextra->auto_) { yyextra->auto_ = false; F(tk::AutoComplete, hi::Nothing); } L yyterminate();
 
-. L E("invalid character")
-
 %%
 
 #undef yyextra