-/* Cycript - Optimizing JavaScript Compiler/Runtime
- * Copyright (C) 2009-2015 Jay Freeman (saurik)
+/* Cycript - The Truly Universal Scripting Language
+ * Copyright (C) 2009-2016 Jay Freeman (saurik)
*/
/* GNU Affero General Public License, Version 3 {{{ */
#include "Highlight.hpp"
+#include "IdentifierStart.h"
+#include "IdentifierContinue.h"
+
#define YY_EXTRA_TYPE CYDriver *
#define F(value, highlight) do { \
- yyextra->newline_ = yyextra->last_; \
- yyextra->last_ = false; \
- yyextra->next_ = false; \
BEGIN(yyextra->template_.top() ? DivOrTemplateTail : Div); \
yylval->highlight_ = highlight; \
return value; \
} while (false)
#define N \
- if (yyextra->last_ && yyextra->next_) { \
+ if (yyextra->last_) { \
yyextra->last_ = false; \
F(tk::NewLine, hi::Nothing); \
}
} else _assert(false);
}
-static void U(char *&local, const char *text, yy_size_t &i) {
+static unsigned U(char *&local, const char *text, yy_size_t &i) {
unsigned point;
char next(text[++i]);
}
U(local, point);
+ return point;
}
#define CYLexBufferPoint(point) do { \
I(type, Type(P.strmemdup(yyextra->buffer_.data(), yyextra->buffer_.size()), yyextra->buffer_.size()), value, highlight); \
} while (false)
-#define YY_INPUT(data, value, size) { \
- if (yyextra->data_.eof()) \
- value = YY_NULL; \
- else { \
- yyextra->data_.read(data, size); \
- size_t copy(yyextra->data_.gcount()); \
- value = copy == 0 ? YY_NULL : copy; \
- } \
-}
+#define YY_INPUT(data, value, size) do { \
+ value = yyextra->data_.sgetn(data, size) ?: YY_NULL; \
+} while (false)
%}
U3 [\xe0-\xef]
U4 [\xf0-\xf4]
UN [\xc0-\xc1\xf5-\xff]
+UE {U1}|{U2}|{U3}|{U4}|{UN}
HexDigit [0-9a-fA-F]
LineTerminatorSequence \r?\n|\r|\xe2\x80[\xa8\xa9]
UnicodeStart {IdentifierMore}|{UnicodeIDStart}
UnicodePart {IdentifierMore}|\xe2\x80[\x8c\x8d]|{UnicodeIDContinue}
-UnicodeFail {U2}|{U3}|{U3}{U0}|{U4}|{U4}{U0}|{U4}{U0}{U0}|{UN}|{U0}
-UnicodeScrap {UnicodePart}*{UnicodeFail}?
+UnicodeScrap {U2}|{U3}{U0}{0,1}|{U4}{U0}{0,2}|{UN}|{U0}
+UnicodeError ({U2}|{U3}{U0}{0,1}|{U4}{U0}{0,2}){UE}|{UN}|{U0}
IdentifierStart {UnicodeStart}|{UnicodeEscape}
IdentifierPart {UnicodePart}|{UnicodeEscape}
-IdentifierFail {UnicodeFail}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))?
+IdentifierFail {UnicodeError}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))?
IdentifierScrap {IdentifierPart}*{IdentifierFail}?
RegularExpressionBackslashSequence \\{NoneTerminatorCharacter}
/* RegEx {{{ */
<RegularExpression>{
- \/{UnicodePart}* R CYLexBufferUnits(yytext, yyleng); CYLexBufferEnd(literal, RegEx, tk::RegularExpressionLiteral, hi::Constant);
- \/{UnicodePart}*{UnicodeFail} R E("invalid flags")
+ \/{UnicodePart}* R CYLexBufferUnits(yytext, yyleng); CYLexBufferEnd(literal, RegEx, tk::RegularExpressionLiteral_, hi::Constant);
+ \/{UnicodePart}*{UnicodeError} R E("invalid character");
{RegExCharacter}+ R CYLexBufferUnits(yytext, yyleng);
- {RegExCharacter}*{UnicodeFail} R E("invalid character");
{RegularExpressionBackslashSequence} R CYLexBufferUnits(yytext, yyleng);
- \\{UnicodeFail}? R E("invalid escape")
+ \\ R E("invalid escape")
+
+ (\\|{RegExCharacter}+)?{LineTerminatorSequence} R E("invalid newline");
+ (\\|{RegExCharacter}+)?{UnicodeScrap} R E("invalid character");
"["{RegularExpressionClassChars}"]" R CYLexBufferUnits(yytext, yyleng);
"["{RegularExpressionClassChars}\\? R E("invalid class");
- "["{RegularExpressionClassChars}\\?{UnicodeFail} R E("invalid character");
+
"["{RegularExpressionClassChars}\\?{LineTerminatorSequence} R E("invalid newline");
+ "["{RegularExpressionClassChars}\\?{UnicodeScrap} R E("invalid character");
- (\\|{RegExCharacter}+)?{LineTerminatorSequence} R E("invalid newline");
<<EOF>> R E("unterminated regex")
}
/* }}} */
/* Comment {{{ */
-#![^\n]* L M
-\/\/[^\n]* L M
+ /* XXX: maybe fold LineTerminatorSequence into these definitions */
+#!{NoneTerminatorCharacter}* L M
+\/\/{NoneTerminatorCharacter}* L M
+(#!|\/\/){NoneTerminatorCharacter}*{UnicodeError} L E("invalid character");
\/\* L yy_push_state(MultiLine, yyscanner);
\**\*\/ R yy_pop_state(yyscanner); M N
\**{LineTerminatorSequence} yylloc->end.Lines(); yyextra->last_ = true;
\**{CommentCharacter}|\/ R
- \**({UnicodeFail}|\*) R E("invalid comment");
+
+ \**{UnicodeScrap} R E("invalid character");
+ \**\* R E("invalid comment");
+
<<EOF>> R E("invalid comment")
}
/* }}} */
".." L E("invalid operator")
@begin E4X
-"::" L F(tk::ColonColon, hi::Operator);
".." L F(tk::PeriodPeriod, hi::Operator);
@end
@begin E4X ObjectiveC
"@" L F(tk::At, hi::Operator);
-"#" L F(tk::Pound, hi::Operator);
@end
"&" L F(tk::Ampersand, hi::Operator);
"=" L F(tk::Equal, hi::Operator);
"==" L F(tk::EqualEqual, hi::Operator);
"===" L F(tk::EqualEqualEqual, hi::Operator);
-"=>" L F(yyextra->newline_ ? tk::EqualRight_ : tk::EqualRight, hi::Operator);
+"=>" L F(tk::EqualRight, hi::Operator);
"!" L F(tk::Exclamation, hi::Operator);
"!=" L F(tk::ExclamationEqual, hi::Operator);
"!==" L F(tk::ExclamationEqualEqual, hi::Operator);
"-" L F(tk::Hyphen, hi::Operator);
"-=" L F(tk::HyphenEqual, hi::Operator);
-"--" L F(yyextra->newline_ ? tk::HyphenHyphen_ : tk::HyphenHyphen, hi::Operator);
+"--" L F(tk::HyphenHyphen, hi::Operator);
"->" L F(tk::HyphenRight, hi::Operator);
"<" L F(tk::Left, hi::Operator);
"<=" L F(tk::LeftEqual, hi::Operator);
"||" L F(tk::PipePipe, hi::Operator);
"+" L F(tk::Plus, hi::Operator);
"+=" L F(tk::PlusEqual, hi::Operator);
-"++" L F(yyextra->newline_ ? tk::PlusPlus_ : tk::PlusPlus, hi::Operator);
+"++" L F(tk::PlusPlus, hi::Operator);
">" L F(tk::Right, hi::Operator);
">=" L F(tk::RightEqual, hi::Operator);
">>" L F(tk::RightRight, hi::Operator);
"/=" L F(tk::SlashEqual, hi::Operator);
":" L F(tk::Colon, hi::Structure);
+"::" L F(tk::ColonColon, hi::Structure);
"," L F(tk::Comma, hi::Structure);
"?" L F(tk::Question, hi::Structure);
";" L F(tk::SemiColon, hi::Structure);
+"#" L F(tk::Pound, hi::Operator);
"(" L F(tk::OpenParen, hi::Structure);
")" L F(tk::CloseParen, hi::Structure);
-"{" L yyextra->template_.push(false); F(yyextra->newline_ ? tk::OpenBrace_ : tk::OpenBrace, hi::Structure);
+"{" L yyextra->template_.push(false); F(tk::OpenBrace, hi::Structure);
<Div>"}" L S(template_); F(tk::CloseBrace, hi::Structure);
"[" L F(tk::OpenBracket, hi::Structure);
"@YES" L F(tk::At_YES_, hi::Constant);
@end
-@({UnicodeStart}{UnicodeScrap}|{UnicodeFail}) L E("invalid keyword")
+@({UnicodeStart}{UnicodePart}*{UnicodeError}?|{UnicodeError}) L E("invalid keyword")
/* }}} */
/* Highlight {{{ */
"undefined" L F(tk::_undefined_, hi::Operator);
/* }}} */
/* Reserved {{{ */
"abstract" L /*FII*/ F(tk::_abstract_, hi::Meta);
+"as" L /*III*/ F(tk::_as_, hi::Meta);
"await" L /*II?*/ F(tk::_await_, hi::Meta);
"boolean" L /*FII*/ F(tk::_boolean_, hi::Type);
"break" L /*KKK*/ F(tk::_break_, hi::Control);
"if" L /*KKK*/ F(tk::_if_, hi::Control);
"implements" L /*FSS*/ F(tk::_implements_, hi::Meta);
"import" L /*FFK*/ F(tk::_import_, hi::Meta);
-"in" L /*KKK*/ F(yyextra->in_.top() ? tk::_in__ : tk::_in_, hi::Operator);
+"in" L /*KKK*/ F(tk::_in_, hi::Operator);
+"Infinity" L /*III*/ F(tk::_Infinity_, hi::Constant);
"instanceof" L /*KKK*/ F(tk::_instanceof_, hi::Operator);
"int" L /*FII*/ F(tk::_int_, hi::Type);
+"__int128" L /*III*/ F(tk::___int128_, hi::Type);
"interface" L /*FSS*/ F(tk::_interface_, hi::Meta);
"let" L /*IS?*/ F(tk::_let_, hi::Meta);
"long" L /*FII*/ F(tk::_long_, hi::Type);
"package" L /*FSS*/ F(tk::_package_, hi::Meta);
"private" L /*FSS*/ F(tk::_private_, hi::Meta);
"protected" L /*FSS*/ F(tk::_protected_, hi::Meta);
+"__proto__" L /*III*/ F(tk::___proto___, hi::Special);
"prototype" L /*III*/ F(tk::_prototype_, hi::Special);
"public" L /*FSS*/ F(tk::_public_, hi::Meta);
-"return" L /*KKK*/ F(yyextra->return_.top() ? tk::_return__ : tk::_return_, hi::Control);
+"__restrict" L /*III*/ F(tk::___restrict_, hi::Meta);
+"restrict" L /*III*/ F(tk::_restrict_, hi::Meta);
+"return" L /*KKK*/ F(tk::_return_, hi::Control);
"set" L /*III*/ F(tk::_set_, hi::Meta);
"short" L /*FII*/ F(tk::_short_, hi::Type);
"static" L /*FS?*/ F(tk::_static_, hi::Meta);
-"super" L /*FFK*/ F(yyextra->super_.top() ? tk::_super__ : tk::_super_, hi::Constant);
+"super" L /*FFK*/ F(tk::_super_, hi::Constant);
"switch" L /*KKK*/ F(tk::_switch_, hi::Control);
"synchronized" L /*FII*/ F(tk::_synchronized_, hi::Meta);
+"target" L /*III*/ F(tk::_target_, hi::Identifier);
"this" L /*KKK*/ F(tk::_this_, hi::Constant);
"throw" L /*KKK*/ F(tk::_throw_, hi::Control);
"throws" L /*FII*/ F(tk::_throws_, hi::Meta);
"transient" L /*FII*/ F(tk::_transient_, hi::Meta);
"true" L /*LLL*/ F(tk::_true_, hi::Constant);
"try" L /*KKK*/ F(tk::_try_, hi::Control);
+"typeid" L /*III*/ F(tk::_typeid_, hi::Operator);
"typeof" L /*KKK*/ F(tk::_typeof_, hi::Operator);
"var" L /*KKK*/ F(tk::_var_, hi::Meta);
"void" L /*KKK*/ F(tk::_void_, hi::Operator);
"volatile" L /*FII*/ F(tk::_volatile_, hi::Meta);
"while" L /*KKK*/ F(tk::_while_, hi::Control);
"with" L /*KKK*/ F(tk::_with_, hi::Control);
-"yield" L /*IS?*/ F(yyextra->yield_.top() ? tk::_yield__ : tk::_yield_, hi::Control);
+"yield" L /*IS?*/ F(tk::_yield_, hi::Control);
-"auto" L F(tk::_auto_, hi::Meta);
"each" L F(tk::_each_, hi::Control);
"of" L F(tk::_of_, hi::Operator);
@begin C
"extern" L F(tk::_extern_, hi::Type);
"signed" L F(tk::_signed_, hi::Type);
+"struct" L F(tk::_struct_, hi::Meta);
"typedef" L F(tk::_typedef_, hi::Meta);
"unsigned" L F(tk::_unsigned_, hi::Type);
@end
char next(yytext[i]);
if (next != '\\')
*local++ = next;
- else
- U(local, yytext, ++i);
+ else {
+ bool (*is)(unsigned) = (i == 0 ? &IsIdentifierStart : &IsIdentifierContinue);
+ unsigned point(U(local, yytext, ++i));
+ if (!is(point))
+ E("invalid character");
+ }
}
*local = '\0';
<LegacySingleString,StrictSingleString>{
\' R CYLexBufferEnd(string, String, tk::StringLiteral, hi::Constant);
{SingleCharacter}+ R CYLexBufferUnits(yytext, yyleng);
- {SingleCharacter}*{UnicodeFail} R E("invalid character");
- {LineTerminatorSequence} R E("invalid newline");
+ {SingleCharacter}*{LineTerminatorSequence} R E("invalid newline");
+ {SingleCharacter}*{UnicodeScrap} R E("invalid character");
}
\" L CYLexBufferStart(LegacyDoubleString);
<LegacyDoubleString,StrictDoubleString>{
\" R CYLexBufferEnd(string, String, tk::StringLiteral, hi::Constant);
{DoubleCharacter}+ R CYLexBufferUnits(yytext, yyleng);
- {DoubleCharacter}*{UnicodeFail} R E("invalid character");
- {LineTerminatorSequence} R E("invalid newline");
+ {DoubleCharacter}*{LineTerminatorSequence} R E("invalid newline");
+ {DoubleCharacter}*{UnicodeScrap} R E("invalid character");
}
/* }}} */
/* Template {{{ */
"$" R CYLexBufferUnit('$');
{PlateCharacter}+ R CYLexBufferUnits(yytext, yyleng);
- {PlateCharacter}*{UnicodeFail} R E("invalid character");
- {LineTerminatorSequence} R E("invalid newline");
+ {PlateCharacter}*{LineTerminatorSequence} R E("invalid newline");
+ {PlateCharacter}*{UnicodeScrap} R E("invalid character");
}
/* }}} */
/* Escapes {{{ */
}
\\{LineTerminatorSequence} yylloc->end.Lines();
- \\(.|{NotLineTerminator}) R CYLexBufferUnits(yytext + 1, yyleng - 1);
+ \\{NoneTerminatorCharacter} R CYLexBufferUnits(yytext + 1, yyleng - 1);
+ \\{UnicodeScrap} R E("invalid character");
- \\(x{HexDigit}{0,1}|u({HexDigit}{0,3}|\{{HexDigit}*)|{UnicodeFail})? R E("invalid escape");
+ \\(x{HexDigit}{0,1}|u({HexDigit}{0,3}|\{{HexDigit}*))? R E("invalid escape");
<<EOF>> R E("invalid string");
}
/* }}} */
{LineTerminatorSequence} yylloc->step(); yylloc->end.Lines(); yyextra->last_ = true; N
{WhiteSpace} L
+{U1}|{UnicodeScrap} L E("invalid character");
<<EOF>> if (yyextra->auto_) { yyextra->auto_ = false; F(tk::AutoComplete, hi::Nothing); } L yyterminate();
-. L E("invalid character")
-
%%
#undef yyextra