From b900e1a43acf792e7eaccff2418919d02382cd3c Mon Sep 17 00:00:00 2001 From: "Jay Freeman (saurik)" Date: Mon, 30 Nov 2015 06:16:22 -0800 Subject: [PATCH] Massive changes to lexer to get template literals. --- Cycript.l.in | 249 +++++++++++++++++++++++++------------------- Cycript.yy.in | 29 ++++-- Driver.cpp | 3 +- Driver.hpp | 7 +- Highlight.cpp | 2 +- Makefile.am | 3 +- Makefile.in | 3 +- NotLineTerminator.l | 1 + Output.cpp | 11 +- Parser.hpp | 53 +++++++--- Replace.cpp | 12 ++- backtrack.sh | 6 +- unicode.mk | 5 +- unicode.py | 7 ++ 14 files changed, 237 insertions(+), 154 deletions(-) create mode 100644 NotLineTerminator.l diff --git a/Cycript.l.in b/Cycript.l.in index 42c45e2..ec251b6 100644 --- a/Cycript.l.in +++ b/Cycript.l.in @@ -19,13 +19,12 @@ **/ /* }}} */ -/* XXX: supposedly I will be screwed on very very long multi-line comments and need to replace these with a manual lexer. http://websrv.cs.fsu.edu/~engelen/courses/COP5621/Pr2.pdf */ - %top{ #if defined(__clang__) #pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wdeprecated-register" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wunused-variable" #else #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" @@ -70,12 +69,13 @@ typedef cy::parser::token tk; #define C \ yyextra->newline_ = yyextra->last_; \ yyextra->last_ = false; \ - BEGIN(Div); + BEGIN(yyextra->template_.top() ? DivOrTemplateTail : Div); #define N \ - yyextra->last_ = true; \ - if (yyextra->no_.NewLine) \ - F(tk::NewLine, hi::Nothing); + if (yyextra->last_ && yyextra->no_.NewLine) { \ + yyextra->last_ = false; \ + F(tk::NewLine, hi::Nothing); \ + } #define V(more) { \ if (const char *nl = reinterpret_cast(memchr(yytext, '\n', yyleng))) { \ @@ -93,16 +93,16 @@ typedef cy::parser::token tk; } else L \ } -#define L { \ - yylloc->step(); \ - yylloc->end.columns(yyleng); \ -} +#define R yylloc->end.columns(yyleng); +#define L yylloc->step(); R -#define M { \ - if (yyextra->commented_) { \ - I(comment, Comment(Y), tk::Comment, hi::Comment); \ - } \ -} +#define H(value, highlight) do { \ + if (yyextra->highlight_) \ + F(value, highlight); \ +} while (false) + +#define M \ + H(tk::Comment, hi::Comment); #define E(message) { \ CYDriver::Error error; \ @@ -112,7 +112,7 @@ typedef cy::parser::token tk; yyterminate(); \ } -int H(char c) { +int X(char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') @@ -122,7 +122,8 @@ int H(char c) { return -1; } -static void U(char *&local, unsigned point) { +template +static void U(Type_ &local, unsigned point) { if (false) { } else if (point < 0x000080) { *local++ = point; @@ -147,7 +148,7 @@ static void U(char *&local, const char *text, yy_size_t &i) { char next(text[++i]); if (next != '{') { - point = H(text[i + 0]) << 12 | H(text[i + 1]) << 8 | H(text[i + 2]) << 4 | H(text[i + 3]); + point = X(text[i + 0]) << 12 | X(text[i + 1]) << 8 | X(text[i + 2]) << 4 | X(text[i + 3]); i += 3; } else { point = 0; @@ -155,13 +156,36 @@ static void U(char *&local, const char *text, yy_size_t &i) { next = text[++i]; if (next == '}') break; - point = (point << 4) | H(next); + point = (point << 4) | X(next); } } U(local, point); } +#define CYLexBufferPoint(point) do { \ + std::back_insert_iterator > inserter(yyextra->buffer_); \ + U(inserter, point); \ +} while (false) + +#define CYLexBufferUnit(value) do { \ + yyextra->buffer_.push_back(value); \ +} while (false) + +#define CYLexBufferUnits(data, size) do { \ + yyextra->buffer_.insert(yyextra->buffer_.end(), data, data + size); \ +} while (false) + +#define CYLexBufferStart(condition) do { \ + yyextra->buffer_.clear(); \ + yy_push_state(condition, yyscanner); \ +} while (false) + +#define CYLexBufferEnd(type, Type, value, highlight) do { \ + yy_pop_state(yyscanner); \ + C I(type, Type(P.strmemdup(yyextra->buffer_.data(), yyextra->buffer_.size()), yyextra->buffer_.size()), value, highlight); \ +} while (false) + #define YY_INPUT(data, value, size) { \ if (yyextra->data_.eof()) \ value = YY_NULL; \ @@ -198,27 +222,26 @@ U0 [\x80-\xbf] U2 [\xc2-\xdf] U3 [\xe0-\xef] U4 [\xf0-\xf4] +UN [\xc0-\xc1\xf5-\xff] HexDigit [0-9a-fA-F] LineTerminatorSequence \r?\n|\r|\xe2\x80[\xa8\xa9] WhiteSpace [\x09\x0b\x0c\x20]|\xc2\xa0|\xef\xbb\xbf UnicodeEscape \\u({HexDigit}{4}|\{{HexDigit}+\}) -OctalEscape \\[1-7]|\\[4-7][0-7]|\\[0-3][0-7][0-7]? -StringEscape \\['"\\bfnrtv]|\\0|{OctalEscape}|\\x{HexDigit}{2}|{UnicodeEscape} -StringExtra {StringEscape}|\\{LineTerminatorSequence} -SingleString ([^'\\\n]|{StringExtra})* -DoubleString ([^"\\\n]|{StringExtra})* -StringPrefix '{SingleString}|\"{DoubleString} +@include NotLineTerminator.l +CommentCharacter [^*/]{-}[\r\n\x80-\xff]|{NotLineTerminator} +SingleCharacter [^'\\]{-}[\r\n\x80-\xff]|{NotLineTerminator} +DoubleCharacter [^"\\]{-}[\r\n\x80-\xff]|{NotLineTerminator} +PlateCharacter [^$`\\]{-}[\r\n\x80-\xff]|{NotLineTerminator} @include UnicodeIDStart.l @include UnicodeIDContinue.l - IdentifierMore [$_] UnicodeStart {IdentifierMore}|{UnicodeIDStart} UnicodePart {IdentifierMore}|\xe2\x80[\x8c\x8d]|{UnicodeIDContinue} -UnicodeFail {U2}|{U3}|{U3}{U0}|{U4}|{U4}{U0}|{U4}{U0}{U0} +UnicodeFail {U2}|{U3}|{U3}{U0}|{U4}|{U4}{U0}|{U4}{U0}{U0}|{UN}|{U0} UnicodeScrap {UnicodePart}*{UnicodeFail}? IdentifierStart {UnicodeStart}|{UnicodeEscape} @@ -240,8 +263,19 @@ XMLNamePart [a-zA-Z0-9.-_:] XMLName {XMLNameStart}{XMLNamePart}* @end +%x MultiLine + +%x LegacySingleString +%x LegacyDoubleString + +%x StrictSingleString +%x StrictDoubleString +%x StrictAccentString + %s Div +%s DivOrTemplateTail %s RegExp +%s RegExpOrTemplateTail @begin E4X %x XMLContent @@ -251,19 +285,25 @@ XMLName {XMLNameStart}{XMLNamePart}* %% /* RegEx {{{ */ -\/{RegularExpressionBody}\/{RegularExpressionFlags} L C I(literal, RegEx(Y), tk::RegularExpressionLiteral, hi::Constant); -\/{RegularExpressionBody}\/{RegularExpressionFlags}{UnicodeFail} L E("invalid flags") -\/{RegularExpressionBody}?\\? L E("unterminated regex") +{ + \/{RegularExpressionBody}\/{RegularExpressionFlags} L C I(literal, RegEx(Y), tk::RegularExpressionLiteral, hi::Constant); + \/{RegularExpressionBody}\/{RegularExpressionFlags}{UnicodeFail} L E("invalid flags") + \/{RegularExpressionBody}?\\? L E("unterminated regex") +} /* }}} */ /* Comment {{{ */ #![^\n]* L M \/\/[^\n]* L M - /* http://ostermiller.org/findcomment.html */ - /* XXX: unify these two rules using !? */ -\/\*!([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+\/ V() C I(comment, Comment(Y), tk::Comment, hi::Comment); -\/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+\/ V(N) M -\/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\** V() E("invalid comment") +\/\* L yy_push_state(MultiLine, yyscanner); + +{ + \**\*\/ R yy_pop_state(yyscanner); M N + \**{LineTerminatorSequence} yylloc->end.lines(); yyextra->last_ = true; + \**{CommentCharacter}|\/ R + \**({UnicodeFail}|\*) R E("invalid comment"); + <> R E("invalid comment") +} /* }}} */ /* Element {{{ */ @begin E4X @@ -341,8 +381,8 @@ XMLName {XMLNameStart}{XMLNamePart}* "*=" L C F(tk::StarEqual, hi::Operator); "~" L C F(tk::Tilde, hi::Operator); -
"/" L C F(tk::Slash, hi::Operator); -
"/=" L C F(tk::SlashEqual, hi::Operator); +"/" L C F(tk::Slash, hi::Operator); +"/=" L C F(tk::SlashEqual, hi::Operator); ":" L C F(tk::Colon, hi::Structure); "," L C F(tk::Comma, hi::Structure); @@ -352,8 +392,8 @@ XMLName {XMLNameStart}{XMLNamePart}* "(" L C F(tk::OpenParen, hi::Structure); ")" L C F(tk::CloseParen, hi::Structure); -"{" L C F(yyextra->no_.OpenBrace ? tk::OpenBrace__ : yyextra->newline_ ? tk::OpenBrace_ : tk::OpenBrace, hi::Structure); -"}" L C F(tk::CloseBrace, hi::Structure); +"{" L yyextra->template_.push(false); C F(yyextra->no_.OpenBrace ? tk::OpenBrace__ : yyextra->newline_ ? tk::OpenBrace_ : tk::OpenBrace, hi::Structure); +"}" L yyextra->template_.pop(); C F(tk::CloseBrace, hi::Structure); "[" L C F(tk::OpenBracket, hi::Structure); "]" L C F(tk::CloseBracket, hi::Structure); @@ -514,69 +554,77 @@ XMLName {XMLNameStart}{XMLNamePart}* (\.?[0-9]|(0|[1-9][0-9]*)\.){IdentifierScrap} L E("invalid number") /* }}} */ /* String {{{ */ -'{SingleString}'|\"{DoubleString}\" L C { - char *value(A char[yyleng]); - char *local(value); +\' L CYLexBufferStart(LegacySingleString); +{ + \' R CYLexBufferEnd(string, String, tk::StringLiteral, hi::Constant); + {SingleCharacter}+ R CYLexBufferUnits(yytext, yyleng); + {SingleCharacter}*{UnicodeFail} R E("invalid character"); + {LineTerminatorSequence} R E("invalid newline"); +} - for (yy_size_t i(1), e(yyleng - 1); i != e; ++i) { - char next(yytext[i]); +\" L CYLexBufferStart(LegacyDoubleString); +{ + \" R CYLexBufferEnd(string, String, tk::StringLiteral, hi::Constant); + {DoubleCharacter}+ R CYLexBufferUnits(yytext, yyleng); + {DoubleCharacter}*{UnicodeFail} R E("invalid character"); + {LineTerminatorSequence} R E("invalid newline"); +} + /* }}} */ + /* Template {{{ */ +"`" L yyextra->tail_ = false; CYLexBufferStart(StrictAccentString); +"}" L yyextra->tail_ = true; yyextra->template_.pop(); CYLexBufferStart(StrictAccentString); - if (yytext[i] == '\\') - // XXX: support more line continuation characters - if (false) line: { - yylloc->end.lines(1); - yylloc->end.columns(yyleng - i); - } else switch (next = yytext[++i]) { - case '\n': goto line; - - case '\\': next = '\\'; break; - case '\'': next = '\''; break; - case '"': next = '"'; break; - case 'b': next = '\b'; break; - case 'f': next = '\f'; break; - case 'n': next = '\n'; break; - case 'r': next = '\r'; break; - case 't': next = '\t'; break; - case 'v': next = '\v'; break; - - case '0': case '1': case '2': case '3': - if (yytext[i + 1] < '0' || yytext[i + 1] > '7') - next = H(yytext[i]), i += 0; - else if (yytext[i + 2] < '0' || yytext[i + 2] > '7') - next = H(yytext[i]) << 3 | H(yytext[i + 1]), i += 1; - else - next = H(yytext[i]) << 6 | H(yytext[i + 1]) << 3 | H(yytext[i + 2]), i += 2; - break; +{ + "`" R CYLexBufferEnd(string, String, yyextra->tail_ ? tk::TemplateTail : tk::NoSubstitutionTemplate, hi::Constant); + "${" R yyextra->template_.push(true); CYLexBufferEnd(string, String, yyextra->tail_ ? tk::TemplateMiddle : tk::TemplateHead, hi::Constant); - case '4': case '5': case '6': case '7': - if (yytext[i + 1] < '0' || yytext[i + 1] > '7') - next = H(yytext[i]), i += 0; - else - next = H(yytext[i]) << 3 | H(yytext[i + 1]), i += 1; - break; + "$" R CYLexBufferUnit('$'); + + {PlateCharacter}+ R CYLexBufferUnits(yytext, yyleng); + {PlateCharacter}*{UnicodeFail} R E("invalid character"); + {LineTerminatorSequence} R E("invalid newline"); +} + /* }}} */ + /* Escapes {{{ */ +{ + \\[0-3][0-7][0-7] R CYLexBufferPoint(X(yytext[1]) << 6 | X(yytext[2]) << 3 | X(yytext[3])); + \\[0-7][0-7] R CYLexBufferUnit(X(yytext[1]) << 3 | X(yytext[2])); + \\[0-7] R CYLexBufferUnit(X(yytext[1])); +} - case 'x': - U(local, H(yytext[i + 1]) << 4 | H(yytext[i + 2])); - i += 2; - continue; +{ + \\0[0-7] R E("legacy escape"); + \\0 R CYLexBufferUnit('\0'); +} - case 'u': - U(local, yytext, i); - continue; - } +{ + \\b R CYLexBufferUnit('\b'); + \\f R CYLexBufferUnit('\f'); + \\n R CYLexBufferUnit('\n'); + \\r R CYLexBufferUnit('\r'); + \\t R CYLexBufferUnit('\t'); + \\v R CYLexBufferUnit('\v'); - *local++ = next; + \\x{HexDigit}{2} R CYLexBufferPoint(X(yytext[2]) << 4 | X(yytext[3])); + + \\u{HexDigit}{4} R CYLexBufferPoint(X(yytext[2]) << 12 | X(yytext[3]) << 8 | X(yytext[4]) << 4 | X(yytext[5])); + + \\u\{{HexDigit}+\} R { + unsigned point(0); + for (yy_size_t i(3); i != yyleng - 1; ++i) + point = point << 4 | X(yytext[i]); + CYLexBufferPoint(point); } - *local = '\0'; - I(string, String(value, local - value), tk::StringLiteral, hi::Constant); -} + \\{LineTerminatorSequence} yylloc->end.lines(); + \\(.|{NotLineTerminator}) R CYLexBufferUnits(yytext + 1, yyleng - 1); -{StringPrefix}\\(x.{0,2}|u([^{].{0,3}|\{[^}]*)?|{UnicodeFail})? L E("invalid escape") -{StringPrefix} L E("invalid string") + \\(x{HexDigit}{0,1}|u({HexDigit}{0,3}|\{{HexDigit}*)|{UnicodeFail})? R E("invalid escape"); + <> R E("invalid string"); +} /* }}} */ -{LineTerminatorSequence} yylloc->step(); yylloc->end.lines(); N +{LineTerminatorSequence} yylloc->step(); yylloc->end.lines(); yyextra->last_ = true; N {WhiteSpace} L <> if (yyextra->auto_) { yyextra->auto_ = false; F(tk::AutoComplete, hi::Nothing); } L yyterminate(); @@ -594,27 +642,12 @@ void CYDriver::ScannerDestroy() { cylex_destroy(scanner_); } -CYDriver::Condition CYDriver::GetCondition() { - switch (yy_top_state(scanner_)) { - case RegExp: - return RegExpCondition; -@begin E4X - case XMLContent: - return XMLContentCondition; - case XMLTag: - return XMLTagCondition; -@end - default: - _assert(false); - } -} - void CYDriver::SetCondition(Condition condition) { struct yyguts_t *yyg(reinterpret_cast(scanner_)); switch (condition) { case RegExpCondition: - BEGIN(RegExp); + BEGIN(template_.top() ? RegExpOrTemplateTail : RegExp); break; @begin E4X case XMLContentCondition: diff --git a/Cycript.yy.in b/Cycript.yy.in index b656690..1215d39 100644 --- a/Cycript.yy.in +++ b/Cycript.yy.in @@ -48,7 +48,6 @@ %union { CYBoolean *boolean_; } %union { CYClause *clause_; } %union { cy::Syntax::Catch *catch_; } -%union { CYComment *comment_; } %union { CYComprehension *comprehension_; } %union { CYDeclaration *declaration_; } %union { CYDeclarations *declarations_; } @@ -70,6 +69,7 @@ %union { CYProperty *property_; } %union { CYPropertyName *propertyName_; } %union { CYRubyProc *rubyProc_; } +%union { CYSpan *span_; } %union { CYStatement *statement_; } %union { CYString *string_; } %union { CYThis *this_; } @@ -222,7 +222,7 @@ _finline int yylex(cy::parser::semantic_type *semantic, CYLocation *location, vo %token SemiColon ";" %token NewLine "\n" -%token Comment +%token Comment %token OpenParen "(" %token CloseParen ")" @@ -362,6 +362,11 @@ _finline int yylex(cy::parser::semantic_type *semantic, CYLocation *location, vo %token StringLiteral %token RegularExpressionLiteral +%token NoSubstitutionTemplate +%token TemplateHead +%token TemplateMiddle +%token TemplateTail + %type AdditiveExpression %type ArgumentList_ %type ArgumentList @@ -477,6 +482,8 @@ _finline int yylex(cy::parser::semantic_type *semantic, CYLocation *location, vo %type StatementListOpt %type StatementListItem %type SwitchStatement +%type TemplateLiteral +%type TemplateSpans %type ThrowStatement %type TryStatement %type UnaryExpression_ @@ -799,6 +806,7 @@ PrimaryExpression | ArrayLiteral { $$ = $1; } | ObjectLiteral { $$ = $1; } | RegularExpressionLiteral { $$ = $1; } + | TemplateLiteral { $$ = $1; } | CoverParenthesizedExpressionAndArrowParameterList { if ($1 == NULL) error(@1, "invalid parenthetical"); $$ = $1; } | AutoComplete { driver.mode_ = CYDriver::AutoPrimary; YYACCEPT; } ; @@ -888,8 +896,16 @@ InitializerOpt ; /* }}} */ /* 12.2.9 Template Literals {{{ */ -/* }}} */ +TemplateLiteral + : NoSubstitutionTemplate { $$ = CYNew CYTemplate($1, NULL); } + | TemplateHead TemplateSpans { $$ = CYNew CYTemplate($1, $2); } + ; +TemplateSpans + : Expression TemplateMiddle TemplateSpans { $$ = CYNew CYSpan($1, $2, $3); } + | Expression TemplateTail { $$ = CYNew CYSpan($1, $2, NULL); } + ; +/* }}} */ /* 12.3+ Left-Hand-Side Expressions {{{ */ MemberAccess @@ -1542,7 +1558,6 @@ ClassMessageDeclaration ClassMessageDeclarationListOpt : ClassMessageDeclarationListOpt ClassMessageDeclaration { $2->SetNext($1); $$ = $2; } - | ClassMessageDeclarationListOpt Comment { $$ = $1; } | { $$ = NULL; } ; @@ -1746,12 +1761,6 @@ Statement__ @end -/* YUI: Documentation Comments {{{ */ -Statement__ - : Comment { $$ = $1; } - ; -/* }}} */ - @begin E4X /* Lexer State {{{ */ LexPushRegExp diff --git a/Driver.cpp b/Driver.cpp index b944022..387a46d 100644 --- a/Driver.cpp +++ b/Driver.cpp @@ -29,7 +29,7 @@ CYDriver::CYDriver(CYPool &pool, std::istream &data, const std::string &filename data_(data), debug_(0), strict_(false), - commented_(false), + highlight_(false), filename_(filename), script_(NULL), auto_(false), @@ -38,6 +38,7 @@ CYDriver::CYDriver(CYPool &pool, std::istream &data, const std::string &filename { memset(&no_, 0, sizeof(no_)); in_.push(false); + template_.push(false); ScannerInit(); } diff --git a/Driver.hpp b/Driver.hpp index a4458a2..3f0a960 100644 --- a/Driver.hpp +++ b/Driver.hpp @@ -36,7 +36,11 @@ class _visible CYDriver { CYPool &pool_; void *scanner_; + std::vector buffer_; + bool tail_; + std::stack in_; + std::stack template_; bool newline_; bool last_; @@ -53,7 +57,7 @@ class _visible CYDriver { int debug_; bool strict_; - bool commented_; + bool highlight_; enum Condition { RegExpCondition, @@ -112,7 +116,6 @@ class _visible CYDriver { bool Parse(); void Replace(CYOptions &options); - Condition GetCondition(); void SetCondition(Condition condition); void PushCondition(Condition condition); diff --git a/Highlight.cpp b/Highlight.cpp index 3e8aa7b..9c95728 100644 --- a/Highlight.cpp +++ b/Highlight.cpp @@ -60,7 +60,7 @@ _visible void CYLexerHighlight(const char *data, size_t size, std::ostream &outp CYStream stream(data, data + size); CYDriver driver(pool, stream); - driver.commented_ = true; + driver.highlight_ = true; size_t offset(0); CYPosition current; diff --git a/Makefile.am b/Makefile.am index 0519e2d..a3da5f9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -105,8 +105,9 @@ Cycript.l: Cycript.l.in UnicodeIDStart.l UnicodeIDContinue.l CLEANFILES += lex.cy.cpp lex.cy.cpp: Cycript.l - $(FLEX) -o $@ $< + $(FLEX) -o $@ -T $< 2>lex.output || (grep -F '$<:' lex.output; false) grep -F 'No backing up.' lex.backup >/dev/null + ! grep -F ': warning, ' lex.output || true Console.$(OBJEXT) Cycript.tab.lo Driver.lo Handler.lo Highlight.lo Library.lo lex.cy.lo: Cycript.tab.hh diff --git a/Makefile.in b/Makefile.in index 94714ce..d9411fc 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1331,8 +1331,9 @@ Cycript.yy: Cycript.yy.in Cycript.l: Cycript.l.in UnicodeIDStart.l UnicodeIDContinue.l $(srcdir)/Filter.sh $< >$@ $(filters) lex.cy.cpp: Cycript.l - $(FLEX) -o $@ $< + $(FLEX) -o $@ -T $< 2>lex.output || (grep -F '$<:' lex.output; false) grep -F 'No backing up.' lex.backup >/dev/null + ! grep -F ': warning, ' lex.output || true Console.$(OBJEXT) Cycript.tab.lo Driver.lo Handler.lo Highlight.lo Library.lo lex.cy.lo: Cycript.tab.hh Cycript.tab.cc Cycript.tab.hh stack.hh Cycript.output: Cycript.yy diff --git a/NotLineTerminator.l b/NotLineTerminator.l new file mode 100644 index 0000000..0c48e7a --- /dev/null +++ b/NotLineTerminator.l @@ -0,0 +1 @@ +NotLineTerminator [\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|\xe2\x80[\x80-\xa7\xa9-\xbf]|\xe2[\x81-\xbf][\x80-\xbf]|[\xe1\xe3-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|\xf4[\x80-\x8f][\x80-\xbf][\x80-\xbf]|[\xf1-\xf3][\x80-\xbf][\x80-\xbf][\x80-\xbf] diff --git a/Output.cpp b/Output.cpp index 95d2f19..9ec05c2 100644 --- a/Output.cpp +++ b/Output.cpp @@ -183,13 +183,6 @@ void Catch::Output(CYOutput &out) const { } } -void CYComment::Output(CYOutput &out, CYFlags flags) const { - out << '\r'; - out(value_); - out.right_ = true; - out << '\r'; -} - void CYCompound::Output(CYOutput &out, CYFlags flags) const { if (next_ == NULL) expression_->Output(out, flags); @@ -510,6 +503,10 @@ void CYStatement::Output(CYOutput &out) const { Multiple(out); } +void CYTemplate::Output(CYOutput &out, CYFlags flags) const { + _assert(false); +} + void CYTypeArrayOf::Output(CYOutput &out, CYIdentifier *identifier) const { next_->Output(out, Precedence(), identifier); out << '['; diff --git a/Parser.hpp b/Parser.hpp index 2d78a47..93d44cb 100644 --- a/Parser.hpp +++ b/Parser.hpp @@ -272,22 +272,6 @@ struct CYIdentifier : CYIdentifier *Replace(CYContext &context); }; -struct CYComment : - CYStatement -{ - const char *value_; - - CYComment(const char *value) : - value_(value) - { - } - - CYCompact(None) - - virtual CYStatement *Replace(CYContext &context); - virtual void Output(CYOutput &out, CYFlags flags) const; -}; - struct CYLabel : CYStatement { @@ -813,6 +797,43 @@ struct CYString : virtual void PropertyName(CYOutput &out) const; }; +struct CYElement; + +struct CYSpan : + CYNext +{ + CYExpression *expression_; + CYString *string_; + + CYSpan(CYExpression *expression, CYString *string, CYSpan *next) : + CYNext(next), + expression_(expression), + string_(string) + { + } + + CYElement *Replace(CYContext &context); +}; + +struct CYTemplate : + CYExpression +{ + CYString *string_; + CYSpan *spans_; + + CYTemplate(CYString *string, CYSpan *spans) : + string_(string), + spans_(spans) + { + } + + CYPrecedence(0) + CYRightHand(false) + + virtual CYExpression *Replace(CYContext &context); + virtual void Output(CYOutput &out, CYFlags flags) const; +}; + struct CYNumber : CYTrivial, CYPropertyName diff --git a/Replace.cpp b/Replace.cpp index 44a1143..79e18d4 100644 --- a/Replace.cpp +++ b/Replace.cpp @@ -151,10 +151,6 @@ void CYClause::Replace(CYContext &context) { $T() next_->Replace(context); } -CYStatement *CYComment::Replace(CYContext &context) { - return this; -} - CYExpression *CYCompound::Replace(CYContext &context) { context.Replace(expression_); context.Replace(next_); @@ -857,6 +853,10 @@ void CYScope::Close(CYContext &context, CYStatement *&statements) { } } +CYElement *CYSpan::Replace(CYContext &context) { $T(NULL) + return $ CYElement(expression_, $ CYElement(string_, next_->Replace(context))); +} + CYStatement *CYStatement::Return() { return this; } @@ -885,6 +885,10 @@ CYStatement *CYSwitch::Replace(CYContext &context) { return this; } +CYExpression *CYTemplate::Replace(CYContext &context) { + return $C2($M($M($M($V("String"), $S("prototype")), $S("concat")), $S("apply")), $S(""), $ CYArray($ CYElement(string_, spans_->Replace(context)))); +} + CYExpression *CYThis::Replace(CYContext &context) { if (context.this_ != NULL) return $V(context.this_->Identifier(context)); diff --git a/backtrack.sh b/backtrack.sh index a1eb8b4..20b250f 100755 --- a/backtrack.sh +++ b/backtrack.sh @@ -1,3 +1,5 @@ #!/bin/bash -./apple-make.sh -grep '^State' build.osx-i386/lex.backup | wc -l +./apple-make.sh build-osx-i386 +echo "backup $(grep -c '^State ' build.osx-i386/lex.backup)" +echo "states $(grep '^static .* yy_accept\[' build.osx-i386/lex.cy.cpp | sed -e 's/.*\[//;s/].*//') 3528" +echo "jammed $(grep -F 'accepts: ['"$(grep 'jammed' build.osx-i386/lex.cy.cpp -B 3 | head -n 1 | sed -e 's/:$//;s/.* //')"']' build.osx-i386/lex.output | sed -e 's/.* # //;s/ .*//')" diff --git a/unicode.mk b/unicode.mk index dc88024..2d99709 100644 --- a/unicode.mk +++ b/unicode.mk @@ -24,11 +24,14 @@ unicode := unicode.sh unicode.py unicode += DerivedCoreProperties.txt unicode += PropList.txt -all: UnicodeIDStart.l UnicodeIDContinue.l +all: NotLineTerminator.l UnicodeIDStart.l UnicodeIDContinue.l %.txt: wget -qc http://www.unicode.org/Public/UCD/latest/ucd/$@ +NotLineTerminator.l: unicode.py + printf '80..2027\n2029..10ffff\n' | ./unicode.py NotLineTerminator >$@ + UnicodeIDStart.l: $(unicode) ./unicode.sh UnicodeIDStart ID_Start DerivedCoreProperties.txt Other_ID_Start PropList.txt >$@ diff --git a/unicode.py b/unicode.py index eebc83f..d5a10bc 100755 --- a/unicode.py +++ b/unicode.py @@ -90,6 +90,13 @@ def build(index, tree, units): for i in range(0, index): item += '[\\x80-\\xbf]' + if False: + item = item.replace('[\\x00-\\x7f]', '{U1}') + item = item.replace('[\\x80-\\xbf]', '{U0}') + item = item.replace('[\\xc2-\\xdf]', '{U2}') + item = item.replace('[\\xe0-\\xef]', '{U3}') + item = item.replace('[\\xf0-\\xf4]', '{U4}') + items.append(item) return False -- 2.45.2