From: Jay Freeman (saurik) Date: Mon, 23 Nov 2015 11:11:12 +0000 (-0800) Subject: Optimize for lexer performance: stop backtracking. X-Git-Tag: v0.9.590~297 X-Git-Url: https://git.saurik.com/cycript.git/commitdiff_plain/e31ea4969cfa2a181b81fe24fd4e9a39b6def364 Optimize for lexer performance: stop backtracking. --- diff --git a/.gitignore b/.gitignore index b144ebe..dd1e7c5 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ stack.hh sysroot.ios sysroot.sim lex.cy.cpp +lex.backup /cycript Bridge.hpp package diff --git a/Cycript.l.in b/Cycript.l.in index 5a21631..27c4395 100644 --- a/Cycript.l.in +++ b/Cycript.l.in @@ -98,6 +98,14 @@ typedef cy::parser::token tk; } \ } +#define E(message) { \ + CYDriver::Error error; \ + error.location_ = *yylloc; \ + error.message_ = "syntax error, " message; \ + yyextra->errors_.push_back(error); \ + yyterminate(); \ +} + int H(char c) { if (c >= '0' && c <= '9') return c - '0'; @@ -131,12 +139,12 @@ int H(char c) { %option 8bit %option backup %option batch +%option full %option never-interactive %option pointer %option reentrant %option stack -Exponent [eE][+-]?[0-9]+ Escape \\[\\'"bfnrtv]|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\\n IdentifierStart [a-zA-Z$_] @@ -167,6 +175,7 @@ XMLName {XMLNameStart}{XMLNamePart}* %% \/{RegularExpressionBody}\/{RegularExpressionFlags} L C I(literal, RegEx(Y), tk::RegularExpressionLiteral, hi::Constant); +\/{RegularExpressionBody}?\\? L E("unterminated regex") #![^\n]* L M @@ -176,6 +185,7 @@ XMLName {XMLNameStart}{XMLNamePart}* /* XXX: unify these two rules using !? */ \/\*!([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+\/ V() C I(comment, Comment(Y), tk::Comment, hi::Comment); \/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+\/ V(N) M +\/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\** V() E("invalid comment") @begin E4X "<>" L F(tk::LeftRight, hi::Structure); @@ -200,6 +210,7 @@ XMLName {XMLNameStart}{XMLNamePart}* @end "..." L C F(tk::PeriodPeriodPeriod, hi::Meta); +".." L E("invalid operator") @begin E4X "::" L C F(tk::ColonColon, hi::Operator); @@ -383,12 +394,15 @@ XMLName {XMLNameStart}{XMLNamePart}* {IdentifierStart}{IdentifierPart}* L C I(identifier, Identifier(Y), tk::Identifier_, hi::Identifier); -(\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?){Exponent}? L C I(number, Number(strtod(yytext, NULL)), tk::NumericLiteral, hi::Constant); 0[xX][0-9a-fA-F]+ L C I(number, Number(strtoull(yytext + 2, NULL, 16)), tk::NumericLiteral, hi::Constant); 0[0-7]+ L C I(number, Number(strtoull(yytext + 1, NULL, 8)), tk::NumericLiteral, hi::Constant); 0[bB][0-1]+ L C I(number, Number(strtoull(yytext + 2, NULL, 2)), tk::NumericLiteral, hi::Constant); +(\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?)([eE][+-]?[0-9]+)? L C I(number, Number(strtod(yytext, NULL)), tk::NumericLiteral, hi::Constant); +(\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?)[eE][+-]?{IdentifierPart}* L E("invalid exponent") +(\.?[0-9]|(0|[1-9][0-9]*)?\.){IdentifierPart}* L E("invalid number") + \"([^"\\\n]|{Escape})*\"|'([^'\\\n]|{Escape})*' L C { char *value(A char[yyleng]); char *local(value); @@ -423,19 +437,15 @@ XMLName {XMLNameStart}{XMLNamePart}* I(string, String(value, local - value), tk::StringLiteral, hi::Constant); } +(\"([^"\\\n]|{Escape})*|'([^'\\\n]|{Escape})*)(\\(x.{0,2}|u.{0,4})?)? L E("invalid escape") + \r?\n|\r|\xe2\x80[\xa8\xa9] yylloc->step(); yylloc->end.lines(); N [ \t] L <> if (yyextra->auto_) { yyextra->auto_ = false; F(tk::AutoComplete, hi::Nothing); } L yyterminate(); -. L { - CYDriver::Error error; - error.location_ = *yylloc; - error.message_ = "syntax error, unknown token"; - yyextra->errors_.push_back(error); - yyterminate(); -} +@{IdentifierPart}+|\xe2.|. L E("unknown token") %% diff --git a/Makefile.am b/Makefile.am index ee3b63b..76f44b2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -99,7 +99,8 @@ Cycript.l: Cycript.l.in CLEANFILES += lex.cy.cpp lex.cy.cpp: Cycript.l - $(FLEX) -t $< | $(SED) -e 's/int yyl;/yy_size_t yyl;/;s/int yyleng_r;/yy_size_t yyleng_r;/' >$@ + $(FLEX) -b -t $< | $(SED) -e 's/int yyl;/yy_size_t yyl;/;s/int yyleng_r;/yy_size_t yyleng_r;/;s/yyg =/yyg __attribute__((__unused__)) =/' >$@ + grep -F 'No backing up.' lex.backup >/dev/null Console.$(OBJEXT) Cycript.tab.lo Driver.lo Handler.lo Highlight.lo Library.lo lex.cy.lo: Cycript.tab.hh diff --git a/Makefile.in b/Makefile.in index baed839..1c86879 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1322,7 +1322,8 @@ Cycript.yy: Cycript.yy.in Cycript.l: Cycript.l.in $(srcdir)/Filter.sh <$< >$@ $(filters) lex.cy.cpp: Cycript.l - $(FLEX) -t $< | $(SED) -e 's/int yyl;/yy_size_t yyl;/;s/int yyleng_r;/yy_size_t yyleng_r;/' >$@ + $(FLEX) -b -t $< | $(SED) -e 's/int yyl;/yy_size_t yyl;/;s/int yyleng_r;/yy_size_t yyleng_r;/;s/yyg =/yyg __attribute__((__unused__)) =/' >$@ + grep -F 'No backing up.' lex.backup >/dev/null Console.$(OBJEXT) Cycript.tab.lo Driver.lo Handler.lo Highlight.lo Library.lo lex.cy.lo: Cycript.tab.hh Cycript.tab.cc Cycript.tab.hh stack.hh Cycript.output: Cycript.yy