X-Git-Url: https://git.saurik.com/cycript.git/blobdiff_plain/697d6fd2b2bf66a1f0bf1f28de0ced96fc14cbfc..56a66df3a7e954e0821180d4313b43c53e92180e:/Cycript.l.in?ds=sidebyside diff --git a/Cycript.l.in b/Cycript.l.in index ec2befc..e085233 100644 --- a/Cycript.l.in +++ b/Cycript.l.in @@ -1,6 +1,45 @@ -%{ -// XXX: supposedly I will be screwed on very very long multi-line comments and need to replace these with a manual lexer. http://websrv.cs.fsu.edu/~engelen/courses/COP5621/Pr2.pdf +/* Cycript - Inlining/Optimizing JavaScript Compiler + * Copyright (C) 2009 Jay Freeman (saurik) +*/ + +/* Modified BSD License {{{ */ +/* + * Redistribution and use in source and binary + * forms, with or without modification, are permitted + * provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the + * above copyright notice, this list of conditions + * and the following disclaimer. + * 2. Redistributions in binary form must reproduce the + * above copyright notice, this list of conditions + * and the following disclaimer in the documentation + * and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse + * or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/* }}} */ + +/* XXX: supposedly I will be screwed on very very long multi-line comments and need to replace these with a manual lexer. http://websrv.cs.fsu.edu/~engelen/courses/COP5621/Pr2.pdf */ +%{ #define YYLTYPE cy::location #include "Cycript.tab.hh" typedef cy::parser::token tk; @@ -30,7 +69,7 @@ typedef cy::parser::token tk; yyextra->state_ = CYNewLine; \ } -#define M { \ +#define V(more) { \ if (const char *nl = reinterpret_cast(memchr(yytext, '\n', yyleng))) { \ unsigned lines(0); \ size_t left; \ @@ -42,7 +81,7 @@ typedef cy::parser::token tk; yylloc->end.lines(lines); \ yylloc->end.columns(left); \ yylloc->step(); \ - N \ + more \ } else L \ } @@ -68,7 +107,8 @@ int H(char c) { } else if (yyextra->size_ == 0) \ value = YY_NULL; \ else { \ - size_t copy(std::min(size, yyextra->size_)); \ + size_t copy(size); \ + copy = (std::min(copy, yyextra->size_)); \ memcpy(data, yyextra->data_, copy); \ yyextra->data_ += copy; \ yyextra->size_ -= copy; \ @@ -86,6 +126,7 @@ int H(char c) { %option nounput %option interactive %option reentrant +%option stack Exponent [eE][+-]?[0-9]+ Escape \\[\\'"bfnrtv]|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\\n @@ -101,27 +142,63 @@ RegularExpressionFlags {IdentifierPart}* RegularExpressionChars {RegularExpressionChar}* RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionChars} +@begin E4X +XMLNameStart [a-zA-Z_:] +XMLNamePart [a-zA-Z0-9.-_:] +XMLName {XMLNameStart}{XMLNamePart}* +@end + %s Div %s RegExp +@begin E4X +%x XMLContent +%x XMLTag +@end + %% \/{RegularExpressionBody}\/{RegularExpressionFlags} E("") \/\/[^\n]* L -\/\*(\n|[^\*]|\*[^/])*\*\/ M + + /* http://ostermiller.org/findcomment.html */ + /* XXX: unify these two rules using !? */ +\/\*!([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+\/ V() C yylval->comment_ = new(yyextra->pool_) CYComment(apr_pstrmemdup(yyextra->pool_, yytext, yyleng)); return tk::Comment; +\/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+\/ V(N) @begin E4X -\ -\])*]]> -\])*?> +"<>" L return tk::LeftRight; +"" L return tk::LeftSlashRight; -"@" L C return tk::At; +\])*]]> V() return tk::XMLCDATA; +\ V() return tk::XMLComment; +\])*?> V() return tk::XMLPI; + +"=" L return tk::Equal; +">" L return tk::Right; +"/>" L return tk::SlashRight; +"{" L return tk::OpenBrace; + +\"(\n|[^"])*\"|'(\n|[^'])*' V() { + return tk::XMLAttributeValue; +} + +{XMLName} L return tk::XMLName; +[ \t\r\n] V() return tk::XMLWhitespace; + +"{" L return tk::OpenBrace; +"<" L return tk::Left; +"" L C return tk::LeftRight; -"" L C return tk::LeftSlashRight; ".." L C return tk::PeriodPeriod; -"/>" L C return tk::SlashRight; +@end + +@begin E4X ObjectiveC +"@" L C return tk::At; @end "&" L C return tk::Ampersand; @@ -256,19 +333,24 @@ RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionChars} "each" L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("each"); return tk::Each; +@begin E4X +"namespace" L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("namespace"); return tk::Namespace; +"xml" L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("xml"); return tk::XML; +@end + {IdentifierStart}{IdentifierPart}* L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier(apr_pstrmemdup(yyextra->pool_, yytext, yyleng)); return tk::Identifier_; (\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?){Exponent}? L C yylval->number_ = new(yyextra->pool_) CYNumber(strtod(yytext, NULL)); return tk::NumericLiteral; 0[xX][0-9a-fA-F]+ L C yylval->number_ = new(yyextra->pool_) CYNumber(strtoull(yytext + 2, NULL, 16)); return tk::NumericLiteral; - +0[0-7]+ L C yylval->number_ = new(yyextra->pool_) CYNumber(strtoull(yytext + 1, NULL, 8)); return tk::NumericLiteral; 0[bB][0-1]+ L C yylval->number_ = new(yyextra->pool_) CYNumber(strtoull(yytext + 2, NULL, 2)); return tk::NumericLiteral; \"([^"\\\n]|{Escape})*\"|'([^'\\\n]|{Escape})*' L C { char *value(reinterpret_cast(apr_palloc(yyextra->pool_, yyleng))); char *local(value); - for (int i(1); i != yyleng - 1; ++i) { + for (yy_size_t i(1), e(yyleng - 1); i != e; ++i) { char next(yytext[i]); if (yytext[i] == '\\') @@ -323,14 +405,59 @@ void CYDriver::ScannerDestroy() { cylex_destroy(scanner_); } -void CYDriver::BeginCondition(Condition condition) { +CYDriver::Condition CYDriver::GetCondition() { + switch (yy_top_state(scanner_)) { + case RegExp: + return RegExpCondition; +@begin E4X + case XMLContent: + return XMLContentCondition; + case XMLTag: + return XMLTagCondition; +@end + default: + _assert(false); + } +} + +void CYDriver::SetCondition(Condition condition) { struct yyguts_t *yyg(reinterpret_cast(scanner_)); switch (condition) { case RegExpCondition: BEGIN(RegExp); break; +@begin E4X + case XMLContentCondition: + BEGIN(XMLContent); + break; + case XMLTagCondition: + BEGIN(XMLTag); + break; +@end default: _assert(false); } } + +void CYDriver::PushCondition(Condition condition) { + switch (condition) { + case RegExpCondition: + yy_push_state(RegExp, scanner_); + break; +@begin E4X + case XMLContentCondition: + yy_push_state(XMLContent, scanner_); + break; + case XMLTagCondition: + yy_push_state(XMLTag, scanner_); + break; +@end + default: + _assert(false); + } +} + +void CYDriver::PopCondition() { + yy_pop_state(scanner_); +}