]> git.saurik.com Git - cycript.git/blobdiff - Cycript.l.in
Updated todo list with some random exception-related thoughts.
[cycript.git] / Cycript.l.in
index f3201bb7f5a7383b2d4240b6f357c3186ae8df11..7323e251cb9b95c920346be8fbba69842af36c4e 100644 (file)
@@ -1,17 +1,56 @@
-%{
-// XXX: supposedly I will be screwed on very very long multi-line comments and need to replace these with a manual lexer. http://websrv.cs.fsu.edu/~engelen/courses/COP5621/Pr2.pdf
+/* Cycript - Inlining/Optimizing JavaScript Compiler
+ * Copyright (C) 2009  Jay Freeman (saurik)
+*/
+
+/* Modified BSD License {{{ */
+/*
+ *        Redistribution and use in source and binary
+ * forms, with or without modification, are permitted
+ * provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the
+ *    above copyright notice, this list of conditions
+ *    and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the
+ *    above copyright notice, this list of conditions
+ *    and the following disclaimer in the documentation
+ *    and/or other materials provided with the
+ *    distribution.
+ * 3. The name of the author may not be used to endorse
+ *    or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/* }}} */
+
+/* XXX: supposedly I will be screwed on very very long multi-line comments and need to replace these with a manual lexer. http://websrv.cs.fsu.edu/~engelen/courses/COP5621/Pr2.pdf */
 
+%{
 #define YYLTYPE cy::location
 #include "Cycript.tab.hh"
 typedef cy::parser::token tk;
 
 #define YY_EXTRA_TYPE CYDriver *
 
-#define T yylval->newline_ = yyextra->state_ == CYNewLine;
+#define T yylval->newline_ = yyextra->state_ == CYNewLine; BEGIN(Div);
 #define C T yyextra->state_ = CYClear;
 #define R T yyextra->state_ = CYRestricted;
 
-#define E(prefix) L C BEGIN(INITIAL); { \
+#define E(prefix) L C { \
     char *value(reinterpret_cast<char *>(apr_palloc(yyextra->pool_, yyleng + sizeof(prefix)))); \
     memcpy(value, prefix, sizeof(prefix) - 1); \
     memcpy(value + sizeof(prefix) - 1, yytext, yyleng); \
@@ -30,7 +69,7 @@ typedef cy::parser::token tk;
             yyextra->state_ = CYNewLine; \
     }
 
-#define M { \
+#define V(more) { \
     if (const char *nl = reinterpret_cast<const char *>(memchr(yytext, '\n', yyleng))) { \
         unsigned lines(0); \
         size_t left; \
@@ -42,7 +81,7 @@ typedef cy::parser::token tk;
         yylloc->end.lines(lines); \
         yylloc->end.columns(left); \
         yylloc->step(); \
-        N \
+        more \
     } else L \
 }
 
@@ -68,7 +107,8 @@ int H(char c) {
     } else if (yyextra->size_ == 0) \
         value = YY_NULL; \
     else { \
-        size_t copy(std::min(size, yyextra->size_)); \
+        size_t copy(size); \
+        copy = (std::min(copy, yyextra->size_)); \
         memcpy(data, yyextra->data_, copy); \
         yyextra->data_ += copy; \
         yyextra->size_ -= copy; \
@@ -86,6 +126,7 @@ int H(char c) {
 %option nounput
 %option interactive
 %option reentrant
+%option stack
 
 Exponent [eE][+-]?[0-9]+
 Escape   \\[\\'"bfnrtv]|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\\n
@@ -99,38 +140,61 @@ RegularExpressionFirstChar [^\n*\\/]|{BackslashSequence}
 RegularExpressionChar [^\n\\/]|{BackslashSequence}
 RegularExpressionFlags {IdentifierPart}*
 RegularExpressionChars {RegularExpressionChar}*
-RegularExpressionBody_ {RegularExpressionChars}
-RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionBody_}
+RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionChars}
+
+@begin E4X
+XMLNameStart [a-zA-Z_:]
+XMLNamePart [a-zA-Z0-9.-_:]
+XMLName {XMLNameStart}{XMLNamePart}*
+@end
 
-RegularExpressionEnd_ \/{RegularExpressionFlags}
-RegularExpressionRest_ {RegularExpressionBody_}{RegularExpressionEnd_}
-RegularExpressionStart_ {RegularExpressionBody}{RegularExpressionEnd_}
+%s Div
+%s RegExp
 
-%x RegExp
-%x RegExpSlash
-%x RegExpSlashEqual
-%x RegExpSlashRight
+@begin E4X
+%x XMLContent
+%x XMLTag
+@end
 
 %%
 
-<RegExpSlash>{RegularExpressionStart_} E("/")
-<RegExpSlashEqual>{RegularExpressionRest_} E("/=")
-<RegExpSlashRight>{RegularExpressionRest_} E("/>")
+<RegExp>\/{RegularExpressionBody}\/{RegularExpressionFlags} E("")
 
 \/\/[^\n]* L
-\/\*(\n|[^\*]|\*[^/])*\*\/ M
+\/\*(\n|[^\*]|\*[^/])*\*\/ V(N)
 
 @begin E4X
-<RegExp>\<!--(\n|[^-]|-[^-])*--> 
-<RegExp>\<!\[CDATA\[(\n|[^[]|\[[^[]|\[\[[^>])*]]> 
-<RegExp>\<?(\n|[^?]|\?[^>])*?> 
+<RegExp>"<>"      L return tk::LeftRight;
+<XMLContent>"</>" L return tk::LeftSlashRight;
 
-"@"    L C return tk::At;
+<RegExp,XMLContent>\<!\[CDATA\[(\n|[^[]|\[[^[]|\[\[[^>])*]]> V() return tk::XMLCDATA;
+<RegExp,XMLContent>\<!--(\n|[^-]|-[^-])*--> V() return tk::XMLComment;
+<RegExp,XMLContent>\<?(\n|[^?]|\?[^>])*?> V() return tk::XMLPI;
+
+<XMLTag>"="  L return tk::Equal;
+<XMLTag>">"  L return tk::Right;
+<XMLTag>"/>" L return tk::SlashRight;
+<XMLTag>"{"  L return tk::OpenBrace;
+
+<XMLTag>\"(\n|[^"])*\"|'(\n|[^'])*' V() {
+    return tk::XMLAttributeValue;
+}
+
+<XMLTag>{XMLName} L return tk::XMLName;
+<XMLTag>[ \t\r\n] V() return tk::XMLWhitespace;
+
+<XMLContent>"{"  L return tk::OpenBrace;
+<XMLContent>"<"  L return tk::Left;
+<XMLContent>"</" L return tk::LeftSlash;
+@end
+
+@begin E4X
 "::"   L C return tk::ColonColon;
-"<>"   L C return tk::LeftRight;
-"</>"  L C return tk::LeftSlashRight;
 ".."   L C return tk::PeriodPeriod;
-"/>"   L C return tk::SlashRight;
+@end
+
+@begin E4X ObjectiveC
+"@"    L C return tk::At;
 @end
 
 "&"    L C return tk::Ampersand;
@@ -167,12 +231,13 @@ RegularExpressionStart_ {RegularExpressionBody}{RegularExpressionEnd_}
 ">>="  L C return tk::RightRightEqual;
 ">>>"  L C return tk::RightRightRight;
 ">>>=" L C return tk::RightRightRightEqual;
-"/"    L C return tk::Slash;
-"/="   L C return tk::SlashEqual;
 "*"    L C return tk::Star;
 "*="   L C return tk::StarEqual;
 "~"    L C return tk::Tilde;
 
+<Div>"/"  L C return tk::Slash;
+<Div>"/=" L C return tk::SlashEqual;
+
 ":"    L C return tk::Colon;
 ","    L C return tk::Comma;
 "?"    L C return tk::Question;
@@ -264,9 +329,14 @@ RegularExpressionStart_ {RegularExpressionBody}{RegularExpressionEnd_}
 
 "each"         L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("each"); return tk::Each;
 
-{IdentifierStart}{IdentifierPart}* yylval->identifier_ = new(yyextra->pool_) CYIdentifier(apr_pstrmemdup(yyextra->pool_, yytext, yyleng)); L C return tk::Identifier_;
+@begin E4X
+"namespace"    L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("namespace"); return tk::Namespace;
+"xml"          L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("xml"); return tk::XML;
+@end
+
+{IdentifierStart}{IdentifierPart}* L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier(apr_pstrmemdup(yyextra->pool_, yytext, yyleng)); return tk::Identifier_;
 
-(\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?){Exponent}? yylval->number_ = new(yyextra->pool_) CYNumber(strtod(yytext, NULL)); L C return tk::NumericLiteral;
+(\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?){Exponent}? L C yylval->number_ = new(yyextra->pool_) CYNumber(strtod(yytext, NULL)); return tk::NumericLiteral;
 
 0[xX][0-9a-fA-F]+ L C yylval->number_ = new(yyextra->pool_) CYNumber(strtoull(yytext + 2, NULL, 16)); return tk::NumericLiteral;
 
@@ -331,20 +401,59 @@ void CYDriver::ScannerDestroy() {
     cylex_destroy(scanner_);
 }
 
+CYDriver::Condition CYDriver::GetCondition() {
+    switch (yy_top_state(scanner_)) {
+        case RegExp:
+            return RegExpCondition;
+@begin E4X
+        case XMLContent:
+            return XMLContentCondition;
+        case XMLTag:
+            return XMLTagCondition;
+@end
+        default:
+            _assert(false);
+    }
+}
+
 void CYDriver::SetCondition(Condition condition) {
     struct yyguts_t *yyg(reinterpret_cast<struct yyguts_t *>(scanner_));
 
     switch (condition) {
-        case RegExpSlash:
-            BEGIN(RegExpSlash);
+        case RegExpCondition:
+            BEGIN(RegExp);
+            break;
+@begin E4X
+        case XMLContentCondition:
+            BEGIN(XMLContent);
+            break;
+        case XMLTagCondition:
+            BEGIN(XMLTag);
+            break;
+@end
+        default:
+            _assert(false);
+    }
+}
+
+void CYDriver::PushCondition(Condition condition) {
+    switch (condition) {
+        case RegExpCondition:
+            yy_push_state(RegExp, scanner_);
             break;
-        case RegExpSlashEqual:
-            BEGIN(RegExpSlashEqual);
+@begin E4X
+        case XMLContentCondition:
+            yy_push_state(XMLContent, scanner_);
             break;
-        case RegExpSlashRight:
-            BEGIN(RegExpSlashRight);
+        case XMLTagCondition:
+            yy_push_state(XMLTag, scanner_);
             break;
+@end
         default:
             _assert(false);
     }
 }
+
+void CYDriver::PopCondition() {
+    yy_pop_state(scanner_);
+}