]> git.saurik.com Git - cycript.git/blobdiff - Cycript.l.in
I hate the Bison 2.3/2.4 split.
[cycript.git] / Cycript.l.in
index ec2befc4c78a2deb36e7be8ec57c80f719010d52..42ba069501d90e5f919a60075b36ee5fa7220c73 100644 (file)
@@ -1,6 +1,45 @@
-%{
-// XXX: supposedly I will be screwed on very very long multi-line comments and need to replace these with a manual lexer. http://websrv.cs.fsu.edu/~engelen/courses/COP5621/Pr2.pdf
+/* Cycript - Inlining/Optimizing JavaScript Compiler
+ * Copyright (C) 2009  Jay Freeman (saurik)
+*/
+
+/* Modified BSD License {{{ */
+/*
+ *        Redistribution and use in source and binary
+ * forms, with or without modification, are permitted
+ * provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the
+ *    above copyright notice, this list of conditions
+ *    and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the
+ *    above copyright notice, this list of conditions
+ *    and the following disclaimer in the documentation
+ *    and/or other materials provided with the
+ *    distribution.
+ * 3. The name of the author may not be used to endorse
+ *    or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/* }}} */
+
+/* XXX: supposedly I will be screwed on very very long multi-line comments and need to replace these with a manual lexer. http://websrv.cs.fsu.edu/~engelen/courses/COP5621/Pr2.pdf */
 
+%{
 #define YYLTYPE cy::location
 #include "Cycript.tab.hh"
 typedef cy::parser::token tk;
@@ -30,7 +69,7 @@ typedef cy::parser::token tk;
             yyextra->state_ = CYNewLine; \
     }
 
-#define M { \
+#define V(more) { \
     if (const char *nl = reinterpret_cast<const char *>(memchr(yytext, '\n', yyleng))) { \
         unsigned lines(0); \
         size_t left; \
@@ -42,7 +81,7 @@ typedef cy::parser::token tk;
         yylloc->end.lines(lines); \
         yylloc->end.columns(left); \
         yylloc->step(); \
-        N \
+        more \
     } else L \
 }
 
@@ -68,7 +107,8 @@ int H(char c) {
     } else if (yyextra->size_ == 0) \
         value = YY_NULL; \
     else { \
-        size_t copy(std::min(size, yyextra->size_)); \
+        size_t copy(size); \
+        copy = (std::min(copy, yyextra->size_)); \
         memcpy(data, yyextra->data_, copy); \
         yyextra->data_ += copy; \
         yyextra->size_ -= copy; \
@@ -86,6 +126,7 @@ int H(char c) {
 %option nounput
 %option interactive
 %option reentrant
+%option stack
 
 Exponent [eE][+-]?[0-9]+
 Escape   \\[\\'"bfnrtv]|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\\n
@@ -101,27 +142,60 @@ RegularExpressionFlags {IdentifierPart}*
 RegularExpressionChars {RegularExpressionChar}*
 RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionChars}
 
+@begin E4X
+XMLNameStart [a-zA-Z_:]
+XMLNamePart [a-zA-Z0-9.-_:]
+XMLName {XMLNameStart}{XMLNamePart}*
+@end
+
 %s Div
 %s RegExp
 
+@begin E4X
+%x XMLContent
+%x XMLTag
+@end
+
 %%
 
 <RegExp>\/{RegularExpressionBody}\/{RegularExpressionFlags} E("")
 
 \/\/[^\n]* L
-\/\*(\n|[^\*]|\*[^/])*\*\/ M
+\/\*!(\n|[^\*]|\*[^/])*\*\/ V() C yylval->comment_ = new(yyextra->pool_) CYComment(apr_pstrmemdup(yyextra->pool_, yytext, yyleng)); return tk::Comment;
+\/\*(\n|[^\*]|\*[^/])*\*\/ V(N)
 
 @begin E4X
-<RegExp>\<!--(\n|[^-]|-[^-])*--> 
-<RegExp>\<!\[CDATA\[(\n|[^[]|\[[^[]|\[\[[^>])*]]> 
-<RegExp>\<?(\n|[^?]|\?[^>])*?> 
+<RegExp>"<>"      L return tk::LeftRight;
+<XMLContent>"</>" L return tk::LeftSlashRight;
 
-"@"    L C return tk::At;
+<RegExp,XMLContent>\<!\[CDATA\[(\n|[^[]|\[[^[]|\[\[[^>])*]]> V() return tk::XMLCDATA;
+<RegExp,XMLContent>\<!--(\n|[^-]|-[^-])*--> V() return tk::XMLComment;
+<RegExp,XMLContent>\<?(\n|[^?]|\?[^>])*?> V() return tk::XMLPI;
+
+<XMLTag>"="  L return tk::Equal;
+<XMLTag>">"  L return tk::Right;
+<XMLTag>"/>" L return tk::SlashRight;
+<XMLTag>"{"  L return tk::OpenBrace;
+
+<XMLTag>\"(\n|[^"])*\"|'(\n|[^'])*' V() {
+    return tk::XMLAttributeValue;
+}
+
+<XMLTag>{XMLName} L return tk::XMLName;
+<XMLTag>[ \t\r\n] V() return tk::XMLWhitespace;
+
+<XMLContent>"{"  L return tk::OpenBrace;
+<XMLContent>"<"  L return tk::Left;
+<XMLContent>"</" L return tk::LeftSlash;
+@end
+
+@begin E4X
 "::"   L C return tk::ColonColon;
-"<>"   L C return tk::LeftRight;
-"</>"  L C return tk::LeftSlashRight;
 ".."   L C return tk::PeriodPeriod;
-"/>"   L C return tk::SlashRight;
+@end
+
+@begin E4X ObjectiveC
+"@"    L C return tk::At;
 @end
 
 "&"    L C return tk::Ampersand;
@@ -256,12 +330,17 @@ RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionChars}
 
 "each"         L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("each"); return tk::Each;
 
+@begin E4X
+"namespace"    L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("namespace"); return tk::Namespace;
+"xml"          L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("xml"); return tk::XML;
+@end
+
 {IdentifierStart}{IdentifierPart}* L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier(apr_pstrmemdup(yyextra->pool_, yytext, yyleng)); return tk::Identifier_;
 
 (\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?){Exponent}? L C yylval->number_ = new(yyextra->pool_) CYNumber(strtod(yytext, NULL)); return tk::NumericLiteral;
 
 0[xX][0-9a-fA-F]+ L C yylval->number_ = new(yyextra->pool_) CYNumber(strtoull(yytext + 2, NULL, 16)); return tk::NumericLiteral;
-
+0[0-7]+ L C yylval->number_ = new(yyextra->pool_) CYNumber(strtoull(yytext + 1, NULL, 8)); return tk::NumericLiteral;
 0[bB][0-1]+ L C yylval->number_ = new(yyextra->pool_) CYNumber(strtoull(yytext + 2, NULL, 2)); return tk::NumericLiteral;
 
 \"([^"\\\n]|{Escape})*\"|'([^'\\\n]|{Escape})*' L C {
@@ -323,14 +402,59 @@ void CYDriver::ScannerDestroy() {
     cylex_destroy(scanner_);
 }
 
-void CYDriver::BeginCondition(Condition condition) {
+CYDriver::Condition CYDriver::GetCondition() {
+    switch (yy_top_state(scanner_)) {
+        case RegExp:
+            return RegExpCondition;
+@begin E4X
+        case XMLContent:
+            return XMLContentCondition;
+        case XMLTag:
+            return XMLTagCondition;
+@end
+        default:
+            _assert(false);
+    }
+}
+
+void CYDriver::SetCondition(Condition condition) {
     struct yyguts_t *yyg(reinterpret_cast<struct yyguts_t *>(scanner_));
 
     switch (condition) {
         case RegExpCondition:
             BEGIN(RegExp);
             break;
+@begin E4X
+        case XMLContentCondition:
+            BEGIN(XMLContent);
+            break;
+        case XMLTagCondition:
+            BEGIN(XMLTag);
+            break;
+@end
+        default:
+            _assert(false);
+    }
+}
+
+void CYDriver::PushCondition(Condition condition) {
+    switch (condition) {
+        case RegExpCondition:
+            yy_push_state(RegExp, scanner_);
+            break;
+@begin E4X
+        case XMLContentCondition:
+            yy_push_state(XMLContent, scanner_);
+            break;
+        case XMLTagCondition:
+            yy_push_state(XMLTag, scanner_);
+            break;
+@end
         default:
             _assert(false);
     }
 }
+
+void CYDriver::PopCondition() {
+    yy_pop_state(scanner_);
+}