I hate the Bison 2.3/2.4 split.

[cycript.git] / Cycript.l.in
diff --git a/Cycript.l.in b/Cycript.l.in

index ec2befc4c78a2deb36e7be8ec57c80f719010d52..42ba069501d90e5f919a60075b36ee5fa7220c73 100644 (file)
--- a/Cycript.l.in
+++ b/Cycript.l.in
@@ -1,6 +1,45 @@
-%{
-// XXX: supposedly I will be screwed on very very long multi-line comments and need to replace these with a manual lexer. http://websrv.cs.fsu.edu/~engelen/courses/COP5621/Pr2.pdf
+/* Cycript - Inlining/Optimizing JavaScript Compiler
+ * Copyright (C) 2009  Jay Freeman (saurik)
+*/
+
+/* Modified BSD License {{{ */
+/*
+ *        Redistribution and use in source and binary
+ * forms, with or without modification, are permitted
+ * provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the
+ *    above copyright notice, this list of conditions
+ *    and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the
+ *    above copyright notice, this list of conditions
+ *    and the following disclaimer in the documentation
+ *    and/or other materials provided with the
+ *    distribution.
+ * 3. The name of the author may not be used to endorse
+ *    or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/* }}} */
+
+/* XXX: supposedly I will be screwed on very very long multi-line comments and need to replace these with a manual lexer. http://websrv.cs.fsu.edu/~engelen/courses/COP5621/Pr2.pdf */
  
+%{
  #define YYLTYPE cy::location
  #include "Cycript.tab.hh"
  typedef cy::parser::token tk;
@@ -30,7 +69,7 @@ typedef cy::parser::token tk;
              yyextra->state_ = CYNewLine; \
      }
  
-#define M { \
+#define V(more) { \
      if (const char *nl = reinterpret_cast<const char *>(memchr(yytext, '\n', yyleng))) { \
          unsigned lines(0); \
          size_t left; \
@@ -42,7 +81,7 @@ typedef cy::parser::token tk;
          yylloc->end.lines(lines); \
          yylloc->end.columns(left); \
          yylloc->step(); \
-        N \
+        more \
      } else L \
  }
  
@@ -68,7 +107,8 @@ int H(char c) {
      } else if (yyextra->size_ == 0) \
          value = YY_NULL; \
      else { \
-        size_t copy(std::min(size, yyextra->size_)); \
+        size_t copy(size); \
+        copy = (std::min(copy, yyextra->size_)); \
          memcpy(data, yyextra->data_, copy); \
          yyextra->data_ += copy; \
          yyextra->size_ -= copy; \
@@ -86,6 +126,7 @@ int H(char c) {
  %option nounput
  %option interactive
  %option reentrant
+%option stack
  
  Exponent [eE][+-]?[0-9]+
  Escape   \\[\\'"bfnrtv]|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\\n
@@ -101,27 +142,60 @@ RegularExpressionFlags {IdentifierPart}*
  RegularExpressionChars {RegularExpressionChar}*
  RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionChars}
  
+@begin E4X
+XMLNameStart [a-zA-Z_:]
+XMLNamePart [a-zA-Z0-9.-_:]
+XMLName {XMLNameStart}{XMLNamePart}*
+@end
+
  %s Div
  %s RegExp
  
+@begin E4X
+%x XMLContent
+%x XMLTag
+@end
+
  %%
  
  <RegExp>\/{RegularExpressionBody}\/{RegularExpressionFlags} E("")
  
  \/\/[^\n]* L
-\/\*(\n|[^\*]|\*[^/])*\*\/ M
+\/\*!(\n|[^\*]|\*[^/])*\*\/ V() C yylval->comment_ = new(yyextra->pool_) CYComment(apr_pstrmemdup(yyextra->pool_, yytext, yyleng)); return tk::Comment;
+\/\*(\n|[^\*]|\*[^/])*\*\/ V(N)
  
  @begin E4X
-<RegExp>\<!--(\n|[^-]|-[^-])*--> 
-<RegExp>\<!\[CDATA\[(\n|[^[]|\[[^[]|\[\[[^>])*]]> 
-<RegExp>\<?(\n|[^?]|\?[^>])*?> 
+<RegExp>"<>"      L return tk::LeftRight;
+<XMLContent>"</>" L return tk::LeftSlashRight;
  
-"@"    L C return tk::At;
+<RegExp,XMLContent>\<!\[CDATA\[(\n|[^[]|\[[^[]|\[\[[^>])*]]> V() return tk::XMLCDATA;
+<RegExp,XMLContent>\<!--(\n|[^-]|-[^-])*--> V() return tk::XMLComment;
+<RegExp,XMLContent>\<?(\n|[^?]|\?[^>])*?> V() return tk::XMLPI;
+
+<XMLTag>"="  L return tk::Equal;
+<XMLTag>">"  L return tk::Right;
+<XMLTag>"/>" L return tk::SlashRight;
+<XMLTag>"{"  L return tk::OpenBrace;
+
+<XMLTag>\"(\n|[^"])*\"|'(\n|[^'])*' V() {
+    return tk::XMLAttributeValue;
+}
+
+<XMLTag>{XMLName} L return tk::XMLName;
+<XMLTag>[ \t\r\n] V() return tk::XMLWhitespace;
+
+<XMLContent>"{"  L return tk::OpenBrace;
+<XMLContent>"<"  L return tk::Left;
+<XMLContent>"</" L return tk::LeftSlash;
+@end
+
+@begin E4X
  "::"   L C return tk::ColonColon;
-"<>"   L C return tk::LeftRight;
-"</>"  L C return tk::LeftSlashRight;
  ".."   L C return tk::PeriodPeriod;
-"/>"   L C return tk::SlashRight;
+@end
+
+@begin E4X ObjectiveC
+"@"    L C return tk::At;
  @end
  
  "&"    L C return tk::Ampersand;
@@ -256,12 +330,17 @@ RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionChars}
  
  "each"         L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("each"); return tk::Each;
  
+@begin E4X
+"namespace"    L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("namespace"); return tk::Namespace;
+"xml"          L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier("xml"); return tk::XML;
+@end
+
  {IdentifierStart}{IdentifierPart}* L C yylval->identifier_ = new(yyextra->pool_) CYIdentifier(apr_pstrmemdup(yyextra->pool_, yytext, yyleng)); return tk::Identifier_;
  
  (\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?){Exponent}? L C yylval->number_ = new(yyextra->pool_) CYNumber(strtod(yytext, NULL)); return tk::NumericLiteral;
  
  0[xX][0-9a-fA-F]+ L C yylval->number_ = new(yyextra->pool_) CYNumber(strtoull(yytext + 2, NULL, 16)); return tk::NumericLiteral;
-
+0[0-7]+ L C yylval->number_ = new(yyextra->pool_) CYNumber(strtoull(yytext + 1, NULL, 8)); return tk::NumericLiteral;
  0[bB][0-1]+ L C yylval->number_ = new(yyextra->pool_) CYNumber(strtoull(yytext + 2, NULL, 2)); return tk::NumericLiteral;
  
  \"([^"\\\n]|{Escape})*\"|'([^'\\\n]|{Escape})*' L C {
@@ -323,14 +402,59 @@ void CYDriver::ScannerDestroy() {
      cylex_destroy(scanner_);
  }
  
-void CYDriver::BeginCondition(Condition condition) {
+CYDriver::Condition CYDriver::GetCondition() {
+    switch (yy_top_state(scanner_)) {
+        case RegExp:
+            return RegExpCondition;
+@begin E4X
+        case XMLContent:
+            return XMLContentCondition;
+        case XMLTag:
+            return XMLTagCondition;
+@end
+        default:
+            _assert(false);
+    }
+}
+
+void CYDriver::SetCondition(Condition condition) {
      struct yyguts_t *yyg(reinterpret_cast<struct yyguts_t *>(scanner_));
  
      switch (condition) {
          case RegExpCondition:
              BEGIN(RegExp);
              break;
+@begin E4X
+        case XMLContentCondition:
+            BEGIN(XMLContent);
+            break;
+        case XMLTagCondition:
+            BEGIN(XMLTag);
+            break;
+@end
+        default:
+            _assert(false);
+    }
+}
+
+void CYDriver::PushCondition(Condition condition) {
+    switch (condition) {
+        case RegExpCondition:
+            yy_push_state(RegExp, scanner_);
+            break;
+@begin E4X
+        case XMLContentCondition:
+            yy_push_state(XMLContent, scanner_);
+            break;
+        case XMLTagCondition:
+            yy_push_state(XMLTag, scanner_);
+            break;
+@end
          default:
              _assert(false);
      }
  }
+
+void CYDriver::PopCondition() {
+    yy_pop_state(scanner_);
+}