Add extern "C" expression, for Functor's toCYON().

[cycript.git] / Analyze.cpp
diff --git a/Analyze.cpp b/Analyze.cpp

index 332903d2b7f64e80ca26ec8b103d4946d779ff9d..4f84e03e2fa1faa6d4df5bb1889325550a03e9b1 100644 (file)
--- a/Analyze.cpp
+++ b/Analyze.cpp
@@ -1,5 +1,5 @@
-/* Cycript - Optimizing JavaScript Compiler/Runtime
- * Copyright (C) 2009-2015  Jay Freeman (saurik)
+/* Cycript - The Truly Universal Scripting Language
+ * Copyright (C) 2009-2016  Jay Freeman (saurik)
  */
  
  /* GNU Affero General Public License, Version 3 {{{ */
@@ -19,6 +19,7 @@
  **/
  /* }}} */
  
+#include <cmath>
  #include <cstring>
  #include <iostream>
  #include <map>
@@ -68,6 +69,11 @@ struct CYCXString {
      {
      }
  
+    CYCXString(CXFile file) :
+        value_(clang_getFileName(file))
+    {
+    }
+
      CYCXString(CXTranslationUnit unit, CXToken token) :
          value_(clang_getTokenSpelling(unit, token))
      {
@@ -84,9 +90,50 @@ struct CYCXString {
      const char *Pool(CYPool &pool) const {
          return pool.strdup(*this);
      }
+
+    bool operator ==(const char *rhs) const {
+        const char *lhs(*this);
+        return lhs == rhs || strcmp(lhs, rhs) == 0;
+    }
+};
+
+template <void (&clang_get_Location)(CXSourceLocation, CXFile *, unsigned *, unsigned *, unsigned *) = clang_getSpellingLocation>
+struct CYCXPosition {
+    CXFile file_;
+    unsigned line_;
+    unsigned column_;
+    unsigned offset_;
+
+    CYCXPosition(CXSourceLocation location) {
+        clang_get_Location(location, &file_, &line_, &column_, &offset_);
+    }
+
+    CYCXPosition(CXTranslationUnit unit, CXToken token) :
+        CYCXPosition(clang_getTokenLocation(unit, token))
+    {
+    }
+
+    CXSourceLocation Get(CXTranslationUnit unit) const {
+        return clang_getLocation(unit, file_, line_, column_);
+    }
  };
  
-typedef std::map<std::string, std::string> CYKeyMap;
+template <void (&clang_get_Location)(CXSourceLocation, CXFile *, unsigned *, unsigned *, unsigned *)>
+std::ostream &operator <<(std::ostream &out, const CYCXPosition<clang_get_Location> &position) {
+    if (position.file_ != NULL)
+        out << "[" << CYCXString(position.file_) << "]:";
+    out << position.line_ << ":" << position.column_ << "@" << position.offset_;
+    return out;
+}
+
+struct CYKey {
+    unsigned priority_ = 0;
+
+    std::string code_;
+    unsigned flags_;
+};
+
+typedef std::map<std::string, CYKey> CYKeyMap;
  
  struct CYChildBaton {
      CXTranslationUnit unit;
@@ -100,26 +147,68 @@ struct CYChildBaton {
  };
  
  struct CYTokens {
-    CXTranslationUnit unit;
-    CXToken *tokens;
-    unsigned count;
+  private:
+    CXTranslationUnit unit_;
+    CXToken *tokens_;
+    unsigned count_;
+    unsigned valid_;
+
+  public:
+    CYTokens(CXTranslationUnit unit, CXSourceRange range) :
+        unit_(unit)
+    {
+        clang_tokenize(unit_, range, &tokens_, &count_);
+
+
+        // libclang's tokenizer is horribly broken and returns "extra" tokens.
+        // this code goes back through the tokens and filters for good ones :/
+
+        CYCXPosition<> end(clang_getRangeEnd(range));
+        CYCXString file(end.file_);
+
+        for (valid_ = 0; valid_ != count_; ++valid_) {
+            CYCXPosition<> position(unit, tokens_[valid_]);
+            _assert(CYCXString(position.file_) == file);
+            if (position.offset_ >= end.offset_)
+                break;
+        }
+    }
  
      CYTokens(CXTranslationUnit unit, CXCursor cursor) :
-        unit(unit)
+        CYTokens(unit, clang_getCursorExtent(cursor))
      {
-        CXSourceRange range(clang_getCursorExtent(cursor));
-        clang_tokenize(unit, range, &tokens, &count);
      }
  
      ~CYTokens() {
-        clang_disposeTokens(unit, tokens, count);
+        clang_disposeTokens(unit_, tokens_, count_);
      }
  
      operator CXToken *() const {
-        return tokens;
+        return tokens_;
+    }
+
+    size_t size() const {
+        return valid_;
      }
  };
  
+static CYUTF8String CYCXPoolUTF8Range(CYPool &pool, CXSourceRange range) {
+    CYCXPosition<> start(clang_getRangeStart(range));
+    CYCXPosition<> end(clang_getRangeEnd(range));
+    CYCXString file(start.file_);
+    _assert(file == CYCXString(end.file_));
+
+    CYPool temp;
+    size_t size;
+    char *data(static_cast<char *>(CYPoolFile(temp, file, &size)));
+    _assert(start.offset_ <= size && end.offset_ <= size && start.offset_ <= end.offset_);
+
+    CYUTF8String code;
+    code.size = end.offset_ - start.offset_;
+    code.data = pool.strndup(data + start.offset_, code.size);
+    return code;
+}
+
  static CYExpression *CYTranslateExpression(CXTranslationUnit unit, CXCursor cursor) {
      switch (CXCursorKind kind = clang_getCursorKind(cursor)) {
          case CXCursor_CallExpr: {
@@ -140,10 +229,17 @@ static CYExpression *CYTranslateExpression(CXTranslationUnit unit, CXCursor curs
          } break;
  
          case CXCursor_IntegerLiteral: {
-            CYTokens tokens(unit, cursor);
-            _assert(tokens.count != 0);
-            // XXX: I don't understand why this is often enormous :/
-            return $ CYNumber(CYCastDouble(CYCXString(unit, tokens[0])));
+            // libclang doesn't provide any reasonable way to do this
+            // note: clang_tokenize doesn't work if this is a macro
+            // the token range starts inside the macro but ends after it
+            // the tokenizer freaks out and either fails with 0 tokens
+            // or returns some massive number of tokens ending here :/
+
+            CYUTF8String token(CYCXPoolUTF8Range($pool, clang_getCursorExtent(cursor)));
+            double value(CYCastDouble(token));
+            if (std::isnan(value))
+                return $V(token.data);
+            return $ CYNumber(value);
          } break;
  
          case CXCursor_CStyleCastExpr:
@@ -199,48 +295,82 @@ static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClien
      CYCXString spelling(cursor);
      std::string name(spelling);
      std::ostringstream value;
+    unsigned priority(2);
+    unsigned flags(0);
  
      /*CXSourceLocation location(clang_getCursorLocation(cursor));
-
-    CXFile file;
-    unsigned line;
-    unsigned column;
-    unsigned offset;
-    clang_getSpellingLocation(location, &file, &line, &column, &offset);
-
-    if (file != NULL) {
-        CYCXString path(clang_getFileName(file));
-        std::cout << spelling << " " << path << ":" << line << std::endl;
-    }*/
+    CYCXPosition<> position(location);
+    std::cout << spelling << " " << position << std::endl;*/
  
      switch (CXCursorKind kind = clang_getCursorKind(cursor)) {
          case CXCursor_EnumConstantDecl: {
              value << clang_getEnumConstantDeclValue(cursor);
          } break;
  
-        case CXCursor_MacroDefinition: {
-            CYTokens tokens(unit, cursor);
-            if (tokens.count <= 2)
-                goto skip;
+        case CXCursor_MacroDefinition: try {
+            CXSourceRange range(clang_getCursorExtent(cursor));
+            CYTokens tokens(unit, range);
+            _assert(tokens.size() != 0);
  
-            CXCursor cursors[tokens.count];
-            clang_annotateTokens(unit, tokens, tokens.count, cursors);
+            CXCursor cursors[tokens.size()];
+            clang_annotateTokens(unit, tokens, tokens.size(), cursors);
  
-            for (unsigned i(1); i != tokens.count - 1; ++i) {
+            CYLocalPool local;
+            CYList<CYFunctionParameter> parameters;
+            unsigned offset(1);
+
+            if (tokens.size() != 1) {
+                CYCXPosition<> start(clang_getRangeStart(range));
+                CYCXString first(unit, tokens[offset]);
+                if (first == "(") {
+                    CYCXPosition<> paren(unit, tokens[offset]);
+                    if (start.offset_ + strlen(spelling) == paren.offset_) {
+                        for (;;) {
+                            _assert(++offset != tokens.size());
+                            CYCXString token(unit, tokens[offset]);
+                            parameters->*$P($B($I(token.Pool($pool))));
+                            _assert(++offset != tokens.size());
+                            CYCXString comma(unit, tokens[offset]);
+                            if (comma == ")")
+                                break;
+                            _assert(comma == ",");
+                        }
+                        ++offset;
+                    }
+                }
+            }
+
+            std::ostringstream body;
+            for (unsigned i(offset); i != tokens.size(); ++i) {
                  CYCXString token(unit, tokens[i]);
-                if (i != 1)
-                    value << " ";
-                else if (strcmp(token, "(") == 0)
-                    goto skip;
-                value << token;
+                if (i != offset)
+                    body << " ";
+                body << token;
              }
+
+            if (!parameters)
+                value << body.str();
+            else {
+                CYOptions options;
+                CYOutput out(*value.rdbuf(), options);
+                out << '(' << "function" << '(';
+                out << parameters;
+                out << ')' << '{';
+                out << "return" << ' ';
+                value << body.str();
+                out << ';' << '}' << ')';
+            }
+        } catch (const CYException &error) {
+            CYPool pool;
+            //std::cerr << error.PoolCString(pool) << std::endl;
+            goto skip;
          } break;
  
          case CXCursor_StructDecl: {
-            if (!clang_isCursorDefinition(cursor))
-                goto skip;
              if (spelling[0] == '\0')
                  goto skip;
+            if (!clang_isCursorDefinition(cursor))
+                priority = 1;
  
              std::ostringstream types;
              std::ostringstream names;
@@ -253,8 +383,8 @@ static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClien
                  }
              }));
  
+            value << "new Type([" << types.str() << "],[" << names.str() << "]).withName(\"" << name << "\")";
              name += "$cy";
-            value << "new Type([" << types.str() << "],[" << names.str() << "])";
          } break;
  
          case CXCursor_TypedefDecl: {
@@ -292,7 +422,7 @@ static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClien
                          break;
  
                      default:
-                        std::cerr << "A:" << CYCXString(child) << std::endl;
+                        //std::cerr << "A:" << CYCXString(child) << std::endl;
                          break;
                  }
              }));
@@ -324,7 +454,14 @@ static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClien
          } break;
      }
  
-    baton.keys[name] = value.str();
+    {
+        CYKey &key(baton.keys[name]);
+        if (key.priority_ < priority) {
+            key.priority_ = priority;
+            key.code_ = value.str();
+            key.flags_ = flags;
+        }
+    }
  
    skip:
      return CXChildVisit_Continue;
@@ -353,11 +490,11 @@ int main(int argc, const char *argv[]) {
      clang_visitChildren(clang_getTranslationUnitCursor(unit), &CYChildVisit, &baton);
  
      for (CYKeyMap::const_iterator key(keys.begin()); key != keys.end(); ++key) {
-        std::string value(key->second);
-        for (size_t i(0), e(value.size()); i != e; ++i)
-            if (value[i] <= 0 || value[i] >= 0x7f || value[i] == '\n')
+        std::string code(key->second.code_);
+        for (size_t i(0), e(code.size()); i != e; ++i)
+            if (code[i] <= 0 || code[i] >= 0x7f || code[i] == '\n')
                  goto skip;
-        std::cout << key->first << "|\"" << value << "\"" << std::endl;
+        std::cout << key->first << "|" << key->second.flags_ << "\"" << code << "\"" << std::endl;
      skip:; }
  
      clang_disposeTranslationUnit(unit);