]> git.saurik.com Git - cycript.git/blobdiff - Analyze.cpp
Improve support for char values (from JS strings).
[cycript.git] / Analyze.cpp
index 332903d2b7f64e80ca26ec8b103d4946d779ff9d..f1ef1073652197e10ebad5b610b8d713ca96f610 100644 (file)
@@ -1,5 +1,5 @@
-/* Cycript - Optimizing JavaScript Compiler/Runtime
- * Copyright (C) 2009-2015  Jay Freeman (saurik)
+/* Cycript - The Truly Universal Scripting Language
+ * Copyright (C) 2009-2016  Jay Freeman (saurik)
 */
 
 /* GNU Affero General Public License, Version 3 {{{ */
@@ -19,6 +19,7 @@
 **/
 /* }}} */
 
+#include <cmath>
 #include <cstring>
 #include <iostream>
 #include <map>
@@ -27,6 +28,7 @@
 
 #include <clang-c/Index.h>
 
+#include "Bridge.hpp"
 #include "Functor.hpp"
 #include "Replace.hpp"
 #include "Syntax.hpp"
@@ -68,6 +70,11 @@ struct CYCXString {
     {
     }
 
+    CYCXString(CXFile file) :
+        value_(clang_getFileName(file))
+    {
+    }
+
     CYCXString(CXTranslationUnit unit, CXToken token) :
         value_(clang_getTokenSpelling(unit, token))
     {
@@ -84,9 +91,50 @@ struct CYCXString {
     const char *Pool(CYPool &pool) const {
         return pool.strdup(*this);
     }
+
+    bool operator ==(const char *rhs) const {
+        const char *lhs(*this);
+        return lhs == rhs || strcmp(lhs, rhs) == 0;
+    }
 };
 
-typedef std::map<std::string, std::string> CYKeyMap;
+template <void (&clang_get_Location)(CXSourceLocation, CXFile *, unsigned *, unsigned *, unsigned *) = clang_getSpellingLocation>
+struct CYCXPosition {
+    CXFile file_;
+    unsigned line_;
+    unsigned column_;
+    unsigned offset_;
+
+    CYCXPosition(CXSourceLocation location) {
+        clang_get_Location(location, &file_, &line_, &column_, &offset_);
+    }
+
+    CYCXPosition(CXTranslationUnit unit, CXToken token) :
+        CYCXPosition(clang_getTokenLocation(unit, token))
+    {
+    }
+
+    CXSourceLocation Get(CXTranslationUnit unit) const {
+        return clang_getLocation(unit, file_, line_, column_);
+    }
+};
+
+template <void (&clang_get_Location)(CXSourceLocation, CXFile *, unsigned *, unsigned *, unsigned *)>
+std::ostream &operator <<(std::ostream &out, const CYCXPosition<clang_get_Location> &position) {
+    if (position.file_ != NULL)
+        out << "[" << CYCXString(position.file_) << "]:";
+    out << position.line_ << ":" << position.column_ << "@" << position.offset_;
+    return out;
+}
+
+struct CYKey {
+    unsigned priority_ = 0;
+
+    std::string code_;
+    unsigned flags_;
+};
+
+typedef std::map<std::string, CYKey> CYKeyMap;
 
 struct CYChildBaton {
     CXTranslationUnit unit;
@@ -100,26 +148,68 @@ struct CYChildBaton {
 };
 
 struct CYTokens {
-    CXTranslationUnit unit;
-    CXToken *tokens;
-    unsigned count;
+  private:
+    CXTranslationUnit unit_;
+    CXToken *tokens_;
+    unsigned count_;
+    unsigned valid_;
+
+  public:
+    CYTokens(CXTranslationUnit unit, CXSourceRange range) :
+        unit_(unit)
+    {
+        clang_tokenize(unit_, range, &tokens_, &count_);
+
+
+        // libclang's tokenizer is horribly broken and returns "extra" tokens.
+        // this code goes back through the tokens and filters for good ones :/
+
+        CYCXPosition<> end(clang_getRangeEnd(range));
+        CYCXString file(end.file_);
+
+        for (valid_ = 0; valid_ != count_; ++valid_) {
+            CYCXPosition<> position(unit, tokens_[valid_]);
+            _assert(CYCXString(position.file_) == file);
+            if (position.offset_ >= end.offset_)
+                break;
+        }
+    }
 
     CYTokens(CXTranslationUnit unit, CXCursor cursor) :
-        unit(unit)
+        CYTokens(unit, clang_getCursorExtent(cursor))
     {
-        CXSourceRange range(clang_getCursorExtent(cursor));
-        clang_tokenize(unit, range, &tokens, &count);
     }
 
     ~CYTokens() {
-        clang_disposeTokens(unit, tokens, count);
+        clang_disposeTokens(unit_, tokens_, count_);
     }
 
     operator CXToken *() const {
-        return tokens;
+        return tokens_;
+    }
+
+    size_t size() const {
+        return valid_;
     }
 };
 
+static CYUTF8String CYCXPoolUTF8Range(CYPool &pool, CXSourceRange range) {
+    CYCXPosition<> start(clang_getRangeStart(range));
+    CYCXPosition<> end(clang_getRangeEnd(range));
+    CYCXString file(start.file_);
+    _assert(file == CYCXString(end.file_));
+
+    CYPool temp;
+    size_t size;
+    char *data(static_cast<char *>(CYPoolFile(temp, file, &size)));
+    _assert(start.offset_ <= size && end.offset_ <= size && start.offset_ <= end.offset_);
+
+    CYUTF8String code;
+    code.size = end.offset_ - start.offset_;
+    code.data = pool.strndup(data + start.offset_, code.size);
+    return code;
+}
+
 static CYExpression *CYTranslateExpression(CXTranslationUnit unit, CXCursor cursor) {
     switch (CXCursorKind kind = clang_getCursorKind(cursor)) {
         case CXCursor_CallExpr: {
@@ -140,10 +230,17 @@ static CYExpression *CYTranslateExpression(CXTranslationUnit unit, CXCursor curs
         } break;
 
         case CXCursor_IntegerLiteral: {
-            CYTokens tokens(unit, cursor);
-            _assert(tokens.count != 0);
-            // XXX: I don't understand why this is often enormous :/
-            return $ CYNumber(CYCastDouble(CYCXString(unit, tokens[0])));
+            // libclang doesn't provide any reasonable way to do this
+            // note: clang_tokenize doesn't work if this is a macro
+            // the token range starts inside the macro but ends after it
+            // the tokenizer freaks out and either fails with 0 tokens
+            // or returns some massive number of tokens ending here :/
+
+            CYUTF8String token(CYCXPoolUTF8Range($pool, clang_getCursorExtent(cursor)));
+            double value(CYCastDouble(token));
+            if (std::isnan(value))
+                return $V(token.data);
+            return $ CYNumber(value);
         } break;
 
         case CXCursor_CStyleCastExpr:
@@ -192,78 +289,331 @@ static CYStatement *CYTranslateBlock(CXTranslationUnit unit, CXCursor cursor) {
     return $ CYBlock(statements);
 }
 
+static CYType *CYDecodeType(CXType type);
+static void CYParseType(CXType type, CYType *typed);
+
+static void CYParseEnumeration(CXCursor cursor, CYType *typed) {
+    CYList<CYEnumConstant> constants;
+
+    CYForChild(cursor, fun([&](CXCursor child) {
+        if (clang_getCursorKind(child) == CXCursor_EnumConstantDecl)
+            constants->*$ CYEnumConstant($I($pool.strdup(CYCXString(child))), $D(clang_getEnumConstantDeclValue(child)));
+    }));
+
+    CYType *integer(CYDecodeType(clang_getEnumDeclIntegerType(cursor)));
+    typed->specifier_ = $ CYTypeEnum(NULL, integer->specifier_, constants);
+}
+
+static void CYParseStructure(CXCursor cursor, CYType *typed) {
+    CYList<CYTypeStructField> fields;
+    CYForChild(cursor, fun([&](CXCursor child) {
+        if (clang_getCursorKind(child) == CXCursor_FieldDecl)
+            fields->*$ CYTypeStructField(CYDecodeType(clang_getCursorType(child)), $I(CYCXString(child).Pool($pool)));
+    }));
+
+    typed->specifier_ = $ CYTypeStruct(NULL, $ CYStructTail(fields));
+}
+
+static void CYParseCursor(CXType type, CXCursor cursor, CYType *typed) {
+    CYCXString spelling(cursor);
+
+    switch (CXCursorKind kind = clang_getCursorKind(cursor)) {
+        case CXCursor_EnumDecl:
+            if (spelling[0] != '\0')
+                typed->specifier_ = $ CYTypeReference(CYTypeReferenceEnum, $I(spelling.Pool($pool)));
+            else
+                CYParseEnumeration(cursor, typed);
+        break;
+
+        case CXCursor_StructDecl: {
+            if (spelling[0] != '\0')
+                typed->specifier_ = $ CYTypeReference(CYTypeReferenceStruct, $I(spelling.Pool($pool)));
+            else
+                CYParseStructure(cursor, typed);
+        } break;
+
+        case CXCursor_UnionDecl: {
+            _assert(false);
+        } break;
+
+        default:
+            std::cerr << "C:" << CYCXString(kind) << std::endl;
+            _assert(false);
+            break;
+    }
+}
+
+static CYTypedParameter *CYParseSignature(CXType type, CYType *typed) {
+    CYParseType(clang_getResultType(type), typed);
+    CYList<CYTypedParameter> parameters;
+    for (int i(0), e(clang_getNumArgTypes(type)); i != e; ++i)
+        parameters->*$ CYTypedParameter(CYDecodeType(clang_getArgType(type, i)), NULL);
+    return parameters;
+}
+
+static void CYParseFunction(CXType type, CYType *typed) {
+    typed = typed->Modify($ CYTypeFunctionWith(clang_isFunctionTypeVariadic(type), CYParseSignature(type, typed)));
+}
+
+static void CYParseType(CXType type, CYType *typed) {
+    switch (CXTypeKind kind = type.kind) {
+        case CXType_Unexposed: {
+            CXType result(clang_getResultType(type));
+            if (result.kind == CXType_Invalid)
+                CYParseCursor(type, clang_getTypeDeclaration(type), typed);
+            else
+                // clang marks function pointers as Unexposed but still supports them
+                CYParseFunction(type, typed);
+        } break;
+
+        case CXType_Bool: typed->specifier_ = $ CYTypeVariable("bool"); break;
+        case CXType_Float: typed->specifier_ = $ CYTypeVariable("float"); break;
+        case CXType_Double: typed->specifier_ = $ CYTypeVariable("double"); break;
+
+        case CXType_Char_U: typed->specifier_ = $ CYTypeCharacter(CYTypeNeutral); break;
+        case CXType_Char_S: typed->specifier_ = $ CYTypeCharacter(CYTypeNeutral); break;
+        case CXType_SChar: typed->specifier_ = $ CYTypeCharacter(CYTypeSigned); break;
+        case CXType_UChar: typed->specifier_ = $ CYTypeCharacter(CYTypeUnsigned); break;
+
+        case CXType_Short: typed->specifier_ = $ CYTypeIntegral(CYTypeSigned, 0); break;
+        case CXType_UShort: typed->specifier_ = $ CYTypeIntegral(CYTypeUnsigned, 0); break;
+
+        case CXType_Int: typed->specifier_ = $ CYTypeIntegral(CYTypeSigned, 1); break;
+        case CXType_UInt: typed->specifier_ = $ CYTypeIntegral(CYTypeUnsigned, 1); break;
+
+        case CXType_Long: typed->specifier_ = $ CYTypeIntegral(CYTypeSigned, 2); break;
+        case CXType_ULong: typed->specifier_ = $ CYTypeIntegral(CYTypeUnsigned, 2); break;
+
+        case CXType_LongLong: typed->specifier_ = $ CYTypeIntegral(CYTypeSigned, 3); break;
+        case CXType_ULongLong: typed->specifier_ = $ CYTypeIntegral(CYTypeUnsigned, 3); break;
+
+        case CXType_Int128: typed->specifier_ = $ CYTypeInt128(CYTypeSigned); break;
+        case CXType_UInt128: typed->specifier_ = $ CYTypeInt128(CYTypeUnsigned); break;
+
+        case CXType_BlockPointer: {
+            CXType pointee(clang_getPointeeType(type));
+            _assert(!clang_isFunctionTypeVariadic(pointee));
+            typed = typed->Modify($ CYTypeBlockWith(CYParseSignature(pointee, typed)));
+        } break;
+
+        case CXType_ConstantArray:
+            CYParseType(clang_getArrayElementType(type), typed);
+            typed = typed->Modify($ CYTypeArrayOf($D(clang_getArraySize(type))));
+        break;
+
+        case CXType_Enum:
+            typed->specifier_ = $ CYTypeVariable($pool.strdup(CYCXString(clang_getTypeSpelling(type))));
+        break;
+
+        case CXType_FunctionProto:
+            CYParseFunction(type, typed);
+        break;
+
+        case CXType_IncompleteArray:
+            // XXX: I probably should not decay to Pointer
+            CYParseType(clang_getArrayElementType(type), typed);
+            typed = typed->Modify($ CYTypePointerTo());
+        break;
+
+        case CXType_ObjCClass:
+            typed->specifier_ = $ CYTypeVariable("Class");
+        break;
+
+        case CXType_ObjCId:
+            typed->specifier_ = $ CYTypeVariable("id");
+        break;
+
+        case CXType_ObjCInterface:
+            typed->specifier_ = $ CYTypeVariable($pool.strdup(CYCXString(clang_getTypeSpelling(type))));
+        break;
+
+        case CXType_ObjCObjectPointer: {
+            CXType pointee(clang_getPointeeType(type));
+            if (pointee.kind != CXType_Unexposed) {
+                CYParseType(pointee, typed);
+                typed = typed->Modify($ CYTypePointerTo());
+            } else
+                // Clang seems to have internal typedefs for id and Class that are awkward
+                _assert(false);
+        } break;
+
+        case CXType_ObjCSel:
+            typed->specifier_ = $ CYTypeVariable("SEL");
+        break;
+
+        case CXType_Pointer:
+            CYParseType(clang_getPointeeType(type), typed);
+            typed = typed->Modify($ CYTypePointerTo());
+        break;
+
+        case CXType_Record:
+            typed->specifier_ = $ CYTypeReference(CYTypeReferenceStruct, $I($pool.strdup(CYCXString(clang_getTypeSpelling(type)))));
+        break;
+
+        case CXType_Typedef:
+            // use the declaration in order to isolate the name of the typedef itself
+            typed->specifier_ = $ CYTypeVariable($pool.strdup(CYCXString(clang_getTypeDeclaration(type))));
+        break;
+
+        case CXType_Vector:
+            _assert(false);
+        break;
+
+        case CXType_Void:
+            typed->specifier_ = $ CYTypeVoid();
+        break;
+
+        default:
+            std::cerr << "T:" << CYCXString(clang_getTypeKindSpelling(kind)) << std::endl;
+            std::cerr << "_: " << CYCXString(clang_getTypeSpelling(type)) << std::endl;
+            _assert(false);
+    }
+
+    if (clang_isConstQualifiedType(type))
+        typed = typed->Modify($ CYTypeConstant());
+}
+
+static CYType *CYDecodeType(CXType type) {
+    CYType *typed($ CYType(NULL));
+    CYParseType(type, typed);
+    return typed;
+}
+
 static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClientData arg) {
     CYChildBaton &baton(*static_cast<CYChildBaton *>(arg));
     CXTranslationUnit &unit(baton.unit);
 
+    CXChildVisitResult result(CXChildVisit_Continue);
     CYCXString spelling(cursor);
     std::string name(spelling);
     std::ostringstream value;
+    unsigned priority(2);
+    unsigned flags(CYBridgeHold);
 
     /*CXSourceLocation location(clang_getCursorLocation(cursor));
+    CYCXPosition<> position(location);
+    std::cerr << spelling << " " << position << std::endl;*/
 
-    CXFile file;
-    unsigned line;
-    unsigned column;
-    unsigned offset;
-    clang_getSpellingLocation(location, &file, &line, &column, &offset);
-
-    if (file != NULL) {
-        CYCXString path(clang_getFileName(file));
-        std::cout << spelling << " " << path << ":" << line << std::endl;
-    }*/
-
-    switch (CXCursorKind kind = clang_getCursorKind(cursor)) {
+    try { switch (CXCursorKind kind = clang_getCursorKind(cursor)) {
         case CXCursor_EnumConstantDecl: {
             value << clang_getEnumConstantDeclValue(cursor);
         } break;
 
-        case CXCursor_MacroDefinition: {
-            CYTokens tokens(unit, cursor);
-            if (tokens.count <= 2)
+        case CXCursor_EnumDecl: {
+            if (spelling[0] == '\0')
                 goto skip;
+            // XXX: this was blindly copied from StructDecl
+            if (!clang_isCursorDefinition(cursor))
+                priority = 1;
+
+            CYLocalPool pool;
+
+            CYType typed;
+            CYParseEnumeration(cursor, &typed);
+
+            CYOptions options;
+            CYOutput out(*value.rdbuf(), options);
+            CYTypeExpression(&typed).Output(out, CYNoBFC);
 
-            CXCursor cursors[tokens.count];
-            clang_annotateTokens(unit, tokens, tokens.count, cursors);
+            value << ".withName(\"" << name << "\")";
+            name = "$cye" + name;
+            flags = CYBridgeType;
 
-            for (unsigned i(1); i != tokens.count - 1; ++i) {
+            // the enum constants are implemented separately *also*
+            // XXX: maybe move output logic to function we can call
+            result = CXChildVisit_Recurse;
+        } break;
+
+        case CXCursor_MacroDefinition: {
+            CXSourceRange range(clang_getCursorExtent(cursor));
+            CYTokens tokens(unit, range);
+            _assert(tokens.size() != 0);
+
+            CXCursor cursors[tokens.size()];
+            clang_annotateTokens(unit, tokens, tokens.size(), cursors);
+
+            CYLocalPool local;
+            CYList<CYFunctionParameter> parameters;
+            unsigned offset(1);
+
+            if (tokens.size() != 1) {
+                CYCXPosition<> start(clang_getRangeStart(range));
+                CYCXString first(unit, tokens[offset]);
+                if (first == "(") {
+                    CYCXPosition<> paren(unit, tokens[offset]);
+                    if (start.offset_ + strlen(spelling) == paren.offset_) {
+                        for (;;) {
+                            _assert(++offset != tokens.size());
+                            CYCXString token(unit, tokens[offset]);
+                            parameters->*$P($B($I(token.Pool($pool))));
+                            _assert(++offset != tokens.size());
+                            CYCXString comma(unit, tokens[offset]);
+                            if (comma == ")")
+                                break;
+                            _assert(comma == ",");
+                        }
+                        ++offset;
+                    }
+                }
+            }
+
+            std::ostringstream body;
+            for (unsigned i(offset); i != tokens.size(); ++i) {
                 CYCXString token(unit, tokens[i]);
-                if (i != 1)
-                    value << " ";
-                else if (strcmp(token, "(") == 0)
-                    goto skip;
-                value << token;
+                if (i != offset)
+                    body << " ";
+                body << token;
+            }
+
+            if (!parameters)
+                value << body.str();
+            else {
+                CYOptions options;
+                CYOutput out(*value.rdbuf(), options);
+                out << '(' << "function" << '(';
+                out << parameters;
+                out << ')' << '{';
+                out << "return" << ' ';
+                value << body.str();
+                out << ';' << '}' << ')';
             }
         } break;
 
         case CXCursor_StructDecl: {
-            if (!clang_isCursorDefinition(cursor))
-                goto skip;
             if (spelling[0] == '\0')
                 goto skip;
+            if (!clang_isCursorDefinition(cursor))
+                priority = 1;
 
-            std::ostringstream types;
-            std::ostringstream names;
+            CYLocalPool pool;
 
-            CYForChild(cursor, fun([&](CXCursor child) {
-                if (clang_getCursorKind(child) == CXCursor_FieldDecl) {
-                    CXType type(clang_getCursorType(child));
-                    types << "(typedef " << CYCXString(clang_getTypeSpelling(type)) << "),";
-                    names << "'" << CYCXString(child) << "',";
-                }
-            }));
+            CYType typed;
+            CYParseStructure(cursor, &typed);
 
-            name += "$cy";
-            value << "new Type([" << types.str() << "],[" << names.str() << "])";
+            CYOptions options;
+            CYOutput out(*value.rdbuf(), options);
+            CYTypeExpression(&typed).Output(out, CYNoBFC);
+
+            value << ".withName(\"" << name << "\")";
+            name = "$cys" + name;
+            flags = CYBridgeType;
         } break;
 
         case CXCursor_TypedefDecl: {
-            CXType type(clang_getTypedefDeclUnderlyingType(cursor));
-            value << "(typedef " << CYCXString(clang_getTypeSpelling(type)) << ")";
+            CYLocalPool local;
+
+            CYType *typed(CYDecodeType(clang_getTypedefDeclUnderlyingType(cursor)));
+            if (typed->specifier_ == NULL)
+                value << "(typedef " << CYCXString(clang_getTypeSpelling(clang_getTypedefDeclUnderlyingType(cursor))) << ")";
+            else {
+                CYOptions options;
+                CYOutput out(*value.rdbuf(), options);
+                CYTypeExpression(typed).Output(out, CYNoBFC);
+            }
         } break;
 
         case CXCursor_FunctionDecl:
-        case CXCursor_VarDecl: try {
+        case CXCursor_VarDecl: {
             std::string label;
 
             CYList<CYFunctionParameter> parameters;
@@ -292,7 +642,7 @@ static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClien
                         break;
 
                     default:
-                        std::cerr << "A:" << CYCXString(child) << std::endl;
+                        //std::cerr << "A:" << CYCXString(child) << std::endl;
                         break;
                 }
             }));
@@ -313,21 +663,26 @@ static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClien
                 function->Output(out, CYNoBFC);
                 //std::cerr << value.str() << std::endl;
             }
-        } catch (const CYException &error) {
-            CYPool pool;
-            //std::cerr << error.PoolCString(pool) << std::endl;
-            goto skip;
         } break;
 
-        default: {
-            return CXChildVisit_Recurse;
-        } break;
+        default:
+            result = CXChildVisit_Recurse;
+            goto skip;
+        break;
+    } {
+        CYKey &key(baton.keys[name]);
+        if (key.priority_ <= priority) {
+            key.priority_ = priority;
+            key.code_ = value.str();
+            key.flags_ = flags;
+        }
+    } } catch (const CYException &error) {
+        CYPool pool;
+        //std::cerr << error.PoolCString(pool) << std::endl;
     }
 
-    baton.keys[name] = value.str();
-
   skip:
-    return CXChildVisit_Continue;
+    return result;
 }
 
 int main(int argc, const char *argv[]) {
@@ -353,11 +708,11 @@ int main(int argc, const char *argv[]) {
     clang_visitChildren(clang_getTranslationUnitCursor(unit), &CYChildVisit, &baton);
 
     for (CYKeyMap::const_iterator key(keys.begin()); key != keys.end(); ++key) {
-        std::string value(key->second);
-        for (size_t i(0), e(value.size()); i != e; ++i)
-            if (value[i] <= 0 || value[i] >= 0x7f || value[i] == '\n')
+        std::string code(key->second.code_);
+        for (size_t i(0), e(code.size()); i != e; ++i)
+            if (code[i] <= 0 || code[i] >= 0x7f || code[i] == '\n')
                 goto skip;
-        std::cout << key->first << "|\"" << value << "\"" << std::endl;
+        std::cout << key->first << "|" << key->second.flags_ << "\"" << code << "\"" << std::endl;
     skip:; }
 
     clang_disposeTranslationUnit(unit);