-/* Cycript - Optimizing JavaScript Compiler/Runtime
- * Copyright (C) 2009-2015 Jay Freeman (saurik)
+/* Cycript - The Truly Universal Scripting Language
+ * Copyright (C) 2009-2016 Jay Freeman (saurik)
*/
/* GNU Affero General Public License, Version 3 {{{ */
**/
/* }}} */
+#include <cmath>
#include <cstring>
#include <iostream>
#include <map>
#include <clang-c/Index.h>
+#include "Bridge.hpp"
#include "Functor.hpp"
#include "Replace.hpp"
#include "Syntax.hpp"
{
}
+ CYCXString(CXFile file) :
+ value_(clang_getFileName(file))
+ {
+ }
+
CYCXString(CXTranslationUnit unit, CXToken token) :
value_(clang_getTokenSpelling(unit, token))
{
const char *Pool(CYPool &pool) const {
return pool.strdup(*this);
}
+
+ bool operator ==(const char *rhs) const {
+ const char *lhs(*this);
+ return lhs == rhs || strcmp(lhs, rhs) == 0;
+ }
};
-typedef std::map<std::string, std::string> CYKeyMap;
+template <void (&clang_get_Location)(CXSourceLocation, CXFile *, unsigned *, unsigned *, unsigned *) = clang_getSpellingLocation>
+struct CYCXPosition {
+ CXFile file_;
+ unsigned line_;
+ unsigned column_;
+ unsigned offset_;
+
+ CYCXPosition(CXSourceLocation location) {
+ clang_get_Location(location, &file_, &line_, &column_, &offset_);
+ }
+
+ CYCXPosition(CXTranslationUnit unit, CXToken token) :
+ CYCXPosition(clang_getTokenLocation(unit, token))
+ {
+ }
+
+ CXSourceLocation Get(CXTranslationUnit unit) const {
+ return clang_getLocation(unit, file_, line_, column_);
+ }
+};
+
+template <void (&clang_get_Location)(CXSourceLocation, CXFile *, unsigned *, unsigned *, unsigned *)>
+std::ostream &operator <<(std::ostream &out, const CYCXPosition<clang_get_Location> &position) {
+ if (position.file_ != NULL)
+ out << "[" << CYCXString(position.file_) << "]:";
+ out << position.line_ << ":" << position.column_ << "@" << position.offset_;
+ return out;
+}
+
+struct CYKey {
+ unsigned priority_ = 0;
+
+ std::string code_;
+ unsigned flags_;
+};
+
+typedef std::map<std::string, CYKey> CYKeyMap;
struct CYChildBaton {
CXTranslationUnit unit;
};
struct CYTokens {
- CXTranslationUnit unit;
- CXToken *tokens;
- unsigned count;
+ private:
+ CXTranslationUnit unit_;
+ CXToken *tokens_;
+ unsigned count_;
+ unsigned valid_;
+
+ public:
+ CYTokens(CXTranslationUnit unit, CXSourceRange range) :
+ unit_(unit)
+ {
+ clang_tokenize(unit_, range, &tokens_, &count_);
+
+
+ // libclang's tokenizer is horribly broken and returns "extra" tokens.
+ // this code goes back through the tokens and filters for good ones :/
+
+ CYCXPosition<> end(clang_getRangeEnd(range));
+ CYCXString file(end.file_);
+
+ for (valid_ = 0; valid_ != count_; ++valid_) {
+ CYCXPosition<> position(unit, tokens_[valid_]);
+ _assert(CYCXString(position.file_) == file);
+ if (position.offset_ >= end.offset_)
+ break;
+ }
+ }
CYTokens(CXTranslationUnit unit, CXCursor cursor) :
- unit(unit)
+ CYTokens(unit, clang_getCursorExtent(cursor))
{
- CXSourceRange range(clang_getCursorExtent(cursor));
- clang_tokenize(unit, range, &tokens, &count);
}
~CYTokens() {
- clang_disposeTokens(unit, tokens, count);
+ clang_disposeTokens(unit_, tokens_, count_);
}
operator CXToken *() const {
- return tokens;
+ return tokens_;
+ }
+
+ size_t size() const {
+ return valid_;
}
};
+static CYUTF8String CYCXPoolUTF8Range(CYPool &pool, CXSourceRange range) {
+ CYCXPosition<> start(clang_getRangeStart(range));
+ CYCXPosition<> end(clang_getRangeEnd(range));
+ CYCXString file(start.file_);
+ _assert(file == CYCXString(end.file_));
+
+ CYPool temp;
+ size_t size;
+ char *data(static_cast<char *>(CYPoolFile(temp, file, &size)));
+ _assert(start.offset_ <= size && end.offset_ <= size && start.offset_ <= end.offset_);
+
+ CYUTF8String code;
+ code.size = end.offset_ - start.offset_;
+ code.data = pool.strndup(data + start.offset_, code.size);
+ return code;
+}
+
static CYExpression *CYTranslateExpression(CXTranslationUnit unit, CXCursor cursor) {
switch (CXCursorKind kind = clang_getCursorKind(cursor)) {
case CXCursor_CallExpr: {
} break;
case CXCursor_IntegerLiteral: {
- CYTokens tokens(unit, cursor);
- _assert(tokens.count != 0);
- // XXX: I don't understand why this is often enormous :/
- return $ CYNumber(CYCastDouble(CYCXString(unit, tokens[0])));
+ // libclang doesn't provide any reasonable way to do this
+ // note: clang_tokenize doesn't work if this is a macro
+ // the token range starts inside the macro but ends after it
+ // the tokenizer freaks out and either fails with 0 tokens
+ // or returns some massive number of tokens ending here :/
+
+ CYUTF8String token(CYCXPoolUTF8Range($pool, clang_getCursorExtent(cursor)));
+ double value(CYCastDouble(token));
+ if (std::isnan(value))
+ return $V(token.data);
+ return $ CYNumber(value);
} break;
case CXCursor_CStyleCastExpr:
return $ CYBlock(statements);
}
+static CYType *CYDecodeType(CXType type);
+static void CYParseType(CXType type, CYType *typed);
+
+static void CYParseEnumeration(CXCursor cursor, CYType *typed) {
+ CYList<CYEnumConstant> constants;
+
+ CYForChild(cursor, fun([&](CXCursor child) {
+ if (clang_getCursorKind(child) == CXCursor_EnumConstantDecl)
+ constants->*$ CYEnumConstant($I($pool.strdup(CYCXString(child))), $D(clang_getEnumConstantDeclValue(child)));
+ }));
+
+ CYType *integer(CYDecodeType(clang_getEnumDeclIntegerType(cursor)));
+ typed->specifier_ = $ CYTypeEnum(NULL, integer->specifier_, constants);
+}
+
+static void CYParseStructure(CXCursor cursor, CYType *typed) {
+ CYList<CYTypeStructField> fields;
+ CYForChild(cursor, fun([&](CXCursor child) {
+ if (clang_getCursorKind(child) == CXCursor_FieldDecl)
+ fields->*$ CYTypeStructField(CYDecodeType(clang_getCursorType(child)), $I(CYCXString(child).Pool($pool)));
+ }));
+
+ typed->specifier_ = $ CYTypeStruct(NULL, $ CYStructTail(fields));
+}
+
+static void CYParseCursor(CXType type, CXCursor cursor, CYType *typed) {
+ CYCXString spelling(cursor);
+
+ switch (CXCursorKind kind = clang_getCursorKind(cursor)) {
+ case CXCursor_EnumDecl:
+ if (spelling[0] != '\0')
+ typed->specifier_ = $ CYTypeReference(CYTypeReferenceEnum, $I(spelling.Pool($pool)));
+ else
+ CYParseEnumeration(cursor, typed);
+ break;
+
+ case CXCursor_StructDecl: {
+ if (spelling[0] != '\0')
+ typed->specifier_ = $ CYTypeReference(CYTypeReferenceStruct, $I(spelling.Pool($pool)));
+ else
+ CYParseStructure(cursor, typed);
+ } break;
+
+ case CXCursor_UnionDecl: {
+ _assert(false);
+ } break;
+
+ default:
+ std::cerr << "C:" << CYCXString(kind) << std::endl;
+ _assert(false);
+ break;
+ }
+}
+
+static CYTypedParameter *CYParseSignature(CXType type, CYType *typed) {
+ CYParseType(clang_getResultType(type), typed);
+ CYList<CYTypedParameter> parameters;
+ for (int i(0), e(clang_getNumArgTypes(type)); i != e; ++i)
+ parameters->*$ CYTypedParameter(CYDecodeType(clang_getArgType(type, i)), NULL);
+ return parameters;
+}
+
+static void CYParseFunction(CXType type, CYType *typed) {
+ typed = typed->Modify($ CYTypeFunctionWith(clang_isFunctionTypeVariadic(type), CYParseSignature(type, typed)));
+}
+
+static void CYParseType(CXType type, CYType *typed) {
+ switch (CXTypeKind kind = type.kind) {
+ case CXType_Unexposed: {
+ CXType result(clang_getResultType(type));
+ if (result.kind == CXType_Invalid)
+ CYParseCursor(type, clang_getTypeDeclaration(type), typed);
+ else
+ // clang marks function pointers as Unexposed but still supports them
+ CYParseFunction(type, typed);
+ } break;
+
+ case CXType_Bool: typed->specifier_ = $ CYTypeVariable("bool"); break;
+ case CXType_WChar: typed->specifier_ = $ CYTypeVariable("wchar_t"); break;
+ case CXType_Float: typed->specifier_ = $ CYTypeFloating(0); break;
+ case CXType_Double: typed->specifier_ = $ CYTypeFloating(1); break;
+ case CXType_LongDouble: typed->specifier_ = $ CYTypeFloating(2); break;
+
+ case CXType_Char_U: typed->specifier_ = $ CYTypeCharacter(CYTypeNeutral); break;
+ case CXType_Char_S: typed->specifier_ = $ CYTypeCharacter(CYTypeNeutral); break;
+ case CXType_SChar: typed->specifier_ = $ CYTypeCharacter(CYTypeSigned); break;
+ case CXType_UChar: typed->specifier_ = $ CYTypeCharacter(CYTypeUnsigned); break;
+
+ case CXType_Short: typed->specifier_ = $ CYTypeIntegral(CYTypeSigned, 0); break;
+ case CXType_UShort: typed->specifier_ = $ CYTypeIntegral(CYTypeUnsigned, 0); break;
+
+ case CXType_Int: typed->specifier_ = $ CYTypeIntegral(CYTypeSigned, 1); break;
+ case CXType_UInt: typed->specifier_ = $ CYTypeIntegral(CYTypeUnsigned, 1); break;
+
+ case CXType_Long: typed->specifier_ = $ CYTypeIntegral(CYTypeSigned, 2); break;
+ case CXType_ULong: typed->specifier_ = $ CYTypeIntegral(CYTypeUnsigned, 2); break;
+
+ case CXType_LongLong: typed->specifier_ = $ CYTypeIntegral(CYTypeSigned, 3); break;
+ case CXType_ULongLong: typed->specifier_ = $ CYTypeIntegral(CYTypeUnsigned, 3); break;
+
+ case CXType_Int128: typed->specifier_ = $ CYTypeInt128(CYTypeSigned); break;
+ case CXType_UInt128: typed->specifier_ = $ CYTypeInt128(CYTypeUnsigned); break;
+
+ case CXType_BlockPointer: {
+ CXType pointee(clang_getPointeeType(type));
+ _assert(!clang_isFunctionTypeVariadic(pointee));
+ typed = typed->Modify($ CYTypeBlockWith(CYParseSignature(pointee, typed)));
+ } break;
+
+ case CXType_ConstantArray:
+ CYParseType(clang_getArrayElementType(type), typed);
+ typed = typed->Modify($ CYTypeArrayOf($D(clang_getArraySize(type))));
+ break;
+
+ case CXType_Enum:
+ typed->specifier_ = $ CYTypeVariable($pool.strdup(CYCXString(clang_getTypeSpelling(type))));
+ break;
+
+ case CXType_FunctionProto:
+ CYParseFunction(type, typed);
+ break;
+
+ case CXType_IncompleteArray:
+ // XXX: I probably should not decay to Pointer
+ CYParseType(clang_getArrayElementType(type), typed);
+ typed = typed->Modify($ CYTypePointerTo());
+ break;
+
+ case CXType_ObjCClass:
+ typed->specifier_ = $ CYTypeVariable("Class");
+ break;
+
+ case CXType_ObjCId:
+ typed->specifier_ = $ CYTypeVariable("id");
+ break;
+
+ case CXType_ObjCInterface:
+ typed->specifier_ = $ CYTypeVariable($pool.strdup(CYCXString(clang_getTypeSpelling(type))));
+ break;
+
+ case CXType_ObjCObjectPointer: {
+ CXType pointee(clang_getPointeeType(type));
+ if (pointee.kind != CXType_Unexposed) {
+ CYParseType(pointee, typed);
+ typed = typed->Modify($ CYTypePointerTo());
+ } else
+ // Clang seems to have internal typedefs for id and Class that are awkward
+ _assert(false);
+ } break;
+
+ case CXType_ObjCSel:
+ typed->specifier_ = $ CYTypeVariable("SEL");
+ break;
+
+ case CXType_Pointer:
+ CYParseType(clang_getPointeeType(type), typed);
+ typed = typed->Modify($ CYTypePointerTo());
+ break;
+
+ case CXType_Record:
+ typed->specifier_ = $ CYTypeReference(CYTypeReferenceStruct, $I($pool.strdup(CYCXString(clang_getTypeSpelling(type)))));
+ break;
+
+ case CXType_Typedef:
+ // use the declaration in order to isolate the name of the typedef itself
+ typed->specifier_ = $ CYTypeVariable($pool.strdup(CYCXString(clang_getTypeDeclaration(type))));
+ break;
+
+ case CXType_Vector:
+ _assert(false);
+ break;
+
+ case CXType_Void:
+ typed->specifier_ = $ CYTypeVoid();
+ break;
+
+ default:
+ std::cerr << "T:" << CYCXString(clang_getTypeKindSpelling(kind)) << std::endl;
+ std::cerr << "_: " << CYCXString(clang_getTypeSpelling(type)) << std::endl;
+ _assert(false);
+ }
+
+ if (clang_isConstQualifiedType(type))
+ typed = typed->Modify($ CYTypeConstant());
+}
+
+static CYType *CYDecodeType(CXType type) {
+ CYType *typed($ CYType(NULL));
+ CYParseType(type, typed);
+ return typed;
+}
+
static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClientData arg) {
CYChildBaton &baton(*static_cast<CYChildBaton *>(arg));
CXTranslationUnit &unit(baton.unit);
+ CXChildVisitResult result(CXChildVisit_Continue);
CYCXString spelling(cursor);
std::string name(spelling);
std::ostringstream value;
+ unsigned priority(2);
+ unsigned flags(CYBridgeHold);
/*CXSourceLocation location(clang_getCursorLocation(cursor));
+ CYCXPosition<> position(location);
+ std::cerr << spelling << " " << position << std::endl;*/
- CXFile file;
- unsigned line;
- unsigned column;
- unsigned offset;
- clang_getSpellingLocation(location, &file, &line, &column, &offset);
-
- if (file != NULL) {
- CYCXString path(clang_getFileName(file));
- std::cout << spelling << " " << path << ":" << line << std::endl;
- }*/
-
- switch (CXCursorKind kind = clang_getCursorKind(cursor)) {
+ try { switch (CXCursorKind kind = clang_getCursorKind(cursor)) {
case CXCursor_EnumConstantDecl: {
value << clang_getEnumConstantDeclValue(cursor);
} break;
- case CXCursor_MacroDefinition: {
- CYTokens tokens(unit, cursor);
- if (tokens.count <= 2)
+ case CXCursor_EnumDecl: {
+ // the enum constants are implemented separately *also*
+ // XXX: maybe move output logic to function we can call
+ result = CXChildVisit_Recurse;
+
+ if (spelling[0] == '\0')
goto skip;
+ // XXX: this was blindly copied from StructDecl
+ if (!clang_isCursorDefinition(cursor))
+ priority = 1;
+
+ CYLocalPool pool;
+
+ CYType typed;
+ CYParseEnumeration(cursor, &typed);
+
+ CYOptions options;
+ CYOutput out(*value.rdbuf(), options);
+ CYTypeExpression(&typed).Output(out, CYNoBFC);
- CXCursor cursors[tokens.count];
- clang_annotateTokens(unit, tokens, tokens.count, cursors);
+ value << ".withName(\"" << name << "\")";
+ name = "$cye" + name;
+ flags = CYBridgeType;
+ } break;
+
+ case CXCursor_MacroDefinition: {
+ CXSourceRange range(clang_getCursorExtent(cursor));
+ CYTokens tokens(unit, range);
+ _assert(tokens.size() != 0);
+
+ CXCursor cursors[tokens.size()];
+ clang_annotateTokens(unit, tokens, tokens.size(), cursors);
+
+ CYLocalPool local;
+ CYList<CYFunctionParameter> parameters;
+ unsigned offset(1);
+
+ if (tokens.size() != 1) {
+ CYCXPosition<> start(clang_getRangeStart(range));
+ CYCXString first(unit, tokens[offset]);
+ if (first == "(") {
+ CYCXPosition<> paren(unit, tokens[offset]);
+ if (start.offset_ + strlen(spelling) == paren.offset_) {
+ for (;;) {
+ _assert(++offset != tokens.size());
+ CYCXString token(unit, tokens[offset]);
+ parameters->*$P($B($I(token.Pool($pool))));
+ _assert(++offset != tokens.size());
+ CYCXString comma(unit, tokens[offset]);
+ if (comma == ")")
+ break;
+ _assert(comma == ",");
+ }
+ ++offset;
+ }
+ }
+ }
- for (unsigned i(1); i != tokens.count - 1; ++i) {
+ std::ostringstream body;
+ for (unsigned i(offset); i != tokens.size(); ++i) {
CYCXString token(unit, tokens[i]);
- if (i != 1)
- value << " ";
- else if (strcmp(token, "(") == 0)
- goto skip;
- value << token;
+ if (i != offset)
+ body << " ";
+ body << token;
+ }
+
+ if (!parameters)
+ value << body.str();
+ else {
+ CYOptions options;
+ CYOutput out(*value.rdbuf(), options);
+ out << '(' << "function" << '(';
+ out << parameters;
+ out << ')' << '{';
+ out << "return" << ' ';
+ value << body.str();
+ out << ';' << '}' << ')';
}
} break;
case CXCursor_StructDecl: {
- if (!clang_isCursorDefinition(cursor))
- goto skip;
if (spelling[0] == '\0')
goto skip;
+ if (!clang_isCursorDefinition(cursor))
+ priority = 1;
- std::ostringstream types;
- std::ostringstream names;
+ CYLocalPool pool;
- CYForChild(cursor, fun([&](CXCursor child) {
- if (clang_getCursorKind(child) == CXCursor_FieldDecl) {
- CXType type(clang_getCursorType(child));
- types << "(typedef " << CYCXString(clang_getTypeSpelling(type)) << "),";
- names << "'" << CYCXString(child) << "',";
- }
- }));
+ CYType typed;
+ CYParseStructure(cursor, &typed);
- name += "$cy";
- value << "new Type([" << types.str() << "],[" << names.str() << "])";
+ CYOptions options;
+ CYOutput out(*value.rdbuf(), options);
+ CYTypeExpression(&typed).Output(out, CYNoBFC);
+
+ value << ".withName(\"" << name << "\")";
+ name = "$cys" + name;
+ flags = CYBridgeType;
} break;
case CXCursor_TypedefDecl: {
- CXType type(clang_getTypedefDeclUnderlyingType(cursor));
- value << "(typedef " << CYCXString(clang_getTypeSpelling(type)) << ")";
+ CYLocalPool local;
+
+ CYType *typed(CYDecodeType(clang_getTypedefDeclUnderlyingType(cursor)));
+ if (typed->specifier_ == NULL)
+ value << "(typedef " << CYCXString(clang_getTypeSpelling(clang_getTypedefDeclUnderlyingType(cursor))) << ")";
+ else {
+ CYOptions options;
+ CYOutput out(*value.rdbuf(), options);
+ CYTypeExpression(typed).Output(out, CYNoBFC);
+ }
} break;
case CXCursor_FunctionDecl:
- case CXCursor_VarDecl: try {
+ case CXCursor_VarDecl: {
std::string label;
CYList<CYFunctionParameter> parameters;
break;
default:
- std::cerr << "A:" << CYCXString(child) << std::endl;
+ //std::cerr << "A:" << CYCXString(child) << std::endl;
break;
}
}));
goto skip;
if (code == NULL) {
+ value << "*";
CXType type(clang_getCursorType(cursor));
- value << "*(typedef " << CYCXString(clang_getTypeSpelling(type)) << ").pointerTo()(dlsym(RTLD_DEFAULT,'" << label.substr(1) << "'))";
+ CYType *typed(CYDecodeType(type));
+ CYOptions options;
+ CYOutput out(*value.rdbuf(), options);
+ CYTypeExpression(typed).Output(out, CYNoBFC);
+ value << ".pointerTo()(dlsym(RTLD_DEFAULT,'" << label.substr(1) << "'))";
} else {
CYOptions options;
CYOutput out(*value.rdbuf(), options);
function->Output(out, CYNoBFC);
//std::cerr << value.str() << std::endl;
}
- } catch (const CYException &error) {
- CYPool pool;
- //std::cerr << error.PoolCString(pool) << std::endl;
- goto skip;
} break;
- default: {
- return CXChildVisit_Recurse;
- } break;
+ default:
+ result = CXChildVisit_Recurse;
+ goto skip;
+ break;
+ } {
+ CYKey &key(baton.keys[name]);
+ if (key.priority_ <= priority) {
+ key.priority_ = priority;
+ key.code_ = value.str();
+ key.flags_ = flags;
+ }
+ } } catch (const CYException &error) {
+ CYPool pool;
+ //std::cerr << error.PoolCString(pool) << std::endl;
}
- baton.keys[name] = value.str();
-
skip:
- return CXChildVisit_Continue;
+ return result;
}
int main(int argc, const char *argv[]) {
clang_visitChildren(clang_getTranslationUnitCursor(unit), &CYChildVisit, &baton);
for (CYKeyMap::const_iterator key(keys.begin()); key != keys.end(); ++key) {
- std::string value(key->second);
- for (size_t i(0), e(value.size()); i != e; ++i)
- if (value[i] <= 0 || value[i] >= 0x7f || value[i] == '\n')
+ std::string code(key->second.code_);
+ for (size_t i(0), e(code.size()); i != e; ++i)
+ if (code[i] <= 0 || code[i] >= 0x7f || code[i] == '\n')
goto skip;
- std::cout << key->first << "|\"" << value << "\"" << std::endl;
+ std::cout << key->first << "|" << key->second.flags_ << "\"" << code << "\"" << std::endl;
skip:; }
clang_disposeTranslationUnit(unit);