X-Git-Url: https://git.saurik.com/cycript.git/blobdiff_plain/b7854baa232c47ae81470471e96d63fff09706e5..436a877be73ebe14fecd3ef0e9b7dd6b854d2e3b:/Analyze.cpp diff --git a/Analyze.cpp b/Analyze.cpp index c6e5777..4f84e03 100644 --- a/Analyze.cpp +++ b/Analyze.cpp @@ -1,3 +1,25 @@ +/* Cycript - The Truly Universal Scripting Language + * Copyright (C) 2009-2016 Jay Freeman (saurik) +*/ + +/* GNU Affero General Public License, Version 3 {{{ */ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +**/ +/* }}} */ + +#include #include #include #include @@ -6,6 +28,29 @@ #include +#include "Functor.hpp" +#include "Replace.hpp" +#include "Syntax.hpp" + +static CXChildVisitResult CYVisit(CXCursor cursor, CXCursor parent, CXClientData arg) { + (*reinterpret_cast *>(arg))(cursor); + return CXChildVisit_Continue; +} + +static unsigned CYForChild(CXCursor cursor, const Functor &visitor) { + return clang_visitChildren(cursor, &CYVisit, const_cast(static_cast(&visitor))); +} + +static bool CYOneChild(CXCursor cursor, const Functor &visitor) { + bool visited(false); + CYForChild(cursor, fun([&](CXCursor child) { + _assert(!visited); + visited = true; + visitor(child); + })); + return visited; +} + struct CYCXString { CXString value_; @@ -14,6 +59,26 @@ struct CYCXString { { } + CYCXString(CXCursor cursor) : + value_(clang_getCursorSpelling(cursor)) + { + } + + CYCXString(CXCursorKind kind) : + value_(clang_getCursorKindSpelling(kind)) + { + } + + CYCXString(CXFile file) : + value_(clang_getFileName(file)) + { + } + + CYCXString(CXTranslationUnit unit, CXToken token) : + value_(clang_getTokenSpelling(unit, token)) + { + } + ~CYCXString() { clang_disposeString(value_); } @@ -21,39 +86,54 @@ struct CYCXString { operator const char *() const { return clang_getCString(value_); } -}; -struct CYFieldBaton { - std::ostringstream types; - std::ostringstream names; + const char *Pool(CYPool &pool) const { + return pool.strdup(*this); + } + + bool operator ==(const char *rhs) const { + const char *lhs(*this); + return lhs == rhs || strcmp(lhs, rhs) == 0; + } }; -static CXChildVisitResult CYFieldVisit(CXCursor cursor, CXCursor parent, CXClientData arg) { - CYFieldBaton &baton(*static_cast(arg)); +template +struct CYCXPosition { + CXFile file_; + unsigned line_; + unsigned column_; + unsigned offset_; - if (clang_getCursorKind(cursor) == CXCursor_FieldDecl) { - CXType type(clang_getCursorType(cursor)); - baton.types << "(typedef " << CYCXString(clang_getTypeSpelling(type)) << "),"; - baton.names << "'" << CYCXString(clang_getCursorSpelling(cursor)) << "',"; + CYCXPosition(CXSourceLocation location) { + clang_get_Location(location, &file_, &line_, &column_, &offset_); } - return CXChildVisit_Continue; -} + CYCXPosition(CXTranslationUnit unit, CXToken token) : + CYCXPosition(clang_getTokenLocation(unit, token)) + { + } -struct CYAttributeBaton { - std::string label; + CXSourceLocation Get(CXTranslationUnit unit) const { + return clang_getLocation(unit, file_, line_, column_); + } }; -static CXChildVisitResult CYAttributeVisit(CXCursor cursor, CXCursor parent, CXClientData arg) { - CYAttributeBaton &baton(*static_cast(arg)); +template +std::ostream &operator <<(std::ostream &out, const CYCXPosition &position) { + if (position.file_ != NULL) + out << "[" << CYCXString(position.file_) << "]:"; + out << position.line_ << ":" << position.column_ << "@" << position.offset_; + return out; +} - if (clang_getCursorKind(cursor) == CXCursor_AsmLabelAttr) - baton.label = CYCXString(clang_getCursorSpelling(cursor)); +struct CYKey { + unsigned priority_ = 0; - return CXChildVisit_Continue; -} + std::string code_; + unsigned flags_; +}; -typedef std::map CYKeyMap; +typedef std::map CYKeyMap; struct CYChildBaton { CXTranslationUnit unit; @@ -67,86 +147,244 @@ struct CYChildBaton { }; struct CYTokens { - CXTranslationUnit unit; - CXToken *tokens; - unsigned count; + private: + CXTranslationUnit unit_; + CXToken *tokens_; + unsigned count_; + unsigned valid_; + + public: + CYTokens(CXTranslationUnit unit, CXSourceRange range) : + unit_(unit) + { + clang_tokenize(unit_, range, &tokens_, &count_); + + + // libclang's tokenizer is horribly broken and returns "extra" tokens. + // this code goes back through the tokens and filters for good ones :/ + + CYCXPosition<> end(clang_getRangeEnd(range)); + CYCXString file(end.file_); + + for (valid_ = 0; valid_ != count_; ++valid_) { + CYCXPosition<> position(unit, tokens_[valid_]); + _assert(CYCXString(position.file_) == file); + if (position.offset_ >= end.offset_) + break; + } + } CYTokens(CXTranslationUnit unit, CXCursor cursor) : - unit(unit) + CYTokens(unit, clang_getCursorExtent(cursor)) { - CXSourceRange range(clang_getCursorExtent(cursor)); - clang_tokenize(unit, range, &tokens, &count); } ~CYTokens() { - clang_disposeTokens(unit, tokens, count); + clang_disposeTokens(unit_, tokens_, count_); } operator CXToken *() const { - return tokens; + return tokens_; + } + + size_t size() const { + return valid_; } }; +static CYUTF8String CYCXPoolUTF8Range(CYPool &pool, CXSourceRange range) { + CYCXPosition<> start(clang_getRangeStart(range)); + CYCXPosition<> end(clang_getRangeEnd(range)); + CYCXString file(start.file_); + _assert(file == CYCXString(end.file_)); + + CYPool temp; + size_t size; + char *data(static_cast(CYPoolFile(temp, file, &size))); + _assert(start.offset_ <= size && end.offset_ <= size && start.offset_ <= end.offset_); + + CYUTF8String code; + code.size = end.offset_ - start.offset_; + code.data = pool.strndup(data + start.offset_, code.size); + return code; +} + +static CYExpression *CYTranslateExpression(CXTranslationUnit unit, CXCursor cursor) { + switch (CXCursorKind kind = clang_getCursorKind(cursor)) { + case CXCursor_CallExpr: { + CYExpression *function(NULL); + CYList arguments; + CYForChild(cursor, fun([&](CXCursor child) { + CYExpression *expression(CYTranslateExpression(unit, child)); + if (function == NULL) + function = expression; + else + arguments->*$C_(expression); + })); + return $C(function, arguments); + } break; + + case CXCursor_DeclRefExpr: { + return $V(CYCXString(cursor).Pool($pool)); + } break; + + case CXCursor_IntegerLiteral: { + // libclang doesn't provide any reasonable way to do this + // note: clang_tokenize doesn't work if this is a macro + // the token range starts inside the macro but ends after it + // the tokenizer freaks out and either fails with 0 tokens + // or returns some massive number of tokens ending here :/ + + CYUTF8String token(CYCXPoolUTF8Range($pool, clang_getCursorExtent(cursor))); + double value(CYCastDouble(token)); + if (std::isnan(value)) + return $V(token.data); + return $ CYNumber(value); + } break; + + case CXCursor_CStyleCastExpr: + // XXX: most of the time, this is a "NoOp" integer cast; but we should check it + + case CXCursor_UnexposedExpr: + // there is a very high probability that this is actually an "ImplicitCastExpr" + // "Douglas Gregor" err'd on the incorrect side of this one + // http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20110926/046998.html + + case CXCursor_ParenExpr: { + CYExpression *pass(NULL); + CYOneChild(cursor, fun([&](CXCursor child) { + pass = CYTranslateExpression(unit, child); + })); + return pass; + } break; + + default: + //std::cerr << "E:" << CYCXString(kind) << std::endl; + _assert(false); + } +} + +static CYStatement *CYTranslateStatement(CXTranslationUnit unit, CXCursor cursor) { + switch (CXCursorKind kind = clang_getCursorKind(cursor)) { + case CXCursor_ReturnStmt: { + CYExpression *value(NULL); + CYOneChild(cursor, fun([&](CXCursor child) { + value = CYTranslateExpression(unit, child); + })); + return $ CYReturn(value); + } break; + + default: + //std::cerr << "S:" << CYCXString(kind) << std::endl; + _assert(false); + } +} + +static CYStatement *CYTranslateBlock(CXTranslationUnit unit, CXCursor cursor) { + CYList statements; + CYForChild(cursor, fun([&](CXCursor child) { + statements->*CYTranslateStatement(unit, child); + })); + return $ CYBlock(statements); +} + static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClientData arg) { CYChildBaton &baton(*static_cast(arg)); CXTranslationUnit &unit(baton.unit); - CYCXString spelling(clang_getCursorSpelling(cursor)); + CYCXString spelling(cursor); std::string name(spelling); std::ostringstream value; + unsigned priority(2); + unsigned flags(0); /*CXSourceLocation location(clang_getCursorLocation(cursor)); + CYCXPosition<> position(location); + std::cout << spelling << " " << position << std::endl;*/ - CXFile file; - unsigned line; - unsigned column; - unsigned offset; - clang_getSpellingLocation(location, &file, &line, &column, &offset); - - if (file != NULL) { - CYCXString path(clang_getFileName(file)); - std::cout << spelling << " " << path << ":" << line << std::endl; - }*/ - - switch (clang_getCursorKind(cursor)) { + switch (CXCursorKind kind = clang_getCursorKind(cursor)) { case CXCursor_EnumConstantDecl: { value << clang_getEnumConstantDeclValue(cursor); } break; - case CXCursor_MacroDefinition: { - CYTokens tokens(unit, cursor); - if (tokens.count <= 2) - goto skip; + case CXCursor_MacroDefinition: try { + CXSourceRange range(clang_getCursorExtent(cursor)); + CYTokens tokens(unit, range); + _assert(tokens.size() != 0); + + CXCursor cursors[tokens.size()]; + clang_annotateTokens(unit, tokens, tokens.size(), cursors); + + CYLocalPool local; + CYList parameters; + unsigned offset(1); + + if (tokens.size() != 1) { + CYCXPosition<> start(clang_getRangeStart(range)); + CYCXString first(unit, tokens[offset]); + if (first == "(") { + CYCXPosition<> paren(unit, tokens[offset]); + if (start.offset_ + strlen(spelling) == paren.offset_) { + for (;;) { + _assert(++offset != tokens.size()); + CYCXString token(unit, tokens[offset]); + parameters->*$P($B($I(token.Pool($pool)))); + _assert(++offset != tokens.size()); + CYCXString comma(unit, tokens[offset]); + if (comma == ")") + break; + _assert(comma == ","); + } + ++offset; + } + } + } - CXCursor cursors[tokens.count]; - clang_annotateTokens(unit, tokens, tokens.count, cursors); + std::ostringstream body; + for (unsigned i(offset); i != tokens.size(); ++i) { + CYCXString token(unit, tokens[i]); + if (i != offset) + body << " "; + body << token; + } - for (unsigned i(1); i != tokens.count - 1; ++i) { - CYCXString token(clang_getTokenSpelling(unit, tokens[i])); - if (i != 1) - value << " "; - else if (strcmp(token, "(") == 0) - goto skip; - value << token; + if (!parameters) + value << body.str(); + else { + CYOptions options; + CYOutput out(*value.rdbuf(), options); + out << '(' << "function" << '('; + out << parameters; + out << ')' << '{'; + out << "return" << ' '; + value << body.str(); + out << ';' << '}' << ')'; } + } catch (const CYException &error) { + CYPool pool; + //std::cerr << error.PoolCString(pool) << std::endl; + goto skip; } break; case CXCursor_StructDecl: { - if (!clang_isCursorDefinition(cursor)) - goto skip; if (spelling[0] == '\0') goto skip; + if (!clang_isCursorDefinition(cursor)) + priority = 1; - CYFieldBaton baton; + std::ostringstream types; + std::ostringstream names; - baton.types << "["; - baton.names << "["; - clang_visitChildren(cursor, &CYFieldVisit, &baton); - baton.types << "]"; - baton.names << "]"; + CYForChild(cursor, fun([&](CXCursor child) { + if (clang_getCursorKind(child) == CXCursor_FieldDecl) { + CXType type(clang_getCursorType(child)); + types << "(typedef " << CYCXString(clang_getTypeSpelling(type)) << "),"; + names << "'" << CYCXString(child) << "',"; + } + })); + value << "new Type([" << types.str() << "],[" << names.str() << "]).withName(\"" << name << "\")"; name += "$cy"; - value << "new Type(" << baton.types.str() << "," << baton.names.str() << ")"; } break; case CXCursor_TypedefDecl: { @@ -155,18 +393,60 @@ static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClien } break; case CXCursor_FunctionDecl: - case CXCursor_VarDecl: { - CYAttributeBaton baton; - clang_visitChildren(cursor, &CYAttributeVisit, &baton); - - if (baton.label.empty()) { - baton.label = spelling; - baton.label = '_' + baton.label; - } else if (baton.label[0] != '_') + case CXCursor_VarDecl: try { + std::string label; + + CYList parameters; + CYStatement *code(NULL); + + CYLocalPool local; + + CYForChild(cursor, fun([&](CXCursor child) { + switch (CXCursorKind kind = clang_getCursorKind(child)) { + case CXCursor_AsmLabelAttr: + label = CYCXString(child); + break; + + case CXCursor_CompoundStmt: + code = CYTranslateBlock(unit, child); + break; + + case CXCursor_ParmDecl: + parameters->*$P($B($I(CYCXString(child).Pool($pool)))); + break; + + case CXCursor_IntegerLiteral: + case CXCursor_ObjCClassRef: + case CXCursor_TypeRef: + case CXCursor_UnexposedAttr: + break; + + default: + //std::cerr << "A:" << CYCXString(child) << std::endl; + break; + } + })); + + if (label.empty()) { + label = spelling; + label = '_' + label; + } else if (label[0] != '_') goto skip; - CXType type(clang_getCursorType(cursor)); - value << "*(typedef " << CYCXString(clang_getTypeSpelling(type)) << ").pointerTo()(dlsym(RTLD_DEFAULT,'" << baton.label.substr(1) << "'))"; + if (code == NULL) { + CXType type(clang_getCursorType(cursor)); + value << "*(typedef " << CYCXString(clang_getTypeSpelling(type)) << ").pointerTo()(dlsym(RTLD_DEFAULT,'" << label.substr(1) << "'))"; + } else { + CYOptions options; + CYOutput out(*value.rdbuf(), options); + CYFunctionExpression *function($ CYFunctionExpression(NULL, parameters, code)); + function->Output(out, CYNoBFC); + //std::cerr << value.str() << std::endl; + } + } catch (const CYException &error) { + CYPool pool; + //std::cerr << error.PoolCString(pool) << std::endl; + goto skip; } break; default: { @@ -174,7 +454,14 @@ static CXChildVisitResult CYChildVisit(CXCursor cursor, CXCursor parent, CXClien } break; } - baton.keys[name] = value.str(); + { + CYKey &key(baton.keys[name]); + if (key.priority_ < priority) { + key.priority_ = priority; + key.code_ = value.str(); + key.flags_ = flags; + } + } skip: return CXChildVisit_Continue; @@ -190,7 +477,7 @@ int main(int argc, const char *argv[]) { argv[--offset] = "-ObjC++"; #endif - CXTranslationUnit unit(clang_parseTranslationUnit(index, file, argv + offset, argc - offset, NULL, 0, CXTranslationUnit_DetailedPreprocessingRecord | CXTranslationUnit_SkipFunctionBodies)); + CXTranslationUnit unit(clang_parseTranslationUnit(index, file, argv + offset, argc - offset, NULL, 0, CXTranslationUnit_DetailedPreprocessingRecord)); for (unsigned i(0), e(clang_getNumDiagnostics(unit)); i != e; ++i) { CXDiagnostic diagnostic(clang_getDiagnostic(unit, i)); @@ -203,11 +490,11 @@ int main(int argc, const char *argv[]) { clang_visitChildren(clang_getTranslationUnitCursor(unit), &CYChildVisit, &baton); for (CYKeyMap::const_iterator key(keys.begin()); key != keys.end(); ++key) { - std::string value(key->second); - for (size_t i(0), e(value.size()); i != e; ++i) - if (value[i] <= 0 || value[i] >= 0x7f || value[i] == '\n') + std::string code(key->second.code_); + for (size_t i(0), e(code.size()); i != e; ++i) + if (code[i] <= 0 || code[i] >= 0x7f || code[i] == '\n') goto skip; - std::cout << key->first << "|\"" << value << "\"" << std::endl; + std::cout << key->first << "|" << key->second.flags_ << "\"" << code << "\"" << std::endl; skip:; } clang_disposeTranslationUnit(unit);