From 9798e971607b495b0450dfd57a509ca0d2eb9be3 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 22 Oct 2021 17:29:23 +0200 Subject: [PATCH 01/30] Initial work on refining types --- include/rellic/AST/ASTBuilder.h | 10 ++++ include/rellic/AST/GenerateAST.h | 2 +- include/rellic/AST/IRToASTVisitor.h | 5 +- lib/AST/ASTBuilder.cpp | 8 +++ lib/AST/GenerateAST.cpp | 4 +- lib/AST/IRToASTVisitor.cpp | 86 +++++++++++++++++++++++++---- lib/AST/LocalDeclRenamer.cpp | 2 - tools/decomp/Decomp.cpp | 2 +- 8 files changed, 102 insertions(+), 17 deletions(-) diff --git a/include/rellic/AST/ASTBuilder.h b/include/rellic/AST/ASTBuilder.h index 7191a808..d15a7441 100644 --- a/include/rellic/AST/ASTBuilder.h +++ b/include/rellic/AST/ASTBuilder.h @@ -257,6 +257,16 @@ class ASTBuilder { clang::BreakStmt *CreateBreak(); // Return clang::ReturnStmt *CreateReturn(clang::Expr *retval = nullptr); + // Typedef declaration + clang::TypedefDecl *CreateTypedefDecl(clang::DeclContext *decl_ctx, + clang::IdentifierInfo *id, + clang::QualType type); + + clang::TypedefDecl *CreateTypedefDecl(clang::DeclContext *decl_ctx, + std::string name, + clang::QualType type) { + return CreateTypedefDecl(decl_ctx, CreateIdentifier(name), type); + } }; } // namespace rellic \ No newline at end of file diff --git a/include/rellic/AST/GenerateAST.h b/include/rellic/AST/GenerateAST.h index c87e507f..a935cd26 100644 --- a/include/rellic/AST/GenerateAST.h +++ b/include/rellic/AST/GenerateAST.h @@ -51,7 +51,7 @@ class GenerateAST : public llvm::ModulePass { public: static char ID; - GenerateAST(clang::ASTUnit &unit); + GenerateAST(clang::ASTUnit &unit, DebugInfoCollector &dic); IRToStmtMap &GetIRToStmtMap() { return ast_gen.GetIRToStmtMap(); } IRToValDeclMap &GetIRToValDeclMap() { return ast_gen.GetIRToValDeclMap(); } diff --git a/include/rellic/AST/IRToASTVisitor.h b/include/rellic/AST/IRToASTVisitor.h index 23435126..eec62973 100644 --- a/include/rellic/AST/IRToASTVisitor.h +++ b/include/rellic/AST/IRToASTVisitor.h @@ -21,6 +21,7 @@ #include "rellic/AST/ASTBuilder.h" #include "rellic/AST/Compat/ASTContext.h" +#include "rellic/AST/DebugInfoCollector.h" namespace rellic { @@ -37,8 +38,10 @@ class IRToASTVisitor : public llvm::InstVisitor { IRToTypeDeclMap type_decls; IRToValDeclMap value_decls; IRToStmtMap stmts; + DebugInfoCollector &dic; clang::Expr *GetOperandExpr(llvm::Value *val); + clang::QualType GetQualType(llvm::Type *type, llvm::DIType *ditype); clang::QualType GetQualType(llvm::Type *type); clang::Expr *CreateLiteralExpr(llvm::Constant *constant); @@ -46,7 +49,7 @@ class IRToASTVisitor : public llvm::InstVisitor { clang::Decl *GetOrCreateIntrinsic(llvm::InlineAsm *val); public: - IRToASTVisitor(clang::ASTUnit &unit); + IRToASTVisitor(clang::ASTUnit &unit, DebugInfoCollector &dic); clang::Stmt *GetOrCreateStmt(llvm::Value *val); clang::Decl *GetOrCreateDecl(llvm::Value *val); diff --git a/lib/AST/ASTBuilder.cpp b/lib/AST/ASTBuilder.cpp index cc0b7633..0c1f8f9d 100644 --- a/lib/AST/ASTBuilder.cpp +++ b/lib/AST/ASTBuilder.cpp @@ -442,4 +442,12 @@ clang::ReturnStmt *ASTBuilder::CreateReturn(clang::Expr *retval) { return CreateReturnStmt(ctx, retval); } +clang::TypedefDecl *ASTBuilder::CreateTypedefDecl(clang::DeclContext *decl_ctx, + clang::IdentifierInfo *id, + clang::QualType type) { + return clang::TypedefDecl::Create(ctx, decl_ctx, clang::SourceLocation(), + clang::SourceLocation(), id, + ctx.getTrivialTypeSourceInfo(type)); +} + } // namespace rellic \ No newline at end of file diff --git a/lib/AST/GenerateAST.cpp b/lib/AST/GenerateAST.cpp index d147c555..b2ecb6f1 100644 --- a/lib/AST/GenerateAST.cpp +++ b/lib/AST/GenerateAST.cpp @@ -367,10 +367,10 @@ clang::CompoundStmt *GenerateAST::StructureRegion(llvm::Region *region) { char GenerateAST::ID = 0; -GenerateAST::GenerateAST(clang::ASTUnit &unit) +GenerateAST::GenerateAST(clang::ASTUnit &unit, DebugInfoCollector &dic) : ModulePass(GenerateAST::ID), ast_ctx(&unit.getASTContext()), - ast_gen(unit), + ast_gen(unit, dic), ast(unit) {} void GenerateAST::getAnalysisUsage(llvm::AnalysisUsage &usage) const { diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 3b6e5775..ab8ce21b 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -5,7 +5,6 @@ * This source code is licensed in accordance with the terms specified in * the LICENSE file found in the root directory of this source tree. */ - #define GOOGLE_STRIP_LOG 1 #include "rellic/AST/IRToASTVisitor.h" @@ -22,10 +21,55 @@ namespace rellic { -IRToASTVisitor::IRToASTVisitor(clang::ASTUnit &unit) - : ast_ctx(unit.getASTContext()), ast(unit) {} +IRToASTVisitor::IRToASTVisitor(clang::ASTUnit &unit, DebugInfoCollector &dic) + : ast_ctx(unit.getASTContext()), ast(unit), dic(dic) {} clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type) { + return GetQualType(type, dic.GetIRTypeToDITypeMap()[type]); +} + +clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, + llvm::DIType *ditype) { + if (ditype) { + if (auto derived = llvm::dyn_cast(ditype)) { + auto base_type{derived->getBaseType()}; + switch (derived->getTag()) { + default: + break; + case llvm::dwarf::DW_TAG_const_type: + // FIXME(frabert): + // https://github.com/lifting-bits/rellic/issues/190#issuecomment-949694419 + if (!base_type) { + return ast_ctx.VoidTy; + } else { + return GetQualType(type, base_type); + } + case llvm::dwarf::DW_TAG_volatile_type: + if (!base_type) { + return ast_ctx.getVolatileType(ast_ctx.VoidTy); + } else { + return ast_ctx.getVolatileType(GetQualType(type, base_type)); + } + case llvm::dwarf::DW_TAG_restrict_type: + if (!base_type) { + return ast_ctx.getRestrictType(ast_ctx.VoidTy); + } else { + return ast_ctx.getRestrictType(GetQualType(type, base_type)); + } + case llvm::dwarf::DW_TAG_typedef: { + // TODO(frabert): typedefs need to be implemented in ASTPrinter first + // + // auto tudecl{ast_ctx.getTranslationUnitDecl()}; + // auto *tdef{ast.CreateTypedefDecl(tudecl, derived->getName().str(), + // GetQualType(type, base_type))}; + // tudecl->addDecl(tdef); + // return ast_ctx.getTypedefType(tdef); + return GetQualType(type, base_type); + } break; + } + } + } + DLOG(INFO) << "GetQualType: " << LLVMThingToString(type); clang::QualType result; switch (type->getTypeID()) { @@ -50,9 +94,15 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type) { break; case llvm::Type::IntegerTyID: { + int sign{0}; + if (ditype) { + auto inttype{llvm::cast(ditype)}; + sign = + inttype->getSignedness() == llvm::DIBasicType::Signedness::Signed; + } auto size{type->getIntegerBitWidth()}; CHECK(size > 0) << "Integer bit width has to be greater than 0"; - result = ast.GetLeastIntTypeForBitWidth(size, /*sign=*/0); + result = ast.GetLeastIntTypeForBitWidth(size, sign); } break; case llvm::Type::FunctionTyID: { @@ -68,13 +118,23 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type) { } break; case llvm::Type::PointerTyID: { + auto derived{ditype ? llvm::cast(ditype) : nullptr}; + auto elem_ditype{derived ? derived->getBaseType() : nullptr}; auto ptr{llvm::cast(type)}; - result = ast_ctx.getPointerType(GetQualType(ptr->getElementType())); + auto elem_type{GetQualType(ptr->getElementType(), elem_ditype)}; + if (derived && !elem_ditype) { + result = ast_ctx.VoidPtrTy; + } else { + result = ast_ctx.getPointerType(elem_type); + } } break; case llvm::Type::ArrayTyID: { + auto derived{ditype ? llvm::cast(ditype) + : nullptr}; + auto elem_type{derived ? derived->getBaseType() : nullptr}; auto arr{llvm::cast(type)}; - auto elm{GetQualType(arr->getElementType())}; + auto elm{GetQualType(arr->getElementType(), elem_type)}; result = GetConstantArrayType(ast_ctx, elm, arr->getNumElements()); } break; @@ -376,7 +436,8 @@ void IRToASTVisitor::VisitGlobalVar(llvm::GlobalVariable &gvar) { name = "gvar" + std::to_string(GetNumDecls(tudecl)); } // Create a variable declaration - var = ast.CreateVarDecl(tudecl, GetQualType(type), name); + var = ast.CreateVarDecl( + tudecl, GetQualType(type, dic.GetIRToDITypeMap()[&gvar]), name); // Add to translation unit tudecl->addDecl(var); // Create an initalizer literal @@ -399,7 +460,8 @@ void IRToASTVisitor::VisitArgument(llvm::Argument &arg) { auto func{arg.getParent()}; auto fdecl{clang::cast(GetOrCreateDecl(func))}; // Create a declaration - parm = ast.CreateParamDecl(fdecl, GetQualType(arg.getType()), name); + parm = ast.CreateParamDecl( + fdecl, GetQualType(arg.getType(), dic.GetIRToDITypeMap()[&arg]), name); } void IRToASTVisitor::VisitFunctionDecl(llvm::Function &func) { @@ -418,7 +480,8 @@ void IRToASTVisitor::VisitFunctionDecl(llvm::Function &func) { DLOG(INFO) << "Creating FunctionDecl for " << name; auto tudecl{ast_ctx.getTranslationUnitDecl()}; - auto type{GetQualType(func.getFunctionType())}; + auto ftype{func.getFunctionType()}; + auto type{GetQualType(ftype, dic.GetIRTypeToDITypeMap()[ftype])}; decl = ast.CreateFunctionDecl(tudecl, type, name); tudecl->addDecl(decl); @@ -634,7 +697,10 @@ void IRToASTVisitor::visitAllocaInst(llvm::AllocaInst &inst) { // (`varname_addr` being a common name used by clang for variables used as // storage for parameters e.g. a parameter named "foo" has a corresponding // local variable named "foo_addr"). - var = ast.CreateVarDecl(fdecl, GetQualType(inst.getAllocatedType()), name); + var = ast.CreateVarDecl( + fdecl, + GetQualType(inst.getAllocatedType(), dic.GetIRToDITypeMap()[&inst]), + name); fdecl->addDecl(var); } diff --git a/lib/AST/LocalDeclRenamer.cpp b/lib/AST/LocalDeclRenamer.cpp index cd6b13cd..451d3953 100644 --- a/lib/AST/LocalDeclRenamer.cpp +++ b/lib/AST/LocalDeclRenamer.cpp @@ -12,8 +12,6 @@ #include #include -#include - #include "rellic/AST/Compat/Stmt.h" namespace rellic { diff --git a/tools/decomp/Decomp.cpp b/tools/decomp/Decomp.cpp index c017064c..9b5833a6 100644 --- a/tools/decomp/Decomp.cpp +++ b/tools/decomp/Decomp.cpp @@ -209,7 +209,7 @@ static bool GeneratePseudocode(llvm::Module& module, auto ast_unit{clang::tooling::buildASTFromCodeWithArgs("", args, "out.c")}; llvm::legacy::PassManager pm_ast; - rellic::GenerateAST* gr{new rellic::GenerateAST(*ast_unit)}; + rellic::GenerateAST* gr{new rellic::GenerateAST(*ast_unit, dic)}; rellic::DeadStmtElim* dse{new rellic::DeadStmtElim(*ast_unit)}; rellic::LocalDeclRenamer* ldr{new rellic::LocalDeclRenamer( *ast_unit, dic.GetIRToNameMap(), gr->GetIRToValDeclMap())}; From 49e26017a9e0911ca1627b2cd198fa42736e6392 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 25 Oct 2021 16:51:01 +0200 Subject: [PATCH 02/30] Visit global variables, improve return types --- include/rellic/AST/DebugInfoCollector.h | 7 ++++++- lib/AST/DebugInfoCollector.cpp | 21 +++++++++++++++++++++ lib/AST/IRToASTVisitor.cpp | 3 ++- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/include/rellic/AST/DebugInfoCollector.h b/include/rellic/AST/DebugInfoCollector.h index d007f450..19fe3c89 100644 --- a/include/rellic/AST/DebugInfoCollector.h +++ b/include/rellic/AST/DebugInfoCollector.h @@ -17,12 +17,14 @@ namespace rellic { using IRToNameMap = std::unordered_map; -using IRToScopeMap = std::unordered_map; +using IRToScopeMap = std::unordered_map; using IRToDITypeMap = std::unordered_map; using IRTypeToDITypeMap = std::unordered_map; using IRFuncToDITypeMap = std::unordered_map; using IRArgToDITypeMap = std::unordered_map; +using IRFuncTypeToDIRetTypeMap = + std::unordered_map; class DebugInfoCollector : public llvm::InstVisitor { private: @@ -32,6 +34,7 @@ class DebugInfoCollector : public llvm::InstVisitor { IRTypeToDITypeMap types; IRFuncToDITypeMap funcs; IRArgToDITypeMap args; + IRFuncTypeToDIRetTypeMap ret_types; void WalkType(llvm::Type *type, llvm::DIType *ditype); @@ -42,11 +45,13 @@ class DebugInfoCollector : public llvm::InstVisitor { IRTypeToDITypeMap &GetIRTypeToDITypeMap() { return types; } IRFuncToDITypeMap &GetIRFuncToDITypeMap() { return funcs; } IRArgToDITypeMap &GetIRArgToDITypeMap() { return args; } + IRFuncTypeToDIRetTypeMap &GetIRFuncTypeToDIRetTypeMap() { return ret_types; } void visitDbgDeclareInst(llvm::DbgDeclareInst &inst); void visitInstruction(llvm::Instruction &inst); void visitFunction(llvm::Function &func); + void visitModule(llvm::Module &module); }; } // namespace rellic \ No newline at end of file diff --git a/lib/AST/DebugInfoCollector.cpp b/lib/AST/DebugInfoCollector.cpp index 6905ea3b..edb65c7d 100644 --- a/lib/AST/DebugInfoCollector.cpp +++ b/lib/AST/DebugInfoCollector.cpp @@ -32,6 +32,26 @@ void DebugInfoCollector::visitDbgDeclareInst(llvm::DbgDeclareInst& inst) { WalkType(loc->getType(), var->getType()); } +void DebugInfoCollector::visitModule(llvm::Module& module) { + for (auto& gvar : module.globals()) { + llvm::SmallVector gves; + gvar.getDebugInfo(gves); + auto ptrtype{gvar.getType()}; + CHECK_LE(gves.size(), 1) + << "More than one DIGlobalVariableExpression for global variable"; + + if (gves.size() > 0) { + auto digve{gves[0]}; + auto digvar{digve->getVariable()}; + names[&gvar] = digvar->getName().str(); + scopes[&gvar] = digvar->getScope(); + valtypes[&gvar] = digvar->getType(); + + WalkType(ptrtype->getElementType(), digvar->getType()); + } + } +} + void DebugInfoCollector::visitInstruction(llvm::Instruction& inst) { if (auto loc{inst.getDebugLoc().get()}) { scopes[&inst] = loc->getScope(); @@ -88,6 +108,7 @@ void DebugInfoCollector::WalkType(llvm::Type* type, llvm::DIType* ditype) { std::copy(params.begin(), params.end(), std::back_inserter(type_array)); auto di_types{funcditype->getTypeArray()}; + ret_types[functype] = di_types[0]; if (type_array.size() != di_types.size()) { // Mismatch between bitcode and debug metadata, bail out break; diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index ab8ce21b..07817bfa 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -107,7 +107,8 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, case llvm::Type::FunctionTyID: { auto func{llvm::cast(type)}; - auto ret{GetQualType(func->getReturnType())}; + auto ret{GetQualType(func->getReturnType(), + dic.GetIRFuncTypeToDIRetTypeMap()[func])}; std::vector params; for (auto param : func->params()) { params.push_back(GetQualType(param)); From b32d10a330b50d26bc787d597ee6c0e06d9ca81f Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 25 Oct 2021 17:24:21 +0200 Subject: [PATCH 03/30] Implement typedef printing --- include/rellic/AST/ASTPrinter.h | 1 + lib/AST/ASTPrinter.cpp | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/rellic/AST/ASTPrinter.h b/include/rellic/AST/ASTPrinter.h index f90229f6..14458c09 100644 --- a/include/rellic/AST/ASTPrinter.h +++ b/include/rellic/AST/ASTPrinter.h @@ -101,6 +101,7 @@ class DeclTokenizer : public clang::DeclVisitor { void VisitTranslationUnitDecl(clang::TranslationUnitDecl *decl); void VisitFieldDecl(clang::FieldDecl *decl); void VisitRecordDecl(clang::RecordDecl *decl); + void VisitTypedefDecl(clang::TypedefDecl *decl); }; class StmtTokenizer : public clang::StmtVisitor { diff --git a/lib/AST/ASTPrinter.cpp b/lib/AST/ASTPrinter.cpp index 49bec0eb..e91e3ced 100644 --- a/lib/AST/ASTPrinter.cpp +++ b/lib/AST/ASTPrinter.cpp @@ -530,6 +530,26 @@ void DeclTokenizer::VisitRecordDecl(clang::RecordDecl *rdecl) { } } +void DeclTokenizer::VisitTypedefDecl(clang::TypedefDecl *decl) { + auto &policy{unit.getASTContext().getPrintingPolicy()}; + if (!policy.SuppressSpecifiers) { + out.push_back(Token::CreateMisc("typedef")); + Space(); + + if (decl->isModulePrivate()) { + out.push_back(Token::CreateMisc("__module_private__ ")); + } + } + clang::QualType type = decl->getTypeSourceInfo()->getType(); + + std::string buf{""}; + llvm::raw_string_ostream ss(buf); + type.print(ss, policy, decl->getName(), indent_level); + + out.push_back(Token::CreateDecl(decl, ss.str())); + PrintAttributes(decl); +} + void StmtTokenizer::Space() { SpaceImpl(out); } void StmtTokenizer::Indent() { IndentImpl(out, indent_level); } void StmtTokenizer::Newline() { NewlineImpl(out); } From 55d3a1346ce12587de237b51de2732dbe1b28993 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Tue, 26 Oct 2021 16:27:23 +0200 Subject: [PATCH 04/30] Improve type refinement for fields and arguments --- include/rellic/AST/IRToASTVisitor.h | 3 ++ lib/AST/IRToASTVisitor.cpp | 71 +++++++++++++++++++++-------- 2 files changed, 56 insertions(+), 18 deletions(-) diff --git a/include/rellic/AST/IRToASTVisitor.h b/include/rellic/AST/IRToASTVisitor.h index eec62973..60dfb6db 100644 --- a/include/rellic/AST/IRToASTVisitor.h +++ b/include/rellic/AST/IRToASTVisitor.h @@ -28,6 +28,8 @@ namespace rellic { using IRToTypeDeclMap = std::unordered_map; using IRToValDeclMap = std::unordered_map; using IRToStmtMap = std::unordered_map; +using DIToTypedefMap = + std::unordered_map; class IRToASTVisitor : public llvm::InstVisitor { private: @@ -37,6 +39,7 @@ class IRToASTVisitor : public llvm::InstVisitor { IRToTypeDeclMap type_decls; IRToValDeclMap value_decls; + DIToTypedefMap typedef_decls; IRToStmtMap stmts; DebugInfoCollector &dic; diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 07817bfa..d2a208e9 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -57,15 +57,18 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, return ast_ctx.getRestrictType(GetQualType(type, base_type)); } case llvm::dwarf::DW_TAG_typedef: { - // TODO(frabert): typedefs need to be implemented in ASTPrinter first - // - // auto tudecl{ast_ctx.getTranslationUnitDecl()}; - // auto *tdef{ast.CreateTypedefDecl(tudecl, derived->getName().str(), - // GetQualType(type, base_type))}; - // tudecl->addDecl(tdef); - // return ast_ctx.getTypedefType(tdef); - return GetQualType(type, base_type); + auto &tdef_decl{typedef_decls[derived]}; + if (!tdef_decl) { + auto tudecl{ast_ctx.getTranslationUnitDecl()}; + tdef_decl = ast.CreateTypedefDecl(tudecl, derived->getName().str(), + GetQualType(type, base_type)); + tudecl->addDecl(tdef_decl); + } + return ast_ctx.getTypedefType(tdef_decl); } break; + case llvm::dwarf::DW_TAG_member: { + ditype = derived->getBaseType(); + }; } } } @@ -96,9 +99,13 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, case llvm::Type::IntegerTyID: { int sign{0}; if (ditype) { - auto inttype{llvm::cast(ditype)}; - sign = - inttype->getSignedness() == llvm::DIBasicType::Signedness::Signed; + // TODO(frabert): this path will not be taken when arguments will have + // been merged/split or when a struct passed by value has been optimized + // away + if (auto inttype = llvm::dyn_cast(ditype)) { + sign = + inttype->getSignedness() == llvm::DIBasicType::Signedness::Signed; + } } auto size{type->getIntegerBitWidth()}; CHECK(size > 0) << "Integer bit width has to be greater than 0"; @@ -107,11 +114,21 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, case llvm::Type::FunctionTyID: { auto func{llvm::cast(type)}; - auto ret{GetQualType(func->getReturnType(), - dic.GetIRFuncTypeToDIRetTypeMap()[func])}; + std::vector ditype_array{func->getNumParams() + 1}; + if (ditype) { + auto difunctype{llvm::cast(ditype)}; + auto arr{difunctype->getTypeArray()}; + if (arr.size() == ditype_array.size()) { + for (auto i{0UL}; i < arr.size(); ++i) { + ditype_array[i] = arr[i]; + } + } + } + auto ret{GetQualType(func->getReturnType(), ditype_array[0])}; std::vector params; + auto i{1UL}; for (auto param : func->params()) { - params.push_back(GetQualType(param)); + params.push_back(GetQualType(param, ditype_array[i++])); } auto epi{clang::FunctionProtoType::ExtProtoInfo()}; epi.Variadic = func->isVarArg(); @@ -145,6 +162,16 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, if (!decl) { auto tudecl{ast_ctx.getTranslationUnitDecl()}; auto strct{llvm::cast(type)}; + std::vector fields_ditype{strct->getNumElements()}; + if (ditype) { + auto strct_ditype{llvm::cast(ditype)}; + auto di_elems{strct_ditype->getElements()}; + if (di_elems.size() == fields_ditype.size()) { + for (auto i{0U}; i < di_elems.size(); ++i) { + fields_ditype[i] = llvm::cast(di_elems[i]); + } + } + } auto sname{strct->getName().str()}; if (sname.empty()) { auto num{GetNumDecls(tudecl)}; @@ -154,7 +181,8 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, decl = sdecl = ast.CreateStructDecl(tudecl, sname); // Add fields to the C struct for (auto ecnt{0U}; ecnt < strct->getNumElements(); ++ecnt) { - auto etype{GetQualType(strct->getElementType(ecnt))}; + auto etype{ + GetQualType(strct->getElementType(ecnt), fields_ditype[ecnt])}; auto fname{"field" + std::to_string(ecnt)}; sdecl->addDecl(ast.CreateFieldDecl(sdecl, etype, fname)); } @@ -175,7 +203,7 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, default: { if (type->isVectorTy()) { auto vtype{llvm::cast(type)}; - auto etype{GetQualType(vtype->getElementType())}; + auto etype{GetQualType(vtype->getElementType(), ditype)}; auto ecnt{GetNumElements(vtype)}; auto vkind{clang::VectorType::GenericVector}; result = ast_ctx.getVectorType(etype, ecnt, vkind); @@ -460,9 +488,16 @@ void IRToASTVisitor::VisitArgument(llvm::Argument &arg) { // Get parent function declaration auto func{arg.getParent()}; auto fdecl{clang::cast(GetOrCreateDecl(func))}; + llvm::DIType *ditype{nullptr}; + auto difunctype{dic.GetIRFuncToDITypeMap()[func]}; + if (difunctype) { + auto ditype_array{difunctype->getTypeArray()}; + if (ditype_array.size() == func->getFunctionType()->getNumParams() + 1) { + ditype = ditype_array[arg.getArgNo() + 1]; + } + } // Create a declaration - parm = ast.CreateParamDecl( - fdecl, GetQualType(arg.getType(), dic.GetIRToDITypeMap()[&arg]), name); + parm = ast.CreateParamDecl(fdecl, GetQualType(arg.getType(), ditype), name); } void IRToASTVisitor::VisitFunctionDecl(llvm::Function &func) { From 2b1e41de09fb64f7847d905ed2b508b23a4b0760 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Tue, 26 Oct 2021 17:58:35 +0200 Subject: [PATCH 05/30] Fix struct members --- lib/AST/IRToASTVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index d2a208e9..56ec5141 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -67,7 +67,7 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, return ast_ctx.getTypedefType(tdef_decl); } break; case llvm::dwarf::DW_TAG_member: { - ditype = derived->getBaseType(); + return GetQualType(type, derived->getBaseType()); }; } } From 7db935e46b48e99a7113c7f918c2e12db929a1f0 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Thu, 28 Oct 2021 14:22:07 +0200 Subject: [PATCH 06/30] Add explanation for checking argument count --- lib/AST/IRToASTVisitor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 56ec5141..40103d93 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -118,6 +118,10 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, if (ditype) { auto difunctype{llvm::cast(ditype)}; auto arr{difunctype->getTypeArray()}; + // TODO(frabert): related to what happens a few lines above. + // Argument count between debug data and actual bitcode can differ + // due to ABI constraints. Need to figure out a way to reconcile the two + // views. if (arr.size() == ditype_array.size()) { for (auto i{0UL}; i < arr.size(); ++i) { ditype_array[i] = arr[i]; From 1067041e99fb0d036f18893d83127fbacbffc121 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Thu, 28 Oct 2021 15:11:57 +0200 Subject: [PATCH 07/30] Add unit test for `ASTBuilder::CreateTypedefDecl` --- unittests/AST/ASTBuilder.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/unittests/AST/ASTBuilder.cpp b/unittests/AST/ASTBuilder.cpp index f29b9287..a3f36098 100644 --- a/unittests/AST/ASTBuilder.cpp +++ b/unittests/AST/ASTBuilder.cpp @@ -8,6 +8,8 @@ #include "rellic/AST/ASTBuilder.h" +#include + #include "Util.h" namespace { @@ -1251,3 +1253,19 @@ TEST_SUITE("ASTBuilder::CreateReturn") { } } } + +TEST_SUITE("ASTBuilder::CreateTypedefDecl") { + SCENARIO("Create a typedef declaration") { + GIVEN("Empty translation unit") { + auto unit{GetASTUnit()}; + auto &ctx{unit->getASTContext()}; + rellic::ASTBuilder ast(*unit); + auto tudecl{ctx.getTranslationUnitDecl()}; + THEN("return a typedef int foo;") { + auto tdef_decl{ast.CreateTypedefDecl(tudecl, "foo", ctx.IntTy)}; + REQUIRE(tdef_decl != nullptr); + CHECK(clang::isa(tdef_decl)); + } + } + } +} \ No newline at end of file From 048643871382246caf47272c2461b46363a80e3f Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 29 Oct 2021 16:40:24 +0200 Subject: [PATCH 08/30] Fix varargs debug type analysis --- lib/AST/DebugInfoCollector.cpp | 7 +++---- lib/AST/IRToASTVisitor.cpp | 3 ++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/AST/DebugInfoCollector.cpp b/lib/AST/DebugInfoCollector.cpp index edb65c7d..b006f8e0 100644 --- a/lib/AST/DebugInfoCollector.cpp +++ b/lib/AST/DebugInfoCollector.cpp @@ -109,7 +109,7 @@ void DebugInfoCollector::WalkType(llvm::Type* type, llvm::DIType* ditype) { auto di_types{funcditype->getTypeArray()}; ret_types[functype] = di_types[0]; - if (type_array.size() != di_types.size()) { + if (type_array.size() + functype->isVarArg() != di_types.size()) { // Mismatch between bitcode and debug metadata, bail out break; } @@ -160,8 +160,8 @@ void DebugInfoCollector::visitFunction(llvm::Function& func) { } auto ditype{subprogram->getType()}; - - if (func.arg_size() + 1 != ditype->getTypeArray().size()) { + auto type_array{ditype->getTypeArray()}; + if (func.arg_size() + func.isVarArg() + 1 != type_array.size()) { // Debug metadata is not compatible with bitcode, bail out // TODO(frabert): Find a way to reconcile differences return; @@ -169,7 +169,6 @@ void DebugInfoCollector::visitFunction(llvm::Function& func) { funcs[&func] = ditype; size_t i{1}; - auto type_array{ditype->getTypeArray()}; for (auto& arg : func.args()) { auto argtype{type_array[i++]}; args[&arg] = argtype; diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 40103d93..4d64bdb9 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -114,7 +114,8 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, case llvm::Type::FunctionTyID: { auto func{llvm::cast(type)}; - std::vector ditype_array{func->getNumParams() + 1}; + std::vector ditype_array{func->getNumParams() + + func->isVarArg() + 1}; if (ditype) { auto difunctype{llvm::cast(ditype)}; auto arr{difunctype->getTypeArray()}; From 171e90ec44b3519e8611878ff3fa22e1580bd0c0 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 29 Oct 2021 16:45:37 +0200 Subject: [PATCH 09/30] Use more debug info for prototypes --- scripts/roundtrip.py | 16 +++++++++++++++- tests/tools/decomp/conflicting_global.c | 1 + tests/tools/decomp/template_parameter_pack.cpp | 5 ++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/scripts/roundtrip.py b/scripts/roundtrip.py index 388eb78e..03eece98 100755 --- a/scripts/roundtrip.py +++ b/scripts/roundtrip.py @@ -118,7 +118,21 @@ class TestRoundtrip(unittest.TestCase): def test_generator(path): def test(self): roundtrip(self, args.rellic, path, args.clang, args.timeout, args.translate_only) - roundtrip(self, args.rellic, path, args.clang, args.timeout, args.translate_only, ["-g3"]) + debug_flags = [ + "-O1", + "-g3", + "-gfull", + "-gdwarf-5", + #"-fno-eliminate-unused-debug-symbols", + "-fno-common", + "-fno-builtin", + "-ffreestanding", + "-nostdlib", + "-fno-inline", + "-fno-assume-sane-operator-new", + "-mno-inline-all-stringops" + ] + roundtrip(self, args.rellic, path, args.clang, args.timeout, args.translate_only, debug_flags) return test diff --git a/tests/tools/decomp/conflicting_global.c b/tests/tools/decomp/conflicting_global.c index 9d46226b..08f79bb4 100644 --- a/tests/tools/decomp/conflicting_global.c +++ b/tests/tools/decomp/conflicting_global.c @@ -8,4 +8,5 @@ int main(void) { printf("%d\n", a); } printf("%d\n", a); + return 0; } diff --git a/tests/tools/decomp/template_parameter_pack.cpp b/tests/tools/decomp/template_parameter_pack.cpp index 7610c14f..fd78d235 100644 --- a/tests/tools/decomp/template_parameter_pack.cpp +++ b/tests/tools/decomp/template_parameter_pack.cpp @@ -10,4 +10,7 @@ T sum(T x, Ts... y) { return x + sum(y...); } -int main(void) { printf("%d\n", sum(1, 2, 3, 4, 5)); } \ No newline at end of file +int main(void) { + printf("%d\n", sum(1, 2, 3, 4, 5)); + return 0; +} \ No newline at end of file From 09f8a2ec695a673a322ff44d4b7959e7667a9e18 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Sun, 31 Oct 2021 16:26:41 +0100 Subject: [PATCH 10/30] Fix function argument type refinement --- lib/AST/IRToASTVisitor.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 4d64bdb9..b1b05145 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -497,7 +497,9 @@ void IRToASTVisitor::VisitArgument(llvm::Argument &arg) { auto difunctype{dic.GetIRFuncToDITypeMap()[func]}; if (difunctype) { auto ditype_array{difunctype->getTypeArray()}; - if (ditype_array.size() == func->getFunctionType()->getNumParams() + 1) { + auto functype{func->getFunctionType()}; + if (ditype_array.size() == + functype->getNumParams() + functype->isVarArg() + 1) { ditype = ditype_array[arg.getArgNo() + 1]; } } @@ -522,7 +524,7 @@ void IRToASTVisitor::VisitFunctionDecl(llvm::Function &func) { DLOG(INFO) << "Creating FunctionDecl for " << name; auto tudecl{ast_ctx.getTranslationUnitDecl()}; auto ftype{func.getFunctionType()}; - auto type{GetQualType(ftype, dic.GetIRTypeToDITypeMap()[ftype])}; + auto type{GetQualType(ftype, dic.GetIRFuncToDITypeMap()[&func])}; decl = ast.CreateFunctionDecl(tudecl, type, name); tudecl->addDecl(decl); From bdd21e40fe77caa069511bc54bd9b449faf5411a Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Sun, 31 Oct 2021 16:26:56 +0100 Subject: [PATCH 11/30] Default to signed integers --- lib/AST/IRToASTVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index b1b05145..edf6475a 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -97,7 +97,7 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, break; case llvm::Type::IntegerTyID: { - int sign{0}; + int sign{1}; if (ditype) { // TODO(frabert): this path will not be taken when arguments will have // been merged/split or when a struct passed by value has been optimized From 7c474ebd45689e8df720e423428071c4883e1ca3 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Sun, 31 Oct 2021 16:27:55 +0100 Subject: [PATCH 12/30] Fix tests --- scripts/roundtrip.py | 4 +++- tests/tools/decomp/template_parameter_pack.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/roundtrip.py b/scripts/roundtrip.py index 03eece98..6101f30a 100755 --- a/scripts/roundtrip.py +++ b/scripts/roundtrip.py @@ -130,7 +130,9 @@ def test(self): "-nostdlib", "-fno-inline", "-fno-assume-sane-operator-new", - "-mno-inline-all-stringops" + "-mno-inline-all-stringops", + "-Xclang", + "-disable-llvm-passes" ] roundtrip(self, args.rellic, path, args.clang, args.timeout, args.translate_only, debug_flags) diff --git a/tests/tools/decomp/template_parameter_pack.cpp b/tests/tools/decomp/template_parameter_pack.cpp index fd78d235..5a7a0955 100644 --- a/tests/tools/decomp/template_parameter_pack.cpp +++ b/tests/tools/decomp/template_parameter_pack.cpp @@ -10,7 +10,7 @@ T sum(T x, Ts... y) { return x + sum(y...); } -int main(void) { +extern "C" int main(void) { printf("%d\n", sum(1, 2, 3, 4, 5)); return 0; } \ No newline at end of file From e0e6efe3986bac87ab02405904ff03da24e72e00 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 1 Nov 2021 14:32:27 +0100 Subject: [PATCH 13/30] Desugar types for Z3 conversion --- lib/AST/Z3ConvVisitor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/AST/Z3ConvVisitor.cpp b/lib/AST/Z3ConvVisitor.cpp index f2f62fc2..ce33b4ab 100644 --- a/lib/AST/Z3ConvVisitor.cpp +++ b/lib/AST/Z3ConvVisitor.cpp @@ -6,7 +6,7 @@ * the LICENSE file found in the root directory of this source tree. */ -#define GOOGLE_STRIP_LOG 1 +#define GOOGLE_STRIP_LOG 0 #include "rellic/AST/Z3ConvVisitor.h" @@ -174,6 +174,7 @@ clang::ValueDecl *Z3ConvVisitor::GetCValDecl(z3::func_decl z_decl) { } z3::sort Z3ConvVisitor::GetZ3Sort(clang::QualType type) { + type = type.getDesugaredType(*c_ctx); // Void if (type->isVoidType()) { return z_ctx->uninterpreted_sort("void"); From 175c073db4cbf4bcdb439f1d38dc20322e43a55a Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 22 Oct 2021 17:29:23 +0200 Subject: [PATCH 14/30] Initial work on refining types --- include/rellic/AST/ASTBuilder.h | 10 ++++ include/rellic/AST/GenerateAST.h | 2 +- include/rellic/AST/IRToASTVisitor.h | 5 +- lib/AST/ASTBuilder.cpp | 8 +++ lib/AST/GenerateAST.cpp | 4 +- lib/AST/IRToASTVisitor.cpp | 86 +++++++++++++++++++++++++---- lib/AST/LocalDeclRenamer.cpp | 2 - tools/decomp/Decomp.cpp | 2 +- 8 files changed, 102 insertions(+), 17 deletions(-) diff --git a/include/rellic/AST/ASTBuilder.h b/include/rellic/AST/ASTBuilder.h index 7191a808..d15a7441 100644 --- a/include/rellic/AST/ASTBuilder.h +++ b/include/rellic/AST/ASTBuilder.h @@ -257,6 +257,16 @@ class ASTBuilder { clang::BreakStmt *CreateBreak(); // Return clang::ReturnStmt *CreateReturn(clang::Expr *retval = nullptr); + // Typedef declaration + clang::TypedefDecl *CreateTypedefDecl(clang::DeclContext *decl_ctx, + clang::IdentifierInfo *id, + clang::QualType type); + + clang::TypedefDecl *CreateTypedefDecl(clang::DeclContext *decl_ctx, + std::string name, + clang::QualType type) { + return CreateTypedefDecl(decl_ctx, CreateIdentifier(name), type); + } }; } // namespace rellic \ No newline at end of file diff --git a/include/rellic/AST/GenerateAST.h b/include/rellic/AST/GenerateAST.h index c87e507f..a935cd26 100644 --- a/include/rellic/AST/GenerateAST.h +++ b/include/rellic/AST/GenerateAST.h @@ -51,7 +51,7 @@ class GenerateAST : public llvm::ModulePass { public: static char ID; - GenerateAST(clang::ASTUnit &unit); + GenerateAST(clang::ASTUnit &unit, DebugInfoCollector &dic); IRToStmtMap &GetIRToStmtMap() { return ast_gen.GetIRToStmtMap(); } IRToValDeclMap &GetIRToValDeclMap() { return ast_gen.GetIRToValDeclMap(); } diff --git a/include/rellic/AST/IRToASTVisitor.h b/include/rellic/AST/IRToASTVisitor.h index 23435126..eec62973 100644 --- a/include/rellic/AST/IRToASTVisitor.h +++ b/include/rellic/AST/IRToASTVisitor.h @@ -21,6 +21,7 @@ #include "rellic/AST/ASTBuilder.h" #include "rellic/AST/Compat/ASTContext.h" +#include "rellic/AST/DebugInfoCollector.h" namespace rellic { @@ -37,8 +38,10 @@ class IRToASTVisitor : public llvm::InstVisitor { IRToTypeDeclMap type_decls; IRToValDeclMap value_decls; IRToStmtMap stmts; + DebugInfoCollector &dic; clang::Expr *GetOperandExpr(llvm::Value *val); + clang::QualType GetQualType(llvm::Type *type, llvm::DIType *ditype); clang::QualType GetQualType(llvm::Type *type); clang::Expr *CreateLiteralExpr(llvm::Constant *constant); @@ -46,7 +49,7 @@ class IRToASTVisitor : public llvm::InstVisitor { clang::Decl *GetOrCreateIntrinsic(llvm::InlineAsm *val); public: - IRToASTVisitor(clang::ASTUnit &unit); + IRToASTVisitor(clang::ASTUnit &unit, DebugInfoCollector &dic); clang::Stmt *GetOrCreateStmt(llvm::Value *val); clang::Decl *GetOrCreateDecl(llvm::Value *val); diff --git a/lib/AST/ASTBuilder.cpp b/lib/AST/ASTBuilder.cpp index cc0b7633..0c1f8f9d 100644 --- a/lib/AST/ASTBuilder.cpp +++ b/lib/AST/ASTBuilder.cpp @@ -442,4 +442,12 @@ clang::ReturnStmt *ASTBuilder::CreateReturn(clang::Expr *retval) { return CreateReturnStmt(ctx, retval); } +clang::TypedefDecl *ASTBuilder::CreateTypedefDecl(clang::DeclContext *decl_ctx, + clang::IdentifierInfo *id, + clang::QualType type) { + return clang::TypedefDecl::Create(ctx, decl_ctx, clang::SourceLocation(), + clang::SourceLocation(), id, + ctx.getTrivialTypeSourceInfo(type)); +} + } // namespace rellic \ No newline at end of file diff --git a/lib/AST/GenerateAST.cpp b/lib/AST/GenerateAST.cpp index d147c555..b2ecb6f1 100644 --- a/lib/AST/GenerateAST.cpp +++ b/lib/AST/GenerateAST.cpp @@ -367,10 +367,10 @@ clang::CompoundStmt *GenerateAST::StructureRegion(llvm::Region *region) { char GenerateAST::ID = 0; -GenerateAST::GenerateAST(clang::ASTUnit &unit) +GenerateAST::GenerateAST(clang::ASTUnit &unit, DebugInfoCollector &dic) : ModulePass(GenerateAST::ID), ast_ctx(&unit.getASTContext()), - ast_gen(unit), + ast_gen(unit, dic), ast(unit) {} void GenerateAST::getAnalysisUsage(llvm::AnalysisUsage &usage) const { diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 3b6e5775..ab8ce21b 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -5,7 +5,6 @@ * This source code is licensed in accordance with the terms specified in * the LICENSE file found in the root directory of this source tree. */ - #define GOOGLE_STRIP_LOG 1 #include "rellic/AST/IRToASTVisitor.h" @@ -22,10 +21,55 @@ namespace rellic { -IRToASTVisitor::IRToASTVisitor(clang::ASTUnit &unit) - : ast_ctx(unit.getASTContext()), ast(unit) {} +IRToASTVisitor::IRToASTVisitor(clang::ASTUnit &unit, DebugInfoCollector &dic) + : ast_ctx(unit.getASTContext()), ast(unit), dic(dic) {} clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type) { + return GetQualType(type, dic.GetIRTypeToDITypeMap()[type]); +} + +clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, + llvm::DIType *ditype) { + if (ditype) { + if (auto derived = llvm::dyn_cast(ditype)) { + auto base_type{derived->getBaseType()}; + switch (derived->getTag()) { + default: + break; + case llvm::dwarf::DW_TAG_const_type: + // FIXME(frabert): + // https://github.com/lifting-bits/rellic/issues/190#issuecomment-949694419 + if (!base_type) { + return ast_ctx.VoidTy; + } else { + return GetQualType(type, base_type); + } + case llvm::dwarf::DW_TAG_volatile_type: + if (!base_type) { + return ast_ctx.getVolatileType(ast_ctx.VoidTy); + } else { + return ast_ctx.getVolatileType(GetQualType(type, base_type)); + } + case llvm::dwarf::DW_TAG_restrict_type: + if (!base_type) { + return ast_ctx.getRestrictType(ast_ctx.VoidTy); + } else { + return ast_ctx.getRestrictType(GetQualType(type, base_type)); + } + case llvm::dwarf::DW_TAG_typedef: { + // TODO(frabert): typedefs need to be implemented in ASTPrinter first + // + // auto tudecl{ast_ctx.getTranslationUnitDecl()}; + // auto *tdef{ast.CreateTypedefDecl(tudecl, derived->getName().str(), + // GetQualType(type, base_type))}; + // tudecl->addDecl(tdef); + // return ast_ctx.getTypedefType(tdef); + return GetQualType(type, base_type); + } break; + } + } + } + DLOG(INFO) << "GetQualType: " << LLVMThingToString(type); clang::QualType result; switch (type->getTypeID()) { @@ -50,9 +94,15 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type) { break; case llvm::Type::IntegerTyID: { + int sign{0}; + if (ditype) { + auto inttype{llvm::cast(ditype)}; + sign = + inttype->getSignedness() == llvm::DIBasicType::Signedness::Signed; + } auto size{type->getIntegerBitWidth()}; CHECK(size > 0) << "Integer bit width has to be greater than 0"; - result = ast.GetLeastIntTypeForBitWidth(size, /*sign=*/0); + result = ast.GetLeastIntTypeForBitWidth(size, sign); } break; case llvm::Type::FunctionTyID: { @@ -68,13 +118,23 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type) { } break; case llvm::Type::PointerTyID: { + auto derived{ditype ? llvm::cast(ditype) : nullptr}; + auto elem_ditype{derived ? derived->getBaseType() : nullptr}; auto ptr{llvm::cast(type)}; - result = ast_ctx.getPointerType(GetQualType(ptr->getElementType())); + auto elem_type{GetQualType(ptr->getElementType(), elem_ditype)}; + if (derived && !elem_ditype) { + result = ast_ctx.VoidPtrTy; + } else { + result = ast_ctx.getPointerType(elem_type); + } } break; case llvm::Type::ArrayTyID: { + auto derived{ditype ? llvm::cast(ditype) + : nullptr}; + auto elem_type{derived ? derived->getBaseType() : nullptr}; auto arr{llvm::cast(type)}; - auto elm{GetQualType(arr->getElementType())}; + auto elm{GetQualType(arr->getElementType(), elem_type)}; result = GetConstantArrayType(ast_ctx, elm, arr->getNumElements()); } break; @@ -376,7 +436,8 @@ void IRToASTVisitor::VisitGlobalVar(llvm::GlobalVariable &gvar) { name = "gvar" + std::to_string(GetNumDecls(tudecl)); } // Create a variable declaration - var = ast.CreateVarDecl(tudecl, GetQualType(type), name); + var = ast.CreateVarDecl( + tudecl, GetQualType(type, dic.GetIRToDITypeMap()[&gvar]), name); // Add to translation unit tudecl->addDecl(var); // Create an initalizer literal @@ -399,7 +460,8 @@ void IRToASTVisitor::VisitArgument(llvm::Argument &arg) { auto func{arg.getParent()}; auto fdecl{clang::cast(GetOrCreateDecl(func))}; // Create a declaration - parm = ast.CreateParamDecl(fdecl, GetQualType(arg.getType()), name); + parm = ast.CreateParamDecl( + fdecl, GetQualType(arg.getType(), dic.GetIRToDITypeMap()[&arg]), name); } void IRToASTVisitor::VisitFunctionDecl(llvm::Function &func) { @@ -418,7 +480,8 @@ void IRToASTVisitor::VisitFunctionDecl(llvm::Function &func) { DLOG(INFO) << "Creating FunctionDecl for " << name; auto tudecl{ast_ctx.getTranslationUnitDecl()}; - auto type{GetQualType(func.getFunctionType())}; + auto ftype{func.getFunctionType()}; + auto type{GetQualType(ftype, dic.GetIRTypeToDITypeMap()[ftype])}; decl = ast.CreateFunctionDecl(tudecl, type, name); tudecl->addDecl(decl); @@ -634,7 +697,10 @@ void IRToASTVisitor::visitAllocaInst(llvm::AllocaInst &inst) { // (`varname_addr` being a common name used by clang for variables used as // storage for parameters e.g. a parameter named "foo" has a corresponding // local variable named "foo_addr"). - var = ast.CreateVarDecl(fdecl, GetQualType(inst.getAllocatedType()), name); + var = ast.CreateVarDecl( + fdecl, + GetQualType(inst.getAllocatedType(), dic.GetIRToDITypeMap()[&inst]), + name); fdecl->addDecl(var); } diff --git a/lib/AST/LocalDeclRenamer.cpp b/lib/AST/LocalDeclRenamer.cpp index cd6b13cd..451d3953 100644 --- a/lib/AST/LocalDeclRenamer.cpp +++ b/lib/AST/LocalDeclRenamer.cpp @@ -12,8 +12,6 @@ #include #include -#include - #include "rellic/AST/Compat/Stmt.h" namespace rellic { diff --git a/tools/decomp/Decomp.cpp b/tools/decomp/Decomp.cpp index 04a29e8b..66b795ea 100644 --- a/tools/decomp/Decomp.cpp +++ b/tools/decomp/Decomp.cpp @@ -209,7 +209,7 @@ static bool GeneratePseudocode(llvm::Module& module, auto ast_unit{clang::tooling::buildASTFromCodeWithArgs("", args, "out.c")}; llvm::legacy::PassManager pm_ast; - rellic::GenerateAST* gr{new rellic::GenerateAST(*ast_unit)}; + rellic::GenerateAST* gr{new rellic::GenerateAST(*ast_unit, dic)}; rellic::DeadStmtElim* dse{new rellic::DeadStmtElim(*ast_unit)}; rellic::LocalDeclRenamer* ldr{new rellic::LocalDeclRenamer( *ast_unit, dic.GetIRToNameMap(), gr->GetIRToValDeclMap())}; From b55a8d08ffa647d708ec69c8138d91afcbf7fa85 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 25 Oct 2021 16:51:01 +0200 Subject: [PATCH 15/30] Visit global variables, improve return types --- include/rellic/AST/DebugInfoCollector.h | 7 ++++++- lib/AST/DebugInfoCollector.cpp | 21 +++++++++++++++++++++ lib/AST/IRToASTVisitor.cpp | 3 ++- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/include/rellic/AST/DebugInfoCollector.h b/include/rellic/AST/DebugInfoCollector.h index d007f450..19fe3c89 100644 --- a/include/rellic/AST/DebugInfoCollector.h +++ b/include/rellic/AST/DebugInfoCollector.h @@ -17,12 +17,14 @@ namespace rellic { using IRToNameMap = std::unordered_map; -using IRToScopeMap = std::unordered_map; +using IRToScopeMap = std::unordered_map; using IRToDITypeMap = std::unordered_map; using IRTypeToDITypeMap = std::unordered_map; using IRFuncToDITypeMap = std::unordered_map; using IRArgToDITypeMap = std::unordered_map; +using IRFuncTypeToDIRetTypeMap = + std::unordered_map; class DebugInfoCollector : public llvm::InstVisitor { private: @@ -32,6 +34,7 @@ class DebugInfoCollector : public llvm::InstVisitor { IRTypeToDITypeMap types; IRFuncToDITypeMap funcs; IRArgToDITypeMap args; + IRFuncTypeToDIRetTypeMap ret_types; void WalkType(llvm::Type *type, llvm::DIType *ditype); @@ -42,11 +45,13 @@ class DebugInfoCollector : public llvm::InstVisitor { IRTypeToDITypeMap &GetIRTypeToDITypeMap() { return types; } IRFuncToDITypeMap &GetIRFuncToDITypeMap() { return funcs; } IRArgToDITypeMap &GetIRArgToDITypeMap() { return args; } + IRFuncTypeToDIRetTypeMap &GetIRFuncTypeToDIRetTypeMap() { return ret_types; } void visitDbgDeclareInst(llvm::DbgDeclareInst &inst); void visitInstruction(llvm::Instruction &inst); void visitFunction(llvm::Function &func); + void visitModule(llvm::Module &module); }; } // namespace rellic \ No newline at end of file diff --git a/lib/AST/DebugInfoCollector.cpp b/lib/AST/DebugInfoCollector.cpp index 6905ea3b..edb65c7d 100644 --- a/lib/AST/DebugInfoCollector.cpp +++ b/lib/AST/DebugInfoCollector.cpp @@ -32,6 +32,26 @@ void DebugInfoCollector::visitDbgDeclareInst(llvm::DbgDeclareInst& inst) { WalkType(loc->getType(), var->getType()); } +void DebugInfoCollector::visitModule(llvm::Module& module) { + for (auto& gvar : module.globals()) { + llvm::SmallVector gves; + gvar.getDebugInfo(gves); + auto ptrtype{gvar.getType()}; + CHECK_LE(gves.size(), 1) + << "More than one DIGlobalVariableExpression for global variable"; + + if (gves.size() > 0) { + auto digve{gves[0]}; + auto digvar{digve->getVariable()}; + names[&gvar] = digvar->getName().str(); + scopes[&gvar] = digvar->getScope(); + valtypes[&gvar] = digvar->getType(); + + WalkType(ptrtype->getElementType(), digvar->getType()); + } + } +} + void DebugInfoCollector::visitInstruction(llvm::Instruction& inst) { if (auto loc{inst.getDebugLoc().get()}) { scopes[&inst] = loc->getScope(); @@ -88,6 +108,7 @@ void DebugInfoCollector::WalkType(llvm::Type* type, llvm::DIType* ditype) { std::copy(params.begin(), params.end(), std::back_inserter(type_array)); auto di_types{funcditype->getTypeArray()}; + ret_types[functype] = di_types[0]; if (type_array.size() != di_types.size()) { // Mismatch between bitcode and debug metadata, bail out break; diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index ab8ce21b..07817bfa 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -107,7 +107,8 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, case llvm::Type::FunctionTyID: { auto func{llvm::cast(type)}; - auto ret{GetQualType(func->getReturnType())}; + auto ret{GetQualType(func->getReturnType(), + dic.GetIRFuncTypeToDIRetTypeMap()[func])}; std::vector params; for (auto param : func->params()) { params.push_back(GetQualType(param)); From 287e1370c8c6e5fb02cd277fec111f74149ef20e Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 25 Oct 2021 17:24:21 +0200 Subject: [PATCH 16/30] Implement typedef printing --- include/rellic/AST/ASTPrinter.h | 1 + lib/AST/ASTPrinter.cpp | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/rellic/AST/ASTPrinter.h b/include/rellic/AST/ASTPrinter.h index f90229f6..14458c09 100644 --- a/include/rellic/AST/ASTPrinter.h +++ b/include/rellic/AST/ASTPrinter.h @@ -101,6 +101,7 @@ class DeclTokenizer : public clang::DeclVisitor { void VisitTranslationUnitDecl(clang::TranslationUnitDecl *decl); void VisitFieldDecl(clang::FieldDecl *decl); void VisitRecordDecl(clang::RecordDecl *decl); + void VisitTypedefDecl(clang::TypedefDecl *decl); }; class StmtTokenizer : public clang::StmtVisitor { diff --git a/lib/AST/ASTPrinter.cpp b/lib/AST/ASTPrinter.cpp index 032bbd19..4ec28dbe 100644 --- a/lib/AST/ASTPrinter.cpp +++ b/lib/AST/ASTPrinter.cpp @@ -531,6 +531,26 @@ void DeclTokenizer::VisitRecordDecl(clang::RecordDecl *rdecl) { } } +void DeclTokenizer::VisitTypedefDecl(clang::TypedefDecl *decl) { + auto &policy{unit.getASTContext().getPrintingPolicy()}; + if (!policy.SuppressSpecifiers) { + out.push_back(Token::CreateMisc("typedef")); + Space(); + + if (decl->isModulePrivate()) { + out.push_back(Token::CreateMisc("__module_private__ ")); + } + } + clang::QualType type = decl->getTypeSourceInfo()->getType(); + + std::string buf{""}; + llvm::raw_string_ostream ss(buf); + type.print(ss, policy, decl->getName(), indent_level); + + out.push_back(Token::CreateDecl(decl, ss.str())); + PrintAttributes(decl); +} + void StmtTokenizer::Space() { SpaceImpl(out); } void StmtTokenizer::Indent() { IndentImpl(out, indent_level); } void StmtTokenizer::Newline() { NewlineImpl(out); } From 229219358bfdcf362854c19150aa576da7a510d0 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Tue, 26 Oct 2021 16:27:23 +0200 Subject: [PATCH 17/30] Improve type refinement for fields and arguments --- include/rellic/AST/IRToASTVisitor.h | 3 ++ lib/AST/IRToASTVisitor.cpp | 71 +++++++++++++++++++++-------- 2 files changed, 56 insertions(+), 18 deletions(-) diff --git a/include/rellic/AST/IRToASTVisitor.h b/include/rellic/AST/IRToASTVisitor.h index eec62973..60dfb6db 100644 --- a/include/rellic/AST/IRToASTVisitor.h +++ b/include/rellic/AST/IRToASTVisitor.h @@ -28,6 +28,8 @@ namespace rellic { using IRToTypeDeclMap = std::unordered_map; using IRToValDeclMap = std::unordered_map; using IRToStmtMap = std::unordered_map; +using DIToTypedefMap = + std::unordered_map; class IRToASTVisitor : public llvm::InstVisitor { private: @@ -37,6 +39,7 @@ class IRToASTVisitor : public llvm::InstVisitor { IRToTypeDeclMap type_decls; IRToValDeclMap value_decls; + DIToTypedefMap typedef_decls; IRToStmtMap stmts; DebugInfoCollector &dic; diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 07817bfa..d2a208e9 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -57,15 +57,18 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, return ast_ctx.getRestrictType(GetQualType(type, base_type)); } case llvm::dwarf::DW_TAG_typedef: { - // TODO(frabert): typedefs need to be implemented in ASTPrinter first - // - // auto tudecl{ast_ctx.getTranslationUnitDecl()}; - // auto *tdef{ast.CreateTypedefDecl(tudecl, derived->getName().str(), - // GetQualType(type, base_type))}; - // tudecl->addDecl(tdef); - // return ast_ctx.getTypedefType(tdef); - return GetQualType(type, base_type); + auto &tdef_decl{typedef_decls[derived]}; + if (!tdef_decl) { + auto tudecl{ast_ctx.getTranslationUnitDecl()}; + tdef_decl = ast.CreateTypedefDecl(tudecl, derived->getName().str(), + GetQualType(type, base_type)); + tudecl->addDecl(tdef_decl); + } + return ast_ctx.getTypedefType(tdef_decl); } break; + case llvm::dwarf::DW_TAG_member: { + ditype = derived->getBaseType(); + }; } } } @@ -96,9 +99,13 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, case llvm::Type::IntegerTyID: { int sign{0}; if (ditype) { - auto inttype{llvm::cast(ditype)}; - sign = - inttype->getSignedness() == llvm::DIBasicType::Signedness::Signed; + // TODO(frabert): this path will not be taken when arguments will have + // been merged/split or when a struct passed by value has been optimized + // away + if (auto inttype = llvm::dyn_cast(ditype)) { + sign = + inttype->getSignedness() == llvm::DIBasicType::Signedness::Signed; + } } auto size{type->getIntegerBitWidth()}; CHECK(size > 0) << "Integer bit width has to be greater than 0"; @@ -107,11 +114,21 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, case llvm::Type::FunctionTyID: { auto func{llvm::cast(type)}; - auto ret{GetQualType(func->getReturnType(), - dic.GetIRFuncTypeToDIRetTypeMap()[func])}; + std::vector ditype_array{func->getNumParams() + 1}; + if (ditype) { + auto difunctype{llvm::cast(ditype)}; + auto arr{difunctype->getTypeArray()}; + if (arr.size() == ditype_array.size()) { + for (auto i{0UL}; i < arr.size(); ++i) { + ditype_array[i] = arr[i]; + } + } + } + auto ret{GetQualType(func->getReturnType(), ditype_array[0])}; std::vector params; + auto i{1UL}; for (auto param : func->params()) { - params.push_back(GetQualType(param)); + params.push_back(GetQualType(param, ditype_array[i++])); } auto epi{clang::FunctionProtoType::ExtProtoInfo()}; epi.Variadic = func->isVarArg(); @@ -145,6 +162,16 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, if (!decl) { auto tudecl{ast_ctx.getTranslationUnitDecl()}; auto strct{llvm::cast(type)}; + std::vector fields_ditype{strct->getNumElements()}; + if (ditype) { + auto strct_ditype{llvm::cast(ditype)}; + auto di_elems{strct_ditype->getElements()}; + if (di_elems.size() == fields_ditype.size()) { + for (auto i{0U}; i < di_elems.size(); ++i) { + fields_ditype[i] = llvm::cast(di_elems[i]); + } + } + } auto sname{strct->getName().str()}; if (sname.empty()) { auto num{GetNumDecls(tudecl)}; @@ -154,7 +181,8 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, decl = sdecl = ast.CreateStructDecl(tudecl, sname); // Add fields to the C struct for (auto ecnt{0U}; ecnt < strct->getNumElements(); ++ecnt) { - auto etype{GetQualType(strct->getElementType(ecnt))}; + auto etype{ + GetQualType(strct->getElementType(ecnt), fields_ditype[ecnt])}; auto fname{"field" + std::to_string(ecnt)}; sdecl->addDecl(ast.CreateFieldDecl(sdecl, etype, fname)); } @@ -175,7 +203,7 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, default: { if (type->isVectorTy()) { auto vtype{llvm::cast(type)}; - auto etype{GetQualType(vtype->getElementType())}; + auto etype{GetQualType(vtype->getElementType(), ditype)}; auto ecnt{GetNumElements(vtype)}; auto vkind{clang::VectorType::GenericVector}; result = ast_ctx.getVectorType(etype, ecnt, vkind); @@ -460,9 +488,16 @@ void IRToASTVisitor::VisitArgument(llvm::Argument &arg) { // Get parent function declaration auto func{arg.getParent()}; auto fdecl{clang::cast(GetOrCreateDecl(func))}; + llvm::DIType *ditype{nullptr}; + auto difunctype{dic.GetIRFuncToDITypeMap()[func]}; + if (difunctype) { + auto ditype_array{difunctype->getTypeArray()}; + if (ditype_array.size() == func->getFunctionType()->getNumParams() + 1) { + ditype = ditype_array[arg.getArgNo() + 1]; + } + } // Create a declaration - parm = ast.CreateParamDecl( - fdecl, GetQualType(arg.getType(), dic.GetIRToDITypeMap()[&arg]), name); + parm = ast.CreateParamDecl(fdecl, GetQualType(arg.getType(), ditype), name); } void IRToASTVisitor::VisitFunctionDecl(llvm::Function &func) { From c3da1955b8c3aa75d25f30be0ec71fe9ce1c1e73 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Tue, 26 Oct 2021 17:58:35 +0200 Subject: [PATCH 18/30] Fix struct members --- lib/AST/IRToASTVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index d2a208e9..56ec5141 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -67,7 +67,7 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, return ast_ctx.getTypedefType(tdef_decl); } break; case llvm::dwarf::DW_TAG_member: { - ditype = derived->getBaseType(); + return GetQualType(type, derived->getBaseType()); }; } } From 36cb7c81dc485970afee7db85bb20c7bf73e6df3 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Thu, 28 Oct 2021 14:22:07 +0200 Subject: [PATCH 19/30] Add explanation for checking argument count --- lib/AST/IRToASTVisitor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 56ec5141..40103d93 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -118,6 +118,10 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, if (ditype) { auto difunctype{llvm::cast(ditype)}; auto arr{difunctype->getTypeArray()}; + // TODO(frabert): related to what happens a few lines above. + // Argument count between debug data and actual bitcode can differ + // due to ABI constraints. Need to figure out a way to reconcile the two + // views. if (arr.size() == ditype_array.size()) { for (auto i{0UL}; i < arr.size(); ++i) { ditype_array[i] = arr[i]; From 33294d39f10947ff867719bf75d221272cffc04c Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Thu, 28 Oct 2021 15:11:57 +0200 Subject: [PATCH 20/30] Add unit test for `ASTBuilder::CreateTypedefDecl` --- unittests/AST/ASTBuilder.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/unittests/AST/ASTBuilder.cpp b/unittests/AST/ASTBuilder.cpp index 8daf7453..933d8fc6 100644 --- a/unittests/AST/ASTBuilder.cpp +++ b/unittests/AST/ASTBuilder.cpp @@ -8,6 +8,8 @@ #include "rellic/AST/ASTBuilder.h" +#include + #include "Util.h" namespace { @@ -1251,3 +1253,19 @@ TEST_SUITE("ASTBuilder::CreateReturn") { } } } + +TEST_SUITE("ASTBuilder::CreateTypedefDecl") { + SCENARIO("Create a typedef declaration") { + GIVEN("Empty translation unit") { + auto unit{GetASTUnit()}; + auto &ctx{unit->getASTContext()}; + rellic::ASTBuilder ast(*unit); + auto tudecl{ctx.getTranslationUnitDecl()}; + THEN("return a typedef int foo;") { + auto tdef_decl{ast.CreateTypedefDecl(tudecl, "foo", ctx.IntTy)}; + REQUIRE(tdef_decl != nullptr); + CHECK(clang::isa(tdef_decl)); + } + } + } +} \ No newline at end of file From e878d42ce631206db241eb3918d4b1813aee6d22 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 29 Oct 2021 16:40:24 +0200 Subject: [PATCH 21/30] Fix varargs debug type analysis --- lib/AST/DebugInfoCollector.cpp | 7 +++---- lib/AST/IRToASTVisitor.cpp | 3 ++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/AST/DebugInfoCollector.cpp b/lib/AST/DebugInfoCollector.cpp index edb65c7d..b006f8e0 100644 --- a/lib/AST/DebugInfoCollector.cpp +++ b/lib/AST/DebugInfoCollector.cpp @@ -109,7 +109,7 @@ void DebugInfoCollector::WalkType(llvm::Type* type, llvm::DIType* ditype) { auto di_types{funcditype->getTypeArray()}; ret_types[functype] = di_types[0]; - if (type_array.size() != di_types.size()) { + if (type_array.size() + functype->isVarArg() != di_types.size()) { // Mismatch between bitcode and debug metadata, bail out break; } @@ -160,8 +160,8 @@ void DebugInfoCollector::visitFunction(llvm::Function& func) { } auto ditype{subprogram->getType()}; - - if (func.arg_size() + 1 != ditype->getTypeArray().size()) { + auto type_array{ditype->getTypeArray()}; + if (func.arg_size() + func.isVarArg() + 1 != type_array.size()) { // Debug metadata is not compatible with bitcode, bail out // TODO(frabert): Find a way to reconcile differences return; @@ -169,7 +169,6 @@ void DebugInfoCollector::visitFunction(llvm::Function& func) { funcs[&func] = ditype; size_t i{1}; - auto type_array{ditype->getTypeArray()}; for (auto& arg : func.args()) { auto argtype{type_array[i++]}; args[&arg] = argtype; diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 40103d93..4d64bdb9 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -114,7 +114,8 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, case llvm::Type::FunctionTyID: { auto func{llvm::cast(type)}; - std::vector ditype_array{func->getNumParams() + 1}; + std::vector ditype_array{func->getNumParams() + + func->isVarArg() + 1}; if (ditype) { auto difunctype{llvm::cast(ditype)}; auto arr{difunctype->getTypeArray()}; From 6b4d302ac942b3017309ad166a51bd93bc8d065d Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Fri, 29 Oct 2021 16:45:37 +0200 Subject: [PATCH 22/30] Use more debug info for prototypes --- scripts/roundtrip.py | 16 +++++++++++++++- tests/tools/decomp/conflicting_global.c | 1 + tests/tools/decomp/template_parameter_pack.cpp | 5 ++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/scripts/roundtrip.py b/scripts/roundtrip.py index 388eb78e..03eece98 100755 --- a/scripts/roundtrip.py +++ b/scripts/roundtrip.py @@ -118,7 +118,21 @@ class TestRoundtrip(unittest.TestCase): def test_generator(path): def test(self): roundtrip(self, args.rellic, path, args.clang, args.timeout, args.translate_only) - roundtrip(self, args.rellic, path, args.clang, args.timeout, args.translate_only, ["-g3"]) + debug_flags = [ + "-O1", + "-g3", + "-gfull", + "-gdwarf-5", + #"-fno-eliminate-unused-debug-symbols", + "-fno-common", + "-fno-builtin", + "-ffreestanding", + "-nostdlib", + "-fno-inline", + "-fno-assume-sane-operator-new", + "-mno-inline-all-stringops" + ] + roundtrip(self, args.rellic, path, args.clang, args.timeout, args.translate_only, debug_flags) return test diff --git a/tests/tools/decomp/conflicting_global.c b/tests/tools/decomp/conflicting_global.c index 9d46226b..08f79bb4 100644 --- a/tests/tools/decomp/conflicting_global.c +++ b/tests/tools/decomp/conflicting_global.c @@ -8,4 +8,5 @@ int main(void) { printf("%d\n", a); } printf("%d\n", a); + return 0; } diff --git a/tests/tools/decomp/template_parameter_pack.cpp b/tests/tools/decomp/template_parameter_pack.cpp index 7610c14f..fd78d235 100644 --- a/tests/tools/decomp/template_parameter_pack.cpp +++ b/tests/tools/decomp/template_parameter_pack.cpp @@ -10,4 +10,7 @@ T sum(T x, Ts... y) { return x + sum(y...); } -int main(void) { printf("%d\n", sum(1, 2, 3, 4, 5)); } \ No newline at end of file +int main(void) { + printf("%d\n", sum(1, 2, 3, 4, 5)); + return 0; +} \ No newline at end of file From b231b2483db4c77fcad4275d042d8b8f753d7976 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Sun, 31 Oct 2021 16:26:41 +0100 Subject: [PATCH 23/30] Fix function argument type refinement --- lib/AST/IRToASTVisitor.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 4d64bdb9..b1b05145 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -497,7 +497,9 @@ void IRToASTVisitor::VisitArgument(llvm::Argument &arg) { auto difunctype{dic.GetIRFuncToDITypeMap()[func]}; if (difunctype) { auto ditype_array{difunctype->getTypeArray()}; - if (ditype_array.size() == func->getFunctionType()->getNumParams() + 1) { + auto functype{func->getFunctionType()}; + if (ditype_array.size() == + functype->getNumParams() + functype->isVarArg() + 1) { ditype = ditype_array[arg.getArgNo() + 1]; } } @@ -522,7 +524,7 @@ void IRToASTVisitor::VisitFunctionDecl(llvm::Function &func) { DLOG(INFO) << "Creating FunctionDecl for " << name; auto tudecl{ast_ctx.getTranslationUnitDecl()}; auto ftype{func.getFunctionType()}; - auto type{GetQualType(ftype, dic.GetIRTypeToDITypeMap()[ftype])}; + auto type{GetQualType(ftype, dic.GetIRFuncToDITypeMap()[&func])}; decl = ast.CreateFunctionDecl(tudecl, type, name); tudecl->addDecl(decl); From e02bd8a0a02339c9f74bd17a45f26db122b84369 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Sun, 31 Oct 2021 16:26:56 +0100 Subject: [PATCH 24/30] Default to signed integers --- lib/AST/IRToASTVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index b1b05145..edf6475a 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -97,7 +97,7 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, break; case llvm::Type::IntegerTyID: { - int sign{0}; + int sign{1}; if (ditype) { // TODO(frabert): this path will not be taken when arguments will have // been merged/split or when a struct passed by value has been optimized From 3676aa56e790157735b160cce224479ee73ec679 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Sun, 31 Oct 2021 16:27:55 +0100 Subject: [PATCH 25/30] Fix tests --- scripts/roundtrip.py | 4 +++- tests/tools/decomp/template_parameter_pack.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/roundtrip.py b/scripts/roundtrip.py index 03eece98..6101f30a 100755 --- a/scripts/roundtrip.py +++ b/scripts/roundtrip.py @@ -130,7 +130,9 @@ def test(self): "-nostdlib", "-fno-inline", "-fno-assume-sane-operator-new", - "-mno-inline-all-stringops" + "-mno-inline-all-stringops", + "-Xclang", + "-disable-llvm-passes" ] roundtrip(self, args.rellic, path, args.clang, args.timeout, args.translate_only, debug_flags) diff --git a/tests/tools/decomp/template_parameter_pack.cpp b/tests/tools/decomp/template_parameter_pack.cpp index fd78d235..5a7a0955 100644 --- a/tests/tools/decomp/template_parameter_pack.cpp +++ b/tests/tools/decomp/template_parameter_pack.cpp @@ -10,7 +10,7 @@ T sum(T x, Ts... y) { return x + sum(y...); } -int main(void) { +extern "C" int main(void) { printf("%d\n", sum(1, 2, 3, 4, 5)); return 0; } \ No newline at end of file From a9b537156abe80777b55f02344d9964c59afaa38 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 1 Nov 2021 14:32:27 +0100 Subject: [PATCH 26/30] Desugar types for Z3 conversion --- lib/AST/Z3ConvVisitor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/AST/Z3ConvVisitor.cpp b/lib/AST/Z3ConvVisitor.cpp index a70f547d..9f673fd9 100644 --- a/lib/AST/Z3ConvVisitor.cpp +++ b/lib/AST/Z3ConvVisitor.cpp @@ -6,8 +6,7 @@ * the LICENSE file found in the root directory of this source tree. */ -#include -#define GOOGLE_STRIP_LOG 1 +#define GOOGLE_STRIP_LOG 0 #include "rellic/AST/Z3ConvVisitor.h" @@ -175,6 +174,7 @@ clang::ValueDecl *Z3ConvVisitor::GetCValDecl(z3::func_decl z_decl) { } z3::sort Z3ConvVisitor::GetZ3Sort(clang::QualType type) { + type = type.getDesugaredType(*c_ctx); // Void if (type->isVoidType()) { return z_ctx->uninterpreted_sort("void"); From 8f39d9d1489d4e76216d4aadf2c78ce8ae53898f Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Thu, 4 Nov 2021 20:36:48 +0100 Subject: [PATCH 27/30] Add utility functions --- include/rellic/BC/Util.h | 1 + lib/BC/Util.cpp | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/rellic/BC/Util.h b/include/rellic/BC/Util.h index 4667fc47..697e2af3 100644 --- a/include/rellic/BC/Util.h +++ b/include/rellic/BC/Util.h @@ -24,6 +24,7 @@ namespace rellic { // Serialize an LLVM object into a string. std::string LLVMThingToString(llvm::Value *thing); std::string LLVMThingToString(llvm::Type *thing); +std::string LLVMThingToString(llvm::DIType *thing); // Try to verify a module. bool VerifyModule(llvm::Module *module); diff --git a/lib/BC/Util.cpp b/lib/BC/Util.cpp index 4e4e4744..94ca3cc1 100644 --- a/lib/BC/Util.cpp +++ b/lib/BC/Util.cpp @@ -55,6 +55,10 @@ std::string LLVMThingToString(llvm::Type *thing) { return DoLLVMThingToString(thing); } +std::string LLVMThingToString(llvm::DIType *thing) { + return DoLLVMThingToString(thing); +} + // Try to verify a module. bool VerifyModule(llvm::Module *module) { std::string error; From 35599baa993c12f4f4207c98dc780e00dbff4d54 Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Thu, 4 Nov 2021 20:37:38 +0100 Subject: [PATCH 28/30] Fix bugs --- lib/AST/IRToASTVisitor.cpp | 3 ++- lib/AST/StructFieldRenamer.cpp | 3 +++ tests/tools/decomp/byval_struct.c | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index f1c0d14b..b92645ec 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -66,6 +66,7 @@ clang::QualType IRToASTVisitor::GetQualType(llvm::Type *type, } return ast_ctx.getTypedefType(tdef_decl); } break; + case llvm::dwarf::DW_TAG_inheritance: case llvm::dwarf::DW_TAG_member: { return GetQualType(type, derived->getBaseType()); }; @@ -533,7 +534,7 @@ void IRToASTVisitor::VisitArgument(llvm::Argument &arg) { argtype = byval.getValueAsType(); } // Create a declaration - parm = ast.CreateParamDecl(fdecl, GetQualType(argtype), name); + parm = ast.CreateParamDecl(fdecl, GetQualType(argtype, ditype), name); } // This function fixes function types for those functions that have arguments diff --git a/lib/AST/StructFieldRenamer.cpp b/lib/AST/StructFieldRenamer.cpp index 3a9ba920..3202aa65 100644 --- a/lib/AST/StructFieldRenamer.cpp +++ b/lib/AST/StructFieldRenamer.cpp @@ -60,6 +60,9 @@ bool StructFieldRenamer::VisitRecordDecl(clang::RecordDecl *decl) { // FIXME(frabert): Is a clash between field names actually possible? // Can this mechanism actually be left out? auto name{di_field->getName().str()}; + if (di_field->getTag() == llvm::dwarf::DW_TAG_inheritance) { + name = di_field->getBaseType()->getName().str() + "_base"; + } if (seen_names.find(name) == seen_names.end()) { seen_names.insert(name); decl_field->setDeclName(ast.CreateIdentifier(name)); diff --git a/tests/tools/decomp/byval_struct.c b/tests/tools/decomp/byval_struct.c index 5fe25b0c..9e440e87 100644 --- a/tests/tools/decomp/byval_struct.c +++ b/tests/tools/decomp/byval_struct.c @@ -14,4 +14,5 @@ int main() { struct foo f = {atoi("1"), atoi("2"), atoi("3"), atoi("4")}; long long x = get_3x(f); printf("%lld %lld\n", f.x, x); + return 0; } \ No newline at end of file From dbd0d82047be048f2166212bb681512537c3a36c Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 8 Nov 2021 16:21:20 +0100 Subject: [PATCH 29/30] Add void to ptr casts --- lib/AST/IRToASTVisitor.cpp | 7 +++++++ lib/AST/Z3ConvVisitor.cpp | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 70234130..50647e54 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -825,6 +825,13 @@ void IRToASTVisitor::visitStoreInst(llvm::StoreInst &inst) { auto lhs{GetOperandExpr(inst.getPointerOperand())}; // Get the operand we're assigning from auto rhs{GetOperandExpr(inst.getValueOperand())}; + if (auto unop = clang::dyn_cast(rhs)) { + if (unop->getOpcode() == clang::UO_Deref && + unop->getSubExpr()->getType() == ast_ctx.VoidPtrTy) { + rhs = ast.CreateDeref( + ast.CreateCStyleCast(lhs->getType(), unop->getSubExpr())); + } + } // Create the assignemnt itself assign = ast.CreateAssign(ast.CreateDeref(lhs), rhs); } diff --git a/lib/AST/Z3ConvVisitor.cpp b/lib/AST/Z3ConvVisitor.cpp index 9f673fd9..db2d8c41 100644 --- a/lib/AST/Z3ConvVisitor.cpp +++ b/lib/AST/Z3ConvVisitor.cpp @@ -6,7 +6,7 @@ * the LICENSE file found in the root directory of this source tree. */ -#define GOOGLE_STRIP_LOG 0 +#define GOOGLE_STRIP_LOG 1 #include "rellic/AST/Z3ConvVisitor.h" From 38991a6a0eb7c4f8a37037a6ec1fe5983fbc87ff Mon Sep 17 00:00:00 2001 From: Francesco Bertolaccini Date: Mon, 8 Nov 2021 17:25:53 +0100 Subject: [PATCH 30/30] Use plain `char` when asking for `signed char` --- include/rellic/AST/ASTBuilder.h | 1 + lib/AST/ASTBuilder.cpp | 13 +++++++++++-- lib/AST/IRToASTVisitor.cpp | 12 ++++++------ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/include/rellic/AST/ASTBuilder.h b/include/rellic/AST/ASTBuilder.h index d15a7441..936fc21e 100644 --- a/include/rellic/AST/ASTBuilder.h +++ b/include/rellic/AST/ASTBuilder.h @@ -27,6 +27,7 @@ class ASTBuilder { public: ASTBuilder(clang::ASTUnit &unit); // Type helpers + clang::QualType GetIntTypeForBitWidth(unsigned size, unsigned sign); clang::QualType GetLeastIntTypeForBitWidth(unsigned size, unsigned sign); clang::QualType GetLeastRealTypeForBitWidth(unsigned size); // Literals diff --git a/lib/AST/ASTBuilder.cpp b/lib/AST/ASTBuilder.cpp index 0c1f8f9d..38aae04c 100644 --- a/lib/AST/ASTBuilder.cpp +++ b/lib/AST/ASTBuilder.cpp @@ -82,9 +82,18 @@ static unsigned GetOperatorPrecedence(clang::Expr *op) { ASTBuilder::ASTBuilder(clang::ASTUnit &unit) : unit(unit), ctx(unit.getASTContext()), sema(unit.getSema()) {} +clang::QualType ASTBuilder::GetIntTypeForBitWidth(unsigned size, + unsigned sign) { + if (ctx.getIntWidth(ctx.CharTy) == size && sign) { + return ctx.CharTy; + } + + return ctx.getIntTypeForBitwidth(size, sign); +} + clang::QualType ASTBuilder::GetLeastIntTypeForBitWidth(unsigned size, unsigned sign) { - auto result{ctx.getIntTypeForBitwidth(size, sign)}; + auto result{GetIntTypeForBitWidth(size, sign)}; if (!result.isNull()) { return result; } @@ -92,7 +101,7 @@ clang::QualType ASTBuilder::GetLeastIntTypeForBitWidth(unsigned size, auto target_type{ti.getLeastIntTypeByWidth(size, sign)}; CHECK(target_type != clang::TargetInfo::IntType::NoInt) << "Failed to infer clang::TargetInfo::IntType for bitwidth: " << size; - result = ctx.getIntTypeForBitwidth(ti.getTypeWidth(target_type), sign); + result = GetIntTypeForBitWidth(ti.getTypeWidth(target_type), sign); CHECK(!result.isNull()) << "Failed to infer clang::QualType for bitwidth: " << size; return result; diff --git a/lib/AST/IRToASTVisitor.cpp b/lib/AST/IRToASTVisitor.cpp index 50647e54..b7a060d4 100644 --- a/lib/AST/IRToASTVisitor.cpp +++ b/lib/AST/IRToASTVisitor.cpp @@ -870,8 +870,8 @@ void IRToASTVisitor::visitBinaryOperator(llvm::BinaryOperator &inst) { auto rhs{GetOperandExpr(inst.getOperand(1))}; // Sign-cast int operand auto IntSignCast{[this](clang::Expr *operand, bool sign) { - auto type{ast_ctx.getIntTypeForBitwidth( - ast_ctx.getTypeSize(operand->getType()), sign)}; + auto type{ast.GetIntTypeForBitWidth(ast_ctx.getTypeSize(operand->getType()), + sign)}; return ast.CreateCStyleCast(type, operand); }}; // Where the magic happens @@ -955,7 +955,7 @@ void IRToASTVisitor::visitCmpInst(llvm::CmpInst &inst) { // Sign-cast int operand auto IntSignCast{[this](clang::Expr *op, bool sign) { auto ot{op->getType()}; - auto rt{ast_ctx.getIntTypeForBitwidth(ast_ctx.getTypeSize(ot), sign)}; + auto rt{ast.GetIntTypeForBitWidth(ast_ctx.getTypeSize(ot), sign)}; return rt == ot ? op : ast.CreateCStyleCast(rt, op); }}; // Cast operands for signed predicates @@ -1026,17 +1026,17 @@ void IRToASTVisitor::visitCastInst(llvm::CastInst &inst) { case llvm::CastInst::Trunc: { auto bitwidth{ast_ctx.getTypeSize(type)}; auto sign{operand->getType()->isSignedIntegerType()}; - type = ast_ctx.getIntTypeForBitwidth(bitwidth, sign); + type = ast.GetIntTypeForBitWidth(bitwidth, sign); } break; case llvm::CastInst::ZExt: { auto bitwidth{ast_ctx.getTypeSize(type)}; - type = ast_ctx.getIntTypeForBitwidth(bitwidth, /*signed=*/0U); + type = ast.GetIntTypeForBitWidth(bitwidth, /*signed=*/0U); } break; case llvm::CastInst::SExt: { auto bitwidth{ast_ctx.getTypeSize(type)}; - type = ast_ctx.getIntTypeForBitwidth(bitwidth, /*signed=*/1U); + type = ast.GetIntTypeForBitWidth(bitwidth, /*signed=*/1U); } break; case llvm::CastInst::AddrSpaceCast: