diff --git a/CMakeLists.txt b/CMakeLists.txt index 30995d4..73f0185 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,6 +66,7 @@ set(STACK_ANALYZER_SOURCES src/analysis/StackBufferAnalysis.cpp src/analysis/StackComputation.cpp src/analysis/StackPointerEscape.cpp + src/analysis/UninitializedVarAnalysis.cpp src/report/ReportSerialization.cpp src/mangle.cpp src/passes/ModulePasses.cpp diff --git a/README.md b/README.md index f2f10eb..5bb2043 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ LLVM_DIR=/opt/llvm/lib/cmake/llvm Clang_DIR=/opt/llvm/lib/cmake/clang ./build.sh --only-dir= or --only-dir filters by directory --only-function= or --only-function filters by function --only-func= alias for --only-function +--STL includes STL/system library functions (default excludes them) --dump-filter prints filter decisions (stderr) ``` diff --git a/include/StackUsageAnalyzer.hpp b/include/StackUsageAnalyzer.hpp index 5cdc106..9a0bfce 100644 --- a/include/StackUsageAnalyzer.hpp +++ b/include/StackUsageAnalyzer.hpp @@ -46,6 +46,7 @@ namespace ctrace::stack std::vector onlyFiles; std::vector onlyDirs; std::vector onlyFunctions; + bool includeSTL = false; bool dumpFilter = false; std::string dumpIRPath; bool dumpIRIsDir = false; @@ -119,12 +120,13 @@ namespace ctrace::stack InvalidBaseReconstruction = 10, ConstParameterNotModified = 11, SizeMinusOneWrite = 12, - DuplicateIfCondition = 13 + DuplicateIfCondition = 13, + UninitializedLocalRead = 14 }; template <> struct EnumTraits { - static constexpr std::array names = {"None", + static constexpr std::array names = {"None", "StackBufferOverflow", "NegativeStackIndex", "VLAUsage", @@ -137,7 +139,8 @@ namespace ctrace::stack "InvalidBaseReconstruction", "ConstParameterNotModified", "SizeMinusOneWrite", - "DuplicateIfCondition"}; + "DuplicateIfCondition", + "UninitializedLocalRead"}; }; /* @@ -160,6 +163,8 @@ namespace ctrace::stack DiagnosticSeverity severity = DiagnosticSeverity::Warning; DescriptiveErrorCode errCode = DescriptiveErrorCode::None; std::string ruleId; + double confidence = -1.0; // [0,1], negative means unset + std::string cweId; std::vector variableAliasingVec; std::string message; }; diff --git a/include/analysis/UninitializedVarAnalysis.hpp b/include/analysis/UninitializedVarAnalysis.hpp new file mode 100644 index 0000000..0138bf8 --- /dev/null +++ b/include/analysis/UninitializedVarAnalysis.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + enum class UninitializedLocalIssueKind + { + ReadBeforeDefiniteInit, + ReadBeforeDefiniteInitViaCall, + NeverInitialized + }; + + struct UninitializedLocalReadIssue + { + std::string funcName; + std::string varName; + const llvm::Instruction* inst = nullptr; + unsigned line = 0; + unsigned column = 0; + std::string calleeName; + UninitializedLocalIssueKind kind = UninitializedLocalIssueKind::ReadBeforeDefiniteInit; + }; + + std::vector + analyzeUninitializedLocalReads(llvm::Module& mod, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/main.cpp b/main.cpp index 068438c..2b6543c 100644 --- a/main.cpp +++ b/main.cpp @@ -47,6 +47,7 @@ static void printHelp() << " --only-file= Only report functions from this source file\n" << " --only-dir= Only report functions under this directory\n" << " --only-func= Only report functions with this name (comma-separated)\n" + << " --STL Include STL/system library functions in analysis\n" << " --stack-limit= Override stack size limit (bytes, or KiB/MiB/GiB)\n" << " --dump-filter Print filter decisions to stderr\n" << " --dump-ir= Write LLVM IR to file (or directory for multiple inputs)\n" @@ -419,6 +420,11 @@ int main(int argc, char** argv) cfg.quiet = true; continue; } + if (argStr == "--STL" || argStr == "--stl") + { + cfg.includeSTL = true; + continue; + } if (argStr == "--only-file") { if (i + 1 >= argc) diff --git a/run_test.py b/run_test.py index f2fe6de..82b0732 100755 --- a/run_test.py +++ b/run_test.py @@ -717,6 +717,18 @@ def check_cli_parsing_and_filters() -> bool: else: print(f" ✅ macro case OK: {' '.join(opt)}") + # STL toggle + for opt in [["--STL"], ["--stl"]]: + cmd = [str(ANALYZER), str(sample)] + opt + ["--only-function=transition"] + result = subprocess.run(cmd, capture_output=True, text=True) + output = (result.stdout or "") + (result.stderr or "") + if result.returncode != 0 or "Function:" not in output: + print(f" ❌ STL flag case failed: {' '.join(opt)}") + print(output) + ok = False + else: + print(f" ✅ STL flag case OK: {' '.join(opt)}") + print() return ok diff --git a/src/StackUsageAnalyzer.cpp b/src/StackUsageAnalyzer.cpp index bc9ca20..a862bcf 100644 --- a/src/StackUsageAnalyzer.cpp +++ b/src/StackUsageAnalyzer.cpp @@ -32,6 +32,7 @@ #include "analysis/StackBufferAnalysis.hpp" #include "analysis/StackComputation.hpp" #include "analysis/StackPointerEscape.hpp" +#include "analysis/UninitializedVarAnalysis.hpp" #include "passes/ModulePasses.hpp" namespace ctrace::stack @@ -922,6 +923,75 @@ namespace ctrace::stack } } + static void appendUninitializedLocalReadDiagnostics( + AnalysisResult& result, + const std::vector& issues) + { + for (const auto& issue : issues) + { + unsigned line = issue.line; + unsigned column = issue.column; + bool haveLoc = (line != 0); + if (issue.inst) + { + llvm::DebugLoc DL = issue.inst->getDebugLoc(); + if (DL) + { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } + } + + std::ostringstream body; + if (issue.kind == analysis::UninitializedLocalIssueKind::ReadBeforeDefiniteInit) + { + body << " [!!] potential read of uninitialized local variable '" + << issue.varName << "'\n"; + body << " this load may execute before any definite initialization on " + "all control-flow paths\n"; + } + else if (issue.kind == + analysis::UninitializedLocalIssueKind::ReadBeforeDefiniteInitViaCall) + { + body << " [!!] potential read of uninitialized local variable '" + << issue.varName << "'\n"; + body + << " this call may read the value before any definite initialization"; + if (!issue.calleeName.empty()) + { + body << " in '" << issue.calleeName << "'"; + } + body << "\n"; + } + else + { + body << " [!] local variable '" << issue.varName << "' is never initialized\n"; + body << " declared without initializer and no definite write was found " + "in this function\n"; + } + + Diagnostic diag; + diag.funcName = issue.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.severity = DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::UninitializedLocalRead; + diag.ruleId = + (issue.kind == analysis::UninitializedLocalIssueKind::ReadBeforeDefiniteInit || + issue.kind == + analysis::UninitializedLocalIssueKind::ReadBeforeDefiniteInitViaCall) + ? "UninitializedLocalRead" + : "UninitializedLocalVariable"; + diag.confidence = + (issue.kind == analysis::UninitializedLocalIssueKind::NeverInitialized) ? 0.75 + : 0.90; + diag.cweId = "CWE-457"; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } + } + static void appendInvalidBaseReconstructionDiagnostics( AnalysisResult& result, const std::vector& issues) @@ -1268,6 +1338,13 @@ namespace ctrace::stack appendDuplicateIfConditionDiagnostics(result, duplicateIfIssues); logDuration("Multiple stores", t0); + // 12c) Detect potential reads from uninitialized local stack variables + t0 = Clock::now(); + std::vector uninitializedReadIssues = + analysis::analyzeUninitializedLocalReads(mod, shouldAnalyzeFunction); + appendUninitializedLocalReadDiagnostics(result, uninitializedReadIssues); + logDuration("Uninitialized local reads", t0); + // 13) Detect invalid base pointer reconstructions (offsetof/container_of) t0 = Clock::now(); std::vector baseReconIssues = diff --git a/src/analysis/FunctionFilter.cpp b/src/analysis/FunctionFilter.cpp index d5ad0f6..344487e 100644 --- a/src/analysis/FunctionFilter.cpp +++ b/src/analysis/FunctionFilter.cpp @@ -1,5 +1,10 @@ #include "analysis/FunctionFilter.hpp" +#include +#include +#include +#include + #include #include #include @@ -9,12 +14,123 @@ namespace ctrace::stack::analysis { + namespace + { + static std::string normalizePathForMatch(const std::string& input) + { + if (input.empty()) + return {}; + + std::string adjusted = input; + for (char& c : adjusted) + { + if (c == '\\') + c = '/'; + } + + std::filesystem::path path(adjusted); + std::error_code ec; + std::filesystem::path norm = path.lexically_normal(); + if (norm.is_absolute()) + { + std::filesystem::path canonicalPath = std::filesystem::weakly_canonical(norm, ec); + if (!ec) + norm = canonicalPath; + } + std::string out = norm.generic_string(); + while (out.size() > 1 && out.back() == '/') + out.pop_back(); + return out; + } + + static std::string toLowerCopy(const std::string& input) + { + std::string out; + out.reserve(input.size()); + for (char c : input) + { + out.push_back(static_cast(std::tolower(static_cast(c)))); + } + return out; + } + + static bool pathHasPrefix(const std::string& path, const std::string& prefix) + { + if (prefix.empty()) + return false; + if (path.size() < prefix.size()) + return false; + if (path.compare(0, prefix.size(), prefix) != 0) + return false; + if (path.size() == prefix.size()) + return true; + return path[prefix.size()] == '/'; + } + + static bool pathContainsFragment(const std::string& path, const std::string& fragment) + { + return !fragment.empty() && path.find(fragment) != std::string::npos; + } + + static bool isLikelySystemPath(const std::string& path) + { + if (path.empty()) + return false; + + const std::string normalized = toLowerCopy(normalizePathForMatch(path)); + if (normalized.empty()) + return false; + + static constexpr std::array systemPrefixes = { + "/usr/include", + "/usr/lib", + "/usr/local/include", + "/usr/local/lib", + "/opt/homebrew/include", + "/opt/homebrew/lib", + "/opt/homebrew/cellar", + "/opt/local/include", + "/opt/local/lib", + "/library/developer/commandlinetools/usr/include", + "/library/developer/commandlinetools/usr/lib", + "/applications/xcode.app/contents/developer/toolchains", + "/applications/xcode.app/contents/developer/platforms", + "/nix/store", + "c:/program files"}; + + for (const char* prefix : systemPrefixes) + { + if (pathHasPrefix(normalized, prefix)) + return true; + } + + static constexpr std::array systemFragments = { + "/include/c++/", "/c++/v1/", "/lib/clang/", "/x86_64-linux-gnu/c++/", + "/aarch64-linux-gnu/c++/"}; + + for (const char* fragment : systemFragments) + { + if (pathContainsFragment(normalized, fragment)) + return true; + } + + return false; + } + + static bool isCompilerRuntimeLikeName(llvm::StringRef name) + { + return name.starts_with("llvm.") || name.starts_with("clang.") || + name.starts_with("__asan_") || name.starts_with("__ubsan_") || + name.starts_with("__tsan_") || name.starts_with("__msan_"); + } + } // namespace + FunctionFilter buildFunctionFilter(const llvm::Module& mod, const AnalysisConfig& config) { FunctionFilter filter; filter.hasPathFilter = !config.onlyFiles.empty() || !config.onlyDirs.empty(); filter.hasFuncFilter = !config.onlyFunctions.empty(); - filter.hasFilter = filter.hasPathFilter || filter.hasFuncFilter; + filter.hasFilter = filter.hasPathFilter || filter.hasFuncFilter || !config.includeSTL; filter.moduleSourcePath = mod.getSourceFileName(); filter.config = &config; return filter; @@ -38,7 +154,41 @@ namespace ctrace::stack::analysis return false; } if (!hasPathFilter) + { + if (!cfg.includeSTL && !hasFuncFilter) + { + std::string path = getFunctionSourcePath(F); + std::string usedPath; + bool decision = true; + + if (!path.empty()) + { + usedPath = path; + decision = !isLikelySystemPath(usedPath); + } + else if (!moduleSourcePath.empty()) + { + usedPath = moduleSourcePath; + decision = !isLikelySystemPath(usedPath); + } + else + { + decision = !isCompilerRuntimeLikeName(F.getName()); + } + + if (cfg.dumpFilter) + { + llvm::errs() << "[filter] func=" << F.getName() << " file="; + if (usedPath.empty()) + llvm::errs() << ""; + else + llvm::errs() << usedPath; + llvm::errs() << " keep=" << (decision ? "yes" : "no") << "\n"; + } + return decision; + } return true; + } std::string path = getFunctionSourcePath(F); std::string usedPath; bool decision = false; diff --git a/src/analysis/UninitializedVarAnalysis.cpp b/src/analysis/UninitializedVarAnalysis.cpp new file mode 100644 index 0000000..b45e5af --- /dev/null +++ b/src/analysis/UninitializedVarAnalysis.cpp @@ -0,0 +1,1313 @@ +#include "analysis/UninitializedVarAnalysis.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "analysis/IRValueUtils.hpp" + +namespace ctrace::stack::analysis +{ + namespace + { + struct ByteRange + { + std::uint64_t begin = 0; + std::uint64_t end = 0; // [begin, end) + + bool operator==(const ByteRange& other) const + { + return begin == other.begin && end == other.end; + } + }; + + enum class InitLatticeState + { + Uninit, + Partial, + Init + }; + + enum class TrackedObjectKind + { + Alloca, + PointerParam + }; + + struct TrackedMemoryObject + { + TrackedObjectKind kind = TrackedObjectKind::Alloca; + const llvm::AllocaInst* alloca = nullptr; + const llvm::Argument* param = nullptr; + std::uint64_t sizeBytes = 0; // 0 means unknown upper bound. + }; + + struct TrackedObjectContext + { + std::vector objects; + llvm::DenseMap allocaIndex; + llvm::DenseMap paramIndex; + }; + + struct MemoryAccess + { + unsigned objectIdx = 0; + std::uint64_t begin = 0; + std::uint64_t end = 0; + }; + + using RangeSet = std::vector; + using InitRangeState = std::vector; + + struct PointerParamEffectSummary + { + RangeSet readBeforeWriteRanges; + RangeSet writeRanges; + bool hasUnknownReadBeforeWrite = false; + bool hasUnknownWrite = false; + + bool operator==(const PointerParamEffectSummary& other) const + { + return readBeforeWriteRanges == other.readBeforeWriteRanges && + writeRanges == other.writeRanges && + hasUnknownReadBeforeWrite == other.hasUnknownReadBeforeWrite && + hasUnknownWrite == other.hasUnknownWrite; + } + + bool hasAnyEffect() const + { + return hasUnknownReadBeforeWrite || hasUnknownWrite || + !readBeforeWriteRanges.empty() || !writeRanges.empty(); + } + }; + + struct FunctionSummary + { + std::vector paramEffects; + + bool operator==(const FunctionSummary& other) const + { + return paramEffects == other.paramEffects; + } + }; + + using FunctionSummaryMap = llvm::DenseMap; + + static constexpr std::uint64_t kUnknownObjectFullRange = + std::numeric_limits::max() / 4; + + static std::uint64_t saturatingAdd(std::uint64_t lhs, std::uint64_t rhs) + { + constexpr std::uint64_t maxVal = std::numeric_limits::max(); + if (lhs > maxVal - rhs) + return maxVal; + return lhs + rhs; + } + + static bool shouldTrackAlloca(const llvm::AllocaInst& AI) + { + if (!AI.isStaticAlloca()) + return false; + if (AI.getAllocatedType()->isFunctionTy()) + return false; + return true; + } + + static std::uint64_t getAllocaSizeBytes(const llvm::AllocaInst& AI, + const llvm::DataLayout& DL) + { + std::optional allocSize = AI.getAllocationSize(DL); + if (!allocSize || allocSize->isScalable()) + return 0; + return allocSize->getFixedValue(); + } + + static std::uint64_t getTypeStoreSizeBytes(const llvm::Type* ty, const llvm::DataLayout& DL) + { + if (!ty) + return 0; + llvm::TypeSize size = DL.getTypeStoreSize(const_cast(ty)); + if (size.isScalable()) + return 0; + return size.getFixedValue(); + } + + static void addRange(RangeSet& ranges, std::uint64_t begin, std::uint64_t end) + { + if (begin >= end) + return; + + auto it = std::lower_bound(ranges.begin(), ranges.end(), begin, + [](const ByteRange& r, std::uint64_t value) + { return r.end < value; }); + + if (it == ranges.end()) + { + ranges.push_back({begin, end}); + return; + } + + if (end < it->begin) + { + ranges.insert(it, {begin, end}); + return; + } + + it->begin = std::min(it->begin, begin); + it->end = std::max(it->end, end); + + auto next = it + 1; + while (next != ranges.end() && next->begin <= it->end) + { + it->end = std::max(it->end, next->end); + ++next; + } + ranges.erase(it + 1, next); + } + + static RangeSet intersectRanges(const RangeSet& lhs, const RangeSet& rhs) + { + RangeSet out; + std::size_t i = 0; + std::size_t j = 0; + while (i < lhs.size() && j < rhs.size()) + { + std::uint64_t begin = std::max(lhs[i].begin, rhs[j].begin); + std::uint64_t end = std::min(lhs[i].end, rhs[j].end); + if (begin < end) + out.push_back({begin, end}); + + if (lhs[i].end < rhs[j].end) + ++i; + else + ++j; + } + return out; + } + + static bool isRangeCovered(const RangeSet& initialized, std::uint64_t begin, + std::uint64_t end) + { + if (begin >= end) + return true; + for (const ByteRange& r : initialized) + { + if (r.begin <= begin && r.end >= end) + return true; + if (r.begin > begin) + return false; + } + return false; + } + + static InitLatticeState classifyInitState(const RangeSet& initialized, + std::uint64_t totalSize) + { + if (totalSize == 0 || initialized.empty()) + return InitLatticeState::Uninit; + if (initialized.size() == 1 && initialized.front().begin == 0 && + initialized.front().end >= totalSize) + { + return InitLatticeState::Init; + } + return InitLatticeState::Partial; + } + + static bool isAllocaObject(const TrackedMemoryObject& obj) + { + return obj.kind == TrackedObjectKind::Alloca; + } + + static bool isParamObject(const TrackedMemoryObject& obj) + { + return obj.kind == TrackedObjectKind::PointerParam; + } + + static std::uint64_t getObjectFullRangeEnd(const TrackedMemoryObject& obj) + { + return obj.sizeBytes == 0 ? kUnknownObjectFullRange : obj.sizeBytes; + } + + static std::string getTrackedObjectName(const TrackedMemoryObject& obj) + { + if (isAllocaObject(obj) && obj.alloca) + return deriveAllocaName(obj.alloca); + if (isParamObject(obj) && obj.param) + { + if (obj.param->hasName()) + return obj.param->getName().str(); + return "arg" + std::to_string(obj.param->getArgNo()); + } + return ""; + } + + static bool clipRangeToObject(const TrackedMemoryObject& obj, std::uint64_t begin, + std::uint64_t end, std::uint64_t& outBegin, + std::uint64_t& outEnd) + { + if (obj.sizeBytes == 0) + { + if (begin >= end) + return false; + outBegin = begin; + outEnd = end; + return true; + } + + if (begin >= obj.sizeBytes) + return false; + outBegin = begin; + outEnd = std::min(end, obj.sizeBytes); + return outBegin < outEnd; + } + + static bool lookupTrackedObjectIndex(const llvm::Value* base, + const TrackedObjectContext& tracked, + unsigned& outIndex) + { + if (auto* AI = llvm::dyn_cast(base)) + { + auto it = tracked.allocaIndex.find(AI); + if (it != tracked.allocaIndex.end()) + { + outIndex = it->second; + return true; + } + } + if (auto* arg = llvm::dyn_cast(base)) + { + auto it = tracked.paramIndex.find(arg); + if (it != tracked.paramIndex.end()) + { + outIndex = it->second; + return true; + } + } + return false; + } + + static const llvm::Value* peelPointerFromSingleStoreSlot(const llvm::Value* ptr) + { + const llvm::Value* current = ptr; + + for (unsigned depth = 0; depth < 4; ++depth) + { + const auto* LI = llvm::dyn_cast(current->stripPointerCasts()); + if (!LI) + break; + + const auto* slot = + llvm::dyn_cast(LI->getPointerOperand()->stripPointerCasts()); + if (!slot || !slot->isStaticAlloca()) + break; + if (!slot->getAllocatedType()->isPointerTy()) + break; + + const llvm::StoreInst* uniqueStore = nullptr; + bool unsafeUse = false; + for (const llvm::Use& U : slot->uses()) + { + const auto* user = U.getUser(); + if (const auto* SI = llvm::dyn_cast(user)) + { + if (SI->getPointerOperand()->stripPointerCasts() != slot) + { + unsafeUse = true; + break; + } + if (uniqueStore && uniqueStore != SI) + { + uniqueStore = nullptr; + break; + } + uniqueStore = SI; + continue; + } + + if (const auto* LI = llvm::dyn_cast(user)) + { + if (LI->getPointerOperand()->stripPointerCasts() != slot) + { + unsafeUse = true; + break; + } + continue; + } + + if (const auto* II = llvm::dyn_cast(user)) + { + if (llvm::isa(II) || + llvm::isa(II)) + { + continue; + } + unsafeUse = true; + break; + } + + unsafeUse = true; + break; + } + + if (unsafeUse || !uniqueStore) + break; + + const llvm::Value* storedPtr = uniqueStore->getValueOperand()->stripPointerCasts(); + if (!storedPtr->getType()->isPointerTy()) + break; + + current = storedPtr; + } + + return current; + } + + static bool resolveTrackedObjectBase(const llvm::Value* ptr, + const TrackedObjectContext& tracked, + const llvm::DataLayout& DL, unsigned& outObjectIdx, + std::uint64_t& outOffset, bool& outHasConstOffset) + { + if (!ptr || !ptr->getType()->isPointerTy()) + return false; + + const llvm::Value* canonicalPtr = peelPointerFromSingleStoreSlot(ptr); + + int64_t signedOffset = 0; + const llvm::Value* base = llvm::GetPointerBaseWithConstantOffset( + canonicalPtr->stripPointerCasts(), signedOffset, DL, true); + if (base && signedOffset >= 0 && lookupTrackedObjectIndex(base, tracked, outObjectIdx)) + { + outOffset = static_cast(signedOffset); + outHasConstOffset = true; + return true; + } + + const llvm::Value* underlying = + llvm::getUnderlyingObject(canonicalPtr->stripPointerCasts(), 16); + if (!underlying) + return false; + if (!lookupTrackedObjectIndex(underlying, tracked, outObjectIdx)) + return false; + + outOffset = 0; + outHasConstOffset = false; + return true; + } + + static bool resolveAccessFromPointer(const llvm::Value* ptr, std::uint64_t accessSize, + const TrackedObjectContext& tracked, + const llvm::DataLayout& DL, MemoryAccess& out) + { + if (!ptr || !ptr->getType()->isPointerTy() || accessSize == 0) + return false; + + unsigned objectIdx = 0; + std::uint64_t offset = 0; + bool hasConstOffset = false; + if (!resolveTrackedObjectBase(ptr, tracked, DL, objectIdx, offset, hasConstOffset) || + !hasConstOffset) + { + return false; + } + + const TrackedMemoryObject& obj = tracked.objects[objectIdx]; + std::uint64_t begin = offset; + std::uint64_t end = saturatingAdd(offset, accessSize); + std::uint64_t clippedBegin = 0; + std::uint64_t clippedEnd = 0; + if (!clipRangeToObject(obj, begin, end, clippedBegin, clippedEnd)) + return false; + + out.objectIdx = objectIdx; + out.begin = clippedBegin; + out.end = clippedEnd; + return true; + } + + static void collectTrackedObjects(const llvm::Function& F, const llvm::DataLayout& DL, + TrackedObjectContext& tracked) + { + tracked.objects.clear(); + tracked.allocaIndex.clear(); + tracked.paramIndex.clear(); + + for (const llvm::BasicBlock& BB : F) + { + for (const llvm::Instruction& I : BB) + { + auto* AI = llvm::dyn_cast(&I); + if (!AI) + continue; + if (!shouldTrackAlloca(*AI)) + continue; + + std::uint64_t sizeBytes = getAllocaSizeBytes(*AI, DL); + if (sizeBytes == 0) + continue; + + unsigned idx = static_cast(tracked.objects.size()); + tracked.objects.push_back({TrackedObjectKind::Alloca, AI, nullptr, sizeBytes}); + tracked.allocaIndex[AI] = idx; + } + } + + for (const llvm::Argument& arg : F.args()) + { + if (!arg.getType()->isPointerTy()) + continue; + unsigned idx = static_cast(tracked.objects.size()); + tracked.objects.push_back({TrackedObjectKind::PointerParam, nullptr, &arg, 0}); + tracked.paramIndex[&arg] = idx; + } + } + + static InitRangeState makeBottomState(std::size_t trackedCount) + { + return InitRangeState(trackedCount); + } + + static InitRangeState makeTopState(const TrackedObjectContext& tracked) + { + InitRangeState top(tracked.objects.size()); + for (std::size_t i = 0; i < tracked.objects.size(); ++i) + { + addRange(top[i], 0, getObjectFullRangeEnd(tracked.objects[i])); + } + return top; + } + + static void meetMustState(InitRangeState& accum, const InitRangeState& incoming) + { + if (accum.size() != incoming.size()) + return; + for (std::size_t idx = 0; idx < accum.size(); ++idx) + { + accum[idx] = intersectRanges(accum[idx], incoming[idx]); + } + } + + static bool statesEqual(const InitRangeState& lhs, const InitRangeState& rhs) + { + return lhs == rhs; + } + + static void computeReachableBlocks(const llvm::Function& F, + llvm::DenseMap& reachable) + { + reachable.clear(); + if (F.empty()) + return; + + llvm::SmallVector worklist; + worklist.push_back(&F.getEntryBlock()); + reachable[&F.getEntryBlock()] = true; + + while (!worklist.empty()) + { + const llvm::BasicBlock* BB = worklist.pop_back_val(); + for (const llvm::BasicBlock* succ : llvm::successors(BB)) + { + if (!succ) + continue; + auto [it, inserted] = reachable.try_emplace(succ, true); + if (inserted) + { + worklist.push_back(succ); + } + else + { + it->second = true; + } + } + } + } + + static InitRangeState + computeInState(const llvm::BasicBlock& BB, const llvm::BasicBlock* entryBlock, + const llvm::DenseMap& reachable, + const llvm::DenseMap& outState, + const TrackedObjectContext& tracked) + { + InitRangeState in = makeBottomState(tracked.objects.size()); + if (&BB == entryBlock) + return in; + + bool havePred = false; + InitRangeState merged = makeTopState(tracked); + for (const llvm::BasicBlock* pred : llvm::predecessors(&BB)) + { + auto itReach = reachable.find(pred); + if (itReach == reachable.end() || !itReach->second) + continue; + havePred = true; + + auto itOut = outState.find(pred); + if (itOut == outState.end()) + { + merged = makeBottomState(tracked.objects.size()); + break; + } + meetMustState(merged, itOut->second); + } + + if (!havePred) + return in; + return merged; + } + + static const llvm::Instruction* getAllocaDebugAnchor(const llvm::AllocaInst* AI) + { + if (!AI) + return nullptr; + + for (const llvm::Use& U : AI->uses()) + { + auto* dvi = llvm::dyn_cast(U.getUser()); + if (!dvi) + continue; + if (dvi->getDebugLoc()) + return dvi; + } + return AI; + } + + static void getAllocaDeclarationLocation(const llvm::AllocaInst* AI, unsigned& line, + unsigned& column) + { + line = 0; + column = 0; + if (!AI) + return; + + auto* nonConstAI = const_cast(AI); + for (llvm::DbgDeclareInst* ddi : llvm::findDbgDeclares(nonConstAI)) + { + llvm::DebugLoc DL = llvm::getDebugValueLoc(ddi); + if (DL) + { + line = DL.getLine(); + column = DL.getCol(); + return; + } + } + + for (llvm::DbgVariableRecord* dvr : llvm::findDVRDeclares(nonConstAI)) + { + llvm::DebugLoc DL = llvm::getDebugValueLoc(dvr); + if (DL) + { + line = DL.getLine(); + column = DL.getCol(); + return; + } + } + } + + static FunctionSummary makeEmptySummary(const llvm::Function& F) + { + FunctionSummary summary; + summary.paramEffects.resize(F.arg_size()); + return summary; + } + + static PointerParamEffectSummary& getParamEffect(FunctionSummary& summary, + const llvm::Argument& arg) + { + const unsigned argNo = arg.getArgNo(); + assert(argNo < summary.paramEffects.size() && + "pointer parameter index must fit in summary vector"); + return summary.paramEffects[argNo]; + } + + static void applyCalleeSummaryAtCall( + const llvm::CallBase& CB, const llvm::Function& callee, + const FunctionSummary& calleeSummary, const TrackedObjectContext& tracked, + const llvm::DataLayout& DL, InitRangeState& initialized, llvm::BitVector* writeSeen, + llvm::BitVector* readBeforeInitSeen, FunctionSummary* currentSummary, + std::vector* emittedIssues) + { + const unsigned maxArgs = + std::min(static_cast(CB.arg_size()), + static_cast(calleeSummary.paramEffects.size())); + for (unsigned argIdx = 0; argIdx < maxArgs; ++argIdx) + { + const PointerParamEffectSummary& effect = calleeSummary.paramEffects[argIdx]; + if (!effect.hasAnyEffect()) + continue; + + const llvm::Value* actual = CB.getArgOperand(argIdx); + if (!actual || !actual->getType()->isPointerTy()) + continue; + + unsigned objectIdx = 0; + std::uint64_t baseOffset = 0; + bool hasConstOffset = false; + if (!resolveTrackedObjectBase(actual, tracked, DL, objectIdx, baseOffset, + hasConstOffset)) + { + continue; + } + + const TrackedMemoryObject& obj = tracked.objects[objectIdx]; + + bool hasReadBeforeWrite = false; + bool readWasUnknown = false; + RangeSet uncoveredReadRanges; + + if (hasConstOffset) + { + for (const ByteRange& rr : effect.readBeforeWriteRanges) + { + std::uint64_t mappedBegin = saturatingAdd(baseOffset, rr.begin); + std::uint64_t mappedEnd = saturatingAdd(baseOffset, rr.end); + std::uint64_t clippedBegin = 0; + std::uint64_t clippedEnd = 0; + if (!clipRangeToObject(obj, mappedBegin, mappedEnd, clippedBegin, + clippedEnd)) + { + continue; + } + if (!isRangeCovered(initialized[objectIdx], clippedBegin, clippedEnd)) + { + hasReadBeforeWrite = true; + uncoveredReadRanges.push_back({clippedBegin, clippedEnd}); + } + } + + if (effect.hasUnknownReadBeforeWrite) + { + InitLatticeState objectState = + classifyInitState(initialized[objectIdx], getObjectFullRangeEnd(obj)); + if (objectState != InitLatticeState::Init) + { + hasReadBeforeWrite = true; + readWasUnknown = true; + } + } + } + else + { + if (effect.hasUnknownReadBeforeWrite || !effect.readBeforeWriteRanges.empty()) + { + InitLatticeState objectState = + classifyInitState(initialized[objectIdx], getObjectFullRangeEnd(obj)); + if (objectState != InitLatticeState::Init) + { + hasReadBeforeWrite = true; + readWasUnknown = true; + } + } + } + + if (hasReadBeforeWrite) + { + if (isAllocaObject(obj)) + { + if (readBeforeInitSeen && objectIdx < readBeforeInitSeen->size()) + readBeforeInitSeen->set(objectIdx); + + if (emittedIssues) + { + emittedIssues->push_back( + {CB.getFunction()->getName().str(), getTrackedObjectName(obj), &CB, + 0, 0, callee.getName().str(), + UninitializedLocalIssueKind::ReadBeforeDefiniteInitViaCall}); + } + } + else if (currentSummary && obj.param) + { + PointerParamEffectSummary& current = + getParamEffect(*currentSummary, *obj.param); + for (const ByteRange& rr : uncoveredReadRanges) + { + addRange(current.readBeforeWriteRanges, rr.begin, rr.end); + } + if (readWasUnknown || + (!hasConstOffset && (!effect.readBeforeWriteRanges.empty() || + effect.hasUnknownReadBeforeWrite))) + { + current.hasUnknownReadBeforeWrite = true; + } + } + } + + bool wroteSomething = false; + bool writeWasUnknown = false; + if (hasConstOffset) + { + for (const ByteRange& wr : effect.writeRanges) + { + std::uint64_t mappedBegin = saturatingAdd(baseOffset, wr.begin); + std::uint64_t mappedEnd = saturatingAdd(baseOffset, wr.end); + std::uint64_t clippedBegin = 0; + std::uint64_t clippedEnd = 0; + if (!clipRangeToObject(obj, mappedBegin, mappedEnd, clippedBegin, + clippedEnd)) + { + continue; + } + addRange(initialized[objectIdx], clippedBegin, clippedEnd); + wroteSomething = true; + + if (currentSummary && isParamObject(obj) && obj.param) + { + PointerParamEffectSummary& current = + getParamEffect(*currentSummary, *obj.param); + addRange(current.writeRanges, clippedBegin, clippedEnd); + } + } + + if (effect.hasUnknownWrite) + { + wroteSomething = true; + writeWasUnknown = true; + } + } + else + { + if (effect.hasUnknownWrite || !effect.writeRanges.empty()) + { + wroteSomething = true; + writeWasUnknown = true; + } + } + + if (writeWasUnknown && currentSummary && isParamObject(obj) && obj.param) + { + getParamEffect(*currentSummary, *obj.param).hasUnknownWrite = true; + } + + if (wroteSomething && writeSeen && isAllocaObject(obj) && + objectIdx < writeSeen->size()) + { + writeSeen->set(objectIdx); + } + } + } + + static void + transferInstruction(const llvm::Instruction& I, const TrackedObjectContext& tracked, + const llvm::DataLayout& DL, const FunctionSummaryMap& summaries, + InitRangeState& initialized, llvm::BitVector* writeSeen, + llvm::BitVector* readBeforeInitSeen, FunctionSummary* currentSummary, + std::vector* emittedIssues) + { + if (auto* LI = llvm::dyn_cast(&I)) + { + MemoryAccess access; + std::uint64_t loadSize = getTypeStoreSizeBytes(LI->getType(), DL); + if (resolveAccessFromPointer(LI->getPointerOperand(), loadSize, tracked, DL, + access)) + { + const TrackedMemoryObject& obj = tracked.objects[access.objectIdx]; + bool isDefInit = + isRangeCovered(initialized[access.objectIdx], access.begin, access.end); + if (!isDefInit) + { + if (isAllocaObject(obj)) + { + if (emittedIssues) + { + emittedIssues->push_back( + {I.getFunction()->getName().str(), getTrackedObjectName(obj), + LI, 0, 0, "", + UninitializedLocalIssueKind::ReadBeforeDefiniteInit}); + } + if (readBeforeInitSeen && access.objectIdx < readBeforeInitSeen->size()) + readBeforeInitSeen->set(access.objectIdx); + } + else if (currentSummary && obj.param) + { + addRange( + getParamEffect(*currentSummary, *obj.param).readBeforeWriteRanges, + access.begin, access.end); + } + } + return; + } + + unsigned objectIdx = 0; + std::uint64_t offset = 0; + bool hasConstOffset = false; + if (!resolveTrackedObjectBase(LI->getPointerOperand(), tracked, DL, objectIdx, + offset, hasConstOffset)) + { + return; + } + + const TrackedMemoryObject& obj = tracked.objects[objectIdx]; + InitLatticeState stateKind = + classifyInitState(initialized[objectIdx], getObjectFullRangeEnd(obj)); + bool isDefInit = (stateKind == InitLatticeState::Init); + if (!isDefInit) + { + if (isAllocaObject(obj)) + { + if (emittedIssues) + { + emittedIssues->push_back( + {I.getFunction()->getName().str(), getTrackedObjectName(obj), LI, 0, + 0, "", UninitializedLocalIssueKind::ReadBeforeDefiniteInit}); + } + if (readBeforeInitSeen && objectIdx < readBeforeInitSeen->size()) + readBeforeInitSeen->set(objectIdx); + } + else if (currentSummary && obj.param) + { + getParamEffect(*currentSummary, *obj.param).hasUnknownReadBeforeWrite = + true; + } + } + return; + } + + if (auto* SI = llvm::dyn_cast(&I)) + { + if (llvm::isa(SI->getValueOperand()) || + llvm::isa(SI->getValueOperand())) + { + return; + } + + std::uint64_t storeSize = + getTypeStoreSizeBytes(SI->getValueOperand()->getType(), DL); + MemoryAccess access; + if (resolveAccessFromPointer(SI->getPointerOperand(), storeSize, tracked, DL, + access)) + { + const TrackedMemoryObject& obj = tracked.objects[access.objectIdx]; + addRange(initialized[access.objectIdx], access.begin, access.end); + if (isAllocaObject(obj)) + { + if (writeSeen && access.objectIdx < writeSeen->size()) + writeSeen->set(access.objectIdx); + } + else if (currentSummary && obj.param) + { + addRange(getParamEffect(*currentSummary, *obj.param).writeRanges, + access.begin, access.end); + } + return; + } + + unsigned objectIdx = 0; + std::uint64_t offset = 0; + bool hasConstOffset = false; + if (!resolveTrackedObjectBase(SI->getPointerOperand(), tracked, DL, objectIdx, + offset, hasConstOffset)) + { + return; + } + + const TrackedMemoryObject& obj = tracked.objects[objectIdx]; + if (isAllocaObject(obj)) + { + if (writeSeen && objectIdx < writeSeen->size()) + writeSeen->set(objectIdx); + } + else if (currentSummary && obj.param) + { + getParamEffect(*currentSummary, *obj.param).hasUnknownWrite = true; + } + return; + } + + auto* MI = llvm::dyn_cast(&I); + if (MI) + { + auto* len = llvm::dyn_cast(MI->getLength()); + if (len && len->isZero()) + return; + + bool isInitWrite = + llvm::isa(MI) || llvm::isa(MI); + if (!isInitWrite) + return; + + if (auto* MTI = llvm::dyn_cast(MI)) + { + if (len) + { + std::uint64_t readSize = len->getZExtValue(); + MemoryAccess srcAccess; + if (resolveAccessFromPointer(MTI->getSource(), readSize, tracked, DL, + srcAccess)) + { + const TrackedMemoryObject& srcObj = + tracked.objects[srcAccess.objectIdx]; + bool srcDefInit = isRangeCovered(initialized[srcAccess.objectIdx], + srcAccess.begin, srcAccess.end); + if (!srcDefInit) + { + if (isAllocaObject(srcObj)) + { + if (emittedIssues) + { + emittedIssues->push_back( + {I.getFunction()->getName().str(), + getTrackedObjectName(srcObj), MI, 0, 0, "", + UninitializedLocalIssueKind::ReadBeforeDefiniteInit}); + } + if (readBeforeInitSeen && + srcAccess.objectIdx < readBeforeInitSeen->size()) + { + readBeforeInitSeen->set(srcAccess.objectIdx); + } + } + else if (currentSummary && srcObj.param) + { + addRange(getParamEffect(*currentSummary, *srcObj.param) + .readBeforeWriteRanges, + srcAccess.begin, srcAccess.end); + } + } + } + else + { + unsigned srcObjectIdx = 0; + std::uint64_t srcOffset = 0; + bool srcHasConstOffset = false; + if (resolveTrackedObjectBase(MTI->getSource(), tracked, DL, + srcObjectIdx, srcOffset, + srcHasConstOffset)) + { + const TrackedMemoryObject& srcObj = tracked.objects[srcObjectIdx]; + InitLatticeState stateKind = classifyInitState( + initialized[srcObjectIdx], getObjectFullRangeEnd(srcObj)); + if (stateKind != InitLatticeState::Init) + { + if (isAllocaObject(srcObj)) + { + if (emittedIssues) + { + emittedIssues->push_back( + {I.getFunction()->getName().str(), + getTrackedObjectName(srcObj), MI, 0, 0, "", + UninitializedLocalIssueKind:: + ReadBeforeDefiniteInit}); + } + if (readBeforeInitSeen && + srcObjectIdx < readBeforeInitSeen->size()) + { + readBeforeInitSeen->set(srcObjectIdx); + } + } + else if (currentSummary && srcObj.param) + { + getParamEffect(*currentSummary, *srcObj.param) + .hasUnknownReadBeforeWrite = true; + } + } + } + } + } + else + { + unsigned srcObjectIdx = 0; + std::uint64_t srcOffset = 0; + bool srcHasConstOffset = false; + if (resolveTrackedObjectBase(MTI->getSource(), tracked, DL, srcObjectIdx, + srcOffset, srcHasConstOffset)) + { + const TrackedMemoryObject& srcObj = tracked.objects[srcObjectIdx]; + InitLatticeState stateKind = classifyInitState( + initialized[srcObjectIdx], getObjectFullRangeEnd(srcObj)); + if (stateKind != InitLatticeState::Init) + { + if (isAllocaObject(srcObj)) + { + if (emittedIssues) + { + emittedIssues->push_back( + {I.getFunction()->getName().str(), + getTrackedObjectName(srcObj), MI, 0, 0, "", + UninitializedLocalIssueKind::ReadBeforeDefiniteInit}); + } + if (readBeforeInitSeen && + srcObjectIdx < readBeforeInitSeen->size()) + { + readBeforeInitSeen->set(srcObjectIdx); + } + } + else if (currentSummary && srcObj.param) + { + getParamEffect(*currentSummary, *srcObj.param) + .hasUnknownReadBeforeWrite = true; + } + } + } + } + } + + if (len) + { + std::uint64_t writeSize = len->getZExtValue(); + MemoryAccess access; + if (resolveAccessFromPointer(MI->getDest(), writeSize, tracked, DL, access)) + { + const TrackedMemoryObject& obj = tracked.objects[access.objectIdx]; + addRange(initialized[access.objectIdx], access.begin, access.end); + if (isAllocaObject(obj)) + { + if (writeSeen && access.objectIdx < writeSeen->size()) + writeSeen->set(access.objectIdx); + } + else if (currentSummary && obj.param) + { + addRange(getParamEffect(*currentSummary, *obj.param).writeRanges, + access.begin, access.end); + } + return; + } + } + + unsigned objectIdx = 0; + std::uint64_t offset = 0; + bool hasConstOffset = false; + if (!resolveTrackedObjectBase(MI->getDest(), tracked, DL, objectIdx, offset, + hasConstOffset)) + { + return; + } + + const TrackedMemoryObject& obj = tracked.objects[objectIdx]; + if (isAllocaObject(obj)) + { + if (writeSeen && objectIdx < writeSeen->size()) + writeSeen->set(objectIdx); + } + else if (currentSummary && obj.param) + { + getParamEffect(*currentSummary, *obj.param).hasUnknownWrite = true; + } + return; + } + + auto* CB = llvm::dyn_cast(&I); + if (!CB) + return; + + const llvm::Function* callee = CB->getCalledFunction(); + if (!callee || callee->isDeclaration()) + return; + + auto itSummary = summaries.find(callee); + if (itSummary == summaries.end()) + return; + + applyCalleeSummaryAtCall(*CB, *callee, itSummary->second, tracked, DL, initialized, + writeSeen, readBeforeInitSeen, currentSummary, emittedIssues); + } + + static void analyzeFunction(const llvm::Function& F, const llvm::DataLayout& DL, + const FunctionSummaryMap& summaries, + FunctionSummary* outSummary, + std::vector* outIssues) + { + TrackedObjectContext tracked; + collectTrackedObjects(F, DL, tracked); + if (tracked.objects.empty()) + return; + + const unsigned trackedCount = static_cast(tracked.objects.size()); + + llvm::DenseMap reachable; + computeReachableBlocks(F, reachable); + + llvm::DenseMap inState; + llvm::DenseMap outState; + + for (const llvm::BasicBlock& BB : F) + { + if (!reachable.lookup(&BB)) + continue; + const bool isEntry = (&BB == &F.getEntryBlock()); + inState[&BB] = isEntry ? makeBottomState(trackedCount) : makeTopState(tracked); + outState[&BB] = isEntry ? makeBottomState(trackedCount) : makeTopState(tracked); + } + + const unsigned reachableBlocks = static_cast(outState.size()); + const unsigned maxIterations = std::max(64u, reachableBlocks * 16u); + bool changed = true; + unsigned iteration = 0; + while (changed && iteration < maxIterations) + { + ++iteration; + changed = false; + + for (const llvm::BasicBlock& BB : F) + { + if (!reachable.lookup(&BB)) + continue; + + InitRangeState newIn = + computeInState(BB, &F.getEntryBlock(), reachable, outState, tracked); + + InitRangeState state = newIn; + for (const llvm::Instruction& I : BB) + { + transferInstruction(I, tracked, DL, summaries, state, nullptr, nullptr, + nullptr, nullptr); + } + + InitRangeState& oldIn = inState[&BB]; + InitRangeState& oldOut = outState[&BB]; + if (!statesEqual(oldIn, newIn)) + { + oldIn = std::move(newIn); + changed = true; + } + if (!statesEqual(oldOut, state)) + { + oldOut = std::move(state); + changed = true; + } + } + } + + if (outSummary) + { + for (const llvm::BasicBlock& BB : F) + { + if (!reachable.lookup(&BB)) + continue; + + InitRangeState state = inState[&BB]; + for (const llvm::Instruction& I : BB) + { + transferInstruction(I, tracked, DL, summaries, state, nullptr, nullptr, + outSummary, nullptr); + } + } + return; + } + + llvm::BitVector writeSeen(trackedCount, false); + llvm::BitVector readBeforeInitSeen(trackedCount, false); + + for (const llvm::BasicBlock& BB : F) + { + if (!reachable.lookup(&BB)) + continue; + + InitRangeState state = inState[&BB]; + for (const llvm::Instruction& I : BB) + { + transferInstruction(I, tracked, DL, summaries, state, &writeSeen, + &readBeforeInitSeen, nullptr, outIssues); + } + } + + for (unsigned idx = 0; idx < trackedCount; ++idx) + { + const TrackedMemoryObject& obj = tracked.objects[idx]; + if (!isAllocaObject(obj)) + continue; + if (writeSeen.test(idx)) + continue; + if (readBeforeInitSeen.test(idx)) + continue; + + const llvm::AllocaInst* AI = obj.alloca; + if (!AI) + continue; + + const std::string varName = deriveAllocaName(AI); + if (varName.empty() || varName == "") + continue; + + unsigned line = 0; + unsigned column = 0; + getAllocaDeclarationLocation(AI, line, column); + if (outIssues) + { + outIssues->push_back({F.getName().str(), varName, getAllocaDebugAnchor(AI), + line, column, "", + UninitializedLocalIssueKind::NeverInitialized}); + } + } + } + + static FunctionSummaryMap + computeFunctionSummaries(llvm::Module& mod, + const std::function& shouldAnalyze) + { + FunctionSummaryMap summaries; + for (const llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + summaries[&F] = makeEmptySummary(F); + } + + bool changed = true; + unsigned guard = 0; + while (changed && guard < 64) + { + changed = false; + ++guard; + + for (const llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + + FunctionSummary next = makeEmptySummary(F); + analyzeFunction(F, mod.getDataLayout(), summaries, &next, nullptr); + FunctionSummary& cur = summaries[&F]; + if (!(cur == next)) + { + cur = std::move(next); + changed = true; + } + } + } + + return summaries; + } + } // namespace + + std::vector + analyzeUninitializedLocalReads(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector issues; + + FunctionSummaryMap summaries = computeFunctionSummaries(mod, shouldAnalyze); + + for (const llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + + analyzeFunction(F, mod.getDataLayout(), summaries, nullptr, &issues); + } + + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/report/ReportSerialization.cpp b/src/report/ReportSerialization.cpp index fd88046..afee8ce 100644 --- a/src/report/ReportSerialization.cpp +++ b/src/report/ReportSerialization.cpp @@ -1,8 +1,11 @@ #include "StackUsageAnalyzer.hpp" +#include #include // std::snprintf +#include #include #include +#include #include namespace ctrace::stack @@ -81,6 +84,20 @@ namespace ctrace::stack return "note"; } + static std::string resolveRuleId(const Diagnostic& d) + { + if (!d.ruleId.empty()) + return d.ruleId; + return std::string(ctrace::stack::enumToString(d.errCode)); + } + + static std::string formatConfidence(double confidence) + { + std::ostringstream os; + os << std::fixed << std::setprecision(2) << confidence; + return os.str(); + } + } // anonymous namespace static std::string toJsonImpl(const AnalysisResult& result, const std::string* inputFile, @@ -190,9 +207,20 @@ namespace ctrace::stack os << " {\n"; os << " \"id\": \"diag-" << (i + 1) << "\",\n"; os << " \"severity\": \"" << ctrace::stack::enumToString(d.severity) << "\",\n"; - const std::string ruleId = - d.ruleId.empty() ? std::string(ctrace::stack::enumToString(d.errCode)) : d.ruleId; + const std::string ruleId = resolveRuleId(d); os << " \"ruleId\": \"" << jsonEscape(ruleId) << "\",\n"; + os << " \"confidence\": "; + if (d.confidence >= 0.0) + os << formatConfidence(d.confidence); + else + os << "null"; + os << ",\n"; + os << " \"cwe\": "; + if (!d.cweId.empty()) + os << "\"" << jsonEscape(d.cweId) << "\""; + else + os << "null"; + os << ",\n"; std::string diagFilePath = d.filePath; if (diagFilePath.empty() && inputFile) @@ -242,6 +270,32 @@ namespace ctrace::stack std::string toSarif(const AnalysisResult& result, const std::string& inputFile, const std::string& toolName, const std::string& toolVersion) { + struct SarifRuleEntry + { + std::string id; + std::string cweId; + }; + + std::vector rules; + std::unordered_map ruleIndices; + for (const auto& d : result.diagnostics) + { + const std::string rid = resolveRuleId(d); + auto it = ruleIndices.find(rid); + if (it == ruleIndices.end()) + { + ruleIndices.emplace(rid, rules.size()); + rules.push_back({rid, d.cweId}); + } + else if (rules[it->second].cweId.empty() && !d.cweId.empty()) + { + rules[it->second].cweId = d.cweId; + } + } + std::sort(rules.begin(), rules.end(), + [](const SarifRuleEntry& lhs, const SarifRuleEntry& rhs) + { return lhs.id < rhs.id; }); + std::ostringstream os; os << "{\n"; os << " \"version\": \"2.1.0\",\n"; @@ -252,7 +306,32 @@ namespace ctrace::stack os << " \"tool\": {\n"; os << " \"driver\": {\n"; os << " \"name\": \"" << jsonEscape(toolName) << "\",\n"; - os << " \"version\": \"" << jsonEscape(toolVersion) << "\"\n"; + os << " \"version\": \"" << jsonEscape(toolVersion) << "\",\n"; + os << " \"rules\": [\n"; + for (std::size_t i = 0; i < rules.size(); ++i) + { + const auto& rule = rules[i]; + os << " {\n"; + os << " \"id\": \"" << jsonEscape(rule.id) << "\",\n"; + os << " \"shortDescription\": { \"text\": \"" << jsonEscape(rule.id) + << "\" }"; + if (!rule.cweId.empty()) + { + os << ",\n"; + os << " \"properties\": {\n"; + os << " \"tags\": [\"" << jsonEscape(rule.cweId) << "\"]\n"; + os << " }\n"; + } + else + { + os << "\n"; + } + os << " }"; + if (i + 1 < rules.size()) + os << ","; + os << "\n"; + } + os << " ]\n"; os << " }\n"; os << " },\n"; os << " \"results\": [\n"; @@ -261,12 +340,30 @@ namespace ctrace::stack { const auto& d = result.diagnostics[i]; os << " {\n"; - // For now, use a single generic ruleId; you can specialize it later. - const std::string ruleId = - d.ruleId.empty() ? std::string(ctrace::stack::enumToString(d.errCode)) : d.ruleId; + const std::string ruleId = resolveRuleId(d); os << " \"ruleId\": \"" << jsonEscape(ruleId) << "\",\n"; os << " \"level\": \"" << severityToSarifLevel(d.severity) << "\",\n"; os << " \"message\": { \"text\": \"" << jsonEscape(d.message) << "\" },\n"; + bool hasConfidence = d.confidence >= 0.0; + bool hasCwe = !d.cweId.empty(); + if (hasConfidence || hasCwe) + { + os << " \"properties\": {\n"; + bool needComma = false; + if (hasConfidence) + { + os << " \"confidence\": " << formatConfidence(d.confidence); + needComma = true; + } + if (hasCwe) + { + if (needComma) + os << ",\n"; + os << " \"cwe\": \"" << jsonEscape(d.cweId) << "\""; + } + os << "\n"; + os << " },\n"; + } os << " \"locations\": [\n"; os << " {\n"; os << " \"physicalLocation\": {\n"; diff --git a/test/uninitialized-variable/uninitialized-local-argument-never-used.c b/test/uninitialized-variable/uninitialized-local-argument-never-used.c new file mode 100644 index 0000000..0ef83bd --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-argument-never-used.c @@ -0,0 +1,10 @@ +#include + +int main(int argc, char** argv) +{ + int value_int; + bool value_bool; + void* value_ptr; + + return 0; +} diff --git a/test/uninitialized-variable/uninitialized-local-argument.c b/test/uninitialized-variable/uninitialized-local-argument.c new file mode 100644 index 0000000..a9044dc --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-argument.c @@ -0,0 +1,21 @@ +#include + +int main(int argc, char** argv) +{ + int value; + + for (int i = 1; i < argc; ++i) + { + printf("Argument: %s\n", argv[i]); + value = i; // just to use 'value' and avoid unused variable warning + } + printf("%d\n", value); + return 0; +} + +// at line 12, column 20 +// [!!] potential read of uninitialized local variable 'value' +// this load may execute before any definite initialization on all control-flow paths + +// not contains: potential read of uninitialized local variable 'argc.addr' +// not contains: potential read of uninitialized local variable 'argv.addr' diff --git a/test/uninitialized-variable/uninitialized-local-argument.cpp b/test/uninitialized-variable/uninitialized-local-argument.cpp new file mode 100644 index 0000000..d3f5081 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-argument.cpp @@ -0,0 +1,23 @@ +#include +#include +#include + +int main(int argc, char** argv) +{ + std::string value; + std::uint16_t value16 = 0; + + for (int i = 1; i < argc; ++i) + { + const char* arg = argv[i]; + if (std::strcmp(arg, "--help") == 0 || std::strcmp(arg, "-h") == 0) + { + return 1; + } + } + std::cout << value << std::endl; + return 0; +} + +// not contains: potential read of uninitialized local variable 'argc.addr' +// not contains: potential read of uninitialized local variable 'argv.addr' diff --git a/test/uninitialized-variable/uninitialized-local-array-fully-initialized.c b/test/uninitialized-variable/uninitialized-local-array-fully-initialized.c new file mode 100644 index 0000000..41128f1 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-array-fully-initialized.c @@ -0,0 +1,9 @@ +int read_array_fully_initialized(void) +{ + int arr[2]; + arr[0] = 10; + arr[1] = 20; + return arr[1]; +} + +// not contains: potential read of uninitialized local variable 'arr' diff --git a/test/uninitialized-variable/uninitialized-local-array-partial.c b/test/uninitialized-variable/uninitialized-local-array-partial.c new file mode 100644 index 0000000..c0dd96d --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-array-partial.c @@ -0,0 +1,10 @@ +int read_array_partial(void) +{ + int arr[2]; + arr[0] = 42; + return arr[1]; +} + +// at line 5, column 12 +// [!!] potential read of uninitialized local variable 'arr' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-array.c b/test/uninitialized-variable/uninitialized-local-array.c new file mode 100644 index 0000000..cc9f50f --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-array.c @@ -0,0 +1,9 @@ +int read_array_elem(void) +{ + int arr[4]; + return arr[2]; +} + +// at line 4, column 12 +// [!!] potential read of uninitialized local variable 'arr' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-basic.c b/test/uninitialized-variable/uninitialized-local-basic.c new file mode 100644 index 0000000..1b0c429 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-basic.c @@ -0,0 +1,8 @@ +int read_uninitialized_basic(void) +{ + int value; + // at line 7, column 12 + // [!!] potential read of uninitialized local variable 'value' + // this load may execute before any definite initialization on all control-flow paths + return value; +} diff --git a/test/uninitialized-variable/uninitialized-local-branch.c b/test/uninitialized-variable/uninitialized-local-branch.c new file mode 100644 index 0000000..cae1ed8 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-branch.c @@ -0,0 +1,13 @@ +int read_uninitialized_branch(int cond) +{ + int x; + + if (cond) + { + x = 42; + } + // at line 12, column 12 + // [!!] potential read of uninitialized local variable 'x' + // this load may execute before any definite initialization on all control-flow paths + return x; +} diff --git a/test/uninitialized-variable/uninitialized-local-goto.c b/test/uninitialized-variable/uninitialized-local-goto.c new file mode 100644 index 0000000..4c0c0b4 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-goto.c @@ -0,0 +1,13 @@ +int read_goto(int cond) +{ + int value; + if (cond) + goto done; + value = 1; +done: + return value; +} + +// at line 8, column 12 +// [!!] potential read of uninitialized local variable 'value' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-initialized.c b/test/uninitialized-variable/uninitialized-local-initialized.c new file mode 100644 index 0000000..cef41fb --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-initialized.c @@ -0,0 +1,11 @@ +int read_initialized_ok(int cond) +{ + int x = 0; + if (cond) + { + x = 7; + } + return x; +} + +// not contains: potential read of uninitialized local variable diff --git a/test/uninitialized-variable/uninitialized-local-interproc-read-before-write-chain.c b/test/uninitialized-variable/uninitialized-local-interproc-read-before-write-chain.c new file mode 100644 index 0000000..379bdd2 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-interproc-read-before-write-chain.c @@ -0,0 +1,19 @@ +static int read_leaf(const int* p) +{ + return *p; +} + +static int read_mid(const int* p) +{ + return read_leaf(p); +} + +int read_via_call_chain_before_init(void) +{ + int value; + return read_mid(&value); +} + +// at line 14, column 12 +// [!!] potential read of uninitialized local variable 'value' +// this call may read the value before any definite initialization in 'read_mid' diff --git a/test/uninitialized-variable/uninitialized-local-interproc-read-before-write.c b/test/uninitialized-variable/uninitialized-local-interproc-read-before-write.c new file mode 100644 index 0000000..146059a --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-interproc-read-before-write.c @@ -0,0 +1,14 @@ +static int read_value(const int* p) +{ + return *p; +} + +int read_via_call_before_init(void) +{ + int value; + return read_value(&value); +} + +// at line 9, column 12 +// [!!] potential read of uninitialized local variable 'value' +// this call may read the value before any definite initialization in 'read_value' diff --git a/test/uninitialized-variable/uninitialized-local-interproc-struct-partial.c b/test/uninitialized-variable/uninitialized-local-interproc-struct-partial.c new file mode 100644 index 0000000..601d267 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-interproc-struct-partial.c @@ -0,0 +1,21 @@ +struct Pair +{ + int x; + int y; +}; + +static void init_x(struct Pair* p) +{ + p->x = 1; +} + +int read_struct_partial_via_call(void) +{ + struct Pair p; + init_x(&p); + return p.y; +} + +// at line 16, column 14 +// [!!] potential read of uninitialized local variable 'p' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-interproc-write-chain.c b/test/uninitialized-variable/uninitialized-local-interproc-write-chain.c new file mode 100644 index 0000000..e89be88 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-interproc-write-chain.c @@ -0,0 +1,18 @@ +static void init_leaf(int* p) +{ + *p = 7; +} + +static void init_mid(int* p) +{ + init_leaf(p); +} + +int read_after_init_call_chain(void) +{ + int value; + init_mid(&value); + return value; +} + +// not contains: potential read of uninitialized local variable 'value' diff --git a/test/uninitialized-variable/uninitialized-local-interproc-write.c b/test/uninitialized-variable/uninitialized-local-interproc-write.c new file mode 100644 index 0000000..5ece5ae --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-interproc-write.c @@ -0,0 +1,13 @@ +static void init_value(int* p) +{ + *p = 42; +} + +int read_after_init_call(void) +{ + int value; + init_value(&value); + return value; +} + +// not contains: potential read of uninitialized local variable 'value' diff --git a/test/uninitialized-variable/uninitialized-local-memcpy-initialized.c b/test/uninitialized-variable/uninitialized-local-memcpy-initialized.c new file mode 100644 index 0000000..dff7256 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-memcpy-initialized.c @@ -0,0 +1,9 @@ +int memcpy_reads_initialized_source(void) +{ + int src = 7; + int dst = 0; + __builtin_memcpy(&dst, &src, sizeof(src)); + return dst; +} + +// not contains: potential read of uninitialized local variable 'src' diff --git a/test/uninitialized-variable/uninitialized-local-memcpy-read.c b/test/uninitialized-variable/uninitialized-local-memcpy-read.c new file mode 100644 index 0000000..19933f6 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-memcpy-read.c @@ -0,0 +1,11 @@ +int memcpy_reads_uninitialized_source(void) +{ + int src; + int dst = 0; + __builtin_memcpy(&dst, &src, sizeof(src)); + return dst; +} + +// at line 5, column 5 +// [!!] potential read of uninitialized local variable 'src' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-nested-loops.c b/test/uninitialized-variable/uninitialized-local-nested-loops.c new file mode 100644 index 0000000..de514f6 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-nested-loops.c @@ -0,0 +1,16 @@ +int read_nested_loops(int n, int m) +{ + int value; + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < m; ++j) + { + value = i + j; + } + } + return value; +} + +// at line 11, column 12 +// [!!] potential read of uninitialized local variable 'value' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-pointer-redirect.c b/test/uninitialized-variable/uninitialized-local-pointer-redirect.c new file mode 100644 index 0000000..47fc83e --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-pointer-redirect.c @@ -0,0 +1,15 @@ +static void redirect_ptr(int** out, int* target) +{ + *out = target; +} + +int read_after_pointer_redirect(void) +{ + int uninit; + int init = 42; + int* ptr = &uninit; + redirect_ptr(&ptr, &init); + return *ptr; +} + +// not contains: potential read of uninitialized local variable 'uninit' diff --git a/test/uninitialized-variable/uninitialized-local-pointer.c b/test/uninitialized-variable/uninitialized-local-pointer.c new file mode 100644 index 0000000..5c41bbd --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-pointer.c @@ -0,0 +1,12 @@ +int read_pointer_target(int cond) +{ + int fallback = 0; + int* ptr; + if (cond) + ptr = &fallback; + return *ptr; +} + +// at line 7, column 13 +// [!!] potential read of uninitialized local variable 'ptr' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-reference.cpp b/test/uninitialized-variable/uninitialized-local-reference.cpp new file mode 100644 index 0000000..b0b51a6 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-reference.cpp @@ -0,0 +1,14 @@ +int read_cpp_ref(int cond, int& out) +{ + int value; + if (cond) + { + value = 42; + } + out = value; + return out; +} + +// at line 8, column 11 +// [!!] potential read of uninitialized local variable 'value' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-struct-fully-initialized.c b/test/uninitialized-variable/uninitialized-local-struct-fully-initialized.c new file mode 100644 index 0000000..e6a4eea --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-struct-fully-initialized.c @@ -0,0 +1,15 @@ +struct Point +{ + int x; + int y; +}; + +int read_struct_fully_initialized(void) +{ + struct Point p; + p.x = 1; + p.y = 2; + return p.y; +} + +// not contains: potential read of uninitialized local variable 'p' diff --git a/test/uninitialized-variable/uninitialized-local-struct-partial.c b/test/uninitialized-variable/uninitialized-local-struct-partial.c new file mode 100644 index 0000000..172b660 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-struct-partial.c @@ -0,0 +1,16 @@ +struct Pair +{ + int x; + int y; +}; + +int read_struct_partial(void) +{ + struct Pair p; + p.x = 7; + return p.y; +} + +// at line 11, column 14 +// [!!] potential read of uninitialized local variable 'p' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-struct.c b/test/uninitialized-variable/uninitialized-local-struct.c new file mode 100644 index 0000000..50deb37 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-struct.c @@ -0,0 +1,15 @@ +struct Pair +{ + int x; + int y; +}; + +int read_struct_field(void) +{ + struct Pair p; + return p.x; +} + +// at line 10, column 14 +// [!!] potential read of uninitialized local variable 'p' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-switch.c b/test/uninitialized-variable/uninitialized-local-switch.c new file mode 100644 index 0000000..d1acbf5 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-switch.c @@ -0,0 +1,20 @@ +int read_switch(int tag) +{ + int value; + switch (tag) + { + case 0: + value = 10; + break; + case 1: + break; + default: + value = 20; + break; + } + return value; +} + +// at line 15, column 12 +// [!!] potential read of uninitialized local variable 'value' +// this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-unused.c b/test/uninitialized-variable/uninitialized-local-unused.c new file mode 100644 index 0000000..0ff3ec9 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-unused.c @@ -0,0 +1,5 @@ +int main(void) +{ + int value; + return 0; +}