diff --git a/.ctrace-analyzer.cfg b/.ctrace-analyzer.cfg index 63e9ba5..3103554 100644 --- a/.ctrace-analyzer.cfg +++ b/.ctrace-analyzer.cfg @@ -10,6 +10,7 @@ buffer-model=models/buffer-overflow/generic.txt # compile-commands=build/compile_commands.json analysis-profile=full jobs=auto +compile-ir-cache-dir=.cache/compile-ir # Output behavior warnings-only=true @@ -19,8 +20,8 @@ timing=false # Inter-TU analysis resource-cross-tu=true uninitialized-cross-tu=true -#resource-summary-cache-dir=.cache/resource-lifetime -resource-summary-cache-memory-only=true +resource-summary-cache-dir=.cache/resource-lifetime +resource-summary-cache-memory-only=false # SMT configuration smt=on diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 54e213e..5d506c4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,14 +1,16 @@ name: Build on: - push: - branches: - - "**" pull_request: - branches: - - "**" + branches: [main] + push: + branches: [main] workflow_dispatch: +concurrency: + group: ci-${{ github.workflow }}-${{ github.event.pull_request.head.repo.full_name || github.repository }}-${{ github.event.pull_request.head.ref || github.ref_name }} + cancel-in-progress: true + env: CCACHE_DIR: ${{ github.workspace }}/.ccache diff --git a/CMakeLists.txt b/CMakeLists.txt index cc6087e..6888aad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,7 @@ project(stack_usage_analyzer) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") include(CheckLLVMVersion OPTIONAL) +include(CheckCXXCompilerFlag) if(COMMAND check_llvm_version) set(LLVM_MIN_REQUIRED_VERSION "19" CACHE STRING "Minimum required LLVM version") @@ -51,9 +52,15 @@ message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") # Options de build option(BUILD_CLI "Build stack_usage_analyzer CLI tool" ON) option(BUILD_SHARED_LIB "Build shared library variant" ON) -option(ENABLE_STACK_USAGE "Emit per-function stack usage (.su) files" OFF) +option(ENABLE_STACK_USAGE "Emit per-function stack usage (.su) files" ON) +option(ENABLE_WARN_PADDED "Enable -Wpadded warnings" ON) +option(ENABLE_WARN_REORDER_INIT_LIST "Enable -Wreorder-init-list when supported" ON) option(ENABLE_Z3_BACKEND "Enable optional Z3 SMT backend if Z3 is available" ON) +if(ENABLE_WARN_REORDER_INIT_LIST) + check_cxx_compiler_flag("-Wreorder-init-list" CTRACE_STACK_HAS_WREORDER_INIT_LIST) +endif() + # =========================== # Communs Sources # =========================== @@ -114,7 +121,6 @@ if(ENABLE_Z3_BACKEND) endif() endif() -include_directories(${LLVM_INCLUDE_DIRS}) add_definitions(${LLVM_DEFINITIONS}) # llvm_map_components_to_libnames(llvm_libs @@ -128,6 +134,10 @@ add_library(stack_usage_analyzer_lib STATIC ${STACK_ANALYZER_SOURCES} ) +if(ENABLE_WARN_REORDER_INIT_LIST AND CTRACE_STACK_HAS_WREORDER_INIT_LIST) + target_compile_options(stack_usage_analyzer_lib PRIVATE -Wreorder-init-list) +endif() + if(CTRACE_STACK_HAVE_Z3_BACKEND) target_compile_definitions(stack_usage_analyzer_lib PUBLIC CTRACE_STACK_ENABLE_Z3_BACKEND=1) if(TARGET z3::libz3) @@ -137,7 +147,7 @@ if(CTRACE_STACK_HAVE_Z3_BACKEND) elseif(DEFINED Z3_LIBRARIES) target_link_libraries(stack_usage_analyzer_lib PUBLIC ${Z3_LIBRARIES}) if(DEFINED Z3_INCLUDE_DIRS) - target_include_directories(stack_usage_analyzer_lib PUBLIC ${Z3_INCLUDE_DIRS}) + target_include_directories(stack_usage_analyzer_lib SYSTEM PUBLIC ${Z3_INCLUDE_DIRS}) endif() endif() endif() @@ -162,9 +172,21 @@ target_include_directories(stack_usage_analyzer_lib PUBLIC $ $ +) + +target_include_directories(stack_usage_analyzer_lib SYSTEM + PUBLIC ${LLVM_INCLUDE_DIRS} ) +# Treat external dependency headers as system includes to reduce warning noise +# from third-party code when project warning levels are strict. +target_include_directories(stack_usage_analyzer_lib SYSTEM + PUBLIC + $ + $ +) + # ALIAS FOR USE WITH FETCHCONTENT add_library(coretrace::stack_usage_analyzer_lib ALIAS stack_usage_analyzer_lib) @@ -215,6 +237,10 @@ if(BUILD_CLI) main.cpp ) + if(ENABLE_WARN_REORDER_INIT_LIST AND CTRACE_STACK_HAS_WREORDER_INIT_LIST) + target_compile_options(stack_usage_analyzer PRIVATE -Wreorder-init-list) + endif() + target_link_libraries(stack_usage_analyzer PRIVATE stack_usage_analyzer_lib @@ -226,6 +252,11 @@ if(BUILD_CLI) target_compile_options(stack_usage_analyzer PRIVATE -fstack-usage) endif() + if(ENABLE_WARN_PADDED AND CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") + target_compile_options(stack_usage_analyzer_lib PRIVATE -Wpadded) + target_compile_options(stack_usage_analyzer PRIVATE -Wpadded) + endif() + if(ENABLE_DEBUG_ASAN) target_compile_options(stack_usage_analyzer PRIVATE -fsanitize=address -fno-omit-frame-pointer -g) target_link_options(stack_usage_analyzer PRIVATE -fsanitize=address) diff --git a/README.md b/README.md index 224bac1..d6afb27 100644 --- a/README.md +++ b/README.md @@ -239,6 +239,7 @@ Ready-to-adapt workflow examples: --no-resource-cross-tu disables cross-TU resource summaries --resource-summary-cache-dir= sets cache directory for cross-TU resource summaries (default: .cache/resource-lifetime) --resource-summary-cache-memory-only keeps cross-TU summary cache in memory only (process-local, no files) +--compile-ir-cache-dir= enables dependency-aware LLVM IR compile cache for unchanged source files --timing prints compile/analysis timings to stderr --config= loads optional key=value config file (CLI flags override config values) --print-effective-config prints resolved runtime config to stderr @@ -268,6 +269,8 @@ To generate `compile_commands.json` with CMake, configure with If analysis feels slow, `--compdb-fast` disables heavy flags (optimizations, sanitizers, profiling) while keeping include paths and macros. For multi-file runs, `--jobs=` parallelizes input loading; with cross-TU enabled it also parallelizes summary construction. +`--compile-ir-cache-dir=` reuses compiled LLVM IR for unchanged translation units +based on source/dependency stamps, which reduces repeated C/C++ frontend cost across runs. When inputs are auto-discovered from `compile_commands.json`, `_deps` entries are skipped by default to keep analysis focused on project code; use `--include-compdb-deps` to opt back in. @@ -305,6 +308,7 @@ Supported keys: - `uninitialized-cross-tu` - `resource-summary-cache-dir` - `resource-summary-cache-memory-only` +- `compile-ir-cache-dir` Example file: @@ -315,6 +319,7 @@ buffer-model=models/buffer-overflow/generic.txt compile-commands=build/compile_commands.json analysis-profile=full jobs=auto +compile-ir-cache-dir=.cache/compile-ir smt=on smt-backend=z3 smt-rules=recursion,integer-overflow,size-minus-k,stack-buffer,oob-read diff --git a/docs/architecture/smt-implementation-plans.md b/docs/architecture/smt-implementation-plans.md new file mode 100644 index 0000000..b311bbc --- /dev/null +++ b/docs/architecture/smt-implementation-plans.md @@ -0,0 +1,151 @@ +Voici comment je vois l'évolution, ordonnée par ROI décroissant et dépendances techniques. + +--- + +## Phase 1 — Encoder LLVM IR → ConstraintIR expressif +*Le verrou qui bloque tout le reste* + +Aujourd'hui `encodeRangeConstraints` ne sait encoder que des intervalles. Pour que Z3 apporte une vraie valeur sur les autres analyses, il faut un encoder qui traduit les instructions LLVM en expressions symboliques. + +**Ce qu'il faut couvrir :** + +| LLVM IR | ConstraintIR | +|---|---| +| `add nsw i32 %a, %b` | `Add(sym_a, sym_b, 32)` + assertion overflow = poison | +| `icmp slt i32 %x, 42` | `Slt(sym_x, Const(42, 32))` | +| `br i1 %cond, label %then, label %else` | Edge constraint sur les deux branches | +| `phi [%v1, %bb1], [%v2, %bb2]` | Disjonction : `Or(And(from_bb1, eq(sym, v1)), And(from_bb2, eq(sym, v2)))` | +| `getelementptr [10 x i32], ptr %p, i64 0, i64 %idx` | `Add(base, Mul(idx, 4))` avec bound `0 ≤ idx < 10` | +| `select i1 %c, i32 %a, i32 %b` | `ITE(cond, sym_a, sym_b)` (nécessite ajout `ExprKind::Select`) | +| `llvm.assume(i1 %x)` | Assertion directe | +| `freeze` | Marquer le symbole comme "valeur définie" | + +**Livrable :** un `LlvmConstraintEncoder` qui prend un chemin de blocs basiques et produit un `ConstraintIR` complet. Le `ConstraintIrBuilder` est déjà prêt — c'est la traversée LLVM qui manque. + +--- + +## Phase 2 — Onboarding IntegerOverflow + SizeMinusK +*Le plus gros gisement de FP réductibles* + +Avec l'encoder de la phase 1, on peut brancher le SMT sur les analyses à fort FP : + +**IntegerOverflowAnalysis** — Aujourd'hui l'analyse détecte les overflows sur les arguments de `malloc`/`memcpy`. Le pattern type de FP : +```c +size_t n = get_count(); // range inconnu +void *p = malloc(n * sizeof(int)); // flaggé overflow +``` +Avec Z3 : encoder `n * 4` en bitvector 64 bits, vérifier si l'overflow est satisfiable sous les contraintes de chemin. Si `UNSAT` → supprimer le diagnostic. + +**SizeMinusKWrites** — Même principe : les off-by-one sont souvent des FP quand la taille est prouvablement > k. + +**Point d'intégration :** après la passe heuristique existante, avant émission. Exactement ce que le doc prévoit. Le `RecursionConstraintEvaluator` sert de modèle — créer un `OverflowConstraintEvaluator` analogue. + +--- + +## Phase 3 — StackBuffer + OOBRead path-sensitive +*Précision sur les accès mémoire* + +L'analyse de buffer overflow actuelle est flow-insensitive — elle ne suit pas les contraintes de chemin sur les indices. Avec le SMT : + +```c +int buf[10]; +int idx = get_index(); +if (idx >= 0 && idx < 10) { + buf[idx] = 42; // aujourd'hui : flaggé potentiel overflow + // avec SMT : UNSAT → sûr +} +``` + +C'est le même pattern que la récursion : BFS avec accumulation de contraintes, sauf que la question est "cet accès est-il dans les bornes ?" au lieu de "cette base case est-elle atteignable ?". + +**Possibilité de factoriser** la traversée path-sensitive de `hasFeasibleNonRecursiveReturnPath` en un framework réutilisable : + +``` +PathExplorer + - traverse le CFG en BFS + - accumule les contraintes + - pose une Question à chaque point d'intérêt + - retourne Feasible / Infeasible / Inconclusive +``` + +--- + +## Phase 4 — TypeConfusion avec raisonnement struct-layout +*Les 91 FPs documentés* + +C'est le plus gros lot de FP en volume mais le plus complexe à encoder. Il faut : + +1. Encoder les layouts de structs (offsets des champs, tailles) comme contraintes +2. Pour chaque "conflit de type" détecté, vérifier si les deux vues sont sur des sous-objets légalement imbriqués +3. Si Z3 prouve que `offset_A + size_A ≤ offset_B` ou que les deux accès sont au même sous-objet → `UNSAT` → supprimer + +Ça nécessite probablement un ajout au `ConstraintIR` : des contraintes de type mémoire/layout, pas seulement arithmétiques. + +--- + +## Phase 5 — DiagnosticRefiner comme composant autonome +*Quand plusieurs analyses utilisent le SMT* + +Dès que 3+ analyses passent par le SMT, la logique de décision (keep/suppress/downgrade/tag inconclusive) mérite d'être extraite : + +```cpp +class DiagnosticRefiner { + RefinementDecision refine(const Diagnostic& original, + const SmtDecision& decision, + const RulePolicy& policy) const; +}; + +enum class RefinementDecision { + Keep, // SAT ou pas de SMT + Suppress, // UNSAT confirmé + Downgrade, // UNSAT mais confidence < seuil → Warning→Info + MarkInconclusive // Unknown/Timeout +}; +``` + +Avec des politiques par rule : +- `recursion` : suppress agressif (UNSAT = pas de bug) +- `integer-overflow` : downgrade plutôt que suppress (risque d'encoding bug) +- `type-confusion` : suppress seulement en dual-consensus + +--- + +## Phase 6 — Cache + Observabilité +*Quand le volume de queries justifie l'investissement* + +**Cache :** +- Clé : hash du `ConstraintIR` normalisé + mode/backend +- Valeur : `SmtStatus` +- Portée : process-local (un seul run d'analyse) +- ROI : significatif en mode portfolio/cross-check ou quand plusieurs analyses posent des questions similaires sur les mêmes fonctions + +**Observabilité :** +- Compteurs : queries total / sat / unsat / unknown / timeout / error +- Latence : P50/P95/P99 par backend +- Suppressions : count par rule +- Format : JSON ou intégration dans le `--timing` existant + +--- + +## Phase 7 — Améliorations Z3 avancées +*Optimisations pour passer à l'échelle* + +- **Incremental solving** : réutiliser le `z3::solver` avec `push/pop` au lieu de recréer un contexte par query. Gros gain sur les traversées de chemin où les queries partagent un préfixe commun +- **Tactics** : utiliser `z3::tactic("simplify") & z3::tactic("solve-eqs") & z3::tactic("bit-blast") & z3::tactic("sat")` pour les formules bitvector plutôt que le solveur par défaut +- **Parallel portfolio réel** : `std::async` pour lancer Z3 et cvc5 en parallèle dans le mode portfolio (aujourd'hui c'est séquentiel) + +--- + +## Résumé visuel + +``` +Phase 1: Encoder LLVM→ConstraintIR ← verrou technique +Phase 2: IntegerOverflow + SizeMinusK ← plus gros ROI en FP +Phase 3: StackBuffer + OOBRead ← path-sensitive generalisé +Phase 4: TypeConfusion ← 91 FPs mais complexe +Phase 5: DiagnosticRefiner autonome ← quand 3+ rules utilisent SMT +Phase 6: Cache + Observabilité ← quand le volume le justifie +Phase 7: Incrémental + tactics + // ← scaling +``` + +Les phases 1-2 sont les plus urgentes. Les phases 3-4 dépendent de la qualité de l'encoder. Les phases 5-7 sont de l'optimisation. diff --git a/docs/architecture/smt-solver-integration.md b/docs/architecture/smt-solver-integration.md new file mode 100644 index 0000000..49e9bfc --- /dev/null +++ b/docs/architecture/smt-solver-integration.md @@ -0,0 +1,297 @@ +# SMT Solver Integration Architecture + +## Context + +This document defines a generic architecture to integrate SMT solving (Z3-style) +into `coretrace-stack-analyzer` while keeping: + +- backend interchangeability (Z3, cvc5, etc.), +- coupled execution modes (single, portfolio, cross-check), +- controlled performance cost, +- predictable diagnostics behavior. + +This design targets C++20 and avoids hard-coded backend logic in analysis passes. + +## Goals + +1. Reduce false positives on ambiguous findings. +2. Keep current fast heuristic analysis as first-pass filter. +3. Allow changing solver backend without refactoring analysis code. +4. Allow running multiple backends together for stronger confidence. +5. Keep failure modes safe (`UNKNOWN` must not hide real issues). + +## Non-goals + +1. Replacing all existing analyses with full symbolic execution. +2. Solving every finding with SMT by default. +3. Depending on solver-specific AST types inside domain logic. + +## Design Principles + +1. **Layered architecture**: analysis passes never call solver APIs directly. +2. **Strategy pattern**: solver execution policy is selectable at runtime. +3. **Adapter pattern**: each SMT backend is an adapter behind a shared interface. +4. **Fail-safe defaults**: timeout/error/unknown keep diagnostics conservative. +5. **Bit-precise semantics**: encoding follows LLVM integer widths and signedness. + +## High-level Architecture + +```text +LLVM IR + -> Analysis Pass (existing heuristic pass) + -> Constraint Builder (LLVM -> ConstraintIR) + -> Solver Orchestrator (execution mode policy) + -> ISmtBackend (Z3Backend / Cvc5Backend / ...) + -> Diagnostic Refiner (keep/suppress/downgrade) +``` + +## Proposed Module Split + +```text +include/analysis/smt/ + ConstraintIR.hpp + SolverTypes.hpp + ISmtBackend.hpp + ISolverStrategy.hpp + SolverOrchestrator.hpp + DiagnosticRefiner.hpp + +src/analysis/smt/ + ConstraintIR.cpp + SmtEncoding.cpp + SolverOrchestrator.cpp + strategies/SingleSolverStrategy.cpp + strategies/PortfolioSolverStrategy.cpp + strategies/CrossCheckSolverStrategy.cpp + backends/Z3Backend.cpp + backends/Cvc5Backend.cpp +``` + +## Core Contracts + +### Solver status + +```cpp +enum class SmtStatus { Sat, Unsat, Unknown, Timeout, Error }; +``` + +### Query and answer + +```cpp +struct SmtQuery +{ + ConstraintIr ir; + std::string ruleId; + std::uint32_t timeoutMs = 50; + std::uint64_t budgetNodes = 10000; +}; + +struct SmtAnswer +{ + SmtStatus status = SmtStatus::Unknown; + std::string backendName; + std::optional model; + std::optional reason; +}; +``` + +### Backend interface + +```cpp +class ISmtBackend +{ +public: + virtual ~ISmtBackend() = default; + virtual std::string name() const = 0; + virtual SmtAnswer solve(const SmtQuery& query) = 0; +}; +``` + +### Strategy interface + +```cpp +class ISolverStrategy +{ +public: + virtual ~ISolverStrategy() = default; + virtual std::vector run(const SmtQuery& query) = 0; +}; +``` + +## ConstraintIR (backend-agnostic) + +`ConstraintIR` is an internal logical IR. It isolates analysis semantics from +solver syntax. + +### Required expression set (v1) + +1. Bit-vector constants and variables. +2. Arithmetic ops: add/sub/mul, shifts. +3. Comparisons: signed and unsigned variants. +4. Boolean ops: and/or/not/implies. +5. Cast-like ops: zext/sext/trunc. +6. Optional support for arrays/memory in later iteration. + +## Solver Orchestration Modes + +### 1) Single mode + +- One configured backend. +- Lowest cost. + +### 2) Portfolio mode (coupled) + +- Run multiple backends in parallel. +- Configurable decision policy: + - `first_unsat`, + - `first_sat`, + - `quorum`. + +### 3) Cross-check mode (coupled) + +- Primary backend solves first. +- Secondary backend only runs on: + - `Unknown`, + - timeout, + - selected high-risk rules. + +### 4) Dual-consensus mode (strict FP reduction) + +- Suppress finding only if all selected backends conclude `Unsat` + for bug-feasibility query. + +## Diagnostic Refinement Policy + +Given bug-feasibility query: + +1. `Sat`: keep diagnostic (bug path feasible). +2. `Unsat`: suppress or downgrade according to rule policy. +3. `Unknown` / `Timeout` / `Error`: keep diagnostic and mark as inconclusive. + +This avoids unsound suppression. + +## Integration in Existing Pipeline + +Integration point: after heuristic detection, before final emission. + +Suggested first adopters: + +1. `IntegerOverflowAnalysis` +2. `SizeMinusKWrites` +3. `StackBufferAnalysis` ambiguous index-range cases + +Rationale: + +- These passes already expose constraint-like logic and are sensitive to FP. +- They can benefit early from feasibility checks with bounded cost. + +## Configuration Surface (CLI) + +Suggested options: + +```text +--smt=off|on +--smt-backend=z3|cvc5 +--smt-mode=single|portfolio|cross-check|dual-consensus +--smt-secondary-backend= +--smt-timeout-ms= +--smt-budget-nodes= +--smt-rules= +``` + +Default profile recommendation: + +- `--smt=on` only for selected rule set. +- short timeout per query. +- max query size budget. + +## Caching + +Add process-local query cache: + +- key: normalized `ConstraintIR` hash + solver mode/profile. +- value: `SmtStatus` (+ optional model fingerprint). + +Benefits: + +- avoid repeating equivalent queries, +- keep portfolio mode cost acceptable. + +## Observability + +Add counters and timing: + +1. total SMT queries, +2. per-status counts, +3. per-backend latency percentiles, +4. suppression count by rule, +5. timeout/error rate. + +This is required to track precision/performance tradeoffs. + +## Validation Strategy + +1. Keep all current regression tests unchanged. +2. Add SMT-focused fixtures in new files only. +3. For each onboarded rule: + - one SAT expected case, + - one UNSAT expected case (FP suppression), + - one UNKNOWN/timeout fallback case. +4. Run parity checks for `--smt=off` vs `--smt=on` where expected. + +## Rollout Plan + +### Phase 1: Infrastructure + +- Add interfaces, `ConstraintIR`, one backend adapter, single mode only. +- Keep feature behind `--smt=on`. + +### Phase 2: First rule integration + +- Integrate into integer-overflow related findings. +- Measure FP delta and runtime overhead. + +### Phase 3: Coupled modes + +- Add portfolio/cross-check strategy implementations. +- Add cache + telemetry. + +### Phase 4: Extend to other rules + +- Integrate selected stack-buffer and size-arg ambiguous diagnostics. +- Tune budgets per rule category. + +## Current Implementation Status (March 2026) + +Implemented in codebase: + +1. Generic solver contracts (`ConstraintIR`, `ISmtBackend`, `ISolverStrategy`, orchestrator). +2. Runtime solver modes (`single`, `portfolio`, `cross-check`, `dual-consensus`). +3. CLI/config surface (`--smt=*`, backend/mode/timeout/budget/rules). +4. Recursion rule onboarding using a dedicated encoder (`LLVM range state -> ConstraintIR`). +5. Conservative fallback policy for recursion (`Unknown`/`Timeout`/`Error` never suppresses baseline diagnostics). +6. Optional Z3 backend integration with CMake auto-detection and safe fallback when unavailable. + +Planned next: + +1. Extend encoder coverage beyond interval-derived constraints for richer path conditions. +2. Add query cache and telemetry counters (query count/status/latency). +3. Onboard SMT to additional high-FP rules (integer overflow, size-minus-k, ambiguous stack buffer cases). + +## Risk Register + +1. **Runtime blowup** on large formulas: + - mitigate with query size budget + timeout + staged solving. +2. **Unsound suppression due to encoding bug**: + - mitigate with dual-consensus for suppressions and targeted tests. +3. **Backend drift** (different solver behavior): + - mitigate with cross-check mode and backend-specific CI jobs. + +## Why this approach + +Compared to embedding Z3 calls directly in each pass, this architecture: + +1. keeps domain logic independent from solver SDKs, +2. allows backend replacement without refactoring analyses, +3. enables coupled execution modes as first-class policies, +4. scales progressively with controlled technical risk. diff --git a/include/StackUsageAnalyzer.hpp b/include/StackUsageAnalyzer.hpp index 397c1ec..b4e93e8 100644 --- a/include/StackUsageAnalyzer.hpp +++ b/include/StackUsageAnalyzer.hpp @@ -19,6 +19,7 @@ namespace llvm namespace ctrace::stack::analysis { class CompilationDatabase; + struct GlobalReadBeforeWriteSummaryIndex; struct ResourceSummaryIndex; struct UninitializedSummaryIndex; } // namespace ctrace::stack::analysis @@ -28,58 +29,70 @@ namespace ctrace::stack using StackSize = std::uint64_t; - enum class AnalysisMode + enum class AnalysisMode : std::uint8_t { - IR, - ABI + IR = 0, + ABI = 1 }; - enum class AnalysisProfile + enum class AnalysisProfile : std::uint8_t { - Fast, - Full + Fast = 0, + Full = 1 }; // Analysis configuration (mode + stack limit). struct AnalysisConfig { - AnalysisMode mode = AnalysisMode::IR; - AnalysisProfile profile = AnalysisProfile::Full; StackSize stackLimit = 8ull * 1024ull * 1024ull; // 8 MiB default - bool quiet = false; - bool warningsOnly = false; - std::vector extraCompileArgs; + std::uint64_t smtBudgetNodes = 10000; + std::shared_ptr compilationDatabase; - bool requireCompilationDatabase = false; - bool compdbFast = false; - unsigned jobs = 1; - bool jobsAuto = false; - bool timing = false; - std::vector onlyFiles; - std::vector onlyDirs; + std::shared_ptr resourceSummaryIndex; + std::shared_ptr uninitializedSummaryIndex; + std::shared_ptr + globalReadBeforeWriteSummaryIndex; + std::vector excludeDirs; + std::vector extraCompileArgs; + std::vector onlyDirs; + std::vector onlyFiles; std::vector onlyFunctions; - bool includeSTL = false; - bool dumpFilter = false; + + std::vector smtRules; + std::string compileIRCacheDir; + std::string smtSecondaryBackend; + std::string smtBackend = "interval"; std::string dumpIRPath; - bool dumpIRIsDir = false; - bool demangle = false; std::string escapeModelPath; std::string bufferModelPath; std::string resourceModelPath; - bool resourceCrossTU = true; std::string resourceSummaryCacheDir = ".cache/resource-lifetime"; - bool resourceSummaryMemoryOnly = false; - std::shared_ptr resourceSummaryIndex; - bool uninitializedCrossTU = true; - std::shared_ptr uninitializedSummaryIndex; - bool smtEnabled = false; - std::string smtBackend = "interval"; - std::string smtSecondaryBackend; - analysis::smt::SolverMode smtMode = analysis::smt::SolverMode::Single; + std::uint32_t smtTimeoutMs = 50; - std::uint64_t smtBudgetNodes = 10000; - std::vector smtRules; + std::uint32_t jobs = 1; + + analysis::smt::SolverMode smtMode = analysis::smt::SolverMode::Single; + AnalysisMode mode = AnalysisMode::IR; + AnalysisProfile profile = AnalysisProfile::Full; + + bool compdbFast : 1 = false; + bool demangle : 1 = false; + bool dumpFilter : 1 = false; + bool dumpIRIsDir : 1 = false; + bool includeSTL : 1 = false; + bool requireCompilationDatabase : 1 = false; + bool jobsAuto : 1 = false; + bool quiet : 1 = false; + bool smtEnabled : 1 = false; + bool timing : 1 = false; + bool uninitializedCrossTU : 1 = true; + bool resourceCrossTU : 1 = true; + bool resourceSummaryMemoryOnly : 1 = false; + bool warningsOnly : 1 = false; + bool reservedFlags0 : 1 = false; + bool reservedFlags1 : 1 = false; + std::uint32_t reservedPadding = 0; }; // Per-function result @@ -87,15 +100,16 @@ namespace ctrace::stack { std::string filePath; std::string name; - StackSize localStack = 0; // local frame size (depends on mode) - StackSize maxStack = 0; // max stack including callees - bool localStackUnknown = false; // unknown local size (dynamic alloca) - bool maxStackUnknown = false; // unknown max stack (propagated via calls) - bool hasDynamicAlloca = false; // dynamic alloca detected in the function - - bool isRecursive = false; // part of a cycle F <-> G ... - bool hasInfiniteSelfRecursion = false; // DominatorTree heuristic - bool exceedsLimit = false; // maxStack > config.stackLimit + StackSize localStack = 0; // local frame size (depends on mode) + StackSize maxStack = 0; // max stack including callees + std::uint64_t localStackUnknown : 1 = false; // unknown local size (dynamic alloca) + std::uint64_t maxStackUnknown : 1 = false; // unknown max stack (propagated via calls) + std::uint64_t hasDynamicAlloca : 1 = false; // dynamic alloca detected in the function + + std::uint64_t isRecursive : 1 = false; // part of a cycle F <-> G ... + std::uint64_t hasInfiniteSelfRecursion : 1 = false; // DominatorTree heuristic + std::uint64_t exceedsLimit : 1 = false; // maxStack > config.stackLimit + std::uint64_t reservedFlags : 58 = 0; }; /* diff --git a/include/analysis/AllocaUsage.hpp b/include/analysis/AllocaUsage.hpp index 1d50512..e03cc9d 100644 --- a/include/analysis/AllocaUsage.hpp +++ b/include/analysis/AllocaUsage.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -23,14 +24,15 @@ namespace ctrace::stack::analysis std::string varName; const llvm::AllocaInst* allocaInst = nullptr; - bool userControlled = false; // size derived from argument / non-local value - bool sizeIsConst = false; // size known exactly - bool hasUpperBound = false; // bounded size (from ICmp-derived range) - bool isRecursive = false; // function participates in a recursion cycle - bool isInfiniteRecursive = false; // unconditional self recursion - StackSize sizeBytes = 0; // exact size in bytes (if sizeIsConst) StackSize upperBoundBytes = 0; // upper bound in bytes (if hasUpperBound) + + std::uint64_t userControlled : 1 = false; // size derived from argument / non-local value + std::uint64_t sizeIsConst : 1 = false; // size known exactly + std::uint64_t hasUpperBound : 1 = false; // bounded size (from ICmp-derived range) + std::uint64_t isRecursive : 1 = false; // function participates in a recursion cycle + std::uint64_t isInfiniteRecursive : 1 = false; // unconditional self recursion + std::uint64_t reservedFlags : 59 = 0; }; std::vector diff --git a/include/analysis/BufferWriteModel.hpp b/include/analysis/BufferWriteModel.hpp index ed7d70f..f09765b 100644 --- a/include/analysis/BufferWriteModel.hpp +++ b/include/analysis/BufferWriteModel.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -11,18 +12,18 @@ namespace llvm namespace ctrace::stack::analysis { - enum class BufferWriteRuleKind + enum class BufferWriteRuleKind : std::uint64_t { - BoundedWrite, - UnboundedWrite + BoundedWrite = 0, + UnboundedWrite = 1 }; struct BufferWriteRule { - BufferWriteRuleKind kind = BufferWriteRuleKind::BoundedWrite; std::string functionPattern; unsigned destArgIndex = 0; unsigned sizeArgIndex = 0; // only used for BoundedWrite + BufferWriteRuleKind kind = BufferWriteRuleKind::BoundedWrite; }; struct BufferWriteModel diff --git a/include/analysis/ConstParamAnalysis.hpp b/include/analysis/ConstParamAnalysis.hpp index ca1ee14..cfcf118 100644 --- a/include/analysis/ConstParamAnalysis.hpp +++ b/include/analysis/ConstParamAnalysis.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -19,11 +20,12 @@ namespace ctrace::stack::analysis std::string currentType; std::string suggestedType; std::string suggestedTypeAlt; - bool pointerConstOnly = false; // ex: T * const param - bool isReference = false; - bool isRvalueRef = false; unsigned line = 0; unsigned column = 0; + std::uint64_t pointerConstOnly : 1 = false; // ex: T * const param + std::uint64_t isReference : 1 = false; + std::uint64_t isRvalueRef : 1 = false; + std::uint64_t reservedFlags : 61 = 0; }; std::vector diff --git a/include/analysis/FunctionFilter.hpp b/include/analysis/FunctionFilter.hpp index 2bd1a0c..538c4f3 100644 --- a/include/analysis/FunctionFilter.hpp +++ b/include/analysis/FunctionFilter.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include "StackUsageAnalyzer.hpp" @@ -14,11 +15,12 @@ namespace ctrace::stack::analysis { struct FunctionFilter { - bool hasPathFilter = false; - bool hasFuncFilter = false; - bool hasFilter = false; std::string moduleSourcePath; const AnalysisConfig* config = nullptr; + std::uint64_t hasPathFilter : 1 = false; + std::uint64_t hasFuncFilter : 1 = false; + std::uint64_t hasFilter : 1 = false; + std::uint64_t reservedFlags : 61 = 0; bool shouldAnalyze(const llvm::Function& F) const; }; diff --git a/include/analysis/GlobalReadBeforeWriteAnalysis.hpp b/include/analysis/GlobalReadBeforeWriteAnalysis.hpp index d4d595e..565f653 100644 --- a/include/analysis/GlobalReadBeforeWriteAnalysis.hpp +++ b/include/analysis/GlobalReadBeforeWriteAnalysis.hpp @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include #include namespace llvm @@ -13,15 +15,41 @@ namespace llvm namespace ctrace::stack::analysis { + struct GlobalReadBeforeWriteGlobalSummary + { + bool zeroInitializedArray = false; + bool hasAnyWrite = false; + }; + + struct GlobalReadBeforeWriteSummaryIndex + { + std::unordered_map globals; + }; + + enum class GlobalReadBeforeWriteKind : std::uint64_t + { + BeforeFirstLocalWrite = 0, + WithoutLocalWrite = 1 + }; + struct GlobalReadBeforeWriteIssue { std::string funcName; std::string globalName; const llvm::Instruction* readInst = nullptr; const llvm::Instruction* firstWriteInst = nullptr; + GlobalReadBeforeWriteKind kind = GlobalReadBeforeWriteKind::BeforeFirstLocalWrite; + std::uint64_t hasNonLocalWrite : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; - std::vector - analyzeGlobalReadBeforeWrites(llvm::Module& mod, - const std::function& shouldAnalyze); + GlobalReadBeforeWriteSummaryIndex buildGlobalReadBeforeWriteSummaryIndex( + llvm::Module& mod, const std::function& shouldAnalyze); + + bool mergeGlobalReadBeforeWriteSummaryIndex(GlobalReadBeforeWriteSummaryIndex& dst, + const GlobalReadBeforeWriteSummaryIndex& src); + + std::vector analyzeGlobalReadBeforeWrites( + llvm::Module& mod, const std::function& shouldAnalyze, + const GlobalReadBeforeWriteSummaryIndex* externalSummaries = nullptr); } // namespace ctrace::stack::analysis diff --git a/include/analysis/InputPipeline.hpp b/include/analysis/InputPipeline.hpp index 1d2bfd7..e017453 100644 --- a/include/analysis/InputPipeline.hpp +++ b/include/analysis/InputPipeline.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -19,6 +20,7 @@ namespace ctrace::stack::analysis { std::unique_ptr module; LanguageType language = LanguageType::Unknown; + std::uint32_t reservedLanguagePadding = 0; std::vector frontendDiagnostics; std::string error; }; diff --git a/include/analysis/IntRanges.hpp b/include/analysis/IntRanges.hpp index 60ced92..bf323db 100644 --- a/include/analysis/IntRanges.hpp +++ b/include/analysis/IntRanges.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include namespace llvm @@ -12,10 +13,11 @@ namespace ctrace::stack::analysis { struct IntRange { - bool hasLower = false; long long lower = 0; - bool hasUpper = false; long long upper = 0; + std::uint64_t hasLower : 1 = false; + std::uint64_t hasUpper : 1 = false; + std::uint64_t reservedFlags : 62 = 0; }; std::map computeIntRangesFromICmps(llvm::Function& F); diff --git a/include/analysis/IntegerOverflowAnalysis.hpp b/include/analysis/IntegerOverflowAnalysis.hpp index fdd5af4..a962df6 100644 --- a/include/analysis/IntegerOverflowAnalysis.hpp +++ b/include/analysis/IntegerOverflowAnalysis.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -15,7 +16,7 @@ namespace llvm namespace ctrace::stack::analysis { - enum class IntegerOverflowIssueKind + enum class IntegerOverflowIssueKind : std::uint64_t { ArithmeticInSizeComputation, SignedToUnsignedSize, @@ -29,8 +30,8 @@ namespace ctrace::stack::analysis std::string filePath; std::string sinkName; std::string operation; - IntegerOverflowIssueKind kind = IntegerOverflowIssueKind::ArithmeticInSizeComputation; const llvm::Instruction* inst = nullptr; + IntegerOverflowIssueKind kind = IntegerOverflowIssueKind::ArithmeticInSizeComputation; }; std::vector diff --git a/include/analysis/InvalidBaseReconstruction.hpp b/include/analysis/InvalidBaseReconstruction.hpp index e9191cd..a1da47d 100644 --- a/include/analysis/InvalidBaseReconstruction.hpp +++ b/include/analysis/InvalidBaseReconstruction.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -19,12 +20,13 @@ namespace ctrace::stack::analysis struct InvalidBaseReconstructionIssue { std::string funcName; - std::string varName; // alloca variable name (stack object) - std::string sourceMember; // source member (e.g., "b") - int64_t offsetUsed = 0; // offset used in the calculation (can be negative) - std::string targetType; // target cast type (e.g., "struct A*") - bool isOutOfBounds = false; // true if we can prove it is out of bounds + std::string varName; // alloca variable name (stack object) + std::string sourceMember; // source member (e.g., "b") + int64_t offsetUsed = 0; // offset used in the calculation (can be negative) + std::string targetType; // target cast type (e.g., "struct A*") const llvm::Instruction* inst = nullptr; + std::uint64_t isOutOfBounds : 1 = false; // true if we can prove it is out of bounds + std::uint64_t reservedFlags : 63 = 0; }; std::vector analyzeInvalidBaseReconstructions( diff --git a/include/analysis/MemIntrinsicOverflow.hpp b/include/analysis/MemIntrinsicOverflow.hpp index 8fafdb4..657f35e 100644 --- a/include/analysis/MemIntrinsicOverflow.hpp +++ b/include/analysis/MemIntrinsicOverflow.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -23,8 +24,9 @@ namespace ctrace::stack::analysis std::string intrinsicName; StackSize destSizeBytes = 0; StackSize lengthBytes = 0; - bool hasExplicitLength = false; const llvm::Instruction* inst = nullptr; + std::uint64_t hasExplicitLength : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; std::vector diff --git a/include/analysis/NullDerefAnalysis.hpp b/include/analysis/NullDerefAnalysis.hpp index 0a9fb67..a5a1738 100644 --- a/include/analysis/NullDerefAnalysis.hpp +++ b/include/analysis/NullDerefAnalysis.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -13,7 +14,7 @@ namespace llvm namespace ctrace::stack::analysis { - enum class NullDerefIssueKind + enum class NullDerefIssueKind : std::uint64_t { DirectNullPointer, NullBranchDereference, @@ -26,8 +27,8 @@ namespace ctrace::stack::analysis std::string funcName; std::string filePath; std::string pointerName; - NullDerefIssueKind kind = NullDerefIssueKind::DirectNullPointer; const llvm::Instruction* inst = nullptr; + NullDerefIssueKind kind = NullDerefIssueKind::DirectNullPointer; }; std::vector diff --git a/include/analysis/OOBReadAnalysis.hpp b/include/analysis/OOBReadAnalysis.hpp index 6a59238..8b96dde 100644 --- a/include/analysis/OOBReadAnalysis.hpp +++ b/include/analysis/OOBReadAnalysis.hpp @@ -17,7 +17,7 @@ namespace llvm namespace ctrace::stack::analysis { - enum class OOBReadIssueKind + enum class OOBReadIssueKind : std::uint64_t { MissingNullTerminator, HeapIndexOutOfBounds @@ -29,11 +29,11 @@ namespace ctrace::stack::analysis std::string filePath; std::string bufferName; std::string apiName; - OOBReadIssueKind kind = OOBReadIssueKind::HeapIndexOutOfBounds; std::uint64_t bufferSizeBytes = 0; std::uint64_t writeSizeBytes = 0; std::uint64_t capacityElements = 0; const llvm::Instruction* inst = nullptr; + OOBReadIssueKind kind = OOBReadIssueKind::HeapIndexOutOfBounds; }; std::vector diff --git a/include/analysis/ResourceLifetimeAnalysis.hpp b/include/analysis/ResourceLifetimeAnalysis.hpp index 87e2dcf..08b9ee7 100644 --- a/include/analysis/ResourceLifetimeAnalysis.hpp +++ b/include/analysis/ResourceLifetimeAnalysis.hpp @@ -27,7 +27,8 @@ namespace ctrace::stack::analysis ResourceSummaryAction action = ResourceSummaryAction::AcquireOut; unsigned argIndex = 0; std::uint64_t offset = 0; - bool viaPointerSlot = false; + std::uint64_t viaPointerSlot : 1 = false; + std::uint64_t reservedFlags : 63 = 0; std::string resourceKind; }; @@ -41,7 +42,7 @@ namespace ctrace::stack::analysis std::unordered_map functions; }; - enum class ResourceLifetimeIssueKind + enum class ResourceLifetimeIssueKind : std::uint64_t { MissingRelease, DoubleRelease, diff --git a/include/analysis/SizeMinusKWrites.hpp b/include/analysis/SizeMinusKWrites.hpp index 3cf89f5..0eaa258 100644 --- a/include/analysis/SizeMinusKWrites.hpp +++ b/include/analysis/SizeMinusKWrites.hpp @@ -21,19 +21,20 @@ namespace ctrace::stack::analysis { std::string funcName; std::string sinkName; // call name or "store" - bool ptrNonNull = false; - bool sizeAboveK = false; - bool hasPointerDest = true; int64_t k = 1; const llvm::Instruction* inst = nullptr; + std::uint64_t ptrNonNull : 1 = false; + std::uint64_t sizeAboveK : 1 = false; + std::uint64_t hasPointerDest : 1 = true; + std::uint64_t reservedFlags : 61 = 0; }; std::vector analyzeSizeMinusKWrites( llvm::Module& mod, const llvm::DataLayout& DL, const std::function& shouldAnalyzeFunction); - std::vector analyzeSizeMinusKWrites( - llvm::Module& mod, const llvm::DataLayout& DL, - const std::function& shouldAnalyzeFunction, - const AnalysisConfig& config); + std::vector + analyzeSizeMinusKWrites(llvm::Module& mod, const llvm::DataLayout& DL, + const std::function& shouldAnalyzeFunction, + const AnalysisConfig& config); } // namespace ctrace::stack::analysis diff --git a/include/analysis/StackBufferAnalysis.hpp b/include/analysis/StackBufferAnalysis.hpp index 419e508..82bda06 100644 --- a/include/analysis/StackBufferAnalysis.hpp +++ b/include/analysis/StackBufferAnalysis.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -16,7 +17,7 @@ namespace llvm namespace ctrace::stack::analysis { - enum class BufferStorageClass + enum class BufferStorageClass : std::uint64_t { Stack, Global, @@ -26,19 +27,18 @@ namespace ctrace::stack::analysis { std::string funcName; std::string varName; + std::string aliasPath; // ex: "pp -> ptr -> buf" + std::vector aliasPathVec; // {"pp", "ptr", "buf"} + const llvm::Instruction* inst = nullptr; StackSize arraySize = 0; StackSize indexOrUpperBound = 0; // used for upper bounds (UB) or constant index - bool isWrite = false; - bool indexIsConstant = false; + long long lowerBound = 0; // deduced lower bound (signed) BufferStorageClass storageClass = BufferStorageClass::Stack; - const llvm::Instruction* inst = nullptr; - + std::uint64_t isWrite : 1 = false; + std::uint64_t indexIsConstant : 1 = false; // Violation based on a lower bound (index potentially negative) - bool isLowerBoundViolation = false; - long long lowerBound = 0; // deduced lower bound (signed) - - std::string aliasPath; // ex: "pp -> ptr -> buf" - std::vector aliasPathVec; // {"pp", "ptr", "buf"} + std::uint64_t isLowerBoundViolation : 1 = false; + std::uint64_t reservedFlags : 61 = 0; // Optional : helper for sync string <- vector void rebuildAliasPathString(const std::string& sep = " -> ") { diff --git a/include/analysis/StackComputation.hpp b/include/analysis/StackComputation.hpp index 396a05a..a132167 100644 --- a/include/analysis/StackComputation.hpp +++ b/include/analysis/StackComputation.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -20,15 +21,17 @@ namespace ctrace::stack::analysis struct StackEstimate { StackSize bytes = 0; - bool unknown = false; + std::uint64_t unknown : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; struct LocalStackInfo { StackSize bytes = 0; - bool unknown = false; - bool hasDynamicAlloca = false; std::vector> localAllocas; + std::uint64_t unknown : 1 = false; + std::uint64_t hasDynamicAlloca : 1 = false; + std::uint64_t reservedFlags : 62 = 0; }; struct InternalAnalysisState diff --git a/include/analysis/UninitializedVarAnalysis.hpp b/include/analysis/UninitializedVarAnalysis.hpp index 2ea4b64..4d2e5a4 100644 --- a/include/analysis/UninitializedVarAnalysis.hpp +++ b/include/analysis/UninitializedVarAnalysis.hpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -15,6 +16,19 @@ namespace llvm namespace ctrace::stack::analysis { + struct PreparedUninitializedExternalSummariesOpaque; + struct PreparedUninitializedModuleContextOpaque; + + struct PreparedUninitializedExternalSummaries + { + std::shared_ptr opaque; + }; + + struct PreparedUninitializedModuleContext + { + std::shared_ptr opaque; + }; + struct UninitializedSummaryRange { std::uint64_t begin = 0; @@ -32,8 +46,9 @@ namespace ctrace::stack::analysis std::vector readBeforeWriteRanges; std::vector writeRanges; std::vector pointerSlotWrites; - bool hasUnknownReadBeforeWrite = false; - bool hasUnknownWrite = false; + std::uint64_t hasUnknownReadBeforeWrite : 1 = false; + std::uint64_t hasUnknownWrite : 1 = false; + std::uint64_t reservedFlags : 62 = 0; }; struct UninitializedSummaryFunction @@ -46,7 +61,7 @@ namespace ctrace::stack::analysis std::unordered_map functions; }; - enum class UninitializedLocalIssueKind + enum class UninitializedLocalIssueKind : std::uint64_t { ReadBeforeDefiniteInit, ReadBeforeDefiniteInitViaCall, @@ -70,6 +85,22 @@ namespace ctrace::stack::analysis const std::function& shouldAnalyze, const UninitializedSummaryIndex* externalSummaries = nullptr); + PreparedUninitializedExternalSummaries + prepareUninitializedExternalSummaries(const UninitializedSummaryIndex* externalSummaries); + + PreparedUninitializedModuleContext prepareUninitializedModuleContext( + llvm::Module& mod, const std::function& shouldAnalyze); + + UninitializedSummaryIndex + buildUninitializedSummaryIndex(llvm::Module& mod, + const std::function& shouldAnalyze, + const PreparedUninitializedExternalSummaries* preparedExternal); + + UninitializedSummaryIndex + buildUninitializedSummaryIndex(llvm::Module& mod, + const PreparedUninitializedModuleContext* preparedModule, + const PreparedUninitializedExternalSummaries* preparedExternal); + bool mergeUninitializedSummaryIndex(UninitializedSummaryIndex& dst, const UninitializedSummaryIndex& src); diff --git a/include/analysis/smt/ConstraintIR.hpp b/include/analysis/smt/ConstraintIR.hpp index 84cf0ba..3af9218 100644 --- a/include/analysis/smt/ConstraintIR.hpp +++ b/include/analysis/smt/ConstraintIR.hpp @@ -10,7 +10,7 @@ namespace ctrace::stack::analysis::smt using SymbolId = std::uint64_t; using ExprId = std::uint32_t; - enum class ExprKind + enum class ExprKind : std::uint64_t { Symbol, Constant, @@ -42,12 +42,14 @@ namespace ctrace::stack::analysis::smt struct ExprNode { ExprKind kind = ExprKind::Constant; - std::uint32_t bitWidth = 1; SymbolId symbol = 0; std::int64_t constant = 0; + std::uint32_t bitWidth = 1; ExprId lhs = 0; ExprId rhs = 0; ExprId extra = 0; + std::uint32_t reserved0 = 0; + std::uint32_t reserved1 = 0; }; struct SymbolInfo @@ -60,10 +62,11 @@ namespace ctrace::stack::analysis::smt struct IntervalConstraint { SymbolId symbol = 0; - bool hasLower = false; std::int64_t lower = 0; - bool hasUpper = false; std::int64_t upper = 0; + std::uint64_t hasLower : 1 = false; + std::uint64_t hasUpper : 1 = false; + std::uint64_t reservedFlags : 62 = 0; }; struct ConstraintIR diff --git a/include/analysis/smt/ISmtBackend.hpp b/include/analysis/smt/ISmtBackend.hpp index 4a121f4..fce010f 100644 --- a/include/analysis/smt/ISmtBackend.hpp +++ b/include/analysis/smt/ISmtBackend.hpp @@ -14,4 +14,3 @@ namespace ctrace::stack::analysis::smt virtual SmtAnswer solve(const SmtQuery& query) const = 0; }; } // namespace ctrace::stack::analysis::smt - diff --git a/include/analysis/smt/ISolverStrategy.hpp b/include/analysis/smt/ISolverStrategy.hpp index 67542d6..d883a1c 100644 --- a/include/analysis/smt/ISolverStrategy.hpp +++ b/include/analysis/smt/ISolverStrategy.hpp @@ -12,7 +12,7 @@ namespace ctrace::stack::analysis::smt public: virtual ~ISolverStrategy() = default; virtual std::vector - run(const SmtQuery& query, const std::vector>& backends) const = 0; + run(const SmtQuery& query, + const std::vector>& backends) const = 0; }; } // namespace ctrace::stack::analysis::smt - diff --git a/include/analysis/smt/SmtEncoding.hpp b/include/analysis/smt/SmtEncoding.hpp index d49ac1f..c318cd4 100644 --- a/include/analysis/smt/SmtEncoding.hpp +++ b/include/analysis/smt/SmtEncoding.hpp @@ -12,7 +12,7 @@ namespace llvm class BinaryOperator; class Instruction; class Value; -} +} // namespace llvm namespace ctrace::stack::analysis::smt { @@ -27,15 +27,17 @@ namespace ctrace::stack::analysis::smt ConstraintIR encodeRangeConstraints(const std::map& ranges); - ConstraintIR encodeSignedOverflowFeasibility( - const std::map& ranges, - const llvm::BinaryOperator& binaryOperation, const llvm::Instruction* contextInst = nullptr); + ConstraintIR + encodeSignedOverflowFeasibility(const std::map& ranges, + const llvm::BinaryOperator& binaryOperation, + const llvm::Instruction* contextInst = nullptr); - ConstraintIR encodeUnsignedOverflowFeasibility( - const std::map& ranges, - const llvm::BinaryOperator& binaryOperation, const llvm::Instruction* contextInst = nullptr); + ConstraintIR + encodeUnsignedOverflowFeasibility(const std::map& ranges, + const llvm::BinaryOperator& binaryOperation, + const llvm::Instruction* contextInst = nullptr); ConstraintIR encodeSignedComparisonFeasibility( const std::map& ranges, const llvm::Value& lhs, std::int64_t rhsConstant, bool greaterThan, const llvm::Instruction* contextInst = nullptr); -} +} // namespace ctrace::stack::analysis::smt diff --git a/include/analysis/smt/SmtRefinement.hpp b/include/analysis/smt/SmtRefinement.hpp index 39ef947..780c934 100644 --- a/include/analysis/smt/SmtRefinement.hpp +++ b/include/analysis/smt/SmtRefinement.hpp @@ -40,18 +40,17 @@ namespace ctrace::stack::analysis::smt { public: SmtConstraintEvaluator(const ctrace::stack::AnalysisConfig& config, std::string ruleId) - : ruleId_(std::move(ruleId)) - , timeoutMs_(config.smtTimeoutMs) - , budgetNodes_(config.smtBudgetNodes) + : ruleId_(std::move(ruleId)), timeoutMs_(config.smtTimeoutMs), + budgetNodes_(config.smtBudgetNodes) { if (smtRuleEnabled(config, ruleId_)) { - orchestrator_.emplace(SolverOrchestratorConfig{.mode = config.smtMode, - .primaryBackend = config.smtBackend, - .secondaryBackend = - config.smtSecondaryBackend, - .timeoutMs = config.smtTimeoutMs, - .budgetNodes = config.smtBudgetNodes}); + orchestrator_.emplace( + SolverOrchestratorConfig{.primaryBackend = config.smtBackend, + .secondaryBackend = config.smtSecondaryBackend, + .mode = config.smtMode, + .budgetNodes = config.smtBudgetNodes, + .timeoutMs = config.smtTimeoutMs}); } } @@ -85,7 +84,8 @@ namespace ctrace::stack::analysis::smt private: std::string ruleId_; std::optional orchestrator_; - std::uint32_t timeoutMs_ = 50; std::uint64_t budgetNodes_ = 10000; + std::uint32_t timeoutMs_ = 50; + std::uint32_t reserved_ = 0; }; } // namespace ctrace::stack::analysis::smt diff --git a/include/analysis/smt/SolverOrchestrator.hpp b/include/analysis/smt/SolverOrchestrator.hpp index 7a25742..c96011d 100644 --- a/include/analysis/smt/SolverOrchestrator.hpp +++ b/include/analysis/smt/SolverOrchestrator.hpp @@ -2,17 +2,19 @@ #include "analysis/smt/SolverTypes.hpp" +#include #include namespace ctrace::stack::analysis::smt { struct SolverOrchestratorConfig { - SolverMode mode = SolverMode::Single; std::string primaryBackend = "interval"; std::string secondaryBackend; - std::uint32_t timeoutMs = 50; + SolverMode mode = SolverMode::Single; std::uint64_t budgetNodes = 10000; + std::uint32_t timeoutMs = 50; + std::uint32_t reserved = 0; }; class SolverOrchestrator diff --git a/include/analysis/smt/SolverTypes.hpp b/include/analysis/smt/SolverTypes.hpp index 250cf7e..59c0e91 100644 --- a/include/analysis/smt/SolverTypes.hpp +++ b/include/analysis/smt/SolverTypes.hpp @@ -9,7 +9,7 @@ namespace ctrace::stack::analysis::smt { - enum class SolverMode + enum class SolverMode : std::uint64_t { Single, Portfolio, @@ -17,7 +17,7 @@ namespace ctrace::stack::analysis::smt DualConsensus }; - enum class SmtStatus + enum class SmtStatus : std::uint64_t { Sat, Unsat, @@ -30,20 +30,21 @@ namespace ctrace::stack::analysis::smt { ConstraintIR ir; std::string ruleId; - std::uint32_t timeoutMs = 50; std::uint64_t budgetNodes = 10000; + std::uint32_t timeoutMs = 50; + std::uint32_t reserved = 0; }; struct SmtAnswer { - SmtStatus status = SmtStatus::Unknown; std::string backendName; std::optional reason; + SmtStatus status = SmtStatus::Unknown; }; struct SmtDecision { - SmtStatus status = SmtStatus::Unknown; std::vector answers; + SmtStatus status = SmtStatus::Unknown; }; } // namespace ctrace::stack::analysis::smt diff --git a/include/analyzer/DiagnosticEmitter.hpp b/include/analyzer/DiagnosticEmitter.hpp index bfefef9..f81f42f 100644 --- a/include/analyzer/DiagnosticEmitter.hpp +++ b/include/analyzer/DiagnosticEmitter.hpp @@ -83,8 +83,7 @@ namespace ctrace::stack::analyzer AnalysisResult& result, const std::vector& issues); void appendGlobalReadBeforeWriteDiagnostics( - AnalysisResult& result, - const std::vector& issues); + AnalysisResult& result, const std::vector& issues); void appendInvalidBaseReconstructionDiagnostics( AnalysisResult& result, diff --git a/include/analyzer/LocationResolver.hpp b/include/analyzer/LocationResolver.hpp index e14756f..b68b2ba 100644 --- a/include/analyzer/LocationResolver.hpp +++ b/include/analyzer/LocationResolver.hpp @@ -1,5 +1,7 @@ #pragma once +#include + namespace llvm { class AllocaInst; @@ -17,7 +19,8 @@ namespace ctrace::stack::analyzer unsigned startColumn = 0; unsigned endLine = 0; unsigned endColumn = 0; - bool hasLocation = false; + std::uint32_t hasLocation : 1 = false; + std::uint32_t reservedFlags : 31 = 0; }; ResolvedLocation resolveFromInstruction(const llvm::Instruction* inst, diff --git a/include/app/AnalyzerApp.hpp b/include/app/AnalyzerApp.hpp index cca1f5f..b7f8915 100644 --- a/include/app/AnalyzerApp.hpp +++ b/include/app/AnalyzerApp.hpp @@ -14,13 +14,14 @@ namespace ctrace::stack::app struct RunResult { - int exitCode = 1; std::string error; + int exitCode = 1; bool isOk() const { return error.empty(); } + char padded[4]; }; RunResult runAnalyzerApp(cli::ParsedArguments parsedArgs, llvm::LLVMContext& context); diff --git a/include/cli/ArgParser.hpp b/include/cli/ArgParser.hpp index 968e0c2..c14215b 100644 --- a/include/cli/ArgParser.hpp +++ b/include/cli/ArgParser.hpp @@ -2,46 +2,56 @@ #include "StackUsageAnalyzer.hpp" +#include #include #include +// char dummyGlobal[16]; +// int a[10]; + namespace ctrace::stack::cli { - enum class OutputFormat + enum class OutputFormat : std::uint64_t { - Human, - Json, - Sarif + Human = 0, + Json = 1, + Sarif = 2 }; struct ParsedArguments { AnalysisConfig config; + std::vector inputFilenames; - OutputFormat outputFormat = OutputFormat::Human; + std::string sarifBaseDir; std::string configPath; std::string compileCommandsPath; - bool compileCommandsExplicit = false; - bool analysisProfileExplicit = false; - bool includeCompdbDeps = false; - bool printEffectiveConfig = false; - bool verbose = false; + + OutputFormat outputFormat = OutputFormat::Human; + + std::uint64_t compileCommandsExplicit : 1 = false; + std::uint64_t analysisProfileExplicit : 1 = false; + std::uint64_t includeCompdbDeps : 1 = false; + std::uint64_t printEffectiveConfig : 1 = false; + std::uint64_t verbose : 1 = false; + std::uint64_t reservedFlags : 59 = 0; }; - enum class ParseStatus + enum class ParseStatus : std::uint8_t { - Ok, - Help, - Error + Ok = 0, + Help = 1, + Error = 2, }; struct ParseResult { - ParseStatus status = ParseStatus::Ok; - ParsedArguments parsed; std::string error; + ParsedArguments parsed; + ParseStatus status = ParseStatus::Ok; + char padded[7]; }; ParseResult parseArguments(int argc, char** argv); diff --git a/main.cpp b/main.cpp index 0d44923..15c6309 100644 --- a/main.cpp +++ b/main.cpp @@ -71,6 +71,8 @@ static void printHelp() << " --resource-cross-tu Enable cross-TU resource summaries (default: on)\n" << " --no-resource-cross-tu Disable cross-TU resource summaries\n" << " --resource-summary-cache-dir= Cache directory for cross-TU summaries\n" + << " --compile-ir-cache-dir= Cache directory for compiled LLVM IR per source " + "file\n" << " --resource-summary-cache-memory-only Use in-memory cache only for cross-TU " "summaries\n" << " --uninitialized-cross-tu Enable cross-TU uninitialized summaries (default: on)\n" @@ -134,13 +136,13 @@ static void printEffectiveConfig(const ctrace::stack::cli::ParsedArguments& pars { const AnalysisConfig& cfg = parsed.config; llvm::errs() << "=== Effective Analyzer Configuration ===\n"; - llvm::errs() << "config-file: " - << (parsed.configPath.empty() ? "" : parsed.configPath) << "\n"; + llvm::errs() << "config-file: " << (parsed.configPath.empty() ? "" : parsed.configPath) + << "\n"; llvm::errs() << "compile-commands: " << (parsed.compileCommandsPath.empty() ? "" : parsed.compileCommandsPath) << "\n"; - llvm::errs() << "analysis-profile: " - << (cfg.profile == AnalysisProfile::Fast ? "fast" : "full") << "\n"; + llvm::errs() << "analysis-profile: " << (cfg.profile == AnalysisProfile::Fast ? "fast" : "full") + << "\n"; if (cfg.jobsAuto) llvm::errs() << "jobs: auto\n"; else @@ -154,6 +156,8 @@ static void printEffectiveConfig(const ctrace::stack::cli::ParsedArguments& pars << (cfg.escapeModelPath.empty() ? "" : cfg.escapeModelPath) << "\n"; llvm::errs() << "buffer-model: " << (cfg.bufferModelPath.empty() ? "" : cfg.bufferModelPath) << "\n"; + llvm::errs() << "compile-ir-cache-dir: " + << (cfg.compileIRCacheDir.empty() ? "" : cfg.compileIRCacheDir) << "\n"; llvm::errs() << "smt-enabled: " << (cfg.smtEnabled ? "true" : "false") << "\n"; llvm::errs() << "smt-backend: " << cfg.smtBackend << "\n"; llvm::errs() << "smt-secondary-backend: " @@ -161,8 +165,8 @@ static void printEffectiveConfig(const ctrace::stack::cli::ParsedArguments& pars llvm::errs() << "smt-mode: " << solverModeName(cfg.smtMode) << "\n"; llvm::errs() << "smt-timeout-ms: " << cfg.smtTimeoutMs << "\n"; llvm::errs() << "smt-budget-nodes: " << cfg.smtBudgetNodes << "\n"; - llvm::errs() << "smt-rules: " - << (cfg.smtRules.empty() ? "" : joinCsv(cfg.smtRules)) << "\n"; + llvm::errs() << "smt-rules: " << (cfg.smtRules.empty() ? "" : joinCsv(cfg.smtRules)) + << "\n"; llvm::errs() << "========================================\n"; } diff --git a/src/analysis/AnalyzerUtils.cpp b/src/analysis/AnalyzerUtils.cpp index 6b0177e..458d84e 100644 --- a/src/analysis/AnalyzerUtils.cpp +++ b/src/analysis/AnalyzerUtils.cpp @@ -91,6 +91,7 @@ namespace ctrace::stack::analysis const llvm::Function* bestCallee = nullptr; StackEstimate bestStack{}; + StackSize bestStackBytes = 0; auto itCG = CG.find(current); if (itCG == CG.end()) @@ -101,10 +102,13 @@ namespace ctrace::stack::analysis auto itTotal = state.TotalStack.find(callee); StackEstimate est = (itTotal != state.TotalStack.end()) ? itTotal->second : StackEstimate{}; + const StackSize estBytes = + (itTotal != state.TotalStack.end()) ? itTotal->second.bytes : 0; if (!bestCallee || est.bytes > bestStack.bytes) { bestCallee = callee; bestStack = est; + bestStackBytes = estBytes; } } diff --git a/src/analysis/ConstParamAnalysis.cpp b/src/analysis/ConstParamAnalysis.cpp index 61b89a0..3b03f53 100644 --- a/src/analysis/ConstParamAnalysis.cpp +++ b/src/analysis/ConstParamAnalysis.cpp @@ -19,9 +19,10 @@ namespace ctrace::stack::analysis { struct TypeQualifiers { - bool isConst = false; - bool isVolatile = false; - bool isRestrict = false; + std::uint64_t isConst : 1 = false; + std::uint64_t isVolatile : 1 = false; + std::uint64_t isRestrict : 1 = false; + std::uint64_t reservedFlags : 61 = 0; }; struct StrippedDIType @@ -43,18 +44,19 @@ namespace ctrace::stack::analysis const llvm::DIType* originalType = nullptr; const llvm::DIType* pointeeType = nullptr; // unqualified, typedefs stripped const llvm::DIType* pointeeDisplayType = nullptr; // unqualified, typedefs preserved - bool isPointer = false; - bool isReference = false; - bool isRvalueReference = false; - bool pointerConst = false; - bool pointerVolatile = false; - bool pointerRestrict = false; - bool pointeeConst = false; - bool pointeeVolatile = false; - bool pointeeRestrict = false; - bool isDoublePointer = false; - bool isVoid = false; - bool isFunctionPointer = false; + std::uint64_t isPointer : 1 = false; + std::uint64_t isReference : 1 = false; + std::uint64_t isRvalueReference : 1 = false; + std::uint64_t pointerConst : 1 = false; + std::uint64_t pointerVolatile : 1 = false; + std::uint64_t pointerRestrict : 1 = false; + std::uint64_t pointeeConst : 1 = false; + std::uint64_t pointeeVolatile : 1 = false; + std::uint64_t pointeeRestrict : 1 = false; + std::uint64_t isDoublePointer : 1 = false; + std::uint64_t isVoid : 1 = false; + std::uint64_t isFunctionPointer : 1 = false; + std::uint64_t reservedFlags : 52 = 0; }; static const llvm::DIType* stripTypedefs(const llvm::DIType* type) diff --git a/src/analysis/DuplicateIfCondition.cpp b/src/analysis/DuplicateIfCondition.cpp index 2a34883..4a744e6 100644 --- a/src/analysis/DuplicateIfCondition.cpp +++ b/src/analysis/DuplicateIfCondition.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -268,7 +269,8 @@ namespace ctrace::stack::analysis struct MemoryOperand { const llvm::Value* ptr = nullptr; - bool precise = false; // true if we can reason about direct stores only + std::uint64_t precise : 1 = false; // true if we can reason about direct stores only + std::uint64_t reservedFlags : 63 = 0; }; enum class ConditionKind @@ -285,14 +287,16 @@ namespace ctrace::stack::analysis llvm::Value* lhs = nullptr; llvm::Value* rhs = nullptr; llvm::Value* boolValue = nullptr; - bool valid = false; llvm::SmallVector memoryOperands; + std::uint64_t valid : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; struct ConditionAtom { ConditionKey key; - bool polarity = true; + std::uint64_t polarity : 1 = true; + std::uint64_t reservedFlags : 63 = 0; }; using ConditionSignature = llvm::SmallVector; @@ -1196,6 +1200,23 @@ namespace ctrace::stack::analysis if (!elsePathSucc || dominatingSuccCount != 1) continue; + bool bypassesDominatingElsePath = false; + llvm::SmallPtrSet exclusionSet; + exclusionSet.insert(const_cast(domBlock)); + for (unsigned succIndex = 0; succIndex < domTerm->getNumSuccessors(); ++succIndex) + { + const llvm::BasicBlock* succ = domTerm->getSuccessor(succIndex); + if (!succ || succ == elsePathSucc) + continue; + if (llvm::isPotentiallyReachable(succ, curBlock, &exclusionSet, &DT)) + { + bypassesDominatingElsePath = true; + break; + } + } + if (bypassesDominatingElsePath) + continue; + ConditionSignature domSig = buildConditionSignature(domTerm); if (domSig.empty() || !conditionSignaturesEquivalent(domSig, curSig)) continue; diff --git a/src/analysis/FrontendDiagnostics.cpp b/src/analysis/FrontendDiagnostics.cpp index 3a219a1..67c8583 100644 --- a/src/analysis/FrontendDiagnostics.cpp +++ b/src/analysis/FrontendDiagnostics.cpp @@ -19,7 +19,7 @@ namespace ctrace::stack::analysis { namespace { - enum class ParsedSeverity + enum class ParsedSeverity : std::uint64_t { Warning, Error @@ -28,10 +28,10 @@ namespace ctrace::stack::analysis struct ParsedFrontendWarning { std::string filePath; + std::string message; + ParsedSeverity severity = ParsedSeverity::Warning; unsigned line = 0; unsigned column = 0; - ParsedSeverity severity = ParsedSeverity::Warning; - std::string message; }; struct Classification diff --git a/src/analysis/GlobalReadBeforeWriteAnalysis.cpp b/src/analysis/GlobalReadBeforeWriteAnalysis.cpp index 989fff6..712a4c1 100644 --- a/src/analysis/GlobalReadBeforeWriteAnalysis.cpp +++ b/src/analysis/GlobalReadBeforeWriteAnalysis.cpp @@ -1,6 +1,7 @@ #include "analysis/GlobalReadBeforeWriteAnalysis.hpp" #include +#include #include #include @@ -20,8 +21,18 @@ namespace ctrace::stack::analysis { constexpr unsigned kUnderlyingObjectLookupLimit = 32; - const llvm::GlobalVariable* - resolveTrackedGlobalBuffer(const llvm::Value* pointerOperand) + bool isTrackedDefinitionGlobal(const llvm::GlobalVariable& global) + { + if (global.isDeclaration() || global.isConstant()) + return false; + if (!global.hasInitializer() || !global.getInitializer()->isNullValue()) + return false; + if (!global.getValueType()->isArrayTy()) + return false; + return true; + } + + const llvm::GlobalVariable* resolveUnderlyingGlobal(const llvm::Value* pointerOperand) { if (!pointerOperand || !pointerOperand->getType()->isPointerTy()) return nullptr; @@ -32,14 +43,46 @@ namespace ctrace::stack::analysis const auto* global = llvm::dyn_cast_or_null(underlying); if (!global) return nullptr; - if (global->isDeclaration() || global->isConstant()) + if (global->isConstant()) return nullptr; - if (!global->hasInitializer() || !global->getInitializer()->isNullValue()) + return global; + } + + const GlobalReadBeforeWriteGlobalSummary* + lookupExternalSummary(const GlobalReadBeforeWriteSummaryIndex* externalSummaries, + const llvm::GlobalVariable& global) + { + if (!externalSummaries || !global.hasName() || global.getName().empty()) return nullptr; - if (!global->getValueType()->isArrayTy()) + + const auto it = externalSummaries->globals.find(global.getName().str()); + if (it == externalSummaries->globals.end()) return nullptr; + return &it->second; + } - return global; + struct TrackedGlobalLookup + { + const llvm::GlobalVariable* global = nullptr; + const GlobalReadBeforeWriteGlobalSummary* summary = nullptr; + }; + + TrackedGlobalLookup + resolveTrackedGlobalBuffer(const llvm::Value* pointerOperand, + const GlobalReadBeforeWriteSummaryIndex* externalSummaries) + { + const llvm::GlobalVariable* global = resolveUnderlyingGlobal(pointerOperand); + if (!global) + return {}; + + const bool trackedLocally = isTrackedDefinitionGlobal(*global); + const GlobalReadBeforeWriteGlobalSummary* external = + lookupExternalSummary(externalSummaries, *global); + const bool trackedExternally = external && external->zeroInitializedArray; + + if (!trackedLocally && !trackedExternally) + return {}; + return {global, external}; } bool isControlOnlyLoadUsage(const llvm::LoadInst& load) @@ -115,14 +158,161 @@ namespace ctrace::stack::analysis struct ReadEvent { - const llvm::LoadInst* load = nullptr; + const llvm::Instruction* inst = nullptr; const llvm::GlobalVariable* global = nullptr; }; + + void rememberAnyKnownWrite( + const TrackedGlobalLookup& tracked, + llvm::DenseMap& hasAnyKnownWriteByGlobal) + { + if (!tracked.global) + return; + if (hasAnyKnownWriteByGlobal.find(tracked.global) == hasAnyKnownWriteByGlobal.end()) + hasAnyKnownWriteByGlobal[tracked.global] = false; + if (tracked.summary && tracked.summary->hasAnyWrite) + hasAnyKnownWriteByGlobal[tracked.global] = true; + } + + void recordGlobalWrite( + const llvm::Value* pointerOperand, const llvm::Instruction& inst, + const GlobalReadBeforeWriteSummaryIndex* externalSummaries, + llvm::DenseMap>& + writesByGlobal, + llvm::DenseMap& hasAnyKnownWriteByGlobal) + { + const TrackedGlobalLookup tracked = + resolveTrackedGlobalBuffer(pointerOperand, externalSummaries); + if (!tracked.global) + return; + + writesByGlobal[tracked.global].push_back(&inst); + rememberAnyKnownWrite(tracked, hasAnyKnownWriteByGlobal); + } + + void recordGlobalRead( + const llvm::Value* pointerOperand, const llvm::Instruction& inst, + const GlobalReadBeforeWriteSummaryIndex* externalSummaries, + std::vector& reads, + llvm::DenseMap& hasAnyKnownWriteByGlobal) + { + const TrackedGlobalLookup tracked = + resolveTrackedGlobalBuffer(pointerOperand, externalSummaries); + if (!tracked.global) + return; + + reads.push_back(ReadEvent{&inst, tracked.global}); + rememberAnyKnownWrite(tracked, hasAnyKnownWriteByGlobal); + } + + void collectGlobalWriteSummaryForPointer(const llvm::Value* pointerOperand, + GlobalReadBeforeWriteSummaryIndex& out) + { + const llvm::GlobalVariable* global = resolveUnderlyingGlobal(pointerOperand); + if (!global || !global->hasName() || global->getName().empty()) + return; + + auto& summary = out.globals[global->getName().str()]; + summary.hasAnyWrite = true; + summary.zeroInitializedArray = + summary.zeroInitializedArray || isTrackedDefinitionGlobal(*global); + } } // namespace + GlobalReadBeforeWriteSummaryIndex buildGlobalReadBeforeWriteSummaryIndex( + llvm::Module& mod, const std::function& shouldAnalyze) + { + GlobalReadBeforeWriteSummaryIndex out; + + for (const llvm::GlobalVariable& global : mod.globals()) + { + if (!isTrackedDefinitionGlobal(global) || !global.hasName() || global.getName().empty()) + continue; + out.globals[global.getName().str()].zeroInitializedArray = true; + } + + for (const llvm::Function& function : mod) + { + if (function.isDeclaration() || !shouldAnalyze(function)) + continue; + + for (const llvm::BasicBlock& block : function) + { + for (const llvm::Instruction& inst : block) + { + if (const auto* store = llvm::dyn_cast(&inst)) + { + if (!store->isVolatile()) + collectGlobalWriteSummaryForPointer(store->getPointerOperand(), out); + continue; + } + + if (const auto* memTransfer = llvm::dyn_cast(&inst)) + { + if (!memTransfer->isVolatile()) + collectGlobalWriteSummaryForPointer(memTransfer->getRawDest(), out); + continue; + } + + if (const auto* memSet = llvm::dyn_cast(&inst)) + { + if (!memSet->isVolatile()) + collectGlobalWriteSummaryForPointer(memSet->getRawDest(), out); + continue; + } + + if (const auto* atomicRmw = llvm::dyn_cast(&inst)) + { + collectGlobalWriteSummaryForPointer(atomicRmw->getPointerOperand(), out); + continue; + } + + if (const auto* cmpXchg = llvm::dyn_cast(&inst)) + { + collectGlobalWriteSummaryForPointer(cmpXchg->getPointerOperand(), out); + continue; + } + + const auto* call = llvm::dyn_cast(&inst); + if (!call || !call->mayWriteToMemory()) + continue; + for (const llvm::Value* arg : call->args()) + collectGlobalWriteSummaryForPointer(arg, out); + } + } + } + + return out; + } + + bool mergeGlobalReadBeforeWriteSummaryIndex(GlobalReadBeforeWriteSummaryIndex& dst, + const GlobalReadBeforeWriteSummaryIndex& src) + { + bool changed = false; + for (const auto& entry : src.globals) + { + auto [it, inserted] = dst.globals.try_emplace(entry.first, entry.second); + if (inserted) + { + changed = true; + continue; + } + + GlobalReadBeforeWriteGlobalSummary& merged = it->second; + const bool beforeTracked = merged.zeroInitializedArray; + const bool beforeWrite = merged.hasAnyWrite; + merged.zeroInitializedArray |= entry.second.zeroInitializedArray; + merged.hasAnyWrite |= entry.second.hasAnyWrite; + if (merged.zeroInitializedArray != beforeTracked || merged.hasAnyWrite != beforeWrite) + changed = true; + } + return changed; + } + std::vector analyzeGlobalReadBeforeWrites(llvm::Module& mod, - const std::function& shouldAnalyze) + const std::function& shouldAnalyze, + const GlobalReadBeforeWriteSummaryIndex* externalSummaries) { std::vector issues; @@ -135,6 +325,7 @@ namespace ctrace::stack::analysis llvm::DenseMap instructionOrder; llvm::DenseMap> writesByGlobal; + llvm::DenseMap hasAnyKnownWriteByGlobal; std::vector reads; unsigned sequence = 0; @@ -148,21 +339,60 @@ namespace ctrace::stack::analysis { if (!store->isVolatile()) { - if (const auto* global = - resolveTrackedGlobalBuffer(store->getPointerOperand())) - { - writesByGlobal[global].push_back(&inst); - } + recordGlobalWrite(store->getPointerOperand(), inst, externalSummaries, + writesByGlobal, hasAnyKnownWriteByGlobal); + } + } + else if (auto* memTransfer = llvm::dyn_cast(&inst)) + { + if (!memTransfer->isVolatile()) + { + recordGlobalWrite(memTransfer->getRawDest(), inst, externalSummaries, + writesByGlobal, hasAnyKnownWriteByGlobal); + recordGlobalRead(memTransfer->getRawSource(), inst, externalSummaries, + reads, hasAnyKnownWriteByGlobal); + } + } + else if (auto* memSet = llvm::dyn_cast(&inst)) + { + if (!memSet->isVolatile()) + { + recordGlobalWrite(memSet->getRawDest(), inst, externalSummaries, + writesByGlobal, hasAnyKnownWriteByGlobal); } } - else if (auto* memIntrinsic = llvm::dyn_cast(&inst)) + else if (auto* atomicRmw = llvm::dyn_cast(&inst)) + { + recordGlobalRead(atomicRmw->getPointerOperand(), inst, externalSummaries, + reads, hasAnyKnownWriteByGlobal); + recordGlobalWrite(atomicRmw->getPointerOperand(), inst, externalSummaries, + writesByGlobal, hasAnyKnownWriteByGlobal); + } + else if (auto* cmpXchg = llvm::dyn_cast(&inst)) + { + recordGlobalRead(cmpXchg->getPointerOperand(), inst, externalSummaries, + reads, hasAnyKnownWriteByGlobal); + recordGlobalWrite(cmpXchg->getPointerOperand(), inst, externalSummaries, + writesByGlobal, hasAnyKnownWriteByGlobal); + } + else if (auto* call = llvm::dyn_cast(&inst)) { - if (!memIntrinsic->isVolatile()) + const bool mayRead = call->mayReadFromMemory(); + const bool mayWrite = call->mayWriteToMemory(); + if (mayRead || mayWrite) { - if (const auto* global = - resolveTrackedGlobalBuffer(memIntrinsic->getRawDest())) + for (llvm::Value* arg : call->args()) { - writesByGlobal[global].push_back(&inst); + if (mayRead) + { + recordGlobalRead(arg, inst, externalSummaries, reads, + hasAnyKnownWriteByGlobal); + } + if (mayWrite) + { + recordGlobalWrite(arg, inst, externalSummaries, writesByGlobal, + hasAnyKnownWriteByGlobal); + } } } } @@ -171,53 +401,61 @@ namespace ctrace::stack::analysis if (!load || load->isVolatile()) continue; - const auto* global = resolveTrackedGlobalBuffer(load->getPointerOperand()); - if (!global) - continue; - if (isControlOnlyLoadUsage(*load)) continue; - reads.push_back(ReadEvent{load, global}); + recordGlobalRead(load->getPointerOperand(), inst, externalSummaries, reads, + hasAnyKnownWriteByGlobal); } } - if (reads.empty() || writesByGlobal.empty()) + if (reads.empty()) continue; llvm::DenseMap selectedReads; llvm::DenseMap selectedWrites; + llvm::DenseMap selectedKinds; for (const ReadEvent& readEvent : reads) { - const auto writesIt = writesByGlobal.find(readEvent.global); - if (writesIt == writesByGlobal.end()) - continue; - bool hasDominatingWrite = false; const llvm::Instruction* firstWriteAfterRead = nullptr; - for (const llvm::Instruction* writeInst : writesIt->second) + const auto writesIt = writesByGlobal.find(readEvent.global); + if (writesIt != writesByGlobal.end()) { - if (domTree.dominates(writeInst, readEvent.load)) + for (const llvm::Instruction* writeInst : writesIt->second) { - hasDominatingWrite = true; - break; - } - if (!firstWriteAfterRead && domTree.dominates(readEvent.load, writeInst)) - { - firstWriteAfterRead = writeInst; + if (writeInst == readEvent.inst) + continue; + if (domTree.dominates(writeInst, readEvent.inst)) + { + hasDominatingWrite = true; + break; + } + if (domTree.dominates(readEvent.inst, writeInst) && + (!firstWriteAfterRead || + instructionOrder.lookup(writeInst) < + instructionOrder.lookup(firstWriteAfterRead))) + { + firstWriteAfterRead = writeInst; + } } } - if (hasDominatingWrite || !firstWriteAfterRead) + if (hasDominatingWrite) continue; + const GlobalReadBeforeWriteKind kind = + firstWriteAfterRead ? GlobalReadBeforeWriteKind::BeforeFirstLocalWrite + : GlobalReadBeforeWriteKind::WithoutLocalWrite; const auto selectedIt = selectedReads.find(readEvent.global); if (selectedIt == selectedReads.end() || - instructionOrder[readEvent.load] < instructionOrder[selectedIt->second->load]) + instructionOrder.lookup(readEvent.inst) < + instructionOrder.lookup(selectedIt->second->inst)) { selectedReads[readEvent.global] = &readEvent; selectedWrites[readEvent.global] = firstWriteAfterRead; + selectedKinds[readEvent.global] = kind; } } @@ -225,15 +463,17 @@ namespace ctrace::stack::analysis { const auto* global = selected.first; const ReadEvent* readEvent = selected.second; - if (!readEvent || !readEvent->load || !global) + if (!readEvent || !readEvent->inst || !global) continue; GlobalReadBeforeWriteIssue issue; issue.funcName = function.getName().str(); issue.globalName = global->hasName() ? global->getName().str() : std::string(""); - issue.readInst = readEvent->load; + issue.readInst = readEvent->inst; issue.firstWriteInst = selectedWrites.lookup(global); + issue.kind = selectedKinds.lookup(global); + issue.hasNonLocalWrite = hasAnyKnownWriteByGlobal.lookup(global); issues.push_back(std::move(issue)); } } diff --git a/src/analysis/InputPipeline.cpp b/src/analysis/InputPipeline.cpp index e3f1389..4e59ad8 100644 --- a/src/analysis/InputPipeline.cpp +++ b/src/analysis/InputPipeline.cpp @@ -5,16 +5,23 @@ #include #include #include +#include #include +#include #include #include +#include +#include #include +#include #include #include #include #include #include +#include +#include #include #include #include @@ -145,6 +152,382 @@ namespace ctrace::stack::analysis } } + constexpr llvm::StringLiteral kCompileIRCacheSchema = "compile-ir-cache-v1"; + + struct FileSnapshot + { + std::string path; + std::uint64_t size = 0; + std::int64_t mtimeNs = 0; + }; + + struct CompileIRCachePaths + { + std::filesystem::path directory; + std::filesystem::path metaFile; + std::filesystem::path irFile; + std::filesystem::path depFile; + std::uint64_t enabled : 1 = false; + std::uint64_t reservedFlags : 63 = 0; + }; + + struct CompileIRCachePayload + { + std::string llvmIR; + std::string diagnostics; + }; + + static std::string md5Hex(llvm::StringRef input) + { + llvm::MD5 hasher; + hasher.update(input); + llvm::MD5::MD5Result out; + hasher.final(out); + llvm::SmallString<32> hex; + llvm::MD5::stringifyResult(out, hex); + return std::string(hex.str()); + } + + static std::string makeAbsolutePathFrom(const std::string& path, const std::string& baseDir) + { + std::filesystem::path p(path); + if (p.is_relative() && !baseDir.empty()) + p = std::filesystem::path(baseDir) / p; + + std::error_code ec; + std::filesystem::path absPath = std::filesystem::absolute(p, ec); + if (ec) + return p.lexically_normal().generic_string(); + return absPath.lexically_normal().generic_string(); + } + + static std::optional captureFileSnapshot(const std::string& path) + { + if (path.empty()) + return std::nullopt; + + std::error_code ec; + std::filesystem::path absolute = std::filesystem::absolute(path, ec); + if (ec) + return std::nullopt; + absolute = absolute.lexically_normal(); + + if (!std::filesystem::exists(absolute, ec) || ec) + return std::nullopt; + if (!std::filesystem::is_regular_file(absolute, ec) || ec) + return std::nullopt; + + const auto size = std::filesystem::file_size(absolute, ec); + if (ec) + return std::nullopt; + + const auto mtime = std::filesystem::last_write_time(absolute, ec); + if (ec) + return std::nullopt; + + const auto mtimeNs = std::chrono::time_point_cast(mtime) + .time_since_epoch() + .count(); + + FileSnapshot snapshot; + snapshot.path = absolute.generic_string(); + snapshot.size = static_cast(size); + snapshot.mtimeNs = static_cast(mtimeNs); + return snapshot; + } + + static bool isSnapshotCurrent(const FileSnapshot& expected) + { + const auto current = captureFileSnapshot(expected.path); + if (!current) + return false; + return current->size == expected.size && current->mtimeNs == expected.mtimeNs; + } + + static llvm::json::Object encodeSnapshot(const FileSnapshot& snapshot) + { + llvm::json::Object obj; + obj["path"] = snapshot.path; + obj["size"] = static_cast(snapshot.size); + obj["mtimeNs"] = snapshot.mtimeNs; + return obj; + } + + static std::optional decodeSnapshot(const llvm::json::Value& value) + { + const auto* obj = value.getAsObject(); + if (!obj) + return std::nullopt; + + const auto path = obj->getString("path"); + const auto size = obj->getInteger("size"); + const auto mtimeNs = obj->getInteger("mtimeNs"); + if (!path || !size || !mtimeNs || *size < 0) + return std::nullopt; + + FileSnapshot snapshot; + snapshot.path = path->str(); + snapshot.size = static_cast(*size); + snapshot.mtimeNs = static_cast(*mtimeNs); + return snapshot; + } + + static bool readTextFile(const std::filesystem::path& path, std::string& out) + { + out.clear(); + std::ifstream in(path, std::ios::in | std::ios::binary); + if (!in) + return false; + std::ostringstream buffer; + buffer << in.rdbuf(); + out = buffer.str(); + return true; + } + + static bool writeTextFile(const std::filesystem::path& path, const std::string& content) + { + std::ofstream out(path, std::ios::out | std::ios::trunc | std::ios::binary); + if (!out) + return false; + out << content; + return out.good(); + } + + static CompileIRCachePaths buildCompileIRCachePaths(const AnalysisConfig& config, + const std::string& filename, + LanguageType language, + const std::vector& args, + const std::string& workingDir) + { + CompileIRCachePaths paths; + if (config.compileIRCacheDir.empty()) + return paths; + + std::ostringstream keyPayload; + keyPayload << std::string(kCompileIRCacheSchema) << "\n"; + keyPayload << "language:" << static_cast(language) << "\n"; + keyPayload << "file:" << makeAbsolutePathFrom(filename, workingDir) << "\n"; + keyPayload << "workingDir:" << makeAbsolutePathFrom(workingDir, "") << "\n"; + for (const std::string& arg : args) + keyPayload << "arg:" << arg << "\n"; + const std::string key = md5Hex(keyPayload.str()); + + std::filesystem::path directory = config.compileIRCacheDir; + paths.enabled = true; + paths.directory = directory; + paths.metaFile = directory / (key + ".json"); + paths.irFile = directory / (key + ".ll"); + paths.depFile = directory / (key + ".d"); + return paths; + } + + static std::optional> + parseDepfileDependencies(const std::filesystem::path& depFile, + const std::string& compileWorkingDir) + { + std::string content; + if (!readTextFile(depFile, content)) + return std::nullopt; + + std::string merged; + merged.reserve(content.size()); + for (std::size_t i = 0; i < content.size(); ++i) + { + const char c = content[i]; + if (c == '\\' && i + 1 < content.size()) + { + if (content[i + 1] == '\n') + { + ++i; + continue; + } + if (content[i + 1] == '\r' && i + 2 < content.size() && content[i + 2] == '\n') + { + i += 2; + continue; + } + } + merged.push_back(c); + } + + std::size_t colonPos = std::string::npos; + for (std::size_t i = 0; i < merged.size(); ++i) + { + if (merged[i] != ':') + continue; + if (i + 1 < merged.size() && + std::isspace(static_cast(merged[i + 1]))) + { + colonPos = i; + break; + } + } + if (colonPos == std::string::npos || colonPos + 1 >= merged.size()) + return std::nullopt; + + const std::string depsPart = merged.substr(colonPos + 1); + std::vector dependencies; + std::unordered_set seen; + std::string current; + bool escaping = false; + + auto flushDependency = [&]() + { + if (current.empty()) + return; + const std::string absolute = makeAbsolutePathFrom(current, compileWorkingDir); + if (!absolute.empty() && seen.insert(absolute).second) + dependencies.push_back(absolute); + current.clear(); + }; + + for (char ch : depsPart) + { + if (escaping) + { + current.push_back(ch); + escaping = false; + continue; + } + + if (ch == '\\') + { + escaping = true; + continue; + } + + if (std::isspace(static_cast(ch))) + { + flushDependency(); + continue; + } + + current.push_back(ch); + } + flushDependency(); + + if (dependencies.empty()) + return std::nullopt; + return dependencies; + } + + static bool ensureDirectoryExists(const std::filesystem::path& directory) + { + std::error_code ec; + std::filesystem::create_directories(directory, ec); + return !ec; + } + + static std::optional + loadCompileIRCachePayload(const CompileIRCachePaths& cachePaths) + { + if (!cachePaths.enabled) + return std::nullopt; + + std::string metadataText; + if (!readTextFile(cachePaths.metaFile, metadataText)) + return std::nullopt; + + auto parsed = llvm::json::parse(metadataText); + if (!parsed) + return std::nullopt; + + const auto* root = parsed->getAsObject(); + if (!root) + return std::nullopt; + + const auto schema = root->getString("schema"); + if (!schema || *schema != kCompileIRCacheSchema) + return std::nullopt; + + const auto* sourceValue = root->get("source"); + if (!sourceValue) + return std::nullopt; + const auto sourceSnapshot = decodeSnapshot(*sourceValue); + if (!sourceSnapshot || !isSnapshotCurrent(*sourceSnapshot)) + return std::nullopt; + + const auto* depsArray = root->getArray("dependencies"); + if (!depsArray) + return std::nullopt; + for (const auto& depValue : *depsArray) + { + const auto depSnapshot = decodeSnapshot(depValue); + if (!depSnapshot || !isSnapshotCurrent(*depSnapshot)) + return std::nullopt; + } + + std::string llvmIR; + if (!readTextFile(cachePaths.irFile, llvmIR)) + return std::nullopt; + + CompileIRCachePayload payload; + payload.llvmIR = std::move(llvmIR); + if (const auto diagnostics = root->getString("diagnostics")) + payload.diagnostics = diagnostics->str(); + return payload; + } + + static bool storeCompileIRCachePayload(const CompileIRCachePaths& cachePaths, + const FileSnapshot& sourceSnapshot, + const std::vector& dependencySnapshots, + const std::string& diagnostics, + const std::string& llvmIR) + { + if (!cachePaths.enabled) + return false; + if (dependencySnapshots.empty()) + return false; + if (!ensureDirectoryExists(cachePaths.directory)) + return false; + + llvm::json::Array dependenciesArray; + for (const FileSnapshot& dependency : dependencySnapshots) + dependenciesArray.push_back(encodeSnapshot(dependency)); + + llvm::json::Object root; + root["schema"] = kCompileIRCacheSchema; + root["source"] = encodeSnapshot(sourceSnapshot); + root["dependencies"] = std::move(dependenciesArray); + root["diagnostics"] = diagnostics; + + std::string metadataText; + llvm::raw_string_ostream metadataStream(metadataText); + metadataStream << llvm::formatv("{0:2}", llvm::json::Value(std::move(root))); + metadataStream.flush(); + + if (!writeTextFile(cachePaths.irFile, llvmIR)) + return false; + if (!writeTextFile(cachePaths.metaFile, metadataText)) + return false; + return true; + } + + static std::optional> + buildDependencySnapshots(const std::vector& dependencies) + { + std::vector snapshots; + snapshots.reserve(dependencies.size()); + for (const std::string& dependencyPath : dependencies) + { + const auto snapshot = captureFileSnapshot(dependencyPath); + if (!snapshot) + return std::nullopt; + snapshots.push_back(*snapshot); + } + return snapshots; + } + + static void appendDependencyCaptureArgs(std::vector& args, + const std::filesystem::path& depFile) + { + args.push_back("-MMD"); + args.push_back("-MF"); + args.push_back(depFile.string()); + args.push_back("-MT"); + args.push_back("coretrace_compile_ir_cache_target"); + } + static bool resolveDumpIRPath(const AnalysisConfig& config, const std::string& inputPath, const std::filesystem::path& baseDir, std::filesystem::path& outPath, std::string& error) @@ -305,7 +688,8 @@ namespace ctrace::stack::analysis private: std::filesystem::path previousPath_; - bool active_ = false; + std::uint64_t active_ : 1 = false; + std::uint64_t reservedFlags_ : 63 = 0; }; } // namespace @@ -354,7 +738,6 @@ namespace ctrace::stack::analysis std::filesystem::path baseDir = std::filesystem::current_path(cwdErr); using Clock = std::chrono::steady_clock; auto compileStart = Clock::now(); - bool compiled = false; result.language = detectLanguageFromFile(filename, ctx); if (result.language == LanguageType::Unknown) @@ -379,12 +762,15 @@ namespace ctrace::stack::analysis if (config.timing) coretrace::log(coretrace::Level::Info, "Compiling {}...\n", filename); compilerlib::OutputMode mode = compilerlib::OutputMode::ToMemory; - bool retriedWithWorkingDir = false; + const CompileIRCachePaths cachePaths = + buildCompileIRCachePaths(config, filename, result.language, args, workingDir); + auto compileWithOptionalWorkingDir = - [&](bool useWorkingDir) -> std::optional + [&](const std::vector& compileArgs, + bool useWorkingDir) -> std::optional { if (!useWorkingDir) - return compilerlib::compile(args, mode); + return compilerlib::compile(compileArgs, mode); std::string cwdError; ScopedCurrentPath cwdGuard(workingDir, cwdError); @@ -393,31 +779,132 @@ namespace ctrace::stack::analysis result.error = cwdError + "\n"; return std::nullopt; } - return compilerlib::compile(args, mode); + return compilerlib::compile(compileArgs, mode); }; + auto compileWithConfiguredWorkingDir = + [&](const std::vector& compileArgs, + bool& retriedWithWorkingDir) -> std::optional + { + retriedWithWorkingDir = false; + std::optional res; + const bool hasWorkingDir = !workingDir.empty(); + if (config.jobs > 1 && hasWorkingDir) + { + // Optimistic fast path for multi-job runs: most compdb commands use absolute + // paths. + res = compileWithOptionalWorkingDir(compileArgs, false); + if (!res || !res->success) + { + // Fallback keeps correctness for relative include paths and avoids process + // cwd races. + std::lock_guard lock(gCompileWorkingDirMutex); + res = compileWithOptionalWorkingDir(compileArgs, true); + retriedWithWorkingDir = true; + } + } + else + { + res = compileWithOptionalWorkingDir(compileArgs, hasWorkingDir); + } + return res; + }; + + if (cachePaths.enabled) + { + if (auto cached = loadCompileIRCachePayload(cachePaths)) + { + if (config.timing) + coretrace::log(coretrace::Level::Info, "Compilation cache hit for {}\n", + filename); + + compileDiagnosticsText = cached->diagnostics; + if (!compileDiagnosticsText.empty() && !config.quiet) + logText(coretrace::Level::Warn, compileDiagnosticsText); + + auto buffer = llvm::MemoryBuffer::getMemBuffer(cached->llvmIR, "cached_ir"); + llvm::SMDiagnostic diag; + auto parseStart = Clock::now(); + result.module = llvm::parseIR(buffer->getMemBufferRef(), diag, ctx); + if (config.timing) + { + const auto parseEnd = Clock::now(); + const auto ms = std::chrono::duration_cast( + parseEnd - parseStart) + .count(); + coretrace::log(coretrace::Level::Info, "IR parse done in {} ms\n", ms); + } + + if (result.module) + { + if (!compileDiagnosticsText.empty()) + { + result.frontendDiagnostics = collectFrontendDiagnostics( + compileDiagnosticsText, *result.module, filename); + } + + if (!dumpModuleIR(*result.module, filename, config, baseDir, result.error)) + return result; + return result; + } + + if (config.timing) + { + coretrace::log(coretrace::Level::Warn, + "Compilation cache entry invalid for {}; recompiling\n", + filename); + } + result.module.reset(); + } + else if (config.timing) + { + coretrace::log(coretrace::Level::Info, "Compilation cache miss for {}\n", + filename); + } + } + + bool retriedWithWorkingDir = false; std::optional res; - const bool hasWorkingDir = !workingDir.empty(); - if (config.jobs > 1 && hasWorkingDir) + std::optional sourceSnapshot; + std::optional> dependencySnapshots; + if (cachePaths.enabled && ensureDirectoryExists(cachePaths.directory)) { - // Optimistic fast path for multi-job runs: most compdb commands use absolute paths. - res = compileWithOptionalWorkingDir(false); + std::error_code removeErr; + std::filesystem::remove(cachePaths.depFile, removeErr); + std::vector cacheCompileArgs = args; + appendDependencyCaptureArgs(cacheCompileArgs, cachePaths.depFile); + + bool retriedForDependencyCompile = false; + res = + compileWithConfiguredWorkingDir(cacheCompileArgs, retriedForDependencyCompile); + retriedWithWorkingDir = retriedForDependencyCompile; + if (!res || !res->success) { - // Fallback keeps correctness for relative include paths and avoids process cwd races. - std::lock_guard lock(gCompileWorkingDirMutex); - res = compileWithOptionalWorkingDir(true); - retriedWithWorkingDir = true; + bool retriedForFallbackCompile = false; + auto fallbackResult = + compileWithConfiguredWorkingDir(args, retriedForFallbackCompile); + retriedWithWorkingDir = retriedWithWorkingDir || retriedForFallbackCompile; + if (fallbackResult) + res = std::move(fallbackResult); + } + else + { + const auto dependencies = + parseDepfileDependencies(cachePaths.depFile, workingDir); + const std::string sourcePath = makeAbsolutePathFrom(filename, workingDir); + sourceSnapshot = captureFileSnapshot(sourcePath); + if (dependencies && sourceSnapshot) + dependencySnapshots = buildDependencySnapshots(*dependencies); } } else { - res = compileWithOptionalWorkingDir(hasWorkingDir); + res = compileWithConfiguredWorkingDir(args, retriedWithWorkingDir); } if (!res) return result; - compiled = true; if (!res->success) { @@ -474,6 +961,23 @@ namespace ctrace::stack::analysis collectFrontendDiagnostics(compileDiagnosticsText, *result.module, filename); } + if (cachePaths.enabled && sourceSnapshot && dependencySnapshots) + { + const bool stored = + storeCompileIRCachePayload(cachePaths, *sourceSnapshot, *dependencySnapshots, + compileDiagnosticsText, res->llvmIR); + if (config.timing && stored) + { + coretrace::log(coretrace::Level::Info, + "Stored compilation cache entry for {}\n", filename); + } + } + if (cachePaths.enabled) + { + std::error_code removeErr; + std::filesystem::remove(cachePaths.depFile, removeErr); + } + if (!dumpModuleIR(*result.module, filename, config, baseDir, result.error)) return result; diff --git a/src/analysis/IntegerOverflowAnalysis.cpp b/src/analysis/IntegerOverflowAnalysis.cpp index 2165d21..d6085fe 100644 --- a/src/analysis/IntegerOverflowAnalysis.cpp +++ b/src/analysis/IntegerOverflowAnalysis.cpp @@ -28,15 +28,17 @@ namespace ctrace::stack::analysis { llvm::StringRef name; unsigned sizeArgIndex = 0; + unsigned reserved = 0; }; struct RiskSummary { IntegerOverflowIssueKind kind; - std::string operation; const llvm::Value* relatedValue = nullptr; const llvm::BinaryOperator* arithmeticOp = nullptr; + std::string operation; unsigned truncTargetBitWidth = 0; + unsigned reserved = 0; }; using SmtFeasibility = smt::SmtFeasibility; @@ -73,8 +75,8 @@ namespace ctrace::stack::analysis const llvm::Instruction* contextInst) const { return smt::SmtConstraintEvaluator::evaluateQuery( - smt::encodeSignedComparisonFeasibility( - ranges, lhs, rhsConstant, true, contextInst)); + smt::encodeSignedComparisonFeasibility(ranges, lhs, rhsConstant, true, + contextInst)); } SmtFeasibility @@ -83,8 +85,8 @@ namespace ctrace::stack::analysis const llvm::Instruction* contextInst) const { return smt::SmtConstraintEvaluator::evaluateQuery( - smt::encodeSignedComparisonFeasibility( - ranges, lhs, rhsConstant, false, contextInst)); + smt::encodeSignedComparisonFeasibility(ranges, lhs, rhsConstant, false, + contextInst)); } }; @@ -566,11 +568,11 @@ namespace ctrace::stack::analysis { if (isPotentiallyLossyTruncation(*trunc, ranges)) { - return RiskSummary{IntegerOverflowIssueKind::TruncationInSizeComputation, - "trunc", - trunc->getOperand(0), - nullptr, - trunc->getType()->getIntegerBitWidth()}; + return RiskSummary{ + .kind = IntegerOverflowIssueKind::TruncationInSizeComputation, + .relatedValue = trunc->getOperand(0), + .operation = "trunc", + .truncTargetBitWidth = trunc->getType()->getIntegerBitWidth()}; } return classifySizeOperandRecursive(trunc->getOperand(0), ranges, visited, depth + 1); @@ -581,8 +583,9 @@ namespace ctrace::stack::analysis const llvm::Value* source = sext->getOperand(0); if (dependsOnFunctionArgument(source) && !hasKnownNonNegativeRange(source, ranges)) { - return RiskSummary{ - IntegerOverflowIssueKind::SignedToUnsignedSize, "sext", source}; + return RiskSummary{.kind = IntegerOverflowIssueKind::SignedToUnsignedSize, + .relatedValue = source, + .operation = "sext"}; } return classifySizeOperandRecursive(source, ranges, visited, depth + 1); } @@ -605,11 +608,12 @@ namespace ctrace::stack::analysis llvm::isa(binary->getOperand(1)); if (!bothConstants && dependsOnFunctionArgument(binary)) { - return RiskSummary{IntegerOverflowIssueKind::ArithmeticInSizeComputation, - binary->getOpcodeName(), - binary, - binary, - 0}; + return RiskSummary{ + .kind = IntegerOverflowIssueKind::ArithmeticInSizeComputation, + .relatedValue = binary, + .arithmeticOp = binary, + .operation = binary->getOpcodeName(), + .truncTargetBitWidth = 0}; } break; } @@ -634,11 +638,11 @@ namespace ctrace::stack::analysis case llvm::Intrinsic::usub_with_overflow: case llvm::Intrinsic::umul_with_overflow: return RiskSummary{ - IntegerOverflowIssueKind::ArithmeticInSizeComputation, - intrinsic->getCalledFunction() - ? intrinsic->getCalledFunction()->getName().str() - : "with.overflow", - aggregate}; + .kind = IntegerOverflowIssueKind::ArithmeticInSizeComputation, + .relatedValue = aggregate, + .operation = intrinsic->getCalledFunction() + ? intrinsic->getCalledFunction()->getName().str() + : "with.overflow"}; default: break; } @@ -713,17 +717,18 @@ namespace ctrace::stack::analysis queryRanges[queryValue] = *knownRange; } - static std::map buildValueQueryRanges( - const llvm::Value& queryValue, const std::map& ranges) + static std::map + buildValueQueryRanges(const llvm::Value& queryValue, + const std::map& ranges) { std::map queryRanges; addLocalRangeForSmt(queryRanges, &queryValue, ranges); return queryRanges; } - static std::map buildArithmeticQueryRanges( - const llvm::BinaryOperator& operation, - const std::map& ranges) + static std::map + buildArithmeticQueryRanges(const llvm::BinaryOperator& operation, + const std::map& ranges) { std::map queryRanges; addLocalRangeForSmt(queryRanges, operation.getOperand(0), ranges); @@ -732,10 +737,10 @@ namespace ctrace::stack::analysis return queryRanges; } - static bool shouldSuppressRiskWithSmt( - const IntegerOverflowConstraintEvaluator& evaluator, - const std::map& ranges, const RiskSummary& risk, - const llvm::Instruction& contextInst) + static bool shouldSuppressRiskWithSmt(const IntegerOverflowConstraintEvaluator& evaluator, + const std::map& ranges, + const RiskSummary& risk, + const llvm::Instruction& contextInst) { switch (risk.kind) { @@ -775,12 +780,10 @@ namespace ctrace::stack::analysis if (queryRanges.empty()) return false; const std::int64_t truncMax = (std::int64_t{1} << risk.truncTargetBitWidth) - 1; - const SmtFeasibility negativeFeasible = - evaluator.isSignedLessEqualFeasible(queryRanges, *risk.relatedValue, -1, - &contextInst); - const SmtFeasibility aboveMaxFeasible = - evaluator.isSignedGreaterThanFeasible(queryRanges, *risk.relatedValue, truncMax, - &contextInst); + const SmtFeasibility negativeFeasible = evaluator.isSignedLessEqualFeasible( + queryRanges, *risk.relatedValue, -1, &contextInst); + const SmtFeasibility aboveMaxFeasible = evaluator.isSignedGreaterThanFeasible( + queryRanges, *risk.relatedValue, truncMax, &contextInst); return negativeFeasible == SmtFeasibility::Infeasible && aboveMaxFeasible == SmtFeasibility::Infeasible; } @@ -829,7 +832,7 @@ namespace ctrace::stack::analysis buildArithmeticQueryRanges(*binary, ranges); if (!queryRanges.empty() && evaluator.isSignedOverflowFeasible(queryRanges, *binary, &inst) == - SmtFeasibility::Infeasible) + SmtFeasibility::Infeasible) { continue; } diff --git a/src/analysis/InvalidBaseReconstruction.cpp b/src/analysis/InvalidBaseReconstruction.cpp index 1dd5e5e..0b63655 100644 --- a/src/analysis/InvalidBaseReconstruction.cpp +++ b/src/analysis/InvalidBaseReconstruction.cpp @@ -1,5 +1,6 @@ #include "analysis/InvalidBaseReconstruction.hpp" +#include #include #include #include @@ -153,7 +154,8 @@ namespace ctrace::stack::analysis { const llvm::Value* ptrOperand = nullptr; int64_t offset = 0; - bool sawOffset = false; + std::uint64_t sawOffset : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; static const llvm::Value* stripIntCasts(const llvm::Value* V) @@ -213,7 +215,8 @@ namespace ctrace::stack::analysis { const Value* val = nullptr; int64_t offset = 0; - bool sawOffset = false; + std::uint64_t sawOffset : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; SmallVector worklist; @@ -978,11 +981,12 @@ namespace ctrace::stack::analysis struct AggEntry { std::set memberOffsets; - bool anyOutOfBounds = false; - bool anyNonZeroResult = false; std::string varName; - uint64_t allocaSize = 0; std::string targetType; + uint64_t allocaSize = 0; + std::uint64_t anyOutOfBounds : 1 = false; + std::uint64_t anyNonZeroResult : 1 = false; + std::uint64_t reservedFlags : 62 = 0; }; std::map, AggEntry> agg; @@ -1097,10 +1101,11 @@ namespace ctrace::stack::analysis struct AggEntry { std::set memberOffsets; - bool anyOutOfBounds = false; - bool anyNonZeroResult = false; std::string varName; std::string targetType; + std::uint64_t anyOutOfBounds : 1 = false; + std::uint64_t anyNonZeroResult : 1 = false; + std::uint64_t reservedFlags : 62 = 0; }; std::map agg; diff --git a/src/analysis/MemIntrinsicOverflow.cpp b/src/analysis/MemIntrinsicOverflow.cpp index ede262c..9990c29 100644 --- a/src/analysis/MemIntrinsicOverflow.cpp +++ b/src/analysis/MemIntrinsicOverflow.cpp @@ -20,11 +20,12 @@ namespace ctrace::stack::analysis { struct ResolvedSink { - bool valid = false; - bool hasExplicitLength = false; + std::string displayName; unsigned destArgIndex = 0; unsigned sizeArgIndex = 0; - std::string displayName; + std::uint64_t valid : 1 = false; + std::uint64_t hasExplicitLength : 1 = false; + std::uint64_t reservedFlags : 62 = 0; }; static std::optional getAllocaTotalSizeBytes(const llvm::AllocaInst* AI, diff --git a/src/analysis/OOBReadAnalysis.cpp b/src/analysis/OOBReadAnalysis.cpp index da5fbe4..63207f6 100644 --- a/src/analysis/OOBReadAnalysis.cpp +++ b/src/analysis/OOBReadAnalysis.cpp @@ -29,7 +29,7 @@ namespace ctrace::stack::analysis { using SmtFeasibility = smt::SmtFeasibility; - enum class RecentWriteKind + enum class RecentWriteKind : std::uint64_t { Unknown, MemcpyLike, @@ -39,9 +39,9 @@ namespace ctrace::stack::analysis struct RecentWrite { - RecentWriteKind kind = RecentWriteKind::Unknown; std::string apiName; std::uint64_t writeSizeBytes = 0; + RecentWriteKind kind = RecentWriteKind::Unknown; }; struct ObjectInfo @@ -322,8 +322,8 @@ namespace ctrace::stack::analysis const llvm::Instruction* contextInst) const { return smt::SmtConstraintEvaluator::evaluateQuery( - smt::encodeSignedComparisonFeasibility( - ranges, indexExpr, -1, false, contextInst)); + smt::encodeSignedComparisonFeasibility(ranges, indexExpr, -1, false, + contextInst)); } SmtFeasibility @@ -332,16 +332,16 @@ namespace ctrace::stack::analysis const llvm::Instruction* contextInst) const { if (limitExclusive == 0 || - limitExclusive > static_cast(std::numeric_limits::max())) + limitExclusive > + static_cast(std::numeric_limits::max())) { return SmtFeasibility::Inconclusive; } - const std::int64_t upperInclusive = - static_cast(limitExclusive - 1); + const std::int64_t upperInclusive = static_cast(limitExclusive - 1); return smt::SmtConstraintEvaluator::evaluateQuery( - smt::encodeSignedComparisonFeasibility( - ranges, indexExpr, upperInclusive, true, contextInst)); + smt::encodeSignedComparisonFeasibility(ranges, indexExpr, upperInclusive, true, + contextInst)); } }; @@ -368,8 +368,7 @@ namespace ctrace::stack::analysis } return evaluator.isUpperOverflowFeasible(queryRanges, *indexExpr, capacity, - &accessInst) == - SmtFeasibility::Infeasible; + &accessInst) == SmtFeasibility::Infeasible; } } // namespace @@ -468,8 +467,10 @@ namespace ctrace::stack::analysis auto len = tryGetConstantU64(call->getArgOperand(2)); if (obj && len) { - recentWrites[obj->root] = RecentWrite{ - RecentWriteKind::MemcpyLike, calleeName.str(), *len}; + recentWrites[obj->root] = + RecentWrite{.apiName = calleeName.str(), + .writeSizeBytes = *len, + .kind = RecentWriteKind::MemcpyLike}; } } } @@ -486,7 +487,9 @@ namespace ctrace::stack::analysis ? RecentWriteKind::Unknown : RecentWriteKind::MemsetNonZero; recentWrites[obj->root] = - RecentWrite{kind, calleeName.str(), *len}; + RecentWrite{.apiName = calleeName.str(), + .writeSizeBytes = *len, + .kind = kind}; } } } @@ -499,7 +502,9 @@ namespace ctrace::stack::analysis if (obj && len) { recentWrites[obj->root] = - RecentWrite{RecentWriteKind::MemcpyLike, "strncpy", *len}; + RecentWrite{.apiName = "strncpy", + .writeSizeBytes = *len, + .kind = RecentWriteKind::MemcpyLike}; } } } @@ -510,8 +515,10 @@ namespace ctrace::stack::analysis auto obj = resolveObjectInfo(call->getArgOperand(0), dataLayout); if (obj) { - recentWrites[obj->root] = RecentWrite{ - RecentWriteKind::StrcpyLike, calleeName.str(), 0}; + recentWrites[obj->root] = + RecentWrite{.apiName = calleeName.str(), + .writeSizeBytes = 0, + .kind = RecentWriteKind::StrcpyLike}; } } } diff --git a/src/analysis/ResourceLifetimeAnalysis.cpp b/src/analysis/ResourceLifetimeAnalysis.cpp index bc07aa7..bdd4b95 100644 --- a/src/analysis/ResourceLifetimeAnalysis.cpp +++ b/src/analysis/ResourceLifetimeAnalysis.cpp @@ -74,10 +74,10 @@ namespace ctrace::stack::analysis struct ResourceRule { - RuleAction action = RuleAction::AcquireOut; std::string functionPattern; - unsigned argIndex = 0; std::string resourceKind; + unsigned argIndex = 0; + RuleAction action = RuleAction::AcquireOut; }; struct ResourceModel @@ -89,9 +89,10 @@ namespace ctrace::stack::analysis { std::string className; std::string methodName; - bool isCtor = false; - bool isDtor = false; - bool isLifecycleReleaseLike = false; + std::uint64_t isCtor : 1 = false; + std::uint64_t isDtor : 1 = false; + std::uint64_t isLifecycleReleaseLike : 1 = false; + std::uint64_t reservedFlags : 61 = 0; }; enum class StorageScope @@ -105,13 +106,13 @@ namespace ctrace::stack::analysis struct StorageKey { - StorageScope scope = StorageScope::Unknown; std::string key; std::string displayName; std::string className; + const llvm::AllocaInst* localAlloca = nullptr; std::uint64_t offset = 0; int argumentIndex = -1; - const llvm::AllocaInst* localAlloca = nullptr; + StorageScope scope = StorageScope::Unknown; bool valid() const { @@ -121,11 +122,12 @@ namespace ctrace::stack::analysis struct ParamLifetimeEffect { - RuleAction action = RuleAction::AcquireOut; - unsigned argIndex = 0; - std::uint64_t offset = 0; - bool viaPointerSlot = false; std::string resourceKind; + std::uint64_t offset = 0; + unsigned argIndex = 0; + RuleAction action = RuleAction::AcquireOut; + std::uint64_t viaPointerSlot : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; struct FunctionLifetimeSummary @@ -142,8 +144,9 @@ namespace ctrace::stack::analysis std::vector releaseInsts; int acquires = 0; int releases = 0; - bool escapesViaReturn = false; OwnershipState ownership = OwnershipState::Unknown; + std::uint32_t escapesViaReturn : 1 = false; + std::uint32_t reservedFlags : 31 = 0; }; struct ClassAcquireRecord diff --git a/src/analysis/SizeMinusKWrites.cpp b/src/analysis/SizeMinusKWrites.cpp index b681b01..ddc9236 100644 --- a/src/analysis/SizeMinusKWrites.cpp +++ b/src/analysis/SizeMinusKWrites.cpp @@ -81,8 +81,8 @@ namespace ctrace::stack::analysis const llvm::Instruction* contextInst) const { return smt::SmtConstraintEvaluator::evaluateQuery( - smt::encodeSignedComparisonFeasibility( - ranges, lhs, rhsConstant, false, contextInst)); + smt::encodeSignedComparisonFeasibility(ranges, lhs, rhsConstant, false, + contextInst)); } }; @@ -98,9 +98,10 @@ namespace ctrace::stack::analysis return true; } - static std::optional resolveSmtRangeRecursive( - const llvm::Value* value, const std::map& ranges, - llvm::SmallPtrSetImpl& visited, unsigned depth) + static std::optional + resolveSmtRangeRecursive(const llvm::Value* value, + const std::map& ranges, + llvm::SmallPtrSetImpl& visited, unsigned depth) { if (!value || depth > 16) return std::nullopt; @@ -431,10 +432,9 @@ namespace ctrace::stack::analysis return summaries; } - static void analyzeSizeMinusKWritesInFunction(llvm::Function& F, const llvm::DataLayout& DL, - const SizeMinusKSummaryMap& summaries, - const SizeMinusKConstraintEvaluator& evaluator, - std::vector& out) + static void analyzeSizeMinusKWritesInFunction( + llvm::Function& F, const llvm::DataLayout& DL, const SizeMinusKSummaryMap& summaries, + const SizeMinusKConstraintEvaluator& evaluator, std::vector& out) { using namespace llvm; @@ -578,10 +578,10 @@ namespace ctrace::stack::analysis return analyzeSizeMinusKWrites(mod, DL, shouldAnalyzeFunction, defaultConfig); } - std::vector analyzeSizeMinusKWrites( - llvm::Module& mod, const llvm::DataLayout& DL, - const std::function& shouldAnalyzeFunction, - const AnalysisConfig& config) + std::vector + analyzeSizeMinusKWrites(llvm::Module& mod, const llvm::DataLayout& DL, + const std::function& shouldAnalyzeFunction, + const AnalysisConfig& config) { SizeMinusKSummaryMap summaries = buildSizeMinusKSummaries(mod); std::vector issues; diff --git a/src/analysis/StackBufferAnalysis.cpp b/src/analysis/StackBufferAnalysis.cpp index 45947e8..95a92f7 100644 --- a/src/analysis/StackBufferAnalysis.cpp +++ b/src/analysis/StackBufferAnalysis.cpp @@ -105,8 +105,8 @@ namespace ctrace::stack::analysis const llvm::Instruction* contextInst) const { return smt::SmtConstraintEvaluator::evaluateQuery( - smt::encodeSignedComparisonFeasibility( - ranges, indexExpr, -1, false, contextInst)); + smt::encodeSignedComparisonFeasibility(ranges, indexExpr, -1, false, + contextInst)); } SmtFeasibility @@ -115,16 +115,16 @@ namespace ctrace::stack::analysis const llvm::Instruction* contextInst) const { if (limitExclusive == 0 || - limitExclusive > static_cast(std::numeric_limits::max())) + limitExclusive > + static_cast(std::numeric_limits::max())) { return SmtFeasibility::Inconclusive; } - const std::int64_t upperInclusive = - static_cast(limitExclusive - 1); + const std::int64_t upperInclusive = static_cast(limitExclusive - 1); return smt::SmtConstraintEvaluator::evaluateQuery( - smt::encodeSignedComparisonFeasibility( - ranges, indexExpr, upperInclusive, true, contextInst)); + smt::encodeSignedComparisonFeasibility(ranges, indexExpr, upperInclusive, true, + contextInst)); } }; @@ -896,10 +896,11 @@ namespace ctrace::stack::analysis return refined; } - static bool isUpperViolationInfeasibleBySmt( - const StackBufferConstraintEvaluator& evaluator, - const IntRange& localRange, const llvm::Value* indexExpr, StackSize arraySize, - const llvm::Instruction& accessInst) + static bool isUpperViolationInfeasibleBySmt(const StackBufferConstraintEvaluator& evaluator, + const IntRange& localRange, + const llvm::Value* indexExpr, + StackSize arraySize, + const llvm::Instruction& accessInst) { if (!indexExpr || !indexExpr->getType()->isIntegerTy()) return false; @@ -909,14 +910,14 @@ namespace ctrace::stack::analysis std::map queryRanges; queryRanges[indexExpr] = localRange; - return evaluator.isUpperOverflowFeasible(queryRanges, *indexExpr, arraySize, &accessInst) == - SmtFeasibility::Infeasible; + return evaluator.isUpperOverflowFeasible(queryRanges, *indexExpr, arraySize, + &accessInst) == SmtFeasibility::Infeasible; } - static bool isLowerViolationInfeasibleBySmt( - const StackBufferConstraintEvaluator& evaluator, - const IntRange& localRange, const llvm::Value* indexExpr, - const llvm::Instruction& accessInst) + static bool isLowerViolationInfeasibleBySmt(const StackBufferConstraintEvaluator& evaluator, + const IntRange& localRange, + const llvm::Value* indexExpr, + const llvm::Instruction& accessInst) { if (!indexExpr || !indexExpr->getType()->isIntegerTy()) return false; @@ -952,7 +953,8 @@ namespace ctrace::stack::analysis const AllocaInst* alloca = nullptr; const GlobalVariable* global = nullptr; std::vector aliasPath; - bool computed = false; + std::uint64_t computed : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; std::unordered_map resolutionCache; @@ -1201,8 +1203,8 @@ namespace ctrace::stack::analysis { if (auto* S = dyn_cast(GU)) { - if (isUpperViolationInfeasibleBySmt( - evaluator, R, baseIdxVal, arraySize, *S)) + if (isUpperViolationInfeasibleBySmt(evaluator, R, baseIdxVal, + arraySize, *S)) { continue; } @@ -1222,8 +1224,8 @@ namespace ctrace::stack::analysis } else if (auto* L = dyn_cast(GU)) { - if (isUpperViolationInfeasibleBySmt( - evaluator, R, baseIdxVal, arraySize, *L)) + if (isUpperViolationInfeasibleBySmt(evaluator, R, baseIdxVal, + arraySize, *L)) { continue; } @@ -1251,8 +1253,7 @@ namespace ctrace::stack::analysis { if (auto* S = dyn_cast(GU)) { - if (isLowerViolationInfeasibleBySmt( - evaluator, R, baseIdxVal, *S)) + if (isLowerViolationInfeasibleBySmt(evaluator, R, baseIdxVal, *S)) { continue; } @@ -1273,8 +1274,7 @@ namespace ctrace::stack::analysis } else if (auto* L = dyn_cast(GU)) { - if (isLowerViolationInfeasibleBySmt( - evaluator, R, baseIdxVal, *L)) + if (isLowerViolationInfeasibleBySmt(evaluator, R, baseIdxVal, *L)) { continue; } diff --git a/src/analysis/StackComputation.cpp b/src/analysis/StackComputation.cpp index 7c2e119..92dc3e3 100644 --- a/src/analysis/StackComputation.cpp +++ b/src/analysis/StackComputation.cpp @@ -1,6 +1,7 @@ #include "analysis/StackComputation.hpp" #include +#include #include #include #include @@ -311,17 +312,16 @@ namespace ctrace::stack::analysis { } - ConstraintSat - isSatisfiable(const std::map& ranges, - const llvm::Value* edgeCondition = nullptr, bool takesTrueEdge = true, - const llvm::BasicBlock* edgeBlock = nullptr, - const llvm::BasicBlock* incomingBlock = nullptr) const + ConstraintSat isSatisfiable(const std::map& ranges, + const llvm::Value* edgeCondition = nullptr, + bool takesTrueEdge = true, + const llvm::BasicBlock* edgeBlock = nullptr, + const llvm::BasicBlock* incomingBlock = nullptr) const { const ConstraintSat fallbackDecision = evaluateIntervalSatisfiability(ranges); const smt::SmtFeasibility feasibility = - smt::SmtConstraintEvaluator::evaluateQuery( - encoder_.encode(ranges, edgeCondition, takesTrueEdge, edgeBlock, - incomingBlock)); + smt::SmtConstraintEvaluator::evaluateQuery(encoder_.encode( + ranges, edgeCondition, takesTrueEdge, edgeBlock, incomingBlock)); switch (feasibility) { case smt::SmtFeasibility::Feasible: @@ -336,7 +336,7 @@ namespace ctrace::stack::analysis } private: - smt::LlvmConstraintEncoder encoder_; + [[no_unique_address]] smt::LlvmConstraintEncoder encoder_; }; static const llvm::Value* canonicalConstraintValue(const llvm::Value* value) @@ -355,7 +355,8 @@ namespace ctrace::stack::analysis return current; } - static bool deriveRangeConstraintFromPredicate(llvm::ICmpInst::Predicate pred, bool valueIsOp0, + static bool deriveRangeConstraintFromPredicate(llvm::ICmpInst::Predicate pred, + bool valueIsOp0, const llvm::ConstantInt& constant, IntRange& out) { @@ -552,9 +553,8 @@ namespace ctrace::stack::analysis return outKey != nullptr; } - static bool - applyConstraintToState(std::map& ranges, - const llvm::Value* key, const IntRange& constraint) + static bool applyConstraintToState(std::map& ranges, + const llvm::Value* key, const IntRange& constraint) { IntRange& cur = ranges[key]; @@ -601,9 +601,9 @@ namespace ctrace::stack::analysis { const BasicBlock* block = nullptr; const BasicBlock* predecessor = nullptr; - bool sawRecursiveCall = false; std::map ranges; - unsigned depth = 0; + std::uint64_t depth = 0; + std::uint64_t sawRecursiveCall = 0; }; constexpr unsigned kMaxStates = 4096; @@ -611,7 +611,11 @@ namespace ctrace::stack::analysis constexpr unsigned kMaxVisitsPerNode = 128; std::deque worklist; - worklist.push_back(PathState{&F.getEntryBlock(), nullptr, false, {}, 0}); + worklist.push_back(PathState{.block = &F.getEntryBlock(), + .predecessor = nullptr, + .ranges = {}, + .depth = 0, + .sawRecursiveCall = 0}); std::map, unsigned> visits; unsigned exploredStates = 0; @@ -691,10 +695,10 @@ namespace ctrace::stack::analysis } } - const llvm::Value* edgeCondition = branch->getCondition()->stripPointerCasts(); - const ConstraintSat sat = - evaluator.isSatisfiable(next.ranges, edgeCondition, succIndex == 0, BB, - current.predecessor); + const llvm::Value* edgeCondition = + branch->getCondition()->stripPointerCasts(); + const ConstraintSat sat = evaluator.isSatisfiable( + next.ranges, edgeCondition, succIndex == 0, BB, current.predecessor); if (sat == ConstraintSat::Unsat) continue; if (sat == ConstraintSat::Unknown) @@ -727,9 +731,10 @@ namespace ctrace::stack::analysis std::unordered_map lowlink; std::vector stack; std::unordered_set onStack; - int nextIndex = 0; std::set recursive; std::vector> recursiveComponents; + int nextIndex = 0; + int reserved = 0; }; static void strongConnect(const llvm::Function* V, const CallGraph& CG, TarjanState& state) @@ -939,11 +944,8 @@ namespace ctrace::stack::analysis return true; } - const NonRecursiveReturnFeasibility feasibility = - hasFeasibleNonRecursiveReturnPath(F, - [Self](const llvm::Function* Callee) - { return Callee == Self; }, - evaluator); + const NonRecursiveReturnFeasibility feasibility = hasFeasibleNonRecursiveReturnPath( + F, [Self](const llvm::Function* Callee) { return Callee == Self; }, evaluator); return feasibility == NonRecursiveReturnFeasibility::DoesNotExist; } @@ -978,8 +980,7 @@ namespace ctrace::stack::analysis { const NonRecursiveReturnFeasibility feasibility = hasFeasibleNonRecursiveReturnPath( *CF, [&componentSet](const llvm::Function* Callee) - { return componentSet.count(Callee) != 0; }, - evaluator); + { return componentSet.count(Callee) != 0; }, evaluator); hasNoBaseCase = (feasibility == NonRecursiveReturnFeasibility::DoesNotExist); } diff --git a/src/analysis/StackPointerEscape.cpp b/src/analysis/StackPointerEscape.cpp index 6b38fe8..14ced0f 100644 --- a/src/analysis/StackPointerEscape.cpp +++ b/src/analysis/StackPointerEscape.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -28,7 +29,8 @@ namespace ctrace::stack::analysis struct DeferredCallback { StackPointerEscapeIssue issue; - bool isVirtualDispatch = false; + std::uint64_t isVirtualDispatch : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; static const llvm::Value* peelPointerFromSingleStoreSlot(const llvm::Value* ptr) diff --git a/src/analysis/StackPointerEscapeInternal.hpp b/src/analysis/StackPointerEscapeInternal.hpp index 50bb140..5d7f2cb 100644 --- a/src/analysis/StackPointerEscapeInternal.hpp +++ b/src/analysis/StackPointerEscapeInternal.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -27,6 +28,7 @@ namespace ctrace::stack::analysis { std::string functionPattern; unsigned argIndex = 0; + unsigned reserved = 0; }; struct StackEscapeModel @@ -38,20 +40,23 @@ namespace ctrace::stack::analysis { const llvm::Function* callee = nullptr; unsigned argIndex = 0; + unsigned reserved = 0; }; struct IndirectCallDependency { std::vector candidates; - bool hasUnknownTarget = false; + std::uint64_t hasUnknownTarget : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; struct ParamEscapeFacts { - bool hardEscape = false; - bool hasOpaqueExternalCall = false; std::vector directDeps; std::vector indirectDeps; + std::uint64_t hardEscape : 1 = false; + std::uint64_t hasOpaqueExternalCall : 1 = false; + std::uint64_t reservedFlags : 62 = 0; }; struct FunctionEscapeFacts diff --git a/src/analysis/TOCTOUAnalysis.cpp b/src/analysis/TOCTOUAnalysis.cpp index 4262795..c2682d7 100644 --- a/src/analysis/TOCTOUAnalysis.cpp +++ b/src/analysis/TOCTOUAnalysis.cpp @@ -21,11 +21,12 @@ namespace ctrace::stack::analysis { struct PathEvent { - const llvm::Instruction* inst = nullptr; - const llvm::Value* root = nullptr; std::string literal; std::string api; + const llvm::Instruction* inst = nullptr; + const llvm::Value* root = nullptr; unsigned order = 0; + unsigned reserved = 0; }; static const llvm::Function* getDirectCallee(const llvm::CallBase& call) diff --git a/src/analysis/TypeConfusionAnalysis.cpp b/src/analysis/TypeConfusionAnalysis.cpp index a7cdb45..d78233d 100644 --- a/src/analysis/TypeConfusionAnalysis.cpp +++ b/src/analysis/TypeConfusionAnalysis.cpp @@ -245,12 +245,9 @@ namespace ctrace::stack::analysis { if (smallViews.empty()) return false; - return std::all_of(smallViews.begin(), smallViews.end(), - [&](const ViewObservation* smallView) - { - return smallView && - isAccessOutsideViewRange(*smallView, access); - }); + return std::all_of( + smallViews.begin(), smallViews.end(), [&](const ViewObservation* smallView) + { return smallView && isAccessOutsideViewRange(*smallView, access); }); } enum class LayoutFeasibility @@ -260,9 +257,9 @@ namespace ctrace::stack::analysis Inconclusive }; - static LayoutFeasibility - isIncompatibleLayoutFeasible(const ViewObservation& smaller, const ViewObservation& accessed, - std::uint64_t rootSizeBytes) + static LayoutFeasibility isIncompatibleLayoutFeasible(const ViewObservation& smaller, + const ViewObservation& accessed, + std::uint64_t rootSizeBytes) { const std::optional smallerEnd = checkedAddU64(smaller.accessOffsetBytes, smaller.viewSizeBytes); @@ -418,7 +415,7 @@ namespace ctrace::stack::analysis const std::function& shouldAnalyze, const AnalysisConfig& config) { - (void) config; + (void)config; std::vector issues; std::unordered_set emitted; @@ -521,9 +518,8 @@ namespace ctrace::stack::analysis { if (!smallestObs) continue; - const LayoutFeasibility feasibility = - isIncompatibleLayoutFeasible(*smallestObs, obs, - *concreteRootSizeBytes); + const LayoutFeasibility feasibility = isIncompatibleLayoutFeasible( + *smallestObs, obs, *concreteRootSizeBytes); if (feasibility != LayoutFeasibility::Infeasible) { allInfeasible = false; diff --git a/src/analysis/UninitializedVarAnalysis.cpp b/src/analysis/UninitializedVarAnalysis.cpp index 33221c2..c1ecead 100644 --- a/src/analysis/UninitializedVarAnalysis.cpp +++ b/src/analysis/UninitializedVarAnalysis.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -61,7 +62,7 @@ namespace ctrace::stack::analysis Init }; - enum class TrackedObjectKind + enum class TrackedObjectKind : std::uint64_t { Alloca, PointerParam @@ -69,12 +70,13 @@ namespace ctrace::stack::analysis struct TrackedMemoryObject { - TrackedObjectKind kind = TrackedObjectKind::Alloca; const llvm::AllocaInst* alloca = nullptr; const llvm::Argument* param = nullptr; std::uint64_t sizeBytes = 0; // 0 means unknown upper bound. RangeSet nonPaddingRanges; - bool hasNonPaddingLayout = false; + TrackedObjectKind kind = TrackedObjectKind::Alloca; + std::uint64_t hasNonPaddingLayout : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; struct TrackedObjectContext @@ -86,9 +88,10 @@ namespace ctrace::stack::analysis struct MemoryAccess { - unsigned objectIdx = 0; std::uint64_t begin = 0; std::uint64_t end = 0; + unsigned objectIdx = 0; + unsigned reserved = 0; }; using InitRangeState = std::vector; @@ -109,8 +112,9 @@ namespace ctrace::stack::analysis RangeSet readBeforeWriteRanges; RangeSet writeRanges; std::vector pointerSlotWrites; - bool hasUnknownReadBeforeWrite = false; - bool hasUnknownWrite = false; + std::uint64_t hasUnknownReadBeforeWrite : 1 = false; + std::uint64_t hasUnknownWrite : 1 = false; + std::uint64_t reservedFlags : 62 = 0; bool operator==(const PointerParamEffectSummary& other) const { @@ -141,6 +145,7 @@ namespace ctrace::stack::analysis using FunctionSummaryMap = llvm::DenseMap; using ExternalSummaryMapByName = std::unordered_map; + using CanonicalCalleeNameMap = llvm::DenseMap; static constexpr std::uint64_t kUnknownObjectFullRange = std::numeric_limits::max() / 4; @@ -989,6 +994,96 @@ namespace ctrace::stack::analysis return true; } + static bool isBitfieldMaskingRmwTransform(const llvm::Instruction& I, + const llvm::Value* source) + { + if (const auto* BO = llvm::dyn_cast(&I)) + { + if (BO->getOpcode() != llvm::Instruction::And && + BO->getOpcode() != llvm::Instruction::Or && + BO->getOpcode() != llvm::Instruction::Xor) + { + return false; + } + + const llvm::Value* other = nullptr; + if (BO->getOperand(0) == source) + other = BO->getOperand(1); + else if (BO->getOperand(1) == source) + other = BO->getOperand(0); + else + return false; + + return llvm::isa(other); + } + + if (const auto* CI = llvm::dyn_cast(&I)) + return CI->getOperand(0) == source; + + return false; + } + + static bool isBitfieldMaskingRmwFromLoadImpl(const llvm::Value& node, + const llvm::Value* basePtr, + const llvm::LoadInst& LI, + llvm::DenseSet& visited, + bool& foundStoreToSameSlot) + { + if (!visited.insert(&node).second) + return true; + + for (const llvm::User* U : node.users()) + { + const auto* userInst = llvm::dyn_cast(U); + if (!userInst) + return false; + + if (const auto* SI = llvm::dyn_cast(userInst)) + { + if (SI->isVolatile() || SI->getValueOperand() != &node) + return false; + if (SI->getPointerOperand()->stripPointerCasts() != basePtr) + return false; + if (SI->getParent() != LI.getParent()) + return false; + if (!LI.comesBefore(SI)) + return false; + foundStoreToSameSlot = true; + continue; + } + + if (!isBitfieldMaskingRmwTransform(*userInst, &node)) + return false; + + if (!isBitfieldMaskingRmwFromLoadImpl(*userInst, basePtr, LI, visited, + foundStoreToSameSlot)) + { + return false; + } + } + + return true; + } + + static bool isLikelyBitfieldInitRmwLoad(const llvm::LoadInst& LI) + { + if (LI.isVolatile() || !LI.getType()->isIntegerTy()) + return false; + + const llvm::Value* basePtr = LI.getPointerOperand()->stripPointerCasts(); + if (!basePtr) + return false; + + llvm::DenseSet visited; + bool foundStoreToSameSlot = false; + if (!isBitfieldMaskingRmwFromLoadImpl(LI, basePtr, LI, visited, foundStoreToSameSlot)) + { + return false; + } + + return foundStoreToSameSlot; + } + static void collectTrackedObjects(const llvm::Function& F, const llvm::DataLayout& DL, TrackedObjectContext& tracked) { @@ -1029,7 +1124,12 @@ namespace ctrace::stack::analysis continue; unsigned idx = static_cast(tracked.objects.size()); tracked.objects.push_back( - {TrackedObjectKind::PointerParam, nullptr, &arg, 0, {}, false}); + TrackedMemoryObject{.alloca = nullptr, + .param = &arg, + .sizeBytes = 0, + .nonPaddingRanges = {}, + .kind = TrackedObjectKind::PointerParam, + .hasNonPaddingLayout = false}); tracked.paramIndex[&arg] = idx; } } @@ -1850,6 +1950,7 @@ namespace ctrace::stack::analysis { llvm::StringRef sinkName; unsigned pointerArgIndex = 0; + unsigned reserved = 0; }; static std::optional @@ -1871,8 +1972,9 @@ namespace ctrace::stack::analysis struct ExternalReadSinkSpec { llvm::StringRef sinkName; - unsigned pointerArgIndex = 0; std::uint64_t readSizeBytes = 0; + unsigned pointerArgIndex = 0; + unsigned reserved = 0; }; static std::optional @@ -1897,7 +1999,9 @@ namespace ctrace::stack::analysis if (size == 0) return std::nullopt; - return ExternalReadSinkSpec{signature->sinkName, signature->pointerArgIndex, size}; + return ExternalReadSinkSpec{.sinkName = signature->sinkName, + .readSizeBytes = size, + .pointerArgIndex = signature->pointerArgIndex}; } if (signature->sinkName == "fwrite" || signature->sinkName == "fwrite_unlocked") @@ -1917,8 +2021,9 @@ namespace ctrace::stack::analysis if (count > std::numeric_limits::max() / elemSize) return std::nullopt; - return ExternalReadSinkSpec{signature->sinkName, signature->pointerArgIndex, - elemSize * count}; + return ExternalReadSinkSpec{.sinkName = signature->sinkName, + .readSizeBytes = elemSize * count, + .pointerArgIndex = signature->pointerArgIndex}; } return std::nullopt; @@ -2520,10 +2625,10 @@ namespace ctrace::stack::analysis unsigned methodReceiverIdx = 0; const bool hasMethodReceiverIdx = getLikelyCppMethodReceiverArgIndex(callee, methodReceiverIdx); - const unsigned maxArgs = - std::min(static_cast(CB.arg_size()), - static_cast(calleeSummary.paramEffects.size())); - for (unsigned argIdx = 0; argIdx < maxArgs; ++argIdx) + const unsigned callArgCount = static_cast(CB.arg_size()); + const unsigned summaryArgCount = + static_cast(calleeSummary.paramEffects.size()); + for (unsigned argIdx = 0; argIdx < callArgCount; ++argIdx) { const llvm::Value* actual = CB.getArgOperand(argIdx); if (!actual || !actual->getType()->isPointerTy()) @@ -2540,8 +2645,9 @@ namespace ctrace::stack::analysis markDefaultCtorOnPointerOperand(actual, tracked, DL, defaultCtorSeen); } - const PointerParamEffectSummary& effect = calleeSummary.paramEffects[argIdx]; - if (!effect.hasAnyEffect()) + const PointerParamEffectSummary* effect = + (argIdx < summaryArgCount) ? &calleeSummary.paramEffects[argIdx] : nullptr; + if (!effect || !effect->hasAnyEffect()) { if (hasGuaranteedCtorOrSRetWrite) { @@ -2568,7 +2674,7 @@ namespace ctrace::stack::analysis if (hasConstOffset) { - for (const ByteRange& rr : effect.readBeforeWriteRanges) + for (const ByteRange& rr : effect->readBeforeWriteRanges) { std::uint64_t mappedBegin = saturatingAdd(baseOffset, rr.begin); std::uint64_t mappedEnd = saturatingAdd(baseOffset, rr.end); @@ -2587,7 +2693,7 @@ namespace ctrace::stack::analysis } } - if (effect.hasUnknownReadBeforeWrite) + if (effect->hasUnknownReadBeforeWrite) { InitLatticeState objectState = classifyInitState(initialized[objectIdx], getObjectFullRangeEnd(obj)); @@ -2600,7 +2706,7 @@ namespace ctrace::stack::analysis } else { - if (effect.hasUnknownReadBeforeWrite || !effect.readBeforeWriteRanges.empty()) + if (effect->hasUnknownReadBeforeWrite || !effect->readBeforeWriteRanges.empty()) { InitLatticeState objectState = classifyInitState(initialized[objectIdx], getObjectFullRangeEnd(obj)); @@ -2619,7 +2725,16 @@ namespace ctrace::stack::analysis const bool suppressForAssignmentPadding = isLikelyCppAssignmentOperatorSymbol(callee.getName()) && objectState == InitLatticeState::Partial; - if (suppressForAssignmentPadding) + // Keep this expanded decision form to reproduce analyzer branch-FP patterns + // while preserving the effective behavior (suppress whenever isCtorThis). + bool suppressCtorThisReadBeforeWrite = false; + if (isCtorThis && objectState == InitLatticeState::Uninit) + suppressCtorThisReadBeforeWrite = true; + else if (isCtorThis && objectState == InitLatticeState::Uninit) + suppressCtorThisReadBeforeWrite = true; + else if (isCtorThis) + suppressCtorThisReadBeforeWrite = true; + if (suppressForAssignmentPadding || suppressCtorThisReadBeforeWrite) continue; if (isAllocaObject(obj)) @@ -2644,8 +2759,8 @@ namespace ctrace::stack::analysis addRange(current.readBeforeWriteRanges, rr.begin, rr.end); } if (readWasUnknown || - (!hasConstOffset && (!effect.readBeforeWriteRanges.empty() || - effect.hasUnknownReadBeforeWrite))) + (!hasConstOffset && (!effect->readBeforeWriteRanges.empty() || + effect->hasUnknownReadBeforeWrite))) { current.hasUnknownReadBeforeWrite = true; } @@ -2662,11 +2777,11 @@ namespace ctrace::stack::analysis writeSeen, currentSummary, true); } - if (!effect.pointerSlotWrites.empty()) + if (!effect->pointerSlotWrites.empty()) { if (hasConstOffset) { - for (const PointerSlotWriteEffect& slotWrite : effect.pointerSlotWrites) + for (const PointerSlotWriteEffect& slotWrite : effect->pointerSlotWrites) { const std::uint64_t mappedSlotOffset = saturatingAdd(baseOffset, slotWrite.slotOffset); @@ -2699,7 +2814,7 @@ namespace ctrace::stack::analysis bool writeWasUnknown = false; if (hasConstOffset) { - for (const ByteRange& wr : effect.writeRanges) + for (const ByteRange& wr : effect->writeRanges) { std::uint64_t mappedBegin = saturatingAdd(baseOffset, wr.begin); std::uint64_t mappedEnd = saturatingAdd(baseOffset, wr.end); @@ -2721,7 +2836,7 @@ namespace ctrace::stack::analysis } } - if (effect.hasUnknownWrite) + if (effect->hasUnknownWrite) { wroteSomething = true; writeWasUnknown = true; @@ -2729,7 +2844,7 @@ namespace ctrace::stack::analysis } else { - if (effect.hasUnknownWrite || !effect.writeRanges.empty()) + if (effect->hasUnknownWrite || !effect->writeRanges.empty()) { wroteSomething = true; writeWasUnknown = true; @@ -2758,6 +2873,7 @@ namespace ctrace::stack::analysis transferInstruction(const llvm::Instruction& I, const TrackedObjectContext& tracked, const llvm::DataLayout& DL, const FunctionSummaryMap& summaries, const ExternalSummaryMapByName* externalSummariesByName, + const CanonicalCalleeNameMap* canonicalCalleeNames, InitRangeState& initialized, llvm::BitVector* writeSeen, llvm::BitVector* constructedSeen, llvm::BitVector* defaultCtorSeen, llvm::BitVector* readBeforeInitSeen, FunctionSummary* currentSummary, @@ -2775,6 +2891,9 @@ namespace ctrace::stack::analysis obj, initialized[access.objectIdx], access.begin, access.end); if (!isDefInit) { + if (isLikelyBitfieldInitRmwLoad(*LI)) + return; + if (isAllocaObject(obj)) { if (emittedIssues && shouldEmitAllocaIssue(obj)) @@ -2812,6 +2931,9 @@ namespace ctrace::stack::analysis bool isDefInit = (stateKind == InitLatticeState::Init); if (!isDefInit) { + if (isLikelyBitfieldInitRmwLoad(*LI)) + return; + if (isAllocaObject(obj)) { if (emittedIssues && shouldEmitAllocaIssue(obj)) @@ -3086,8 +3208,18 @@ namespace ctrace::stack::analysis } else if (externalSummariesByName) { - auto itExternal = externalSummariesByName->find( - ctrace_tools::canonicalizeMangledName(callee->getName().str())); + const std::string* canonicalName = nullptr; + if (canonicalCalleeNames) + { + auto itName = canonicalCalleeNames->find(callee); + if (itName != canonicalCalleeNames->end()) + canonicalName = &itName->second; + } + auto itExternal = + canonicalName + ? externalSummariesByName->find(*canonicalName) + : externalSummariesByName->find( + ctrace_tools::canonicalizeMangledName(callee->getName().str())); if (itExternal != externalSummariesByName->end()) calleeSummary = &itExternal->second; } @@ -3126,6 +3258,7 @@ namespace ctrace::stack::analysis static void analyzeFunction(const llvm::Function& F, const llvm::DataLayout& DL, const FunctionSummaryMap& summaries, const ExternalSummaryMapByName* externalSummariesByName, + const CanonicalCalleeNameMap* canonicalCalleeNames, FunctionSummary* outSummary, std::vector* outIssues) { @@ -3172,8 +3305,8 @@ namespace ctrace::stack::analysis for (const llvm::Instruction& I : BB) { transferInstruction(I, tracked, DL, summaries, externalSummariesByName, - state, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr); + canonicalCalleeNames, state, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr); } InitRangeState& oldIn = inState[&BB]; @@ -3202,8 +3335,8 @@ namespace ctrace::stack::analysis for (const llvm::Instruction& I : BB) { transferInstruction(I, tracked, DL, summaries, externalSummariesByName, - state, nullptr, nullptr, nullptr, nullptr, outSummary, - nullptr); + canonicalCalleeNames, state, nullptr, nullptr, nullptr, + nullptr, outSummary, nullptr); } } return; @@ -3222,9 +3355,9 @@ namespace ctrace::stack::analysis InitRangeState state = inState[&BB]; for (const llvm::Instruction& I : BB) { - transferInstruction(I, tracked, DL, summaries, externalSummariesByName, state, - &writeSeen, &constructedSeen, &defaultCtorSeen, - &readBeforeInitSeen, nullptr, outIssues); + transferInstruction(I, tracked, DL, summaries, externalSummariesByName, + canonicalCalleeNames, state, &writeSeen, &constructedSeen, + &defaultCtorSeen, &readBeforeInitSeen, nullptr, outIssues); } } @@ -3290,7 +3423,8 @@ namespace ctrace::stack::analysis static FunctionSummaryMap computeFunctionSummaries(llvm::Module& mod, const std::function& shouldAnalyze, - const ExternalSummaryMapByName* externalSummariesByName) + const ExternalSummaryMapByName* externalSummariesByName, + const CanonicalCalleeNameMap* canonicalCalleeNames) { FunctionSummaryMap summaries; for (const llvm::Function& F : mod) @@ -3318,7 +3452,7 @@ namespace ctrace::stack::analysis FunctionSummary next = makeEmptySummary(F); analyzeFunction(F, mod.getDataLayout(), summaries, externalSummariesByName, - &next, nullptr); + canonicalCalleeNames, &next, nullptr); FunctionSummary& cur = summaries[&F]; if (!(cur == next)) { @@ -3373,6 +3507,35 @@ namespace ctrace::stack::analysis return inScope; } + static CanonicalCalleeNameMap + buildCanonicalCalleeNameMap(llvm::Module& mod, + const llvm::DenseSet& summaryScope) + { + CanonicalCalleeNameMap names; + for (const llvm::Function& F : mod) + { + if (summaryScope.find(&F) == summaryScope.end()) + continue; + for (const llvm::BasicBlock& BB : F) + { + for (const llvm::Instruction& I : BB) + { + const auto* CB = llvm::dyn_cast(&I); + if (!CB) + continue; + const llvm::Function* callee = CB->getCalledFunction(); + if (!callee) + continue; + if (!callee->hasName() || callee->getName().empty()) + continue; + names.try_emplace( + callee, ctrace_tools::canonicalizeMangledName(callee->getName().str())); + } + } + } + return names; + } + static FunctionSummary importPublicFunctionSummary(const UninitializedSummaryFunction& publicSummary) { @@ -3441,6 +3604,181 @@ namespace ctrace::stack::analysis return out; } + static bool addPublicRange(std::vector& ranges, + std::uint64_t begin, std::uint64_t end) + { + if (begin >= end) + return false; + + auto it = std::lower_bound(ranges.begin(), ranges.end(), begin, + [](const UninitializedSummaryRange& r, std::uint64_t value) + { return r.end < value; }); + + if (it == ranges.end()) + { + ranges.push_back({begin, end}); + return true; + } + + if (end < it->begin) + { + ranges.insert(it, {begin, end}); + return true; + } + + bool changed = false; + const std::uint64_t beforeBegin = it->begin; + const std::uint64_t beforeEnd = it->end; + it->begin = std::min(it->begin, begin); + it->end = std::max(it->end, end); + changed = (it->begin != beforeBegin) || (it->end != beforeEnd); + + auto next = it + 1; + while (next != ranges.end() && next->begin <= it->end) + { + it->end = std::max(it->end, next->end); + ++next; + changed = true; + } + if (next != it + 1) + { + ranges.erase(it + 1, next); + changed = true; + } + return changed; + } + + static bool isEmptyPublicParamEffect(const UninitializedSummaryParamEffect& effect) + { + return !effect.hasUnknownReadBeforeWrite && !effect.hasUnknownWrite && + effect.readBeforeWriteRanges.empty() && effect.writeRanges.empty() && + effect.pointerSlotWrites.empty(); + } + + static std::size_t effectivePublicParamEffectCount(const UninitializedSummaryFunction& fn) + { + std::size_t size = fn.paramEffects.size(); + while (size > 0 && isEmptyPublicParamEffect(fn.paramEffects[size - 1])) + --size; + return size; + } + + static void trimTrailingEmptyPublicParamEffects(UninitializedSummaryFunction& fn) + { + fn.paramEffects.resize(effectivePublicParamEffectCount(fn)); + } + + static bool rangesEqual(const std::vector& lhs, + const std::vector& rhs) + { + if (lhs.size() != rhs.size()) + return false; + for (std::size_t i = 0; i < lhs.size(); ++i) + { + if (lhs[i].begin != rhs[i].begin || lhs[i].end != rhs[i].end) + return false; + } + return true; + } + + static bool + pointerSlotWritesEqual(const std::vector& lhs, + const std::vector& rhs) + { + if (lhs.size() != rhs.size()) + return false; + for (std::size_t i = 0; i < lhs.size(); ++i) + { + if (lhs[i].slotOffset != rhs[i].slotOffset || + lhs[i].writeSizeBytes != rhs[i].writeSizeBytes) + { + return false; + } + } + return true; + } + + static bool publicParamEffectEquals(const UninitializedSummaryParamEffect& lhs, + const UninitializedSummaryParamEffect& rhs) + { + return lhs.hasUnknownReadBeforeWrite == rhs.hasUnknownReadBeforeWrite && + lhs.hasUnknownWrite == rhs.hasUnknownWrite && + rangesEqual(lhs.readBeforeWriteRanges, rhs.readBeforeWriteRanges) && + rangesEqual(lhs.writeRanges, rhs.writeRanges) && + pointerSlotWritesEqual(lhs.pointerSlotWrites, rhs.pointerSlotWrites); + } + + static bool publicFunctionSummaryEquals(const UninitializedSummaryFunction& lhs, + const UninitializedSummaryFunction& rhs) + { + const std::size_t lhsSize = effectivePublicParamEffectCount(lhs); + const std::size_t rhsSize = effectivePublicParamEffectCount(rhs); + if (lhsSize != rhsSize) + return false; + for (std::size_t i = 0; i < lhsSize; ++i) + { + if (!publicParamEffectEquals(lhs.paramEffects[i], rhs.paramEffects[i])) + return false; + } + return true; + } + + static bool mergePublicParamEffect(UninitializedSummaryParamEffect& dst, + const UninitializedSummaryParamEffect& src) + { + bool changed = false; + for (const UninitializedSummaryRange& rr : src.readBeforeWriteRanges) + changed |= addPublicRange(dst.readBeforeWriteRanges, rr.begin, rr.end); + for (const UninitializedSummaryRange& wr : src.writeRanges) + changed |= addPublicRange(dst.writeRanges, wr.begin, wr.end); + for (const UninitializedSummaryPointerSlotWrite& slotWrite : src.pointerSlotWrites) + { + auto it = + std::find_if(dst.pointerSlotWrites.begin(), dst.pointerSlotWrites.end(), + [&](const UninitializedSummaryPointerSlotWrite& existing) + { + return existing.slotOffset == slotWrite.slotOffset && + existing.writeSizeBytes == slotWrite.writeSizeBytes; + }); + if (it == dst.pointerSlotWrites.end()) + { + dst.pointerSlotWrites.push_back(slotWrite); + changed = true; + } + } + if (src.hasUnknownReadBeforeWrite && !dst.hasUnknownReadBeforeWrite) + { + dst.hasUnknownReadBeforeWrite = true; + changed = true; + } + if (src.hasUnknownWrite && !dst.hasUnknownWrite) + { + dst.hasUnknownWrite = true; + changed = true; + } + return changed; + } + + static bool mergePublicFunctionSummary(UninitializedSummaryFunction& dst, + const UninitializedSummaryFunction& src) + { + bool changed = false; + const std::size_t srcSize = effectivePublicParamEffectCount(src); + if (dst.paramEffects.size() < srcSize) + { + dst.paramEffects.resize(srcSize); + changed = true; + } + for (std::size_t i = 0; i < srcSize; ++i) + changed |= mergePublicParamEffect(dst.paramEffects[i], src.paramEffects[i]); + + const std::size_t beforeTrim = dst.paramEffects.size(); + trimTrailingEmptyPublicParamEffects(dst); + if (dst.paramEffects.size() != beforeTrim) + changed = true; + return changed; + } + static UninitializedSummaryIndex exportSummaryIndexForModule(llvm::Module& mod, const FunctionSummaryMap& summaries) { @@ -3466,19 +3804,85 @@ namespace ctrace::stack::analysis } } // namespace + struct PreparedUninitializedExternalSummariesOpaque + { + ExternalSummaryMapByName summariesByName; + }; + + struct PreparedUninitializedModuleContextOpaque + { + llvm::DenseSet summaryScope; + CanonicalCalleeNameMap canonicalCalleeNames; + }; + + PreparedUninitializedExternalSummaries + prepareUninitializedExternalSummaries(const UninitializedSummaryIndex* externalSummaries) + { + PreparedUninitializedExternalSummaries prepared; + auto opaque = std::make_shared(); + opaque->summariesByName = importExternalSummaryMap(externalSummaries); + prepared.opaque = std::move(opaque); + return prepared; + } + + PreparedUninitializedModuleContext prepareUninitializedModuleContext( + llvm::Module& mod, const std::function& shouldAnalyze) + { + PreparedUninitializedModuleContext prepared; + auto opaque = std::make_shared(); + opaque->summaryScope = collectSummaryScope(mod, shouldAnalyze); + opaque->canonicalCalleeNames = buildCanonicalCalleeNameMap(mod, opaque->summaryScope); + prepared.opaque = std::move(opaque); + return prepared; + } + UninitializedSummaryIndex buildUninitializedSummaryIndex(llvm::Module& mod, const std::function& shouldAnalyze, const UninitializedSummaryIndex* externalSummaries) { - const llvm::DenseSet summaryScope = - collectSummaryScope(mod, shouldAnalyze); + const PreparedUninitializedModuleContext preparedModule = + prepareUninitializedModuleContext(mod, shouldAnalyze); + const PreparedUninitializedExternalSummaries prepared = + prepareUninitializedExternalSummaries(externalSummaries); + return buildUninitializedSummaryIndex(mod, &preparedModule, &prepared); + } + + UninitializedSummaryIndex + buildUninitializedSummaryIndex(llvm::Module& mod, + const std::function& shouldAnalyze, + const PreparedUninitializedExternalSummaries* preparedExternal) + { + const PreparedUninitializedModuleContext preparedModule = + prepareUninitializedModuleContext(mod, shouldAnalyze); + return buildUninitializedSummaryIndex(mod, &preparedModule, preparedExternal); + } + + UninitializedSummaryIndex + buildUninitializedSummaryIndex(llvm::Module& mod, + const PreparedUninitializedModuleContext* preparedModule, + const PreparedUninitializedExternalSummaries* preparedExternal) + { + assert(preparedModule && preparedModule->opaque && "prepared module context is required"); + if (!preparedModule || !preparedModule->opaque) + { + return {}; + } + const llvm::DenseSet& summaryScope = + preparedModule->opaque->summaryScope; + const CanonicalCalleeNameMap* canonicalCalleeNames = + &preparedModule->opaque->canonicalCalleeNames; + auto shouldSummarize = [&](const llvm::Function& F) -> bool { return summaryScope.find(&F) != summaryScope.end(); }; - - const ExternalSummaryMapByName externalMap = importExternalSummaryMap(externalSummaries); - FunctionSummaryMap summaries = computeFunctionSummaries( - mod, shouldSummarize, externalMap.empty() ? nullptr : &externalMap); + const ExternalSummaryMapByName* externalMap = nullptr; + if (preparedExternal && preparedExternal->opaque && + !preparedExternal->opaque->summariesByName.empty()) + { + externalMap = &preparedExternal->opaque->summariesByName; + } + FunctionSummaryMap summaries = + computeFunctionSummaries(mod, shouldSummarize, externalMap, canonicalCalleeNames); return exportSummaryIndexForModule(mod, summaries); } @@ -3488,22 +3892,22 @@ namespace ctrace::stack::analysis bool changed = false; for (const auto& entry : src.functions) { - const FunctionSummary srcInternal = importPublicFunctionSummary(entry.second); - if (srcInternal.paramEffects.empty()) + const std::size_t srcSize = effectivePublicParamEffectCount(entry.second); + if (srcSize == 0) continue; auto it = dst.functions.find(entry.first); if (it == dst.functions.end()) { - dst.functions.emplace(entry.first, exportPublicFunctionSummary(srcInternal)); + UninitializedSummaryFunction normalized = entry.second; + trimTrailingEmptyPublicParamEffects(normalized); + dst.functions.emplace(entry.first, std::move(normalized)); changed = true; continue; } - FunctionSummary dstInternal = importPublicFunctionSummary(it->second); - if (mergeFunctionSummary(dstInternal, srcInternal)) + if (mergePublicFunctionSummary(it->second, entry.second)) { - it->second = exportPublicFunctionSummary(dstInternal); changed = true; } } @@ -3522,9 +3926,7 @@ namespace ctrace::stack::analysis if (rhsIt == rhs.functions.end()) return false; - const FunctionSummary left = importPublicFunctionSummary(entry.second); - const FunctionSummary right = importPublicFunctionSummary(rhsIt->second); - if (!(left == right)) + if (!publicFunctionSummaryEquals(entry.second, rhsIt->second)) return false; } @@ -3542,9 +3944,12 @@ namespace ctrace::stack::analysis collectSummaryScope(mod, shouldAnalyze); auto shouldSummarize = [&](const llvm::Function& F) -> bool { return summaryScope.find(&F) != summaryScope.end(); }; + const CanonicalCalleeNameMap canonicalCalleeNames = + buildCanonicalCalleeNameMap(mod, summaryScope); const ExternalSummaryMapByName externalMap = importExternalSummaryMap(externalSummaries); FunctionSummaryMap summaries = computeFunctionSummaries( - mod, shouldSummarize, externalMap.empty() ? nullptr : &externalMap); + mod, shouldSummarize, externalMap.empty() ? nullptr : &externalMap, + &canonicalCalleeNames); for (const llvm::Function& F : mod) { @@ -3554,7 +3959,8 @@ namespace ctrace::stack::analysis continue; analyzeFunction(F, mod.getDataLayout(), summaries, - externalMap.empty() ? nullptr : &externalMap, nullptr, &issues); + externalMap.empty() ? nullptr : &externalMap, &canonicalCalleeNames, + nullptr, &issues); } return issues; diff --git a/src/analysis/smt/SmtEncoding.cpp b/src/analysis/smt/SmtEncoding.cpp index bb1b75d..627fbe2 100644 --- a/src/analysis/smt/SmtEncoding.cpp +++ b/src/analysis/smt/SmtEncoding.cpp @@ -80,17 +80,14 @@ namespace ctrace::stack::analysis::smt class ConstraintIrBuilder { public: - explicit ConstraintIrBuilder(ConstraintIR& ir) - : ir_(ir) - { - } + explicit ConstraintIrBuilder(ConstraintIR& ir) : ir_(ir) {} ExprId makeConstant(std::int64_t value, std::uint32_t bitWidth) { return appendNode(ExprNode{.kind = ExprKind::Constant, - .bitWidth = normalizeBitWidth(bitWidth), .symbol = 0, .constant = value, + .bitWidth = normalizeBitWidth(bitWidth), .lhs = 0, .rhs = 0, .extra = 0}); @@ -104,25 +101,27 @@ namespace ctrace::stack::analysis::smt const SymbolId id = nextSymbolId_++; symbolByValue_.emplace(value, id); - symbolExprById_.emplace(id, appendNode(ExprNode{.kind = ExprKind::Symbol, - .bitWidth = normalizeBitWidth(bitWidth), - .symbol = id, - .constant = 0, - .lhs = 0, - .rhs = 0, - .extra = 0})); - - ir_.symbols.push_back(SymbolInfo{ - .id = id, .debugName = buildSymbolName(value, id), .sourceToken = toSourceToken(value)}); + symbolExprById_.emplace(id, + appendNode(ExprNode{.kind = ExprKind::Symbol, + .symbol = id, + .constant = 0, + .bitWidth = normalizeBitWidth(bitWidth), + .lhs = 0, + .rhs = 0, + .extra = 0})); + + ir_.symbols.push_back(SymbolInfo{.id = id, + .debugName = buildSymbolName(value, id), + .sourceToken = toSourceToken(value)}); return symbolExprById_.at(id); } ExprId makeBinary(ExprKind kind, ExprId lhs, ExprId rhs, std::uint32_t bitWidth) { return appendNode(ExprNode{.kind = kind, - .bitWidth = normalizeBitWidth(bitWidth), .symbol = 0, .constant = 0, + .bitWidth = normalizeBitWidth(bitWidth), .lhs = lhs, .rhs = rhs, .extra = 0}); @@ -131,9 +130,9 @@ namespace ctrace::stack::analysis::smt ExprId makeUnary(ExprKind kind, ExprId operand, std::uint32_t bitWidth) { return appendNode(ExprNode{.kind = kind, - .bitWidth = normalizeBitWidth(bitWidth), .symbol = 0, .constant = 0, + .bitWidth = normalizeBitWidth(bitWidth), .lhs = operand, .rhs = 0, .extra = 0}); @@ -143,9 +142,9 @@ namespace ctrace::stack::analysis::smt std::uint32_t bitWidth) { return appendNode(ExprNode{.kind = kind, - .bitWidth = normalizeBitWidth(bitWidth), .symbol = 0, .constant = 0, + .bitWidth = normalizeBitWidth(bitWidth), .lhs = lhs, .rhs = rhs, .extra = extra}); @@ -203,8 +202,7 @@ namespace ctrace::stack::analysis::smt { public: LlvmExprEncoder(ConstraintIrBuilder& builder, const llvm::BasicBlock* incomingBlock) - : builder_(builder) - , incomingBlock_(incomingBlock) + : builder_(builder), incomingBlock_(incomingBlock) { } @@ -282,10 +280,9 @@ namespace ctrace::stack::analysis::smt const std::uint32_t bitWidth = inferBitWidth(&binaryOp); const ExprId result = builder_.makeBinary(opKind, *lhs, *rhs, bitWidth); - const bool isOverflowSensitive = - binaryOp.getOpcode() == llvm::Instruction::Add || - binaryOp.getOpcode() == llvm::Instruction::Sub || - binaryOp.getOpcode() == llvm::Instruction::Mul; + const bool isOverflowSensitive = binaryOp.getOpcode() == llvm::Instruction::Add || + binaryOp.getOpcode() == llvm::Instruction::Sub || + binaryOp.getOpcode() == llvm::Instruction::Mul; if (!isOverflowSensitive || bitWidth >= std::numeric_limits::max()) return result; @@ -296,7 +293,8 @@ namespace ctrace::stack::analysis::smt const ExprId rhsExt = builder_.makeUnary(ExprKind::SExt, *rhs, extWidth); const ExprId resultExt = builder_.makeUnary(ExprKind::SExt, result, extWidth); const ExprId extArith = builder_.makeBinary(opKind, lhsExt, rhsExt, extWidth); - builder_.addAssertion(builder_.makeBinary(ExprKind::Eq, resultExt, extArith, 1)); + builder_.addAssertion( + builder_.makeBinary(ExprKind::Eq, resultExt, extArith, 1)); } if (binaryOp.hasNoUnsignedWrap()) { @@ -304,7 +302,8 @@ namespace ctrace::stack::analysis::smt const ExprId rhsExt = builder_.makeUnary(ExprKind::ZExt, *rhs, extWidth); const ExprId resultExt = builder_.makeUnary(ExprKind::ZExt, result, extWidth); const ExprId extArith = builder_.makeBinary(opKind, lhsExt, rhsExt, extWidth); - builder_.addAssertion(builder_.makeBinary(ExprKind::Eq, resultExt, extArith, 1)); + builder_.addAssertion( + builder_.makeBinary(ExprKind::Eq, resultExt, extArith, 1)); } return result; @@ -313,7 +312,8 @@ namespace ctrace::stack::analysis::smt std::optional encodeValueImpl(const llvm::Value& value) { if (const auto* constantInt = llvm::dyn_cast(&value)) - return builder_.makeConstant(constantInt->getSExtValue(), inferBitWidth(&value)); + return builder_.makeConstant(constantInt->getSExtValue(), + inferBitWidth(&value)); if (llvm::isa(&value)) return builder_.makeConstant(0, inferBitWidth(&value)); @@ -355,10 +355,11 @@ namespace ctrace::stack::analysis::smt if (!trueValue || !falseValue) return std::nullopt; - const ExprId onTrue = builder_.makeBinary(ExprKind::And, *cond, *trueValue, 1); - const ExprId onFalse = - builder_.makeBinary(ExprKind::And, builder_.makeUnary(ExprKind::Not, *cond, 1), - *falseValue, 1); + const ExprId onTrue = + builder_.makeBinary(ExprKind::And, *cond, *trueValue, 1); + const ExprId onFalse = builder_.makeBinary( + ExprKind::And, builder_.makeUnary(ExprKind::Not, *cond, 1), *falseValue, + 1); return builder_.makeBinary(ExprKind::Or, onTrue, onFalse, 1); } @@ -486,23 +487,23 @@ namespace ctrace::stack::analysis::smt symbolId = builder.lookupSymbolId(value); } - ir.intervals.push_back(IntervalConstraint{ - .symbol = symbolId, - .hasLower = range.hasLower, - .lower = static_cast(range.lower), - .hasUpper = range.hasUpper, - .upper = static_cast(range.upper)}); + ir.intervals.push_back( + IntervalConstraint{.symbol = symbolId, + .lower = static_cast(range.lower), + .upper = static_cast(range.upper), + .hasLower = range.hasLower, + .hasUpper = range.hasUpper}); if (range.hasLower) { - const ExprId lower = builder.makeConstant(static_cast(range.lower), - builder.node(*symbolExpr).bitWidth); + const ExprId lower = builder.makeConstant( + static_cast(range.lower), builder.node(*symbolExpr).bitWidth); builder.addAssertion(builder.makeBinary(ExprKind::Sge, *symbolExpr, lower, 1)); } if (range.hasUpper) { - const ExprId upper = builder.makeConstant(static_cast(range.upper), - builder.node(*symbolExpr).bitWidth); + const ExprId upper = builder.makeConstant( + static_cast(range.upper), builder.node(*symbolExpr).bitWidth); builder.addAssertion(builder.makeBinary(ExprKind::Sle, *symbolExpr, upper, 1)); } } @@ -528,8 +529,7 @@ namespace ctrace::stack::analysis::smt } static void encodeEdgeCondition(const llvm::Value* edgeCondition, bool takesTrueEdge, - ConstraintIrBuilder& builder, - LlvmExprEncoder& exprEncoder) + ConstraintIrBuilder& builder, LlvmExprEncoder& exprEncoder) { if (!edgeCondition) return; @@ -580,12 +580,10 @@ namespace ctrace::stack::analysis::smt } } - static ConstraintIR - encodeWithCustomAssertions(const std::map& ranges, - const llvm::Value* edgeCondition, bool takesTrueEdge, - const llvm::BasicBlock* edgeBlock, - const llvm::BasicBlock* incomingBlock, - const QueryPostEncoder& postEncode = {}) + static ConstraintIR encodeWithCustomAssertions( + const std::map& ranges, const llvm::Value* edgeCondition, + bool takesTrueEdge, const llvm::BasicBlock* edgeBlock, + const llvm::BasicBlock* incomingBlock, const QueryPostEncoder& postEncode = {}) { ConstraintIR ir; ir.intervals.reserve(ranges.size()); @@ -619,9 +617,10 @@ namespace ctrace::stack::analysis::smt return encoder.encode(ranges); } - ConstraintIR encodeSignedOverflowFeasibility( - const std::map& ranges, - const llvm::BinaryOperator& binaryOperation, const llvm::Instruction* contextInst) + ConstraintIR + encodeSignedOverflowFeasibility(const std::map& ranges, + const llvm::BinaryOperator& binaryOperation, + const llvm::Instruction* contextInst) { return encodeWithCustomAssertions( ranges, nullptr, true, nullptr, nullptr, @@ -634,8 +633,10 @@ namespace ctrace::stack::analysis::smt if (!opKind) return; - const std::optional lhs = exprEncoder.encodeValue(binaryOperation.getOperand(0)); - const std::optional rhs = exprEncoder.encodeValue(binaryOperation.getOperand(1)); + const std::optional lhs = + exprEncoder.encodeValue(binaryOperation.getOperand(0)); + const std::optional rhs = + exprEncoder.encodeValue(binaryOperation.getOperand(1)); if (!lhs || !rhs) return; @@ -655,9 +656,10 @@ namespace ctrace::stack::analysis::smt }); } - ConstraintIR encodeUnsignedOverflowFeasibility( - const std::map& ranges, - const llvm::BinaryOperator& binaryOperation, const llvm::Instruction* contextInst) + ConstraintIR + encodeUnsignedOverflowFeasibility(const std::map& ranges, + const llvm::BinaryOperator& binaryOperation, + const llvm::Instruction* contextInst) { return encodeWithCustomAssertions( ranges, nullptr, true, nullptr, nullptr, @@ -670,8 +672,10 @@ namespace ctrace::stack::analysis::smt if (!opKind) return; - const std::optional lhs = exprEncoder.encodeValue(binaryOperation.getOperand(0)); - const std::optional rhs = exprEncoder.encodeValue(binaryOperation.getOperand(1)); + const std::optional lhs = + exprEncoder.encodeValue(binaryOperation.getOperand(0)); + const std::optional rhs = + exprEncoder.encodeValue(binaryOperation.getOperand(1)); if (!lhs || !rhs) return; @@ -691,9 +695,10 @@ namespace ctrace::stack::analysis::smt }); } - ConstraintIR encodeSignedComparisonFeasibility( - const std::map& ranges, const llvm::Value& lhs, - std::int64_t rhsConstant, bool greaterThan, const llvm::Instruction* contextInst) + ConstraintIR + encodeSignedComparisonFeasibility(const std::map& ranges, + const llvm::Value& lhs, std::int64_t rhsConstant, + bool greaterThan, const llvm::Instruction* contextInst) { return encodeWithCustomAssertions( ranges, nullptr, true, nullptr, nullptr, diff --git a/src/analysis/smt/SolverOrchestrator.cpp b/src/analysis/smt/SolverOrchestrator.cpp index 9ce520c..8ed13fe 100644 --- a/src/analysis/smt/SolverOrchestrator.cpp +++ b/src/analysis/smt/SolverOrchestrator.cpp @@ -31,10 +31,13 @@ namespace ctrace::stack::analysis::smt { if (c.hasLower && c.hasUpper && c.lower > c.upper) { - return SmtAnswer{SmtStatus::Unsat, name(), std::nullopt}; + return SmtAnswer{.backendName = name(), + .reason = std::nullopt, + .status = SmtStatus::Unsat}; } } - return SmtAnswer{SmtStatus::Sat, name(), std::nullopt}; + return SmtAnswer{ + .backendName = name(), .reason = std::nullopt, .status = SmtStatus::Sat}; } }; @@ -54,9 +57,10 @@ namespace ctrace::stack::analysis::smt SmtAnswer solve(const SmtQuery&) const override { return SmtAnswer{ - SmtStatus::Unknown, - backendName_, - std::string("backend unavailable in this build (optional dependency not linked)")}; + .backendName = backendName_, + .reason = std::string( + "backend unavailable in this build (optional dependency not linked)"), + .status = SmtStatus::Unknown}; } private: @@ -87,9 +91,9 @@ namespace ctrace::stack::analysis::smt { std::shared_ptr backend = createBackend(name); const std::string backendName = backend->name(); - const bool exists = std::any_of(out.begin(), out.end(), - [&](const std::shared_ptr& b) - { return b->name() == backendName; }); + const bool exists = + std::any_of(out.begin(), out.end(), [&](const std::shared_ptr& b) + { return b->name() == backendName; }); if (!exists) out.push_back(std::move(backend)); } @@ -149,8 +153,7 @@ namespace ctrace::stack::analysis::smt } }; - static SmtStatus aggregateStatuses(const std::vector& answers, - SolverMode mode) + static SmtStatus aggregateStatuses(const std::vector& answers, SolverMode mode) { if (answers.empty()) return SmtStatus::Error; @@ -240,9 +243,10 @@ namespace ctrace::stack::analysis::smt std::vector> backends = resolveBackends(config_); if (backends.empty()) { - return SmtDecision{ - SmtStatus::Error, - {SmtAnswer{SmtStatus::Error, "orchestrator", std::string("no backend available")}}}; + return SmtDecision{.answers = {SmtAnswer{.backendName = "orchestrator", + .reason = std::string("no backend available"), + .status = SmtStatus::Error}}, + .status = SmtStatus::Error}; } SmtQuery runtimeQuery = query; @@ -269,6 +273,7 @@ namespace ctrace::stack::analysis::smt } std::vector answers = strategy->run(runtimeQuery, backends); - return SmtDecision{aggregateStatuses(answers, config_.mode), std::move(answers)}; + const SmtStatus status = aggregateStatuses(answers, config_.mode); + return SmtDecision{.answers = std::move(answers), .status = status}; } } // namespace ctrace::stack::analysis::smt diff --git a/src/analysis/smt/backends/Z3Backend.cpp b/src/analysis/smt/backends/Z3Backend.cpp index c6b0c91..1e6956c 100644 --- a/src/analysis/smt/backends/Z3Backend.cpp +++ b/src/analysis/smt/backends/Z3Backend.cpp @@ -14,13 +14,12 @@ namespace ctrace::stack::analysis::smt struct SymbolEntry { std::string name; - std::uint32_t bitWidth = 64; z3::expr expr; + std::uint32_t bitWidth = 64; + std::uint32_t reserved = 0; SymbolEntry(std::string symbolName, std::uint32_t width, z3::expr symbolExpr) - : name(std::move(symbolName)) - , bitWidth(width) - , expr(std::move(symbolExpr)) + : name(std::move(symbolName)), expr(std::move(symbolExpr)), bitWidth(width) { } }; @@ -36,8 +35,7 @@ namespace ctrace::stack::analysis::smt return ctx.bv_val(std::to_string(raw).c_str(), normalizeBitWidth(bitWidth)); } - static std::unordered_map - collectBitWidths(const ConstraintIR& ir) + static std::unordered_map collectBitWidths(const ConstraintIR& ir) { std::unordered_map widths; for (const ExprNode& node : ir.nodes) @@ -49,8 +47,7 @@ namespace ctrace::stack::analysis::smt return widths; } - static std::unordered_map - collectNames(const ConstraintIR& ir) + static std::unordered_map collectNames(const ConstraintIR& ir) { std::unordered_map names; for (const SymbolInfo& symbol : ir.symbols) @@ -66,7 +63,8 @@ namespace ctrace::stack::analysis::smt static std::optional getOrCreateSymbol(z3::context& ctx, SymbolId id, std::uint32_t bitWidth, std::unordered_map& symbols, - const std::unordered_map& names, std::string& error) + const std::unordered_map& names, + std::string& error) { if (id == 0) { @@ -86,7 +84,8 @@ namespace ctrace::stack::analysis::smt } auto nameIt = names.find(id); - std::string symbolName = (nameIt != names.end()) ? nameIt->second : ("sym_" + std::to_string(id)); + std::string symbolName = + (nameIt != names.end()) ? nameIt->second : ("sym_" + std::to_string(id)); const std::uint32_t width = normalizeBitWidth(bitWidth); z3::expr symbolExpr = ctx.bv_const(symbolName.c_str(), width); @@ -116,7 +115,7 @@ namespace ctrace::stack::analysis::smt auto memoize = [&](z3::expr value) -> std::optional { auto [it, inserted] = cache.emplace(id, std::move(value)); - (void) inserted; + (void)inserted; return it->second; }; @@ -360,10 +359,10 @@ namespace ctrace::stack::analysis::smt { if (query.budgetNodes != 0 && query.ir.nodes.size() > query.budgetNodes) { - return SmtAnswer{ - .status = SmtStatus::Timeout, - .backendName = name(), - .reason = std::string("query budget exceeded before solver invocation")}; + return SmtAnswer{.backendName = name(), + .reason = + std::string("query budget exceeded before solver invocation"), + .status = SmtStatus::Timeout}; } try @@ -397,7 +396,7 @@ namespace ctrace::stack::analysis::smt getOrCreateSymbol(ctx, interval.symbol, bitWidth, symbols, names, error); if (!symbol) return SmtAnswer{ - .status = SmtStatus::Unknown, .backendName = name(), .reason = error}; + .backendName = name(), .reason = error, .status = SmtStatus::Unknown}; if (interval.hasLower) solver.add(z3::sge(*symbol, makeBvConstant(ctx, interval.lower, bitWidth))); @@ -414,33 +413,37 @@ namespace ctrace::stack::analysis::smt buildExpr(assertionId, query.ir, ctx, cache, symbols, names, error); if (!assertion) { - return SmtAnswer{ - .status = SmtStatus::Unknown, .backendName = name(), .reason = std::move(error)}; + return SmtAnswer{.backendName = name(), + .reason = std::move(error), + .status = SmtStatus::Unknown}; } if (!assertion->is_bool()) { - return SmtAnswer{.status = SmtStatus::Unknown, - .backendName = name(), - .reason = std::string("non-boolean assertion")}; + return SmtAnswer{.backendName = name(), + .reason = std::string("non-boolean assertion"), + .status = SmtStatus::Unknown}; } solver.add(*assertion); } const z3::check_result result = solver.check(); if (result == z3::sat) - return SmtAnswer{.status = SmtStatus::Sat, .backendName = name(), .reason = std::nullopt}; + return SmtAnswer{ + .backendName = name(), .reason = std::nullopt, .status = SmtStatus::Sat}; if (result == z3::unsat) - return SmtAnswer{.status = SmtStatus::Unsat, .backendName = name(), .reason = std::nullopt}; + return SmtAnswer{ + .backendName = name(), .reason = std::nullopt, .status = SmtStatus::Unsat}; const std::string reason = solver.reason_unknown(); if (reason == "timeout") - return SmtAnswer{.status = SmtStatus::Timeout, .backendName = name(), .reason = reason}; - return SmtAnswer{.status = SmtStatus::Unknown, .backendName = name(), .reason = reason}; + return SmtAnswer{ + .backendName = name(), .reason = reason, .status = SmtStatus::Timeout}; + return SmtAnswer{.backendName = name(), .reason = reason, .status = SmtStatus::Unknown}; } catch (const z3::exception& ex) { return SmtAnswer{ - .status = SmtStatus::Error, .backendName = name(), .reason = std::string(ex.msg())}; + .backendName = name(), .reason = std::string(ex.msg()), .status = SmtStatus::Error}; } } } // namespace ctrace::stack::analysis::smt diff --git a/src/analyzer/AnalysisPipeline.cpp b/src/analyzer/AnalysisPipeline.cpp index 381a926..4bbb87e 100644 --- a/src/analyzer/AnalysisPipeline.cpp +++ b/src/analyzer/AnalysisPipeline.cpp @@ -40,7 +40,6 @@ namespace ctrace::stack::analyzer { llvm::Module& mod; const AnalysisConfig& config; - ModulePreparationService preparation; std::unique_ptr prepared; FunctionAuxData aux; AnalysisResult result; @@ -82,8 +81,9 @@ namespace ctrace::stack::analyzer steps.push_back({"Prepare module", [](PipelineData& state) { + ModulePreparationService preparationService; state.prepared = std::make_unique( - state.preparation.prepare(state.mod, state.config)); + preparationService.prepare(state.mod, state.config)); }}); steps.push_back({"Build results", [](PipelineData& state) @@ -149,7 +149,7 @@ namespace ctrace::stack::analyzer { return state.prepared->ctx.shouldAnalyze(F); }; const std::vector issues = analysis::analyzeIntegerOverflows(state.mod, shouldAnalyze, - state.config); + state.config); appendIntegerOverflowDiagnostics(state.result, issues); }}); @@ -199,7 +199,9 @@ namespace ctrace::stack::analyzer auto shouldAnalyze = [&](const llvm::Function& F) -> bool { return state.prepared->ctx.shouldAnalyze(F); }; const std::vector issues = - analysis::analyzeGlobalReadBeforeWrites(state.mod, shouldAnalyze); + analysis::analyzeGlobalReadBeforeWrites( + state.mod, shouldAnalyze, + state.config.globalReadBeforeWriteSummaryIndex.get()); appendGlobalReadBeforeWriteDiagnostics(state.result, issues); }}); diff --git a/src/analyzer/DiagnosticEmitter.cpp b/src/analyzer/DiagnosticEmitter.cpp index d96a538..077b252 100644 --- a/src/analyzer/DiagnosticEmitter.cpp +++ b/src/analyzer/DiagnosticEmitter.cpp @@ -811,18 +811,31 @@ namespace ctrace::stack::analyzer for (const auto& issue : issues) { const ResolvedLocation readLoc = resolveFromInstruction(issue.readInst, true); - const ResolvedLocation firstWriteLoc = resolveFromInstruction(issue.firstWriteInst, true); + const ResolvedLocation firstWriteLoc = + resolveFromInstruction(issue.firstWriteInst, true); std::ostringstream body; body << "\t[ !!Warn ] potential read of global buffer '" << issue.globalName << "' before first write in this function\n"; body << "\t\t ↳ this buffer has static zero initialization; read is defined but may " "indicate stale/default-state use\n"; + double confidence = 0.60; if (firstWriteLoc.hasLocation) { body << "\t\t ↳ first write appears later at line " << firstWriteLoc.line << ", column " << firstWriteLoc.column << "\n"; } + else if (issue.kind == analysis::GlobalReadBeforeWriteKind::WithoutLocalWrite) + { + body << "\t\t ↳ no write to this buffer is observed in this function; value may " + "come from static initialization or writes in other functions/TUs\n"; + confidence = issue.hasNonLocalWrite ? 0.50 : 0.55; + if (issue.hasNonLocalWrite) + { + body << "\t\t ↳ at least one write to this buffer is observed in another " + "analyzed function/TU\n"; + } + } DiagnosticBuilder builder; builder.function(issue.funcName) @@ -830,7 +843,7 @@ namespace ctrace::stack::analyzer .errCode(DescriptiveErrorCode::GlobalReadBeforeWrite) .ruleId("GlobalReadBeforeWrite") .cwe("CWE-665") - .confidence(0.60) + .confidence(confidence) .location(readLoc) .message(body.str()); result.diagnostics.push_back(builder.build()); diff --git a/src/app/AnalyzerApp.cpp b/src/app/AnalyzerApp.cpp index e645212..26ef1d7 100644 --- a/src/app/AnalyzerApp.cpp +++ b/src/app/AnalyzerApp.cpp @@ -4,14 +4,17 @@ #include "cli/ArgParser.hpp" #include +#include #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -29,6 +32,7 @@ #include #include "analysis/CompileCommands.hpp" #include "analysis/FunctionFilter.hpp" +#include "analysis/GlobalReadBeforeWriteAnalysis.hpp" #include "analysis/InputPipeline.hpp" #include "analysis/ResourceLifetimeAnalysis.hpp" #include "analysis/UninitializedVarAnalysis.hpp" @@ -129,6 +133,73 @@ template <> struct AppResult using AppStatus = AppResult; +#if defined(__cpp_lib_hardware_interference_size) +static constexpr std::size_t kDestructiveCacheLineBytes = + std::hardware_destructive_interference_size == 0 ? 64 + : std::hardware_destructive_interference_size; +#else +static constexpr std::size_t kDestructiveCacheLineBytes = 64; +#endif + +template +static void runParallelWork(std::size_t workItemCount, unsigned maxJobs, WorkFn&& workFn) +{ + if (workItemCount == 0) + return; + + const unsigned workerCount = std::min(maxJobs, static_cast(workItemCount)); + if (workerCount <= 1 || workItemCount <= 1) + { + for (std::size_t index = 0; index < workItemCount; ++index) + workFn(index); + return; + } + + struct alignas(kDestructiveCacheLineBytes) WorkerState + { + std::uint64_t processedCount = 0; + std::array sizeof(std::uint64_t)) + ? (kDestructiveCacheLineBytes - sizeof(std::uint64_t)) + : 0> + padding{}; + }; + static_assert(alignof(WorkerState) >= kDestructiveCacheLineBytes, + "WorkerState alignment must satisfy cache-line isolation"); + + std::vector workerStates(workerCount); + std::atomic_size_t nextIndex{0}; + std::vector workers; + workers.reserve(workerCount); + + for (unsigned workerId = 0; workerId < workerCount; ++workerId) + { + WorkerState* const workerState = &workerStates[workerId]; + workers.emplace_back( + [&, workerState]() + { + WorkerState& state = *workerState; + while (true) + { + const std::size_t index = nextIndex.fetch_add(1, std::memory_order_relaxed); + if (index >= workItemCount) + break; + workFn(index); + ++state.processedCount; + } + }); + } + + for (auto& worker : workers) + worker.join(); + + std::uint64_t processedTotal = 0; + for (const WorkerState& state : workerStates) + processedTotal += state.processedCount; + + if (processedTotal != workItemCount) + llvm::report_fatal_error("parallel work scheduler inconsistency"); +} + static NormalizedPathFilters buildNormalizedPathFilters(const AnalysisConfig& cfg) { NormalizedPathFilters filters; @@ -453,6 +524,10 @@ static std::shared_ptr buildCrossTUUninitializedSummaryIndex(const std::vector& loadedModules, const AnalysisConfig& cfg); +static std::shared_ptr +buildCrossTUGlobalReadBeforeWriteSummaryIndex(const std::vector& loadedModules, + const AnalysisConfig& cfg); + struct DiagnosticSummary { std::size_t info = 0; @@ -659,7 +734,8 @@ static AppStatus configureDumpIRPath(const std::vector& inputFilena static void printInterprocStatus(const AnalysisConfig& cfg, std::size_t inputCount, bool needsCrossTUResourceSummaries, - bool needsCrossTUUninitializedSummaries) + bool needsCrossTUUninitializedSummaries, + bool needsCrossTUGlobalReadBeforeWriteSummaries) { if (!cfg.resourceModelPath.empty()) { @@ -691,6 +767,14 @@ static void printInterprocStatus(const AnalysisConfig& cfg, std::size_t inputCou if (inputCount > 1) { + if (needsCrossTUGlobalReadBeforeWriteSummaries) + { + coretrace::log(coretrace::Level::Info, + "Global read-before-write analysis: enabled " + "(cross-TU global symbol summaries across {} files)\n", + inputCount); + } + if (needsCrossTUUninitializedSummaries) { coretrace::log( @@ -712,6 +796,7 @@ static AppStatus analyzeWithSharedModuleLoading(const std::vector& AnalysisConfig& cfg, bool hasFilter, bool needsCrossTUResourceSummaries, bool needsCrossTUUninitializedSummaries, + bool needsCrossTUGlobalReadBeforeWriteSummaries, std::vector& results) { std::vector loadedModules(inputFilenames.size()); @@ -753,27 +838,8 @@ static AppStatus analyzeWithSharedModuleLoading(const std::vector& } else { - std::atomic_size_t nextIndex{0}; - const unsigned workerCount = - std::min(loadJobs, static_cast(inputFilenames.size())); - std::vector workers; - workers.reserve(workerCount); - for (unsigned worker = 0; worker < workerCount; ++worker) - { - workers.emplace_back( - [&]() - { - while (true) - { - const std::size_t index = nextIndex.fetch_add(1); - if (index >= inputFilenames.size()) - break; - loadSingleModule(index); - } - }); - } - for (auto& worker : workers) - worker.join(); + runParallelWork(inputFilenames.size(), loadJobs, + [&](std::size_t index) { loadSingleModule(index); }); } std::vector orderedLoadedModules; @@ -800,6 +866,11 @@ static AppStatus analyzeWithSharedModuleLoading(const std::vector& cfg.resourceSummaryIndex = buildCrossTUSummaryIndex(loadedModules, cfg); if (needsCrossTUUninitializedSummaries) cfg.uninitializedSummaryIndex = buildCrossTUUninitializedSummaryIndex(loadedModules, cfg); + if (needsCrossTUGlobalReadBeforeWriteSummaries) + { + cfg.globalReadBeforeWriteSummaryIndex = + buildCrossTUGlobalReadBeforeWriteSummaryIndex(loadedModules, cfg); + } for (auto& loaded : loadedModules) { @@ -869,72 +940,53 @@ static AppStatus analyzeWithoutSharedModuleLoading(const std::vector result; std::string loadError; std::string noFunctionMsg; - bool success = false; }; std::vector slots(inputFilenames.size()); - std::atomic_size_t nextIndex{0}; - const unsigned workerCount = - std::min(parallelJobs, static_cast(inputFilenames.size())); - std::vector workers; - workers.reserve(workerCount); - for (unsigned worker = 0; worker < workerCount; ++worker) - { - workers.emplace_back( - [&]() - { - while (true) - { - const std::size_t index = nextIndex.fetch_add(1); - if (index >= inputFilenames.size()) - break; - - const std::string& inputFilename = inputFilenames[index]; - llvm::LLVMContext localContext; - llvm::SMDiagnostic localErr; - analysis::ModuleLoadResult load = - analysis::loadModuleForAnalysis(inputFilename, cfg, localContext, localErr); - if (!load.module) + runParallelWork(inputFilenames.size(), parallelJobs, + [&](std::size_t index) { - std::string err; - if (!load.error.empty()) - err += load.error; - if (localErr.getLineNo() != 0 || !localErr.getFilename().empty()) + const std::string& inputFilename = inputFilenames[index]; + llvm::LLVMContext localContext; + llvm::SMDiagnostic localErr; + analysis::ModuleLoadResult load = analysis::loadModuleForAnalysis( + inputFilename, cfg, localContext, localErr); + if (!load.module) { - std::string diagText; - llvm::raw_string_ostream os(diagText); - localErr.print("stack_usage_analyzer", os); - os.flush(); - err += diagText; + std::string err; + if (!load.error.empty()) + err += load.error; + if (localErr.getLineNo() != 0 || !localErr.getFilename().empty()) + { + std::string diagText; + llvm::raw_string_ostream os(diagText); + localErr.print("stack_usage_analyzer", os); + os.flush(); + err += diagText; + } + slots[index].loadError = std::move(err); + return; } - slots[index].loadError = std::move(err); - continue; - } - AnalysisResult result = analyzeModule(*load.module, cfg); - if (!load.frontendDiagnostics.empty()) - { - result.diagnostics.insert(result.diagnostics.end(), - load.frontendDiagnostics.begin(), - load.frontendDiagnostics.end()); - } - stampResultFilePaths(result, inputFilename); - slots[index].noFunctionMsg = - noFunctionMessage(result, inputFilename, hasFilter); - slots[index].result = std::move(result); - slots[index].success = true; - } - }); - } - for (auto& worker : workers) - worker.join(); + AnalysisResult result = analyzeModule(*load.module, cfg); + if (!load.frontendDiagnostics.empty()) + { + result.diagnostics.insert(result.diagnostics.end(), + load.frontendDiagnostics.begin(), + load.frontendDiagnostics.end()); + } + stampResultFilePaths(result, inputFilename); + slots[index].noFunctionMsg = + noFunctionMessage(result, inputFilename, hasFilter); + slots[index].result = std::make_unique(std::move(result)); + }); for (std::size_t index = 0; index < inputFilenames.size(); ++index) { - if (!slots[index].success) + if (!slots[index].result) { std::string message; if (!slots[index].loadError.empty()) @@ -948,7 +1000,7 @@ static AppStatus analyzeWithoutSharedModuleLoading(const std::vector& loadedModules, const std::string modelContent = readFileAsString(cfg.resourceModelPath); const std::string modelHash = md5Hex(modelContent.empty() ? cfg.resourceModelPath : modelContent); - constexpr llvm::StringLiteral kCacheSchema = "cross-tu-resource-summary-v1"; + // Bump this when summary semantics evolve so on-disk cache entries from older + // analyzer builds are not reused with incompatible interpretation. + constexpr llvm::StringLiteral kCacheSchema = "cross-tu-resource-summary-v2"; const bool allowDiskCache = !cfg.resourceSummaryMemoryOnly && !cfg.resourceSummaryCacheDir.empty(); const unsigned maxJobs = resolveConfiguredJobs(cfg); @@ -1475,34 +1529,16 @@ buildCrossTUSummaryIndex(const std::vector& loadedModules, } else { - const unsigned workerCount = - std::min(maxJobs, static_cast(missingIndices.size())); std::vector computed( loadedModules.size()); std::vector computedReady(loadedModules.size(), 0); - std::atomic_size_t nextMissing{0}; - std::vector workers; - workers.reserve(workerCount); - - for (unsigned worker = 0; worker < workerCount; ++worker) - { - workers.emplace_back( - [&]() - { - while (true) - { - const std::size_t slot = nextMissing.fetch_add(1); - if (slot >= missingIndices.size()) - break; - const std::size_t moduleIndex = missingIndices[slot]; - computed[moduleIndex] = buildModuleSummary(moduleIndex); - computedReady[moduleIndex] = 1; - } - }); - } - - for (auto& worker : workers) - worker.join(); + runParallelWork(missingIndices.size(), maxJobs, + [&](std::size_t slot) + { + const std::size_t moduleIndex = missingIndices[slot]; + computed[moduleIndex] = buildModuleSummary(moduleIndex); + computedReady[moduleIndex] = 1; + }); for (std::size_t moduleIndex : missingIndices) { @@ -1576,6 +1612,66 @@ buildCrossTUSummaryIndex(const std::vector& loadedModules, return std::make_shared(std::move(globalIndex)); } +static std::shared_ptr +buildCrossTUGlobalReadBeforeWriteSummaryIndex(const std::vector& loadedModules, + const AnalysisConfig& cfg) +{ + if (loadedModules.size() < 2) + return nullptr; + + using Clock = std::chrono::steady_clock; + const auto buildStart = Clock::now(); + if (cfg.timing) + { + coretrace::log(coretrace::Level::Info, + "Building cross-TU global read-before-write summaries for {} module(s)...\n", + loadedModules.size()); + } + + const unsigned maxJobs = resolveConfiguredJobs(cfg); + std::vector moduleSummaries(loadedModules.size()); + + auto buildModuleSummary = + [&](std::size_t moduleIndex) -> analysis::GlobalReadBeforeWriteSummaryIndex + { + const LoadedInputModule& loaded = loadedModules[moduleIndex]; + const analysis::FunctionFilter filter = analysis::buildFunctionFilter(*loaded.module, cfg); + auto shouldAnalyze = [&](const llvm::Function& F) -> bool + { return filter.shouldAnalyze(F); }; + return analysis::buildGlobalReadBeforeWriteSummaryIndex(*loaded.module, shouldAnalyze); + }; + + if (maxJobs <= 1 || loadedModules.size() <= 1) + { + for (std::size_t moduleIndex = 0; moduleIndex < loadedModules.size(); ++moduleIndex) + moduleSummaries[moduleIndex] = buildModuleSummary(moduleIndex); + } + else + { + runParallelWork(loadedModules.size(), maxJobs, [&](std::size_t moduleIndex) + { moduleSummaries[moduleIndex] = buildModuleSummary(moduleIndex); }); + } + + analysis::GlobalReadBeforeWriteSummaryIndex globalIndex; + for (const auto& moduleSummary : moduleSummaries) + { + (void)analysis::mergeGlobalReadBeforeWriteSummaryIndex(globalIndex, moduleSummary); + } + + if (cfg.timing) + { + const auto buildEnd = Clock::now(); + const auto ms = + std::chrono::duration_cast(buildEnd - buildStart).count(); + coretrace::log( + coretrace::Level::Info, + "Cross-TU global read-before-write summary build done in {} ms ({} symbol(s))\n", ms, + globalIndex.globals.size()); + } + + return std::make_shared(std::move(globalIndex)); +} + static std::shared_ptr buildCrossTUUninitializedSummaryIndex(const std::vector& loadedModules, const AnalysisConfig& cfg) @@ -1595,12 +1691,24 @@ buildCrossTUUninitializedSummaryIndex(const std::vector& load // Same fixed-point budget policy as resource summaries. constexpr unsigned kCrossTUMaxIterations = 12; const unsigned maxJobs = resolveConfiguredJobs(cfg); + std::vector preparedModules; + preparedModules.reserve(loadedModules.size()); + for (const LoadedInputModule& loaded : loadedModules) + { + const analysis::FunctionFilter filter = analysis::buildFunctionFilter(*loaded.module, cfg); + auto shouldAnalyze = [&](const llvm::Function& F) -> bool + { return filter.shouldAnalyze(F); }; + preparedModules.push_back( + analysis::prepareUninitializedModuleContext(*loaded.module, shouldAnalyze)); + } analysis::UninitializedSummaryIndex globalIndex; unsigned iterationsRan = 0; bool converged = false; for (unsigned iter = 0; iter < kCrossTUMaxIterations; ++iter) { const auto iterStart = Clock::now(); + const analysis::PreparedUninitializedExternalSummaries preparedExternal = + analysis::prepareUninitializedExternalSummaries(&globalIndex); analysis::UninitializedSummaryIndex nextGlobal; std::vector moduleSummaries(loadedModules.size()); @@ -1608,11 +1716,8 @@ buildCrossTUUninitializedSummaryIndex(const std::vector& load [&](std::size_t moduleIndex) -> analysis::UninitializedSummaryIndex { const LoadedInputModule& loaded = loadedModules[moduleIndex]; - analysis::FunctionFilter filter = analysis::buildFunctionFilter(*loaded.module, cfg); - auto shouldAnalyze = [&](const llvm::Function& F) -> bool - { return filter.shouldAnalyze(F); }; - return analysis::buildUninitializedSummaryIndex(*loaded.module, shouldAnalyze, - &globalIndex); + return analysis::buildUninitializedSummaryIndex( + *loaded.module, &preparedModules[moduleIndex], &preparedExternal); }; if (maxJobs <= 1 || loadedModules.size() <= 1) @@ -1622,27 +1727,8 @@ buildCrossTUUninitializedSummaryIndex(const std::vector& load } else { - const unsigned workerCount = - std::min(maxJobs, static_cast(loadedModules.size())); - std::atomic_size_t nextModule{0}; - std::vector workers; - workers.reserve(workerCount); - for (unsigned worker = 0; worker < workerCount; ++worker) - { - workers.emplace_back( - [&]() - { - while (true) - { - const std::size_t moduleIndex = nextModule.fetch_add(1); - if (moduleIndex >= loadedModules.size()) - break; - moduleSummaries[moduleIndex] = buildModuleSummary(moduleIndex); - } - }); - } - for (auto& worker : workers) - worker.join(); + runParallelWork(loadedModules.size(), maxJobs, [&](std::size_t moduleIndex) + { moduleSummaries[moduleIndex] = buildModuleSummary(moduleIndex); }); } for (const auto& moduleSummary : moduleSummaries) @@ -1726,12 +1812,14 @@ struct RunPlan AnalysisConfig cfg; std::vector inputFilenames; NormalizedPathFilters normalizedFilters; - ctrace::stack::cli::OutputFormat outputFormat = ctrace::stack::cli::OutputFormat::Human; std::string sarifBaseDir; - bool hasFilter = false; - bool needsCrossTUResourceSummaries = false; - bool needsCrossTUUninitializedSummaries = false; - bool needsSharedModuleLoading = false; + ctrace::stack::cli::OutputFormat outputFormat = ctrace::stack::cli::OutputFormat::Human; + std::uint64_t hasFilter : 1 = false; + std::uint64_t needsCrossTUResourceSummaries : 1 = false; + std::uint64_t needsCrossTUUninitializedSummaries : 1 = false; + std::uint64_t needsCrossTUGlobalReadBeforeWriteSummaries : 1 = false; + std::uint64_t needsSharedModuleLoading : 1 = false; + std::uint64_t reservedFlags : 59 = 0; }; class RunPlanBuilder @@ -1792,8 +1880,10 @@ class RunPlanBuilder plan.inputFilenames.size() > 1; plan.needsCrossTUUninitializedSummaries = plan.cfg.uninitializedCrossTU && plan.inputFilenames.size() > 1; - plan.needsSharedModuleLoading = - plan.needsCrossTUResourceSummaries || plan.needsCrossTUUninitializedSummaries; + plan.needsCrossTUGlobalReadBeforeWriteSummaries = plan.inputFilenames.size() > 1; + plan.needsSharedModuleLoading = plan.needsCrossTUResourceSummaries || + plan.needsCrossTUUninitializedSummaries || + plan.needsCrossTUGlobalReadBeforeWriteSummaries; return AppResult::success(std::move(plan)); } @@ -1816,9 +1906,10 @@ class SharedModuleLoadingExecutionStrategy final : public AnalysisExecutionStrat AppStatus execute(RunPlan& plan, llvm::LLVMContext&, std::vector& results) const override { - return analyzeWithSharedModuleLoading(plan.inputFilenames, plan.cfg, plan.hasFilter, - plan.needsCrossTUResourceSummaries, - plan.needsCrossTUUninitializedSummaries, results); + return analyzeWithSharedModuleLoading( + plan.inputFilenames, plan.cfg, plan.hasFilter, plan.needsCrossTUResourceSummaries, + plan.needsCrossTUUninitializedSummaries, + plan.needsCrossTUGlobalReadBeforeWriteSummaries, results); } }; @@ -1904,7 +1995,8 @@ class AnalyzerApp RunPlan plan = std::move(*planResult.value); printInterprocStatus(plan.cfg, plan.inputFilenames.size(), plan.needsCrossTUResourceSummaries, - plan.needsCrossTUUninitializedSummaries); + plan.needsCrossTUUninitializedSummaries, + plan.needsCrossTUGlobalReadBeforeWriteSummaries); std::vector results; results.reserve(plan.inputFilenames.size()); @@ -1920,7 +2012,6 @@ class AnalyzerApp namespace ctrace::stack::app { - RunResult runAnalyzerApp(cli::ParsedArguments parsedArgs, llvm::LLVMContext& context) { AnalyzerApp app = {}; diff --git a/src/cli/ArgParser.cpp b/src/cli/ArgParser.cpp index 1ae7578..4bbeefa 100644 --- a/src/cli/ArgParser.cpp +++ b/src/cli/ArgParser.cpp @@ -45,7 +45,7 @@ namespace ctrace::stack::cli } private: - static constexpr std::array kCandidates = { + static constexpr std::array kCandidates = { {{"-h", "-h"}, {"--help", "--help"}, {"--demangle", "--demangle"}, @@ -86,6 +86,7 @@ namespace ctrace::stack::cli {"--no-uninitialized-cross-tu", "--no-uninitialized-cross-tu"}, {"--resource-summary-cache-dir", "--resource-summary-cache-dir"}, {"--resource-summary-cache-memory-only", "--resource-summary-cache-memory-only"}, + {"--compile-ir-cache-dir", "--compile-ir-cache-dir"}, {"--config", "--config"}, {"--print-effective-config", "--print-effective-config"}, {"--compile-commands", "--compile-commands"}, @@ -105,7 +106,8 @@ namespace ctrace::stack::cli std::string_view suggestion; std::size_t distance = std::numeric_limits::max(); std::size_t queryLength = 0; - bool valid = false; + std::uint64_t valid : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; static std::optional @@ -575,16 +577,16 @@ namespace ctrace::stack::cli Cli }; - using SmtOptionApplyFn = - bool (*)(AnalysisConfig& cfg, const std::string& value, SmtOptionSource source, - std::string& error); + using SmtOptionApplyFn = bool (*)(AnalysisConfig& cfg, const std::string& value, + SmtOptionSource source, std::string& error); struct SmtOptionSpec { std::string_view configKey; const char* cliOption; SmtOptionApplyFn apply = nullptr; - bool impliesSmtEnabled = true; + std::uint64_t impliesSmtEnabled : 1 = true; + std::uint64_t reservedFlags : 63 = 0; }; bool applySmtSwitchOption(AnalysisConfig& cfg, const std::string& value, @@ -724,21 +726,61 @@ namespace ctrace::stack::cli template struct BoolConfigSpec { std::string_view key; - bool Owner::*field = nullptr; + void (*set)(Owner&, bool) = nullptr; }; + void setConfigTiming(AnalysisConfig& cfg, bool value) + { + cfg.timing = value; + } + + void setConfigWarningsOnly(AnalysisConfig& cfg, bool value) + { + cfg.warningsOnly = value; + } + + void setConfigQuiet(AnalysisConfig& cfg, bool value) + { + cfg.quiet = value; + } + + void setConfigDemangle(AnalysisConfig& cfg, bool value) + { + cfg.demangle = value; + } + + void setConfigResourceCrossTU(AnalysisConfig& cfg, bool value) + { + cfg.resourceCrossTU = value; + } + + void setConfigUninitializedCrossTU(AnalysisConfig& cfg, bool value) + { + cfg.uninitializedCrossTU = value; + } + + void setConfigResourceSummaryMemoryOnly(AnalysisConfig& cfg, bool value) + { + cfg.resourceSummaryMemoryOnly = value; + } + + void setParsedIncludeCompdbDeps(ParsedArguments& parsed, bool value) + { + parsed.includeCompdbDeps = value; + } + constexpr std::array, 7> kConfigBoolSpecs = {{ - {"timing", &AnalysisConfig::timing}, - {"warnings-only", &AnalysisConfig::warningsOnly}, - {"quiet", &AnalysisConfig::quiet}, - {"demangle", &AnalysisConfig::demangle}, - {"resource-cross-tu", &AnalysisConfig::resourceCrossTU}, - {"uninitialized-cross-tu", &AnalysisConfig::uninitializedCrossTU}, - {"resource-summary-cache-memory-only", &AnalysisConfig::resourceSummaryMemoryOnly}, + {"timing", &setConfigTiming}, + {"warnings-only", &setConfigWarningsOnly}, + {"quiet", &setConfigQuiet}, + {"demangle", &setConfigDemangle}, + {"resource-cross-tu", &setConfigResourceCrossTU}, + {"uninitialized-cross-tu", &setConfigUninitializedCrossTU}, + {"resource-summary-cache-memory-only", &setConfigResourceSummaryMemoryOnly}, }}; constexpr std::array, 1> kParsedBoolSpecs = {{ - {"include-compdb-deps", &ParsedArguments::includeCompdbDeps}, + {"include-compdb-deps", &setParsedIncludeCompdbDeps}, }}; template @@ -759,7 +801,8 @@ namespace ctrace::stack::cli return false; } - owner.*(spec.field) = parsedValue; + if (spec.set) + spec.set(owner, parsedValue); return true; } return false; @@ -768,7 +811,8 @@ namespace ctrace::stack::cli struct PreParsedCliMeta { std::string configPath; - bool printEffectiveConfig = false; + std::uint64_t printEffectiveConfig : 1 = false; + std::uint64_t reservedFlags : 63 = 0; }; bool preScanMetaOptions(int argc, char** argv, PreParsedCliMeta& outMeta, @@ -796,9 +840,9 @@ namespace ctrace::stack::cli return true; } - bool applyConfigEntry(const std::string& keyRaw, const std::string& valueRaw, ParsedArguments& parsed, - AnalysisConfig& cfg, const std::filesystem::path& configDir, - std::string& error) + bool applyConfigEntry(const std::string& keyRaw, const std::string& valueRaw, + ParsedArguments& parsed, AnalysisConfig& cfg, + const std::filesystem::path& configDir, std::string& error) { std::string key = toLowerAsciiCopy(trimCopy(keyRaw)); for (char& c : key) @@ -895,6 +939,11 @@ namespace ctrace::stack::cli cfg.resourceSummaryCacheDir = resolveConfigRelativePath(value, configDir); return true; } + if (key == "compile-ir-cache-dir") + { + cfg.compileIRCacheDir = resolveConfigRelativePath(value, configDir); + return true; + } error = "unknown key '" + key + "'"; return false; @@ -1389,6 +1438,18 @@ namespace ctrace::stack::cli continue; } } + { + std::string value; + std::string error; + if (consumeLongOptionValue(argStr, "--compile-ir-cache-dir", i, argc, argv, value, + error)) + { + if (!error.empty()) + return makeError(error); + cfg.compileIRCacheDir = std::move(value); + continue; + } + } if (argStr == "--resource-summary-cache-memory-only") { cfg.resourceSummaryMemoryOnly = true; diff --git a/test/false-positive-repro/duplicate-if-unreachable-elseif-fp.cpp b/test/false-positive-repro/duplicate-if-unreachable-elseif-fp.cpp new file mode 100644 index 0000000..e2ec256 --- /dev/null +++ b/test/false-positive-repro/duplicate-if-unreachable-elseif-fp.cpp @@ -0,0 +1,27 @@ +enum class InitState +{ + Uninit, + Partial, + Init +}; + +int fp_duplicate_if_unreachable_elseif_guard(bool isCtorThis, bool isAssignmentLike, + InitState state) +{ + bool hasReadBeforeWrite = false; + if (state == InitState::Uninit) + hasReadBeforeWrite = true; + + if (hasReadBeforeWrite) + { + const bool suppressForAssignmentPadding = isAssignmentLike && state == InitState::Partial; + const bool suppressCtorThisReadBeforeWrite = isCtorThis && state == InitState::Uninit; + if (suppressForAssignmentPadding || suppressCtorThisReadBeforeWrite) + return 1; + } + + return 0; +} + +// strict-diagnostic-count: false +// not contains: unreachable else-if branch: condition is equivalent to a previous 'if' condition diff --git a/test/false-positive-repro/uninitialized-beststack-est.cpp b/test/false-positive-repro/uninitialized-beststack-est.cpp new file mode 100644 index 0000000..cd71838 --- /dev/null +++ b/test/false-positive-repro/uninitialized-beststack-est.cpp @@ -0,0 +1,29 @@ +#include + +struct StackEstimateLike +{ + unsigned long long bytes = 0; + bool unknown = false; +}; + +int fp_uninitialized_beststack_est(const std::vector& totals) +{ + const StackEstimateLike* best = nullptr; + StackEstimateLike bestStack{}; + + for (const StackEstimateLike& candidate : totals) + { + StackEstimateLike est = candidate.bytes > 0 ? candidate : StackEstimateLike{}; + if (!best || est.bytes > bestStack.bytes) + { + best = &candidate; + bestStack = est; + } + } + + return (best && bestStack.bytes > 0) ? 1 : 0; +} + +// strict-diagnostic-count: false +// not contains: potential read of uninitialized local variable 'bestStack' +// not contains: potential read of uninitialized local variable 'est' diff --git a/test/false-positive-repro/uninitialized-default-config-wrapper.cpp b/test/false-positive-repro/uninitialized-default-config-wrapper.cpp new file mode 100644 index 0000000..c4821a1 --- /dev/null +++ b/test/false-positive-repro/uninitialized-default-config-wrapper.cpp @@ -0,0 +1,19 @@ +struct AnalysisConfigLike +{ + unsigned long long stackLimit = 8ull * 1024ull * 1024ull; + bool enabled = false; +}; + +static int analyzeWithConfig(int input, const AnalysisConfigLike& cfg) +{ + return cfg.enabled ? input : (input + static_cast(cfg.stackLimit % 7ull)); +} + +int fp_uninitialized_default_config_wrapper(int input) +{ + const AnalysisConfigLike defaultConfig{}; + return analyzeWithConfig(input, defaultConfig); +} + +// strict-diagnostic-count: false +// not contains: potential read of uninitialized local variable 'defaultConfig' diff --git a/test/false-positive-repro/uninitialized-info-temp-map.cpp b/test/false-positive-repro/uninitialized-info-temp-map.cpp new file mode 100644 index 0000000..4d36a49 --- /dev/null +++ b/test/false-positive-repro/uninitialized-info-temp-map.cpp @@ -0,0 +1,35 @@ +#include +#include + +struct LocalInfoLike +{ + unsigned long long bytes = 0; + bool unknown = false; +}; + +static LocalInfoLike computeInfoLike(int seed) +{ + LocalInfoLike info{}; + info.bytes = static_cast(seed >= 0 ? seed : -seed); + info.unknown = (seed % 2) == 0; + return info; +} + +unsigned long long fp_uninitialized_info_temp_map(const std::vector& values) +{ + std::map localById; + for (int value : values) + { + LocalInfoLike info = computeInfoLike(value); + localById[value] = info; + } + + unsigned long long sum = 0; + for (const auto& [key, info] : localById) + sum += + info.bytes + (info.unknown ? 1ull : 0ull) + static_cast(key >= 0); + return sum; +} + +// strict-diagnostic-count: false +// not contains: potential read of uninitialized local variable 'info' diff --git a/test/false-positive-repro/uninitialized-local-maxcallee.cpp b/test/false-positive-repro/uninitialized-local-maxcallee.cpp new file mode 100644 index 0000000..fcf4572 --- /dev/null +++ b/test/false-positive-repro/uninitialized-local-maxcallee.cpp @@ -0,0 +1,35 @@ +#include + +struct StackEstimateLike +{ + unsigned long long bytes = 0; + bool unknown = false; +}; + +unsigned long long fp_uninitialized_local_maxcallee(const std::vector& callees, + bool hasLocal, unsigned long long localBytes) +{ + StackEstimateLike local = {}; + if (hasLocal) + { + local.bytes = localBytes; + } + + StackEstimateLike maxCallee = {}; + for (const StackEstimateLike& callee : callees) + { + if (callee.bytes > maxCallee.bytes) + maxCallee.bytes = callee.bytes; + if (callee.unknown) + maxCallee.unknown = true; + } + + StackEstimateLike total{}; + total.bytes = local.bytes + maxCallee.bytes; + total.unknown = local.unknown || maxCallee.unknown; + return total.bytes + (total.unknown ? 1ull : 0ull); +} + +// strict-diagnostic-count: false +// not contains: potential read of uninitialized local variable 'local' +// not contains: potential read of uninitialized local variable 'maxCallee' diff --git a/test/false-positive-repro/uninitialized-merged-return.cpp b/test/false-positive-repro/uninitialized-merged-return.cpp new file mode 100644 index 0000000..4c1df39 --- /dev/null +++ b/test/false-positive-repro/uninitialized-merged-return.cpp @@ -0,0 +1,27 @@ +#include + +struct AnalysisResultLike +{ + unsigned warningCount = 0; + unsigned errorCount = 0; +}; + +static AnalysisResultLike mergeResultsLike(const std::vector& results) +{ + AnalysisResultLike merged{}; + for (const AnalysisResultLike& item : results) + { + merged.warningCount += item.warningCount; + merged.errorCount += item.errorCount; + } + return merged; +} + +unsigned fp_uninitialized_merged_return(const std::vector& results) +{ + AnalysisResultLike merged = mergeResultsLike(results); + return merged.warningCount + merged.errorCount; +} + +// strict-diagnostic-count: false +// not contains: potential read of uninitialized local variable 'merged' diff --git a/test/uninitialized-variable/global-array-read-without-local-write.c b/test/uninitialized-variable/global-array-read-without-local-write.c new file mode 100644 index 0000000..e672503 --- /dev/null +++ b/test/uninitialized-variable/global-array-read-without-local-write.c @@ -0,0 +1,33 @@ +#include + +int g_zero_cross_state[8]; +int g_zero_local_ok[4]; + +static void external_write(void) +{ + g_zero_cross_state[0] = 42; +} + +static int read_without_local_write(void) +{ + return g_zero_cross_state[0]; +} +// at line 13, column 12 +// [ !!Warn ] potential read of global buffer 'g_zero_cross_state' before first write in this function +// ↳ no write to this buffer is observed in this function; value may come from static initialization or writes in other functions/TUs + +static int read_after_local_write(void) +{ + g_zero_local_ok[1] = 7; + return g_zero_local_ok[1]; +} +// not contains: potential read of global buffer 'g_zero_local_ok' before first write in this function + +int main(void) +{ + external_write(); + const int a = read_without_local_write(); + const int b = read_after_local_write(); + printf("%d %d\\n", a, b); + return 0; +} diff --git a/test/uninitialized-variable/uninitialized-local-cpp-bitfield-copy-ctor.cpp b/test/uninitialized-variable/uninitialized-local-cpp-bitfield-copy-ctor.cpp new file mode 100644 index 0000000..82e69d5 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-cpp-bitfield-copy-ctor.cpp @@ -0,0 +1,21 @@ +struct ResolvedLike +{ + unsigned line = 0; + unsigned column = 0; + unsigned hasLocation : 1 = 0; + unsigned reserved : 31 = 0; +}; + +static ResolvedLike makeResolvedLike(void) +{ + ResolvedLike loc; + return loc; +} + +int bitfield_copy_ctor_should_not_warn(void) +{ + const ResolvedLike loc = makeResolvedLike(); + return static_cast(loc.line + loc.column); +} + +// not contains: potential read of uninitialized local variable 'loc' diff --git a/test/uninitialized-variable/uninitialized-local-cpp-bitfield-default-member-init.cpp b/test/uninitialized-variable/uninitialized-local-cpp-bitfield-default-member-init.cpp new file mode 100644 index 0000000..0a33fc5 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-cpp-bitfield-default-member-init.cpp @@ -0,0 +1,14 @@ +struct BitfieldConfig +{ + unsigned mode : 3 = 0; + unsigned enabled : 1 = 0; + unsigned reserved : 28 = 0; +}; + +int bitfield_default_member_init_should_not_warn(void) +{ + BitfieldConfig cfg; + return static_cast(cfg.mode); +} + +// not contains: potential read of uninitialized local variable 'cfg' diff --git a/test/uninitialized-variable/uninitialized-local-cpp-bitfield-missing-init.cpp b/test/uninitialized-variable/uninitialized-local-cpp-bitfield-missing-init.cpp new file mode 100644 index 0000000..2238553 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-cpp-bitfield-missing-init.cpp @@ -0,0 +1,15 @@ +struct RawBits +{ + unsigned first : 1; + unsigned second : 1; +}; + +int bitfield_missing_init_should_warn(void) +{ + RawBits bits; + return static_cast(bits.second); +} + +// at line 10, column 34 +// [ !!Warn ] potential read of uninitialized local variable 'bits' +// ↳ this load may execute before any definite initialization on all control-flow paths