From 8727f7d9287da250e1aa286e0a4492de9c55139a Mon Sep 17 00:00:00 2001 From: dutchpsycho <178704185+dutchpsycho@users.noreply.github.com> Date: Mon, 11 May 2026 17:41:51 +1000 Subject: [PATCH 1/4] feat(cli): integrate reconstruction and syscall analysis --- resx/src/analysis/disasm.rs | 423 ++++-- resx/src/analysis/discovery.rs | 417 ++++++ resx/src/analysis/follow/output.rs | 26 +- resx/src/analysis/follow/scan.rs | 276 +++- resx/src/analysis/follow/trace.rs | 60 +- resx/src/analysis/indirect.rs | 167 +++ resx/src/analysis/ir.rs | 357 +++++ resx/src/analysis/mod.rs | 5 + resx/src/analysis/reconstruct.rs | 2001 ++++++++++++++++++++++++++ resx/src/analysis/recursive_cfg.rs | 312 ++++ resx/src/analysis/symbols.rs | 71 +- resx/src/analysis/thunk.rs | 106 +- resx/src/cli/help.rs | 70 + resx/src/cli/router.rs | 14 +- resx/src/commands/dump/callmap.rs | 197 ++- resx/src/commands/dump/json.rs | 184 ++- resx/src/commands/dump/mod.rs | 128 +- resx/src/commands/dump/style.rs | 4 + resx/src/commands/mod.rs | 2 + resx/src/commands/peinfo/detect.rs | 97 +- resx/src/commands/peinfo/mod.rs | 10 +- resx/src/commands/peinfo/model.rs | 67 +- resx/src/commands/reconstruct_cfg.rs | 96 ++ resx/src/commands/scan.rs | 576 ++++++++ resx/src/core/config.rs | 45 +- resx/src/core/priority.rs | 34 + resx/src/core/search.rs | 69 +- resx/src/formats/pe/metadata.rs | 435 +++++- resx/src/formats/pe/mod.rs | 3 +- resx/src/formats/pe/types.rs | 68 + 30 files changed, 6060 insertions(+), 260 deletions(-) create mode 100644 resx/src/analysis/discovery.rs create mode 100644 resx/src/analysis/indirect.rs create mode 100644 resx/src/analysis/ir.rs create mode 100644 resx/src/analysis/reconstruct.rs create mode 100644 resx/src/analysis/recursive_cfg.rs create mode 100644 resx/src/commands/reconstruct_cfg.rs create mode 100644 resx/src/commands/scan.rs diff --git a/resx/src/analysis/disasm.rs b/resx/src/analysis/disasm.rs index b8993c0..8e6621f 100644 --- a/resx/src/analysis/disasm.rs +++ b/resx/src/analysis/disasm.rs @@ -127,8 +127,30 @@ pub fn disassemble_at( )); } - let mut chunk = &raw[file_off..]; - if cfg.max_bytes > 0 && chunk.len() > cfg.max_bytes { + let runtime_function = crate::formats::pe::read_runtime_function(pe, raw, start_rva); + let runtime_size = runtime_function + .as_ref() + .and_then(|func| func.end_rva.checked_sub(start_rva)) + .map(|size| size as usize) + .filter(|&size| size > 0); + + let section_limit = pe + .rva_to_section(start_rva) + .and_then(|section| { + section + .virtual_address + .saturating_add(section.virtual_size.max(section.raw_size)) + .checked_sub(start_rva) + }) + .map(|size| size as usize) + .unwrap_or_else(|| raw.len().saturating_sub(file_off)); + + let decode_len = runtime_size + .unwrap_or(section_limit) + .min(section_limit) + .min(raw.len().saturating_sub(file_off)); + let mut chunk = &raw[file_off..file_off + decode_len]; + if runtime_size.is_none() && cfg.max_bytes > 0 && chunk.len() > cfg.max_bytes { chunk = &chunk[..cfg.max_bytes]; } @@ -165,7 +187,7 @@ pub fn disassemble_at( let mut padding_after_ret = 0usize; while pos < chunk.len() { - if cfg.max_insns > 0 && insns.len() >= cfg.max_insns { + if runtime_size.is_none() && cfg.max_insns > 0 && insns.len() >= cfg.max_insns { break; } @@ -178,6 +200,9 @@ pub fn disassemble_at( decoder.decode_out(&mut iced); let i_len = iced.len(); + if i_len == 0 { + break; + } let i_bytes: Vec = chunk[pos..pos + i_len.min(chunk.len() - pos)].to_vec(); let pc = ip + pos as u64; let m = iced.mnemonic(); @@ -237,6 +262,14 @@ pub fn disassemble_at( } } + if cfg.hostile { + if let Some(note) = suspicious_flow_note(&iced) { + if !comment_parts.iter().any(|p| p == ¬e) { + comment_parts.push(note); + } + } + } + let comment = comment_parts.join(" | "); let is_int3 = i_bytes.len() == 1 && i_bytes[0] == 0xCC; let is_all_pad = i_bytes.iter().all(|&b| b == 0xCC || b == 0x90 || b == 0x00); @@ -259,23 +292,25 @@ pub fn disassemble_at( insns.push(insn); - if is_ret(m) { + if runtime_size.is_none() && is_ret(m) { last_ret_idx = Some(insns.len() - 1); padding_after_ret = 0; - } else if let Some(ret_idx) = last_ret_idx { - if is_all_pad || m == Mnemonic::Nop || is_int3 { - padding_after_ret += i_len; - if padding_after_ret >= 3 { - insns.truncate(ret_idx + 1); - break; + } else if runtime_size.is_none() { + if let Some(ret_idx) = last_ret_idx { + if is_all_pad || m == Mnemonic::Nop || is_int3 { + padding_after_ret += i_len; + if padding_after_ret >= 3 { + insns.truncate(ret_idx + 1); + break; + } + } else { + last_ret_idx = None; + padding_after_ret = 0; } - } else { - last_ret_idx = None; - padding_after_ret = 0; } } - if last_ret_idx.is_none() && is_int3 { + if runtime_size.is_none() && last_ret_idx.is_none() && is_int3 { break; } @@ -800,101 +835,298 @@ fn register_short_name(reg: Register) -> String { format!("{:?}", reg.full_register()).to_lowercase() } -/// Describes how an indirect register's value originated (backward dataflow, up to 16 insns). +/// Describes how an indirect register's value originated. struct RegSource { label: String, dll: String, is_import: bool, - /// Human-readable description of how the register was loaded, e.g. "rax ← IAT [rip+0x1234]". + /// Human-readable description of how the register was loaded. method: String, target_rva: u32, iat_slot_rva: u32, } -/// Scan backwards from `call_idx` looking for the most recent def of `target_reg`. -/// Handles: -/// - `mov reg, [rip+rel32]` or `mov reg, [abs]` → IAT resolution attempt -/// - `add reg, other` → returns None (table-dispatch, handled upstream) -/// - Anything else → returns None -fn track_indirect_register( +/// Expression type for backward register slicing. +#[derive(Debug, Clone)] +enum RegExpr { + Unknown, + Imm(u64), + Va(u64), + Import { + dll: String, + func: String, + slot_rva: u32, + }, + Derived(String), +} + +fn combine_add(base: RegExpr, rhs: u64) -> RegExpr { + match base { + RegExpr::Imm(v) => RegExpr::Imm(v.wrapping_add(rhs)), + RegExpr::Va(v) => RegExpr::Va(v.wrapping_add(rhs)), + RegExpr::Import { + dll, + func, + slot_rva, + } => RegExpr::Derived(format!( + "{dll}!{func} @IAT+0x{rhs:X} [slot 0x{slot_rva:08X}]" + )), + RegExpr::Derived(s) => RegExpr::Derived(format!("({s}) + 0x{rhs:X}")), + RegExpr::Unknown => RegExpr::Unknown, + } +} + +fn combine_sub(base: RegExpr, rhs: u64) -> RegExpr { + match base { + RegExpr::Imm(v) => RegExpr::Imm(v.wrapping_sub(rhs)), + RegExpr::Va(v) => RegExpr::Va(v.wrapping_sub(rhs)), + RegExpr::Import { + dll, + func, + slot_rva, + } => RegExpr::Derived(format!( + "{dll}!{func} @IAT-0x{rhs:X} [slot 0x{slot_rva:08X}]" + )), + RegExpr::Derived(s) => RegExpr::Derived(format!("({s}) - 0x{rhs:X}")), + RegExpr::Unknown => RegExpr::Unknown, + } +} + +fn expr_to_regsource(expr: RegExpr, reg: Register) -> RegSource { + let reg_name = register_short_name(reg); + match expr { + RegExpr::Import { + dll, + func, + slot_rva, + } => RegSource { + label: func, + dll, + is_import: true, + method: format!("{reg_name} ← IAT slot 0x{slot_rva:08X}"), + target_rva: 0, + iat_slot_rva: slot_rva, + }, + RegExpr::Va(va) => RegSource { + label: format!("0x{va:016X}"), + dll: String::new(), + is_import: false, + method: format!("{reg_name} ← VA 0x{va:016X}"), + target_rva: va as u32, + iat_slot_rva: 0, + }, + RegExpr::Imm(v) => RegSource { + label: format!("0x{v:016X}"), + dll: String::new(), + is_import: false, + method: format!("{reg_name} ← imm 0x{v:016X}"), + target_rva: 0, + iat_slot_rva: 0, + }, + RegExpr::Derived(s) => RegSource { + label: format!("[{s}]"), + dll: String::new(), + is_import: false, + method: format!("{reg_name} ← {s}"), + target_rva: 0, + iat_slot_rva: 0, + }, + RegExpr::Unknown => RegSource { + label: format!("[via {reg_name}]"), + dll: String::new(), + is_import: false, + method: format!("unresolved register {reg_name}"), + target_rva: 0, + iat_slot_rva: 0, + }, + } +} + +fn resolve_reg_expr( insns: &[Instruction], - call_idx: usize, - target_reg: Register, + until_idx: usize, + reg: Register, image_base: u64, pe: &PeFile, raw: &[u8], -) -> Option { + depth: usize, +) -> RegExpr { use iced_x86::Mnemonic; - let full_target = target_reg.full_register(); - for insn in insns[..call_idx].iter().rev().take(16) { + if depth > 8 { + return RegExpr::Unknown; + } + + let full_reg = reg.full_register(); + + // Scan at most 64 instructions backwards from until_idx. + // Use enumerate to get the absolute index directly — avoids the O(N) + // linear position() search that made this catastrophically slow on large images. + let scan_start = until_idx.saturating_sub(64); + for (rel, insn) in insns[scan_start..until_idx].iter().enumerate().rev() { if insn.iced.op_count() == 0 || insn.iced.op0_kind() != OpKind::Register { continue; } + let dst = insn.iced.op0_register().full_register(); - if dst != full_target { + if dst != full_reg { continue; } + let insn_pos = scan_start + rel; + return match insn.iced.mnemonic() { - Mnemonic::Mov if insn.iced.op1_kind() == OpKind::Memory => { - let slot_va = if insn.iced.memory_base() == Register::RIP - || insn.iced.memory_base() == Register::EIP - { - insn.iced.ip_rel_memory_address() + Mnemonic::Mov => match insn.iced.op1_kind() { + OpKind::Register => { + let src = insn.iced.op1_register().full_register(); + resolve_reg_expr(insns, insn_pos, src, image_base, pe, raw, depth + 1) + } + OpKind::Immediate8 => RegExpr::Imm(insn.iced.immediate8() as u64), + OpKind::Immediate16 => RegExpr::Imm(insn.iced.immediate16() as u64), + OpKind::Immediate32 | OpKind::Immediate32to64 => { + RegExpr::Imm(insn.iced.immediate32() as u64) + } + OpKind::Immediate64 => RegExpr::Imm(insn.iced.immediate64()), + OpKind::Memory => { + let slot_va = + if matches!(insn.iced.memory_base(), Register::RIP | Register::EIP) { + insn.iced.ip_rel_memory_address() + } else if insn.iced.memory_base() == Register::None + && insn.iced.memory_index() == Register::None + { + insn.iced.memory_displacement64() + } else { + 0 + }; + + if slot_va >= image_base { + let slot_rva = (slot_va - image_base) as u32; + if let Some((dll, func)) = + crate::formats::pe::resolve_iat_slot(pe, raw, slot_rva) + { + RegExpr::Import { + dll, + func, + slot_rva, + } + } else { + RegExpr::Va(slot_va) + } + } else { + RegExpr::Unknown + } + } + _ => RegExpr::Unknown, + }, + + Mnemonic::Lea => { + if matches!(insn.iced.memory_base(), Register::RIP | Register::EIP) { + RegExpr::Va(insn.iced.ip_rel_memory_address()) } else if insn.iced.memory_base() == Register::None && insn.iced.memory_index() == Register::None { - insn.iced.memory_displacement64() + RegExpr::Va(insn.iced.memory_displacement64()) } else { - // indexed or based memory — too complex to follow here - return None; - }; + RegExpr::Derived(format!("lea {}", insn.operands)) + } + } - let load_desc = if insn.iced.memory_base() == Register::RIP - || insn.iced.memory_base() == Register::EIP - { - format!("[rip+0x{:X}]", insn.iced.memory_displacement64()) - } else { - format!("[0x{:X}]", slot_va) - }; - let reg_name = register_short_name(target_reg); + Mnemonic::Add => { + let base = + resolve_reg_expr(insns, insn_pos, full_reg, image_base, pe, raw, depth + 1); + match insn.iced.op1_kind() { + OpKind::Immediate8 => combine_add(base, insn.iced.immediate8() as u64), + OpKind::Immediate16 => combine_add(base, insn.iced.immediate16() as u64), + OpKind::Immediate32 | OpKind::Immediate32to64 => { + combine_add(base, insn.iced.immediate32() as u64) + } + OpKind::Immediate64 => combine_add(base, insn.iced.immediate64()), + _ => RegExpr::Derived(format!( + "{} + {}", + register_short_name(full_reg), + insn.operands + )), + } + } - if slot_va != 0 && slot_va >= image_base { - let slot_rva = (slot_va - image_base) as u32; - if let Some((dll, func)) = - crate::formats::pe::resolve_iat_slot(pe, raw, slot_rva) - { - return Some(RegSource { - label: func, - dll, - is_import: true, - method: format!("{reg_name} ← {load_desc} (IAT)"), - target_rva: 0, - iat_slot_rva: slot_rva, - }); + Mnemonic::Sub => { + let base = + resolve_reg_expr(insns, insn_pos, full_reg, image_base, pe, raw, depth + 1); + match insn.iced.op1_kind() { + OpKind::Immediate8 => combine_sub(base, insn.iced.immediate8() as u64), + OpKind::Immediate16 => combine_sub(base, insn.iced.immediate16() as u64), + OpKind::Immediate32 | OpKind::Immediate32to64 => { + combine_sub(base, insn.iced.immediate32() as u64) } + OpKind::Immediate64 => combine_sub(base, insn.iced.immediate64()), + _ => RegExpr::Derived(format!( + "{} - {}", + register_short_name(full_reg), + insn.operands + )), } - // Memory load but not a recognised IAT slot. - Some(RegSource { - label: format!("[{reg_name} ← {load_desc}]"), - dll: String::new(), - is_import: false, - method: format!("{reg_name} ← {load_desc} (ptr)"), - target_rva: 0, - iat_slot_rva: 0, - }) } - // ADD modifying the target register is the tail of a table-dispatch sequence - // (e.g. `add r9, rcx`). The switch-dispatch path handles those; bail here. - Mnemonic::Add => None, - // LEA usually loads a table base, not a call target. - Mnemonic::Lea => None, - // Any other def of the register — stop. - _ => None, + + Mnemonic::Xor + if insn.iced.op1_kind() == OpKind::Register + && insn.iced.op1_register().full_register() == full_reg => + { + RegExpr::Imm(0) + } + + Mnemonic::Rol + | Mnemonic::Ror + | Mnemonic::Shl + | Mnemonic::Shr + | Mnemonic::Sar + | Mnemonic::And + | Mnemonic::Or => RegExpr::Derived(format!( + "{} {}", + insn.mnemonic.to_lowercase(), + insn.operands + )), + + _ => RegExpr::Unknown, }; } + RegExpr::Unknown +} + +fn track_indirect_register( + insns: &[Instruction], + call_idx: usize, + target_reg: Register, + image_base: u64, + pe: &PeFile, + raw: &[u8], +) -> Option { + Some(expr_to_regsource( + resolve_reg_expr(insns, call_idx, target_reg, image_base, pe, raw, 0), + target_reg, + )) +} + +/// Flag suspicious indirect control-flow for annotation in the disasm listing. +fn suspicious_flow_note(instr: &iced_x86::Instruction) -> Option { + use iced_x86::Mnemonic; + let m = instr.mnemonic(); + + if matches!(m, Mnemonic::Call | Mnemonic::Jmp) && instr.op0_kind() == OpKind::Register { + return Some(format!( + "indirect {} via {}", + format!("{:?}", m).to_lowercase(), + format!("{:?}", instr.op0_register().full_register()).to_lowercase() + )); + } + + if matches!( + m, + Mnemonic::Rol | Mnemonic::Ror | Mnemonic::Shl | Mnemonic::Shr | Mnemonic::Sar + ) { + return Some("bit-mix / pointer-transform candidate".to_owned()); + } + None } @@ -912,6 +1144,7 @@ pub fn collect_api_calls( raw: &[u8], symbol_index: &SymbolIndex, image_base: u64, + hostile: bool, ) -> Vec { let mut results = Vec::new(); @@ -1009,20 +1242,31 @@ pub fn collect_api_calls( let reg = insn.iced.op0_register(); let reg_name = register_short_name(reg); - if let Some(src) = track_indirect_register(insns, idx, reg, image_base, pe, raw) { - results.push(ApiCall { - rva: insn.rva, - kind, - target_rva: src.target_rva, - label: src.label, - dll: src.dll, - is_import: src.is_import, - is_indirect: true, - indirect_method: Some(src.method), - switch_cases: Vec::new(), - }); - } else if insn.is_call { - // Unresolved CALL via register — always emit so the call site is visible. + let src = track_indirect_register(insns, idx, reg, image_base, pe, raw); + let is_unresolved = src + .as_ref() + .map(|s| !s.is_import && s.target_rva == 0 && s.iat_slot_rva == 0) + .unwrap_or(true); + + if let Some(src) = src { + // Emit resolved or partially-resolved result. + // For unresolved JMPs, only emit when hostile (they are handled by + // switch-dispatch otherwise). + if insn.is_call || hostile || !is_unresolved { + results.push(ApiCall { + rva: insn.rva, + kind, + target_rva: src.target_rva, + label: src.label, + dll: src.dll, + is_import: src.is_import, + is_indirect: true, + indirect_method: Some(src.method), + switch_cases: Vec::new(), + }); + } + } else if insn.is_call || hostile { + // Fallback: completely unresolvable register. results.push(ApiCall { rva: insn.rva, kind, @@ -1035,8 +1279,7 @@ pub fn collect_api_calls( switch_cases: Vec::new(), }); } - // Unresolved JMP via register: skip here — the switch-dispatch path in - // the caller resolves and merges those targets separately. + // Non-hostile unresolved JMP via register: left for the switch-dispatch path. } } diff --git a/resx/src/analysis/discovery.rs b/resx/src/analysis/discovery.rs new file mode 100644 index 0000000..4be9c4f --- /dev/null +++ b/resx/src/analysis/discovery.rs @@ -0,0 +1,417 @@ +use std::collections::{BTreeMap, BTreeSet, VecDeque}; + +use iced_x86::{Decoder, DecoderOptions, Mnemonic, OpKind}; +use serde::Serialize; + +use crate::analysis::symbols::{display_symbol_name, SymbolIndex}; +use crate::core::config::Config; +use crate::formats::pdb::PdbSymbol; +use crate::formats::pe::{ + read_data_summary, read_runtime_functions, Export, PeFile, PeRuntimeFunctionInfo, + PeStartupRoutine, +}; + +#[derive(Debug, Clone, Default, Serialize)] +pub struct FunctionDiscoveryReport { + pub stats: FunctionDiscoveryStats, + pub functions: Vec, + pub notes: Vec, +} + +#[derive(Debug, Clone, Default, Serialize)] +pub struct FunctionDiscoveryStats { + pub total: usize, + pub exports: usize, + pub pdb: usize, + pub pdata: usize, + pub startup: usize, + pub call_targets: usize, + pub data_pointers: usize, + pub prologues: usize, +} + +#[derive(Debug, Clone, Serialize)] +pub struct DiscoveredFunction { + pub rva: String, + pub va: String, + pub name: String, + pub size: String, + pub source: String, + pub kind: String, + pub section: String, + pub prototype: String, + pub confidence: u8, + pub reason: String, + pub flags: Vec, +} + +#[derive(Debug, Clone)] +struct FunctionSeed { + rva: u32, + name: String, + size: u64, + source: String, + kind: String, + prototype: String, + confidence: u8, + reason: String, + flags: BTreeSet, +} + +pub fn discover_functions( + raw: &[u8], + pe: &PeFile, + exports: &[Export], + symbol_index: &SymbolIndex, + pdb_symbols: &[PdbSymbol], + startup_routines: &[PeStartupRoutine], + cfg: &Config, +) -> FunctionDiscoveryReport { + let mut map: BTreeMap = BTreeMap::new(); + let mut stats = FunctionDiscoveryStats::default(); + + for export in exports.iter().filter(|e| executable_rva(pe, e.rva)) { + stats.exports += 1; + merge_seed( + &mut map, + FunctionSeed { + rva: export.rva, + name: display_symbol_name(&export.name), + size: 0, + source: "export".to_owned(), + kind: "export".to_owned(), + prototype: String::new(), + confidence: 95, + reason: format!("EAT ordinal {}", export.ordinal), + flags: BTreeSet::new(), + }, + ); + } + + for sym in pdb_symbols + .iter() + .filter(|s| s.rva != 0 && s.kind == "function" && executable_rva(pe, s.rva)) + { + stats.pdb += 1; + merge_seed( + &mut map, + FunctionSeed { + rva: sym.rva, + name: display_symbol_name(&sym.name), + size: sym.size, + source: "pdb".to_owned(), + kind: "function".to_owned(), + prototype: sym.type_name.clone(), + confidence: 98, + reason: "PDB function symbol".to_owned(), + flags: BTreeSet::new(), + }, + ); + } + + for runtime in read_runtime_functions(pe, raw) { + if !executable_rva(pe, runtime.begin_rva) { + continue; + } + stats.pdata += 1; + let mut flags = BTreeSet::new(); + flags.insert("unwind".to_owned()); + if runtime.exception_handler_rva != 0 { + flags.insert("exception-handler".to_owned()); + } + if runtime.chained_parent.is_some() { + flags.insert("chained-unwind".to_owned()); + } + merge_seed( + &mut map, + FunctionSeed { + rva: runtime.begin_rva, + name: best_name(symbol_index, pe, runtime.begin_rva), + size: runtime.end_rva.saturating_sub(runtime.begin_rva) as u64, + source: ".pdata".to_owned(), + kind: "runtime-function".to_owned(), + prototype: String::new(), + confidence: 94, + reason: format!( + ".pdata range 0x{:08X}..0x{:08X}", + runtime.begin_rva, runtime.end_rva + ), + flags, + }, + ); + } + + for startup in startup_routines + .iter() + .filter(|s| executable_rva(pe, s.rva)) + { + stats.startup += 1; + let mut flags = BTreeSet::new(); + flags.insert("startup".to_owned()); + merge_seed( + &mut map, + FunctionSeed { + rva: startup.rva, + name: best_name(symbol_index, pe, startup.rva), + size: 0, + source: startup.source.clone(), + kind: startup.kind.clone(), + prototype: String::new(), + confidence: 90, + reason: startup.note.clone(), + flags, + }, + ); + } + + for target_rva in collect_direct_targets(raw, pe, cfg.max_total.max(256)) { + stats.call_targets += 1; + merge_seed( + &mut map, + FunctionSeed { + rva: target_rva, + name: best_name(symbol_index, pe, target_rva), + size: 0, + source: "direct-target".to_owned(), + kind: "call-target".to_owned(), + prototype: String::new(), + confidence: 74, + reason: "direct CALL/JMP target in executable code".to_owned(), + flags: BTreeSet::new(), + }, + ); + } + + let data = read_data_summary(pe, raw); + for pointer in data.pointers.iter().filter(|p| p.kind == "code") { + stats.data_pointers += 1; + let mut flags = BTreeSet::new(); + flags.insert("address-taken".to_owned()); + merge_seed( + &mut map, + FunctionSeed { + rva: pointer.target_rva, + name: best_name(symbol_index, pe, pointer.target_rva), + size: 0, + source: pointer.section_name.clone(), + kind: "code-pointer".to_owned(), + prototype: String::new(), + confidence: 62, + reason: format!("code pointer at {}", hex32(pointer.rva)), + flags, + }, + ); + } + + for prologue in find_prologue_candidates(raw, pe, 256) { + stats.prologues += 1; + merge_seed( + &mut map, + FunctionSeed { + rva: prologue, + name: best_name(symbol_index, pe, prologue), + size: 0, + source: "prologue-scan".to_owned(), + kind: "prologue".to_owned(), + prototype: String::new(), + confidence: 45, + reason: "common function prologue byte pattern".to_owned(), + flags: BTreeSet::new(), + }, + ); + } + + let mut functions = map + .into_values() + .map(|seed| { + let section = pe + .rva_to_section(seed.rva) + .map(|section| section.name.clone()) + .unwrap_or_default(); + DiscoveredFunction { + rva: hex32(seed.rva), + va: hex64(pe.image_base + seed.rva as u64), + name: seed.name, + size: if seed.size == 0 { + String::new() + } else { + format!("0x{:X}", seed.size) + }, + source: seed.source, + kind: seed.kind, + section, + prototype: seed.prototype, + confidence: seed.confidence, + reason: seed.reason, + flags: seed.flags.into_iter().collect(), + } + }) + .collect::>(); + functions.sort_by(|a, b| a.rva.cmp(&b.rva)); + stats.total = functions.len(); + + let mut notes = Vec::new(); + if stats.pdb == 0 { + notes.push("PDB function symbols unavailable or disabled".to_owned()); + } + if stats.pdata == 0 && pe.arch == 64 { + notes.push("no x64 .pdata runtime functions were recovered".to_owned()); + } + if stats.prologues > 0 { + notes.push( + "prologue hits are low-confidence hints and may include false positives".to_owned(), + ); + } + + FunctionDiscoveryReport { + stats, + functions, + notes, + } +} + +fn merge_seed(map: &mut BTreeMap, seed: FunctionSeed) { + map.entry(seed.rva) + .and_modify(|existing| { + if seed.confidence > existing.confidence { + let mut merged_flags = existing.flags.clone(); + merged_flags.extend(seed.flags.iter().cloned()); + *existing = seed.clone(); + existing.flags.extend(merged_flags); + } else { + existing.flags.extend(seed.flags.iter().cloned()); + if !seed.source.is_empty() && !existing.flags.contains(&seed.source) { + existing.flags.insert(format!("also:{}", seed.source)); + } + if existing.prototype.is_empty() && !seed.prototype.is_empty() { + existing.prototype = seed.prototype.clone(); + } + if existing.size == 0 && seed.size > 0 { + existing.size = seed.size; + } + } + }) + .or_insert(seed); +} + +fn collect_direct_targets(raw: &[u8], pe: &PeFile, max_targets: usize) -> BTreeSet { + let mut out = BTreeSet::new(); + let mut queue = VecDeque::new(); + queue.push_back(pe.entry_point); + + for section in pe.sections.iter().filter(|section| section.is_executable()) { + queue.push_back(section.virtual_address); + } + + let mut visited = BTreeSet::new(); + while let Some(start_rva) = queue.pop_front() { + if out.len() >= max_targets || !visited.insert(start_rva) || !executable_rva(pe, start_rva) + { + continue; + } + let Some(file_off) = pe.rva_to_offset(start_rva) else { + continue; + }; + let section_limit = pe + .rva_to_section(start_rva) + .map(|section| { + section + .virtual_address + .saturating_add(section.virtual_size.max(section.raw_size)) + .saturating_sub(start_rva) as usize + }) + .unwrap_or(4096); + let chunk_len = section_limit + .min(16 * 1024) + .min(raw.len().saturating_sub(file_off)); + let mut decoder = Decoder::with_ip( + pe.arch, + &raw[file_off..file_off + chunk_len], + pe.image_base + start_rva as u64, + DecoderOptions::NONE, + ); + let mut insn = iced_x86::Instruction::default(); + let mut decoded = 0usize; + while decoder.can_decode() && decoded < 4096 && out.len() < max_targets { + decoder.decode_out(&mut insn); + if insn.is_invalid() || insn.len() == 0 { + break; + } + decoded += 1; + if matches!(insn.mnemonic(), Mnemonic::Call | Mnemonic::Jmp) { + if let Some(target_rva) = branch_target_rva(pe, &insn) { + if out.insert(target_rva) && visited.len() < max_targets { + queue.push_back(target_rva); + } + } + } + } + } + out +} + +fn find_prologue_candidates(raw: &[u8], pe: &PeFile, limit: usize) -> BTreeSet { + let mut out = BTreeSet::new(); + for section in pe.sections.iter().filter(|section| section.is_executable()) { + let start = section.raw_offset as usize; + let end = start + .saturating_add(section.raw_size as usize) + .min(raw.len()); + let mut off = start; + while off + 4 <= end && out.len() < limit { + let bytes = &raw[off..end.min(off + 8)]; + let is_x64_frame = bytes.starts_with(&[0x40, 0x53]) + || bytes.starts_with(&[0x48, 0x89, 0x5C]) + || bytes.starts_with(&[0x48, 0x83, 0xEC]) + || bytes.starts_with(&[0x55, 0x48, 0x8B, 0xEC]); + let is_x86_frame = + bytes.starts_with(&[0x55, 0x8B, 0xEC]) || bytes.starts_with(&[0x53, 0x56, 0x57]); + if is_x64_frame || is_x86_frame { + let rva = section.virtual_address + (off - start) as u32; + out.insert(rva); + } + off += 1; + } + } + out +} + +fn branch_target_rva(pe: &PeFile, instr: &iced_x86::Instruction) -> Option { + match instr.op0_kind() { + OpKind::NearBranch16 | OpKind::NearBranch32 | OpKind::NearBranch64 => { + let rva = pe.va_to_rva(instr.near_branch_target())?; + executable_rva(pe, rva).then_some(rva) + } + _ => None, + } +} + +fn executable_rva(pe: &PeFile, rva: u32) -> bool { + rva != 0 + && pe + .rva_to_section(rva) + .is_some_and(|section| section.is_executable()) +} + +fn best_name(symbol_index: &SymbolIndex, pe: &PeFile, rva: u32) -> String { + let va = pe.image_base + rva as u64; + if let Some(hit) = symbol_index.lookup(va) { + if hit.displacement == 0 { + return hit.symbol.name; + } + } + format!("sub_{:08X}", rva) +} + +fn hex32(value: u32) -> String { + format!("0x{:08X}", value) +} + +fn hex64(value: u64) -> String { + format!("0x{:016X}", value) +} + +#[allow(dead_code)] +fn runtime_size(runtime: &PeRuntimeFunctionInfo) -> u64 { + runtime.end_rva.saturating_sub(runtime.begin_rva) as u64 +} diff --git a/resx/src/analysis/follow/output.rs b/resx/src/analysis/follow/output.rs index 57bd381..2d51bde 100644 --- a/resx/src/analysis/follow/output.rs +++ b/resx/src/analysis/follow/output.rs @@ -45,6 +45,9 @@ pub fn print_call_tree( .collect(); extra.push_str(&c.dim(&format!(" call@[{}]", site_strs.join(", ")))); } + if let Some(ref via) = node.via_wrapper { + extra.push_str(&c.dim(&format!(" [via wrapper: {}]", via))); + } if node.depth == 0 { writeln!(w, "{}{} {}", func_label, extra, dll_label).ok(); @@ -86,15 +89,21 @@ pub fn print_call_flat(w: &mut dyn Write, node: &CallNode, cfg: &FollowScanConfi } else { String::new() }; + let wrapper = if let Some(ref via) = n.via_wrapper { + c.dim(&format!(" [via wrapper: {}]", via)) + } else { + String::new() + }; writeln!( w, - "{} {} {}!{}{}{}", + "{} {} {}!{}{}{}{}", indent, c.dim(arrow), c.cyan(&n.func.dll), c.b_yellow(n.func.display()), rva, - site + site, + wrapper ) .ok(); for child in &n.callers { @@ -119,12 +128,18 @@ pub fn print_call_list(w: &mut dyn Write, node: &CallNode, cfg: &FollowScanConfi } else { String::new() }; + let wrapper = if let Some(ref via) = n.via_wrapper { + c.dim(&format!(" [via wrapper: {}]", via)) + } else { + String::new() + }; writeln!( w, - " {}!{}{}", + " {}!{}{}{}", c.cyan(&n.func.dll), c.b_yellow(n.func.display()), - rva + rva, + wrapper ) .ok(); } @@ -145,6 +160,8 @@ pub struct NodeJson { #[serde(skip_serializing_if = "is_false")] pub internal: bool, pub depth: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub via_wrapper: Option, #[serde(skip_serializing_if = "Vec::is_empty")] pub call_sites: Vec, #[serde(skip_serializing_if = "Vec::is_empty")] @@ -171,6 +188,7 @@ pub fn node_to_json(node: &CallNode) -> NodeJson { }, internal: node.func.is_internal, depth: node.depth, + via_wrapper: node.via_wrapper.clone(), truncated: node.truncated, call_sites: node .sites diff --git a/resx/src/analysis/follow/scan.rs b/resx/src/analysis/follow/scan.rs index fed3076..6e8dfbe 100644 --- a/resx/src/analysis/follow/scan.rs +++ b/resx/src/analysis/follow/scan.rs @@ -6,8 +6,10 @@ use std::time::UNIX_EPOCH; use serde::{Deserialize, Serialize}; +use crate::analysis::thunk::{follow_jmp_thunk, ThunkResolution}; use crate::core::config::Config; use crate::core::priority::{default_priority_dirs, matcher_from_lists}; +use crate::core::search::image_name_candidates; use crate::formats::pe::{ attribute_to_func, parse_pe, read_cstr, read_exports, read_u32, read_u64, }; @@ -39,6 +41,7 @@ pub struct FollowScanConfig { pub show_site: bool, pub quiet: bool, pub reload: bool, + pub hostile: bool, } impl FollowScanConfig { @@ -69,6 +72,7 @@ impl FollowScanConfig { show_site: cfg.show_site, quiet: cfg.quiet, reload: cfg.reload, + hostile: cfg.hostile, } } } @@ -83,6 +87,29 @@ pub struct CallSite { pub struct Caller { pub func: crate::analysis::follow::trace::FuncRef, pub sites: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub via_wrapper: Option, +} + +/// A named export that is purely a single-JMP stub (thunk) wrapping another target. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WrapperEntry { + /// Export name of the wrapper function in this image. + pub name: String, + /// RVA of the wrapper function in this image. + pub rva: u32, + /// What the wrapper ultimately resolves to. + pub resolves_to: WrapperTarget, +} + +/// The target a wrapper export resolves to. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "kind")] +pub enum WrapperTarget { + /// Resolves to another function inside the same image. + Direct { target_rva: u32 }, + /// Resolves to an IAT import (dll_base is lowercase, no extension). + Import { dll_base: String, func: String }, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -98,6 +125,9 @@ pub struct ReverseCallIndex { pub dll_path_str: String, pub direct: std::collections::HashMap>, pub imports: std::collections::HashMap>, + /// Exports in this image that are single-JMP wrappers around another target. + #[serde(default)] + pub wrappers: Vec, } pub struct ScanImage { @@ -258,6 +288,7 @@ fn push_indexed_site( let entry = bucket.entry(owner_rva).or_insert_with(|| Caller { func, sites: Vec::new(), + via_wrapper: None, }); entry.sites.push(site); } @@ -269,7 +300,7 @@ fn direct_target_rva(pe: &crate::formats::pe::PeFile, target_va: u64) -> Option< Some(rva) } -fn build_reverse_index(data: &ScanImageData, meta: &SourceMeta) -> ReverseCallIndex { +fn build_reverse_index(data: &ScanImageData, meta: &SourceMeta, hostile: bool) -> ReverseCallIndex { let mut direct: std::collections::HashMap> = std::collections::HashMap::new(); let mut imports: std::collections::HashMap> = @@ -291,60 +322,154 @@ fn build_reverse_index(data: &ScanImageData, meta: &SourceMeta) -> ReverseCallIn let sec = &data.raw[start..end]; let sec_va_base = data.pe.image_base + s.virtual_address as u64; - for i in 0..sec.len().saturating_sub(5) { - let b0 = sec[i]; - let b1 = sec.get(i + 1).copied().unwrap_or(0); - if (b0 == 0xFF && (b1 == 0x15 || b1 == 0x25)) && i + 6 <= sec.len() { - let slot_va = if data.pe.arch == 64 { - let rel32 = i32::from_le_bytes(sec[i + 2..i + 6].try_into().unwrap()); + if hostile { + use iced_x86::{Decoder, DecoderOptions, Mnemonic, OpKind, Register}; + let mut decoder = + Decoder::with_ip(data.pe.arch, sec, sec_va_base, DecoderOptions::NONE); + let mut instr = iced_x86::Instruction::default(); + + while decoder.can_decode() { + decoder.decode_out(&mut instr); + let len = instr.len(); + if len == 0 { + break; + } + + let site_rva = (instr.ip().wrapping_sub(data.pe.image_base)) as u32; + let m = instr.mnemonic(); + let is_call = m == Mnemonic::Call; + let is_jmp = m == Mnemonic::Jmp; + if !is_call && !is_jmp { + continue; + } + + match instr.op0_kind() { + OpKind::NearBranch16 | OpKind::NearBranch32 | OpKind::NearBranch64 => { + let target_va = instr.near_branch_target(); + if let Some(target_rva) = direct_target_rva(&data.pe, target_va) { + push_indexed_site( + direct.entry(target_rva).or_default(), + CallSite { + rva: site_rva, + pattern: if is_call { + "CALL rel".to_owned() + } else { + "JMP rel".to_owned() + }, + }, + data, + ); + } + } + + OpKind::Memory => { + let slot_va = + if matches!(instr.memory_base(), Register::RIP | Register::EIP) { + instr.ip_rel_memory_address() + } else if instr.memory_base() == Register::None + && instr.memory_index() == Register::None + { + instr.memory_displacement64() + } else { + 0 + }; + + if let Some((dll_base, func_name)) = import_slots.get(&slot_va) { + push_indexed_site( + imports + .entry(import_lookup_key(dll_base, func_name)) + .or_default(), + CallSite { + rva: site_rva, + pattern: if is_call { + "CALL [IAT]".to_owned() + } else { + "JMP [IAT]".to_owned() + }, + }, + data, + ); + } + } + + // Register-indirect: keep visible by site, even if unresolved. + OpKind::Register => { + let reg = + format!("{:?}", instr.op0_register().full_register()).to_lowercase(); + push_indexed_site( + direct.entry(site_rva).or_default(), + CallSite { + rva: site_rva, + pattern: if is_call { + format!("CALL {reg}") + } else { + format!("JMP {reg}") + }, + }, + data, + ); + } + + _ => {} + } + } + } else { + // Fast raw-byte scan for the common E8/E9 and FF 15/25 patterns. + for i in 0..sec.len().saturating_sub(5) { + let b0 = sec[i]; + let b1 = sec.get(i + 1).copied().unwrap_or(0); + if (b0 == 0xFF && (b1 == 0x15 || b1 == 0x25)) && i + 6 <= sec.len() { + let slot_va = if data.pe.arch == 64 { + let rel32 = i32::from_le_bytes(sec[i + 2..i + 6].try_into().unwrap()); + let instr_va = sec_va_base + i as u64; + (instr_va as i64 + 6 + rel32 as i64) as u64 + } else { + u32::from_le_bytes(sec[i + 2..i + 6].try_into().unwrap()) as u64 + }; + if let Some((dll_base, func_name)) = import_slots.get(&slot_va) { + push_indexed_site( + imports + .entry(import_lookup_key(dll_base, func_name)) + .or_default(), + CallSite { + rva: s.virtual_address + i as u32, + pattern: if b1 == 0x15 { + "CALL [IAT]" + } else { + "JMP [IAT]" + } + .to_owned(), + }, + data, + ); + } + continue; + } + + if (b0 == 0xE8 || b0 == 0xE9) && i + 5 <= sec.len() { + let rel32 = i32::from_le_bytes(sec[i + 1..i + 5].try_into().unwrap()); let instr_va = sec_va_base + i as u64; - (instr_va as i64 + 6 + rel32 as i64) as u64 - } else { - u32::from_le_bytes(sec[i + 2..i + 6].try_into().unwrap()) as u64 - }; - if let Some((dll_base, func_name)) = import_slots.get(&slot_va) { + let target_va = (instr_va as i64 + 5 + rel32 as i64) as u64; + let Some(target_rva) = direct_target_rva(&data.pe, target_va) else { + continue; + }; push_indexed_site( - imports - .entry(import_lookup_key(dll_base, func_name)) - .or_default(), + direct.entry(target_rva).or_default(), CallSite { rva: s.virtual_address + i as u32, - pattern: if b1 == 0x15 { - "CALL [IAT]" + pattern: if b0 == 0xE8 { + "CALL rel32" } else { - "JMP [IAT]" + "JMP rel32 (tail)" } .to_owned(), }, data, ); } - continue; - } - - if (b0 == 0xE8 || b0 == 0xE9) && i + 5 <= sec.len() { - let rel32 = i32::from_le_bytes(sec[i + 1..i + 5].try_into().unwrap()); - let instr_va = sec_va_base + i as u64; - let target_va = (instr_va as i64 + 5 + rel32 as i64) as u64; - let Some(target_rva) = direct_target_rva(&data.pe, target_va) else { - continue; - }; - push_indexed_site( - direct.entry(target_rva).or_default(), - CallSite { - rva: s.virtual_address + i as u32, - pattern: if b0 == 0xE8 { - "CALL rel32" - } else { - "JMP rel32 (tail)" - } - .to_owned(), - }, - data, - ); } } - } + } // end for s in &data.pe.sections let direct = direct .into_iter() @@ -364,6 +489,56 @@ fn build_reverse_index(data: &ScanImageData, meta: &SourceMeta) -> ReverseCallIn }) .collect(); + // Wrapper detection: scan every named export for a single-JMP stub. + // Always-on — not gated on `hostile` — because detecting callers via wrappers + // is a fundamental capability independent of aggressive tracing. + let mut wrappers: Vec = Vec::new(); + for export in &data.exports { + if export.name.is_empty() || export.rva == 0 { + continue; + } + let Some(res) = follow_jmp_thunk(&data.raw, &data.pe, export.rva) else { + continue; + }; + let target = match res { + ThunkResolution::Iat { dll, func, .. } => WrapperTarget::Import { + dll_base: normalize_dll_base(&dll), + func, + }, + ThunkResolution::Chain { + ref final_target, .. + } => match final_target.as_ref() { + ThunkResolution::Iat { dll, func, .. } => WrapperTarget::Import { + dll_base: normalize_dll_base(dll), + func: func.clone(), + }, + ThunkResolution::Direct { target_rva } => { + if *target_rva != export.rva { + WrapperTarget::Direct { + target_rva: *target_rva, + } + } else { + continue; + } + } + _ => continue, + }, + ThunkResolution::Direct { target_rva } => { + if target_rva != export.rva { + WrapperTarget::Direct { target_rva } + } else { + continue; + } + } + ThunkResolution::IatUnresolved { .. } => continue, + }; + wrappers.push(WrapperEntry { + name: export.name.clone(), + rva: export.rva, + resolves_to: target, + }); + } + ReverseCallIndex { source_path: meta.path.clone(), file_len: meta.file_len, @@ -376,6 +551,7 @@ fn build_reverse_index(data: &ScanImageData, meta: &SourceMeta) -> ReverseCallIn dll_path_str: data.dll_path_str.clone(), direct, imports, + wrappers, } } @@ -398,11 +574,7 @@ pub fn find_target_dll(name: &str, cfg: &FollowScanConfig) -> Result = cfg.extra_paths.iter().map(PathBuf::from).collect(); if !cfg.no_cwd { @@ -422,9 +594,11 @@ pub fn find_target_dll(name: &str, cfg: &FollowScanConfig) -> Result Option, pub callers: Vec, pub truncated: bool, + /// Set when this node reached its target via a wrapper/thunk export. + /// Format: "dll_name!ExportName" + pub via_wrapper: Option, } pub struct TraceCtx<'a> { @@ -181,6 +184,48 @@ impl GlobalCallGraph { for callers in imports.values_mut() { dedup_callers(callers); } + + // Wrapper expansion: for each export in any scanned image that is a JMP-thunk + // wrapper around another target, inject the wrapper's own callers as indirect + // callers of the final target, annotated with `via_wrapper`. + for index in &loaded { + for wrapper in &index.wrappers { + let wrapper_dt = DirectTarget { + dll_base: index.dll_base_lower.clone(), + rva: wrapper.rva, + }; + let wrapper_callers = match direct.get(&wrapper_dt) { + Some(c) if !c.is_empty() => c.clone(), + _ => continue, + }; + let via = format!("{}!{}", index.dll_name, wrapper.name); + let expanded: Vec = wrapper_callers + .into_iter() + .map(|mut c| { + c.via_wrapper = Some(via.clone()); + c + }) + .collect(); + match &wrapper.resolves_to { + WrapperTarget::Import { dll_base, func } => { + imports + .entry(import_lookup_key(dll_base, func)) + .or_default() + .extend(expanded); + } + WrapperTarget::Direct { target_rva } => { + direct + .entry(DirectTarget { + dll_base: index.dll_base_lower.clone(), + rva: *target_rva, + }) + .or_default() + .extend(expanded); + } + } + } + } + stage.finish(); Self { @@ -218,10 +263,17 @@ impl GlobalCallGraph { fn dedup_callers(callers: &mut Vec) { let mut merged: std::collections::BTreeMap = std::collections::BTreeMap::new(); for caller in callers.drain(..) { - let key = caller.func.key().to_owned(); + // Include via_wrapper in the key so a function that calls both directly + // and via a wrapper is preserved as two distinct entries. + let key = format!( + "{}|{}", + caller.func.key(), + caller.via_wrapper.as_deref().unwrap_or("") + ); let entry = merged.entry(key).or_insert_with(|| Caller { func: caller.func.clone(), sites: Vec::new(), + via_wrapper: caller.via_wrapper.clone(), }); entry.sites.extend(caller.sites); } @@ -246,6 +298,7 @@ struct FlatEntry { depth: usize, truncated: bool, children: Vec, + via_wrapper: Option, } pub fn build_call_tree( @@ -260,6 +313,7 @@ pub fn build_call_tree( depth: 0, truncated: false, children: Vec::new(), + via_wrapper: None, }]; let mut frontier: Vec<(FuncRef, usize)> = vec![(root, 0)]; @@ -317,6 +371,7 @@ pub fn build_call_tree( depth: depth + 1, truncated: !is_new, children: Vec::new(), + via_wrapper: caller.via_wrapper, }); flat[*parent_idx].children.push(new_idx); if is_new { @@ -349,6 +404,7 @@ pub fn build_call_tree( depth: e.depth, sites: e.sites.clone(), truncated: e.truncated, + via_wrapper: e.via_wrapper.clone(), callers: e .children .iter() diff --git a/resx/src/analysis/indirect.rs b/resx/src/analysis/indirect.rs new file mode 100644 index 0000000..ec4c9a0 --- /dev/null +++ b/resx/src/analysis/indirect.rs @@ -0,0 +1,167 @@ +use std::collections::BTreeSet; + +use serde::Serialize; + +use crate::analysis::disasm::ApiCall; +use crate::formats::pe::{ImportDll, PeDataSummary, PeFile, PeLoadConfigInfo}; + +#[derive(Debug, Clone, Default, Serialize)] +pub struct IndirectFlowReport { + pub edges: Vec, + pub tables: Vec, + pub mitigations: Vec, + pub notes: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct IndirectEdge { + pub site_rva: String, + pub kind: String, + pub target_rva: String, + pub target: String, + pub source: String, + pub confidence: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct IndirectTable { + pub rva: String, + pub kind: String, + pub entries: Vec, + pub confidence: String, +} + +pub fn analyze_indirect_flow( + pe: &PeFile, + imports: &[ImportDll], + data: &PeDataSummary, + api_calls: &[ApiCall], + load_config: Option<&PeLoadConfigInfo>, +) -> IndirectFlowReport { + let mut report = IndirectFlowReport::default(); + let mut seen_edges = BTreeSet::new(); + + for call in api_calls.iter().filter(|call| call.is_indirect) { + let key = (call.rva, call.target_rva, call.label.clone()); + if !seen_edges.insert(key) { + continue; + } + let target = if call.dll.is_empty() { + call.label.clone() + } else { + format!("{}!{}", call.dll, call.label) + }; + report.edges.push(IndirectEdge { + site_rva: hex32(call.rva), + kind: call.kind.clone(), + target_rva: if call.target_rva == 0 { + String::new() + } else { + hex32(call.target_rva) + }, + target, + source: call + .indirect_method + .clone() + .unwrap_or_else(|| "indirect operand".to_owned()), + confidence: if call.target_rva == 0 && !call.is_import { + "low" + } else { + "high" + } + .to_owned(), + }); + } + + for vtable in &data.vtables { + report.tables.push(IndirectTable { + rva: hex32(vtable.rva), + kind: "vtable".to_owned(), + entries: vtable.entries.iter().map(|rva| hex32(*rva)).collect(), + confidence: "medium".to_owned(), + }); + } + + for pointer in data.pointers.iter().filter(|ptr| ptr.kind == "code") { + let key = (pointer.rva, pointer.target_rva, "data-pointer".to_owned()); + if !seen_edges.insert(key) { + continue; + } + report.edges.push(IndirectEdge { + site_rva: hex32(pointer.rva), + kind: "data-pointer".to_owned(), + target_rva: hex32(pointer.target_rva), + target: format!("sub_{:08X}", pointer.target_rva), + source: pointer.section_name.clone(), + confidence: "medium".to_owned(), + }); + } + + for dll in imports { + for entry in &dll.entries { + report.edges.push(IndirectEdge { + site_rva: hex32(entry.slot_rva), + kind: "iat-slot".to_owned(), + target_rva: String::new(), + target: if entry.by_ord { + format!("{}!#{}", dll.dll, entry.ordinal) + } else { + format!("{}!{}", dll.dll, entry.name) + }, + source: ".idata".to_owned(), + confidence: "high".to_owned(), + }); + } + } + + if let Some(load) = load_config { + if load.guard_cf_function_count > 0 || load.guard_flags != 0 { + report.mitigations.push(format!( + "CFG guard flags=0x{:X} functions={}", + load.guard_flags, load.guard_cf_function_count + )); + } + if load.guard_eh_continuation_count > 0 { + report.mitigations.push(format!( + "GuardEH continuations={}", + load.guard_eh_continuation_count + )); + } + if load.guard_xfg_check_function_pointer != 0 { + report.mitigations.push(format!( + "XFG check pointer=0x{:X}", + load.guard_xfg_check_function_pointer + )); + } + } + + report.edges.sort_by(|a, b| { + a.site_rva + .cmp(&b.site_rva) + .then_with(|| a.kind.cmp(&b.kind)) + .then_with(|| a.target.cmp(&b.target)) + }); + report + .edges + .dedup_by(|a, b| a.site_rva == b.site_rva && a.kind == b.kind && a.target == b.target); + report.tables.sort_by(|a, b| a.rva.cmp(&b.rva)); + if report.tables.is_empty() && report.edges.is_empty() { + report + .notes + .push("no indirect tables or indirect call sites were recovered".to_owned()); + } + if !pe + .sections + .iter() + .any(|section| section.name.eq_ignore_ascii_case(".pdata")) + { + report + .notes + .push("no .pdata section; indirect recovery relies on local decode bounds".to_owned()); + } + report +} + +fn hex32(value: u32) -> String { + format!("0x{:08X}", value) +} diff --git a/resx/src/analysis/ir.rs b/resx/src/analysis/ir.rs new file mode 100644 index 0000000..09baca4 --- /dev/null +++ b/resx/src/analysis/ir.rs @@ -0,0 +1,357 @@ +use iced_x86::{Mnemonic, OpKind, Register}; +use serde::Serialize; + +use crate::analysis::disasm::{is_jcc, is_ret, Instruction}; +use crate::analysis::symbols::SymbolIndex; + +#[derive(Debug, Clone, Serialize)] +pub struct IrOp { + pub rva: String, + pub op: String, + pub dst: String, + pub src: Vec, + pub ty: String, + pub detail: String, +} + +#[derive(Debug, Clone, Default, Serialize)] +pub struct TypedIrSummary { + pub prototype: String, + pub param_hints: Vec, + pub stack_slots: Vec, + pub memory_refs: Vec, + pub ops: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct TypeHint { + pub name: String, + pub ty: String, + pub source: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct StackSlot { + pub offset: String, + pub access: String, + pub width: u32, +} + +#[derive(Debug, Clone, Serialize)] +pub struct MemoryRef { + pub rva: String, + pub access: String, + pub base: String, + pub index: String, + pub displacement: String, + pub resolved: String, +} + +pub fn summarize_typed_ir( + insns: &[Instruction], + image_base: u64, + symbols: Option<&SymbolIndex>, + prototype: &str, +) -> TypedIrSummary { + let mut summary = TypedIrSummary { + prototype: prototype.to_owned(), + param_hints: parse_param_hints(prototype), + stack_slots: Vec::new(), + memory_refs: Vec::new(), + ops: Vec::new(), + }; + + for insn in insns { + if let Some(slot) = stack_slot(insn) { + if !summary + .stack_slots + .iter() + .any(|existing| existing.offset == slot.offset && existing.access == slot.access) + { + summary.stack_slots.push(slot); + } + } + if let Some(memory) = memory_ref(insn, image_base, symbols) { + summary.memory_refs.push(memory); + } + summary.ops.push(ir_op(insn, image_base, symbols)); + } + + summary.stack_slots.sort_by(|a, b| a.offset.cmp(&b.offset)); + summary.memory_refs.sort_by(|a, b| a.rva.cmp(&b.rva)); + summary +} + +fn ir_op(insn: &Instruction, image_base: u64, symbols: Option<&SymbolIndex>) -> IrOp { + let iced = &insn.iced; + let op = match iced.mnemonic() { + Mnemonic::Mov | Mnemonic::Movsx | Mnemonic::Movsxd | Mnemonic::Movzx => "assign", + Mnemonic::Lea => "address", + Mnemonic::Add | Mnemonic::Sub | Mnemonic::Imul | Mnemonic::Mul => "arith", + Mnemonic::And | Mnemonic::Or | Mnemonic::Xor | Mnemonic::Not | Mnemonic::Neg => "bit", + Mnemonic::Cmp | Mnemonic::Test => "compare", + Mnemonic::Call => "call", + Mnemonic::Jmp => "jump", + m if is_jcc(m) => "branch", + m if is_ret(m) => "return", + Mnemonic::Push | Mnemonic::Pop => "stack", + Mnemonic::Syscall | Mnemonic::Sysenter | Mnemonic::Int => "syscall", + _ => "other", + } + .to_owned(); + + let dst = if iced.op_count() > 0 { + operand_text(iced, 0, image_base, symbols) + } else { + String::new() + }; + let src = (1..iced.op_count()) + .map(|idx| operand_text(iced, idx, image_base, symbols)) + .collect::>(); + let ty = infer_type(iced); + let detail = if insn.comment.is_empty() { + insn.text.clone() + } else { + format!("{} ; {}", insn.text, insn.comment) + }; + + IrOp { + rva: hex32(insn.rva), + op, + dst, + src, + ty, + detail, + } +} + +fn operand_text( + instr: &iced_x86::Instruction, + op_idx: u32, + image_base: u64, + symbols: Option<&SymbolIndex>, +) -> String { + match instr.op_kind(op_idx) { + OpKind::Register => reg_name(instr.op_register(op_idx)), + OpKind::Memory => memory_expr(instr, image_base, symbols), + OpKind::Immediate8 => format!("0x{:X}", instr.immediate8()), + OpKind::Immediate16 => format!("0x{:X}", instr.immediate16()), + OpKind::Immediate32 | OpKind::Immediate32to64 => format!("0x{:X}", instr.immediate32()), + OpKind::Immediate64 => format!("0x{:X}", instr.immediate64()), + OpKind::NearBranch16 | OpKind::NearBranch32 | OpKind::NearBranch64 => { + let target = instr.near_branch_target(); + symbols + .and_then(|idx| idx.describe(target)) + .unwrap_or_else(|| format!("0x{:X}", target)) + } + _ => "?".to_owned(), + } +} + +fn memory_expr( + instr: &iced_x86::Instruction, + image_base: u64, + symbols: Option<&SymbolIndex>, +) -> String { + if let Some(addr) = absolute_memory_address(instr) { + if let Some(desc) = symbols.and_then(|idx| idx.describe(addr)) { + return format!("*({})", desc); + } + if addr >= image_base { + return format!("*(image+0x{:X})", addr - image_base); + } + return format!("*(0x{:X})", addr); + } + + let base = reg_name(instr.memory_base()); + let index = reg_name(instr.memory_index()); + let scale = instr.memory_index_scale(); + let disp = instr.memory_displacement64() as i64; + let mut parts = Vec::new(); + if !base.is_empty() { + parts.push(base); + } + if !index.is_empty() { + parts.push(if scale > 1 { + format!("{index}*{scale}") + } else { + index + }); + } + if disp > 0 { + parts.push(format!("0x{:X}", disp)); + } else if disp < 0 { + parts.push(format!("-0x{:X}", (-disp) as u64)); + } + if parts.is_empty() { + "*(?)".to_owned() + } else { + format!("*({})", parts.join("+")) + } +} + +fn stack_slot(insn: &Instruction) -> Option { + let instr = &insn.iced; + if instr.memory_base().full_register() != Register::RSP + && instr.memory_base().full_register() != Register::RBP + && instr.memory_base().full_register() != Register::ESP + && instr.memory_base().full_register() != Register::EBP + { + return None; + } + let access = if instr.op0_kind() == OpKind::Memory { + "write" + } else { + "read" + }; + Some(StackSlot { + offset: format!( + "{}+0x{:X}", + reg_name(instr.memory_base()), + instr.memory_displacement64() + ), + access: access.to_owned(), + width: instr.memory_size().size() as u32, + }) +} + +fn memory_ref( + insn: &Instruction, + image_base: u64, + symbols: Option<&SymbolIndex>, +) -> Option { + let instr = &insn.iced; + if instr.op_count() == 0 || !uses_memory(instr) { + return None; + } + let access = if instr.op0_kind() == OpKind::Memory { + "write" + } else { + "read" + }; + let resolved = absolute_memory_address(instr) + .and_then(|addr| { + symbols.and_then(|idx| idx.describe(addr)).or_else(|| { + (addr >= image_base).then(|| format!("image+0x{:X}", addr - image_base)) + }) + }) + .unwrap_or_default(); + + Some(MemoryRef { + rva: hex32(insn.rva), + access: access.to_owned(), + base: reg_name(instr.memory_base()), + index: reg_name(instr.memory_index()), + displacement: format!("0x{:X}", instr.memory_displacement64()), + resolved, + }) +} + +fn uses_memory(instr: &iced_x86::Instruction) -> bool { + (0..instr.op_count()).any(|idx| instr.op_kind(idx) == OpKind::Memory) +} + +fn infer_type(instr: &iced_x86::Instruction) -> String { + let width = if uses_memory(instr) { + instr.memory_size().size() as u32 * 8 + } else if instr.op_count() > 0 && instr.op0_kind() == OpKind::Register { + register_width(instr.op0_register()) + } else { + 0 + }; + if width == 0 { + String::new() + } else { + format!("u{}", width) + } +} + +fn register_width(reg: Register) -> u32 { + match reg.full_register() { + Register::RAX + | Register::RBX + | Register::RCX + | Register::RDX + | Register::RSI + | Register::RDI + | Register::RSP + | Register::RBP + | Register::R8 + | Register::R9 + | Register::R10 + | Register::R11 + | Register::R12 + | Register::R13 + | Register::R14 + | Register::R15 => 64, + Register::EAX + | Register::EBX + | Register::ECX + | Register::EDX + | Register::ESI + | Register::EDI + | Register::ESP + | Register::EBP + | Register::R8D + | Register::R9D + | Register::R10D + | Register::R11D + | Register::R12D + | Register::R13D + | Register::R14D + | Register::R15D => 32, + _ => 0, + } +} + +fn absolute_memory_address(instr: &iced_x86::Instruction) -> Option { + if matches!(instr.memory_base(), Register::RIP | Register::EIP) { + return Some(instr.ip_rel_memory_address()); + } + if instr.memory_base() == Register::None && instr.memory_index() == Register::None { + let disp = instr.memory_displacement64(); + if disp != 0 { + return Some(disp); + } + } + None +} + +fn parse_param_hints(prototype: &str) -> Vec { + let Some(open) = prototype.rfind('(') else { + return Vec::new(); + }; + let Some(close) = prototype.rfind(')') else { + return Vec::new(); + }; + if close <= open { + return Vec::new(); + } + prototype[open + 1..close] + .split(',') + .enumerate() + .filter_map(|(idx, raw)| { + let ty = raw.trim(); + if ty.is_empty() || ty.eq_ignore_ascii_case("void") { + return None; + } + Some(TypeHint { + name: format!("param_{}", idx + 1), + ty: ty.to_owned(), + source: "pdb-prototype".to_owned(), + }) + }) + .collect() +} + +fn reg_name(reg: Register) -> String { + if reg == Register::None { + String::new() + } else { + format!("{:?}", reg.full_register()).to_ascii_lowercase() + } +} + +fn hex32(value: u32) -> String { + format!("0x{:08X}", value) +} diff --git a/resx/src/analysis/mod.rs b/resx/src/analysis/mod.rs index 4913871..7ed6e54 100644 --- a/resx/src/analysis/mod.rs +++ b/resx/src/analysis/mod.rs @@ -1,10 +1,15 @@ pub mod cfgview; pub mod disasm; +pub mod discovery; pub mod edr; pub mod explain; pub mod follow; +pub mod indirect; pub mod intelli; +pub mod ir; pub mod recomp; +pub mod reconstruct; +pub mod recursive_cfg; pub mod symbols; pub mod thunk; pub mod yara; diff --git a/resx/src/analysis/reconstruct.rs b/resx/src/analysis/reconstruct.rs new file mode 100644 index 0000000..992812d --- /dev/null +++ b/resx/src/analysis/reconstruct.rs @@ -0,0 +1,2001 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::Write as _; + +use iced_x86::{Mnemonic, OpKind, Register}; +use serde::Serialize; + +use crate::analysis::disasm::{collect_api_calls, disassemble_at, is_ret, ApiCall, Instruction}; +use crate::analysis::discovery::{discover_functions, FunctionDiscoveryReport}; +use crate::analysis::symbols::SymbolIndex; +use crate::core::color::Colors; +use crate::core::config::Config; +use crate::formats::pdb::PdbSymbol; +use crate::formats::pe::{ + read_runtime_function, read_u32, read_u64, Export, PeFile, PeStartupRoutine, +}; + +#[derive(Debug, Serialize)] +pub struct ReconstructReport { + pub image: String, + pub path: String, + pub arch: String, + pub image_base: String, + pub entry_point: String, + pub pdb: PdbInfo, + pub function_discovery: FunctionDiscoveryReport, + pub roots: Vec, + pub stats: ReconstructStats, + pub notes: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct PdbInfo { + pub enabled: bool, + pub loaded: bool, + pub symbol_count: usize, + pub function_count: usize, + pub sized_function_count: usize, + pub status: String, + pub error: String, +} + +impl PdbInfo { + pub fn disabled() -> Self { + Self { + enabled: false, + loaded: false, + symbol_count: 0, + function_count: 0, + sized_function_count: 0, + status: "disabled".to_owned(), + error: String::new(), + } + } + + pub fn loaded(symbols: &[PdbSymbol]) -> Self { + let function_count = symbols.iter().filter(|sym| sym.kind == "function").count(); + let sized_function_count = symbols + .iter() + .filter(|sym| sym.kind == "function" && sym.size > 0) + .count(); + Self { + enabled: true, + loaded: true, + symbol_count: symbols.len(), + function_count, + sized_function_count, + status: "loaded".to_owned(), + error: String::new(), + } + } + + pub fn unavailable(error: String) -> Self { + Self { + enabled: true, + loaded: false, + symbol_count: 0, + function_count: 0, + sized_function_count: 0, + status: "unavailable".to_owned(), + error, + } + } +} + +#[derive(Debug, Default, Serialize)] +pub struct ReconstructStats { + pub roots: usize, + pub functions_expanded: usize, + pub call_edges: usize, + pub import_edges: usize, + pub indirect_edges: usize, + pub thread_edges: usize, + pub workpool_edges: usize, + pub thread_api_edges: usize, + pub exception_edges: usize, + pub cycle_edges: usize, + pub truncated_edges: usize, + pub decode_errors: usize, +} + +#[derive(Debug, Clone, Serialize)] +pub struct FlowFunction { + pub name: String, + pub kind: String, + pub rva: String, + pub va: String, + pub section: String, + pub symbol_source: String, + pub symbol_category: String, + pub symbol_size: String, + pub prototype: String, + pub decode_bound: String, + pub thread_lane: usize, + pub note: String, + pub status: String, + pub edges: Vec, + pub returns: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct FlowEdge { + pub site_rva: String, + pub kind: String, + pub target: String, + pub target_rva: String, + pub target_va: String, + pub target_source: String, + pub target_category: String, + pub thread_lane: usize, + pub tags: Vec, + pub detail: String, + pub relation: String, + pub child: Option>, +} + +#[derive(Debug, Clone, Copy)] +struct CallbackSpec { + relation: &'static str, + tag: &'static str, + arg_index: usize, +} + +#[derive(Debug, Clone)] +struct PdbFunction { + size: u64, + type_name: String, +} + +struct TraceContext<'a> { + raw: &'a [u8], + pe: &'a PeFile, + exports: &'a [Export], + symbol_index: &'a SymbolIndex, + pdb_functions: HashMap, + arch: u32, + image_base: u64, + cfg: &'a Config, + expanded: HashSet, + stats: ReconstructStats, + max_depth: usize, + max_total: usize, + next_lane: usize, +} + +#[allow(clippy::too_many_arguments)] +pub fn reconstruct_image( + image: &str, + path: &str, + raw: &[u8], + pe: &PeFile, + exports: &[Export], + symbol_index: &SymbolIndex, + pdb_symbols: &[PdbSymbol], + pdb: PdbInfo, + startup_routines: &[PeStartupRoutine], + arch: u32, + cfg: &Config, +) -> ReconstructReport { + let pdb_functions = build_pdb_function_index(pdb_symbols); + let mut ctx = TraceContext { + raw, + pe, + exports, + symbol_index, + pdb_functions, + arch, + image_base: pe.image_base, + cfg, + expanded: HashSet::new(), + stats: ReconstructStats::default(), + max_depth: cfg.depth.max(1), + max_total: cfg.max_total.max(1), + next_lane: 1, + }; + + let roots = if startup_routines.is_empty() { + vec![PeStartupRoutine { + kind: "PE Entry Point".to_owned(), + source: "AddressOfEntryPoint".to_owned(), + rva: pe.entry_point, + va: pe.image_base + pe.entry_point as u64, + section_name: pe + .rva_to_section(pe.entry_point) + .map(|section| section.name.clone()) + .unwrap_or_default(), + note: "loader transfers control here after image initialization".to_owned(), + }] + } else { + startup_routines.to_vec() + }; + + let mut traced_roots = Vec::new(); + for root in roots { + if !is_executable_rva(pe, root.rva) { + continue; + } + let mut path_stack = HashSet::new(); + traced_roots.push(ctx.trace_function( + root.rva, + root_name(&root, symbol_index, pe.image_base), + root.kind, + root.source, + root.note, + 0, + 0, + &mut path_stack, + )); + } + + ctx.stats.roots = traced_roots.len(); + let notes = vec![ + "static best-effort reconstruction; runtime dispatch, data-dependent branches, and dynamically generated code may be incomplete".to_owned(), + "thread/workpool callback edges are shown only when the callback argument resolves to executable code in the same image".to_owned(), + "exception edges use x64 unwind handler RVAs when present; language-specific scope tables are not fully expanded".to_owned(), + ]; + + ReconstructReport { + image: image.to_owned(), + path: path.to_owned(), + arch: format!("x{}", arch), + image_base: hex64(pe.image_base), + entry_point: hex32(pe.entry_point), + pdb, + function_discovery: discover_functions( + raw, + pe, + exports, + symbol_index, + pdb_symbols, + startup_routines, + cfg, + ), + roots: traced_roots, + stats: ctx.stats, + notes, + } +} + +impl<'a> TraceContext<'a> { + #[allow(clippy::too_many_arguments)] + fn trace_function( + &mut self, + rva: u32, + name: String, + kind: String, + source: String, + note: String, + depth: usize, + lane: usize, + path_stack: &mut HashSet, + ) -> FlowFunction { + let sym = self.symbol_meta(rva); + let decode_bound = self.decode_bound_label(rva, sym.as_ref()); + let symbol_source = sym + .as_ref() + .map(|s| s.source.clone()) + .unwrap_or_else(|| "synthetic".to_owned()); + let symbol_category = classify_function_symbol(&name, &symbol_source, sym.as_ref()); + let section = self + .pe + .rva_to_section(rva) + .map(|section| section.name.clone()) + .unwrap_or_default(); + let mut node = FlowFunction { + name, + kind, + rva: hex32(rva), + va: hex64(self.image_base + rva as u64), + section, + symbol_source, + symbol_category, + symbol_size: sym + .as_ref() + .and_then(|s| (s.size > 0).then(|| format!("0x{:X}", s.size))) + .unwrap_or_default(), + prototype: sym + .as_ref() + .map(|s| s.prototype.clone()) + .unwrap_or_default(), + decode_bound, + thread_lane: lane, + note: join_detail(&[source, note]), + status: "expanded".to_owned(), + edges: Vec::new(), + returns: Vec::new(), + }; + + if !path_stack.insert(rva) { + node.status = "cycle".to_owned(); + self.stats.cycle_edges += 1; + return node; + } + + if depth >= self.max_depth { + node.status = format!("truncated: max depth {}", self.max_depth); + self.stats.truncated_edges += 1; + path_stack.remove(&rva); + return node; + } + + if self.expanded.len() >= self.max_total { + node.status = format!("truncated: max total {}", self.max_total); + self.stats.truncated_edges += 1; + path_stack.remove(&rva); + return node; + } + + if !self.expanded.insert(rva) { + node.status = "already expanded elsewhere".to_owned(); + path_stack.remove(&rva); + return node; + } + self.stats.functions_expanded += 1; + + let Some(file_off) = self.pe.rva_to_offset(rva) else { + node.status = "decode error: RVA is not mapped to a file offset".to_owned(); + self.stats.decode_errors += 1; + path_stack.remove(&rva); + return node; + }; + + let mut decode_cfg = self.cfg.clone(); + if let Some(size) = sym.as_ref().and_then(|s| (s.size > 0).then_some(s.size)) { + let size = size.min(usize::MAX as u64) as usize; + if size > 0 { + decode_cfg.max_bytes = if decode_cfg.max_bytes == 0 { + size + } else { + decode_cfg.max_bytes.min(size) + }; + } + } + + let insns = match disassemble_at( + self.raw, + self.pe, + file_off, + rva, + self.arch, + self.image_base, + self.exports, + Some(self.symbol_index), + &decode_cfg, + ) { + Ok(insns) => insns, + Err(err) => { + node.status = format!("decode error: {}", err); + self.stats.decode_errors += 1; + path_stack.remove(&rva); + return node; + } + }; + + node.returns = insns + .iter() + .filter(|insn| is_ret(insn.iced.mnemonic())) + .map(|insn| hex32(insn.rva)) + .collect(); + + let mut calls = collect_api_calls( + &insns, + self.pe, + self.raw, + self.symbol_index, + self.image_base, + true, + ); + calls.sort_by_key(|call| call.rva); + + for call in calls { + node.edges + .push(self.edge_from_call(&insns, &call, depth, lane, path_stack)); + } + + if let Some(runtime) = read_runtime_function(self.pe, self.raw, rva) { + if runtime.exception_handler_rva != 0 + && runtime.exception_handler_rva != rva + && is_executable_rva(self.pe, runtime.exception_handler_rva) + { + self.stats.exception_edges += 1; + let handler_rva = runtime.exception_handler_rva; + let handler_name = + best_symbol_name(self.symbol_index, self.image_base, handler_rva); + let handler_meta = self.symbol_meta(handler_rva); + let handler_source = handler_meta + .as_ref() + .map(|s| s.source.clone()) + .unwrap_or_else(|| "synthetic".to_owned()); + let handler_category = + classify_function_symbol(&handler_name, &handler_source, handler_meta.as_ref()); + let child = if path_stack.contains(&handler_rva) { + None + } else { + Some(Box::new(self.trace_function( + handler_rva, + handler_name.clone(), + "Exception Handler".to_owned(), + ".pdata unwind".to_owned(), + format!( + "handler for runtime function 0x{:08X}..0x{:08X}", + runtime.begin_rva, runtime.end_rva + ), + depth + 1, + lane, + path_stack, + ))) + }; + node.edges.push(FlowEdge { + site_rva: hex32(runtime.begin_rva), + kind: "exception".to_owned(), + target: handler_name, + target_rva: hex32(handler_rva), + target_va: hex64(self.image_base + handler_rva as u64), + target_source: handler_source, + target_category: handler_category, + thread_lane: lane, + tags: vec!["try-except".to_owned(), "unwind".to_owned()], + detail: format!( + "UNWIND_INFO 0x{:08X}, flags 0x{:X}", + runtime.unwind_info_rva, runtime.unwind_flags + ), + relation: "exception-handler".to_owned(), + child, + }); + } + } + + path_stack.remove(&rva); + node + } + + fn edge_from_call( + &mut self, + insns: &[Instruction], + call: &ApiCall, + depth: usize, + lane: usize, + path_stack: &mut HashSet, + ) -> FlowEdge { + self.stats.call_edges += 1; + + let mut tags = Vec::new(); + if call.is_import { + self.stats.import_edges += 1; + tags.push("import".to_owned()); + } + if call.is_indirect { + self.stats.indirect_edges += 1; + tags.push("indirect".to_owned()); + } + if call.kind.eq_ignore_ascii_case("jmp") { + tags.push("tail-jump".to_owned()); + } + if is_terminator_api(&call.label) { + tags.push("program-end".to_owned()); + } + if let Some(intent) = thread_api_intent(&call.label) { + self.stats.thread_api_edges += 1; + tags.push("thread-api".to_owned()); + tags.push(intent.to_owned()); + } + + let target_rva = executable_target_rva(self.pe, call.target_rva); + let target_meta = target_rva.and_then(|rva| self.symbol_meta(rva)); + let target_name = call_target_name(call); + let target_source = target_meta + .as_ref() + .map(|meta| meta.source.clone()) + .unwrap_or_else(|| { + if call.is_import { + "import".to_owned() + } else { + "unknown".to_owned() + } + }); + let target_category = + classify_edge_target(&target_name, &target_source, target_meta.as_ref()); + let mut relation = "callee".to_owned(); + let mut child = None; + let mut detail_parts = Vec::new(); + if let Some(method) = &call.indirect_method { + detail_parts.push(method.clone()); + } + if !call.switch_cases.is_empty() { + detail_parts.push(format!("switch cases: {:?}", call.switch_cases)); + } + if let Some(intent) = describe_thread_intent(call, insns, self.pe, self.raw) { + detail_parts.push(intent); + } + + let mut edge_lane = lane; + if let Some(spec) = callback_spec(&call.label) { + tags.push(spec.tag.to_owned()); + relation = spec.relation.to_owned(); + if spec.tag == "thread-spawn" { + self.stats.thread_edges += 1; + } else { + self.stats.workpool_edges += 1; + } + + match recover_callback_target(insns, call.rva, spec.arg_index, self.pe, self.raw) { + Some((callback_rva, method)) => { + detail_parts.push(format!( + "{} callback arg{} via {}", + spec.relation, spec.arg_index, method + )); + let callback_name = + best_symbol_name(self.symbol_index, self.image_base, callback_rva); + edge_lane = self.allocate_lane(); + child = Some(Box::new(self.trace_function( + callback_rva, + callback_name, + relation_title(spec.relation), + format!("{} @ {}", call_target_name(call), hex32(call.rva)), + format!("callback recovered from {}", call_target_name(call)), + depth + 1, + edge_lane, + path_stack, + ))); + } + None => { + detail_parts.push(format!( + "{} callback arg{} unresolved", + spec.relation, spec.arg_index + )); + } + } + } else if let Some(target_rva) = target_rva { + let target_name = best_symbol_name(self.symbol_index, self.image_base, target_rva); + if path_stack.contains(&target_rva) { + tags.push("cycle".to_owned()); + self.stats.cycle_edges += 1; + } else { + child = Some(Box::new(self.trace_function( + target_rva, + target_name, + if call.kind.eq_ignore_ascii_case("jmp") { + "Tail Call".to_owned() + } else { + "Function".to_owned() + }, + format!("{} @ {}", call.kind, hex32(call.rva)), + String::new(), + depth + 1, + lane, + path_stack, + ))); + } + } + + FlowEdge { + site_rva: hex32(call.rva), + kind: call.kind.clone(), + target: target_name, + target_rva: target_rva + .map(hex32) + .unwrap_or_else(|| hex32(call.target_rva)), + target_va: target_rva + .map(|rva| hex64(self.image_base + rva as u64)) + .unwrap_or_default(), + target_source, + target_category, + thread_lane: edge_lane, + tags, + detail: detail_parts.join("; "), + relation, + child, + } + } + + fn allocate_lane(&mut self) -> usize { + let lane = self.next_lane; + self.next_lane += 1; + lane + } + + fn symbol_meta(&self, rva: u32) -> Option { + if let Some(func) = self.pdb_functions.get(&rva) { + return Some(SymbolMeta { + source: "pdb".to_owned(), + size: func.size, + prototype: func.type_name.clone(), + }); + } + + let va = self.image_base + rva as u64; + let hit = self.symbol_index.lookup(va)?; + if hit.displacement != 0 { + return None; + } + let source = if hit.symbol.size > 0 || !hit.symbol.type_name.is_empty() { + "pdb" + } else if self + .exports + .iter() + .any(|export| export.rva == rva && !export.name.is_empty()) + { + "export" + } else { + "symbol" + }; + Some(SymbolMeta { + source: source.to_owned(), + size: hit.symbol.size, + prototype: hit.symbol.type_name, + }) + } + + fn decode_bound_label(&self, rva: u32, sym: Option<&SymbolMeta>) -> String { + if let Some(runtime) = read_runtime_function(self.pe, self.raw, rva) { + return format!( + ".pdata 0x{:08X}..0x{:08X}", + runtime.begin_rva, runtime.end_rva + ); + } + if let Some(sym) = sym.filter(|sym| sym.source == "pdb" && sym.size > 0) { + return format!("pdb-size 0x{:X}", sym.size); + } + "section/max-bytes".to_owned() + } +} + +#[derive(Debug, Clone)] +struct SymbolMeta { + source: String, + size: u64, + prototype: String, +} + +#[derive(Debug, Clone)] +struct RenderFilters { + thread: String, + api: String, +} + +impl RenderFilters { + fn from_config(cfg: &Config) -> Self { + Self { + thread: cfg.reconstruct_thread_filter.trim().to_ascii_lowercase(), + api: cfg.reconstruct_api_filter.trim().to_ascii_lowercase(), + } + } + + fn active(&self) -> bool { + !self.thread.is_empty() || !self.api.is_empty() + } + + fn function_visible(&self, func: &FlowFunction) -> bool { + if !self.active() { + return true; + } + let thread_ok = self.thread.is_empty() + || self.function_matches_thread(func) + || func + .edges + .iter() + .any(|edge| self.edge_matches_thread_tree(edge)); + let api_ok = self.api.is_empty() + || function_text(func).contains(&self.api) + || func + .edges + .iter() + .any(|edge| self.edge_matches_api_tree(edge)); + thread_ok && api_ok + } + + fn edge_visible(&self, edge: &FlowEdge) -> bool { + if !self.active() { + return true; + } + let thread_ok = self.thread.is_empty() || self.edge_matches_thread_tree(edge); + let api_ok = self.api.is_empty() || self.edge_matches_api_tree(edge); + thread_ok && api_ok + } + + fn function_matches_thread(&self, func: &FlowFunction) -> bool { + if self.thread.is_empty() { + return true; + } + match self.thread.as_str() { + "all" => true, + "spawned" => func.thread_lane != 0, + "api" => false, + needle => func.thread_lane != 0 && function_text(func).contains(needle), + } + } + + fn edge_matches_thread_tree(&self, edge: &FlowEdge) -> bool { + if self.thread.is_empty() || self.thread == "all" { + return true; + } + let direct = match self.thread.as_str() { + "spawned" => { + edge.thread_lane != 0 || has_tag(edge, "thread-spawn") || has_tag(edge, "workpool") + } + "api" => has_tag(edge, "thread-api"), + needle => { + (edge.thread_lane != 0 || edge.tags.iter().any(|tag| tag.contains("thread"))) + && edge_text(edge).contains(needle) + } + }; + direct + || edge + .child + .as_ref() + .is_some_and(|child| self.function_visible(child)) + } + + fn edge_matches_api_tree(&self, edge: &FlowEdge) -> bool { + if self.api.is_empty() { + return true; + } + edge_text(edge).contains(&self.api) + || edge + .child + .as_ref() + .is_some_and(|child| self.function_visible(child)) + } +} + +fn function_text(func: &FlowFunction) -> String { + format!( + "{} {} {} {} {} {} {}", + func.name, + func.kind, + func.rva, + func.symbol_source, + func.symbol_category, + func.prototype, + func.note + ) + .to_ascii_lowercase() +} + +fn edge_text(edge: &FlowEdge) -> String { + format!( + "{} {} {} {} {} {} {} {}", + edge.kind, + edge.target, + edge.target_rva, + edge.target_source, + edge.target_category, + edge.tags.join(" "), + edge.detail, + edge.relation + ) + .to_ascii_lowercase() +} + +fn has_tag(edge: &FlowEdge, wanted: &str) -> bool { + edge.tags.iter().any(|tag| tag == wanted) +} + +pub fn render_ascii(report: &ReconstructReport, c: &Colors, cfg: &Config) -> String { + let filters = RenderFilters::from_config(cfg); + let mut out = String::new(); + let _ = writeln!( + out, + "{}", + c.bold(&c.b_blue(&format!("Reconstructed CFG: {}", report.image))) + ); + let _ = writeln!( + out, + " {} {} {} {} {} {}", + c.dim("arch:"), + c.b_white(&report.arch), + c.dim("image_base:"), + c.cyan(&report.image_base), + c.dim("entry:"), + c.green(&report.entry_point) + ); + let _ = writeln!(out, " {} {}", c.dim("path:"), report.path); + let _ = writeln!( + out, + " {} {} {} {} {} {}", + c.dim("pdb:"), + color_pdb_status(&report.pdb, c), + c.dim("symbols:"), + c.b_white(&report.pdb.symbol_count.to_string()), + c.dim("functions:"), + c.b_white(&format!( + "{} / {} sized", + report.pdb.function_count, report.pdb.sized_function_count + )) + ); + let _ = writeln!(out, " {} {}", c.dim("legend:"), render_symbol_legend(c)); + let _ = writeln!(out); + + if report.roots.is_empty() { + let _ = writeln!(out, "{}", c.dim("(no executable startup roots found)")); + } else { + let visible_roots = report + .roots + .iter() + .enumerate() + .filter(|(_, root)| filters.function_visible(root)) + .collect::>(); + if visible_roots.is_empty() { + let _ = writeln!(out, "{}", c.dim("(no paths matched filters)")); + } + for (pos, (idx, root)) in visible_roots.iter().enumerate() { + render_root(&mut out, root, idx + 1, report.roots.len(), c, &filters); + if pos + 1 < visible_roots.len() { + let _ = writeln!(out); + } + } + } + + let _ = writeln!(out); + let _ = writeln!(out, "{}", c.bold(&c.b_cyan("Summary"))); + let _ = writeln!( + out, + " roots={} functions={} calls={} imports={} indirect={} threads={} workpools={} exceptions={} cycles={} truncated={} decode_errors={}", + report.stats.roots, + report.stats.functions_expanded, + report.stats.call_edges, + report.stats.import_edges, + report.stats.indirect_edges, + report.stats.thread_edges, + report.stats.workpool_edges, + report.stats.exception_edges, + report.stats.cycle_edges, + report.stats.truncated_edges, + report.stats.decode_errors, + ); + if !report.notes.is_empty() { + let _ = writeln!(out, "{}", c.dim("Notes:")); + for note in &report.notes { + let _ = writeln!(out, " - {}", c.dim(note)); + } + } + if filters.active() { + let _ = writeln!(out, "{}", c.dim("Filters:")); + if !filters.thread.is_empty() { + let _ = writeln!(out, " - thread-filter: {}", c.b_mag(&filters.thread)); + } + if !filters.api.is_empty() { + let _ = writeln!(out, " - api-filter: {}", c.b_yellow(&filters.api)); + } + } + + out +} + +fn render_root( + out: &mut String, + root: &FlowFunction, + index: usize, + total: usize, + c: &Colors, + filters: &RenderFilters, +) { + let _ = writeln!( + out, + "{} {}", + c.bold(&c.b_yellow(&format!("Root {}/{}:", index, total))), + format_function_header(root, c) + ); + render_function_body(out, root, "", c, filters); +} + +fn render_function_body( + out: &mut String, + func: &FlowFunction, + prefix: &str, + c: &Colors, + filters: &RenderFilters, +) { + if func.status != "expanded" { + let _ = writeln!( + out, + "{}`-- {}", + prefix, + c.dim(&format!("{} [{}]", func.status, func.rva)) + ); + return; + } + + let visible_edges = func + .edges + .iter() + .filter(|edge| filters.edge_visible(edge)) + .collect::>(); + let return_count = if func.returns.is_empty() || filters.active() { + 0 + } else { + 1 + }; + let total = visible_edges.len() + return_count; + if total == 0 { + let text = if filters.active() { + "no matching calls recovered" + } else { + "no calls recovered" + }; + let _ = writeln!(out, "{}`-- {}", prefix, c.dim(text)); + return; + } + + for (idx, edge) in visible_edges.iter().enumerate() { + let is_last = idx + 1 == total; + render_edge(out, edge, prefix, is_last, c, filters); + } + + if return_count > 0 { + render_return(out, &func.returns, prefix, true, c); + } +} + +fn render_edge( + out: &mut String, + edge: &FlowEdge, + prefix: &str, + is_last: bool, + c: &Colors, + filters: &RenderFilters, +) { + let branch = if is_last { "`--" } else { "|--" }; + let next_prefix = format!("{}{}", prefix, if is_last { " " } else { "| " }); + let mut display_tags = edge.tags.clone(); + if !edge.target_category.is_empty() + && !display_tags.iter().any(|tag| tag == &edge.target_category) + { + display_tags.push(edge.target_category.clone()); + } + let tags = if display_tags.is_empty() { + String::new() + } else { + format!(" [{}]", display_tags.join(", ")) + }; + let detail = if edge.detail.is_empty() { + String::new() + } else { + format!(" ; {}", edge.detail) + }; + let target = color_target_name(edge, c); + let branch = color_branch(branch, edge.thread_lane, &edge.tags, c); + + let _ = writeln!( + out, + "{}{} {} {} -> {}{}{}", + prefix, + branch, + c.cyan(&edge.site_rva), + c.bold(&edge.kind), + target, + c.dim(&tags), + c.dim(&detail) + ); + + if let Some(child) = edge.child.as_ref() { + if edge.relation == "callee" { + render_function_body(out, child, &next_prefix, c, filters); + } else { + let _ = writeln!( + out, + "{}`-- {} -> {}", + next_prefix, + c.bold(&edge.relation), + format_function_header(child, c) + ); + render_function_body(out, child, &format!("{} ", next_prefix), c, filters); + } + } +} + +fn render_return(out: &mut String, returns: &[String], prefix: &str, is_last: bool, c: &Colors) { + let branch = if is_last { "`--" } else { "|--" }; + let rendered = if returns.len() <= 4 { + returns.join(", ") + } else { + format!( + "{}, {}, ... ({} sites)", + returns[0], + returns[1], + returns.len() + ) + }; + let _ = writeln!( + out, + "{}{} {} {}", + prefix, + branch, + c.bold("return/program-end"), + c.dim(&rendered) + ); +} + +fn format_function_header(func: &FlowFunction, c: &Colors) -> String { + let section = if func.section.is_empty() { + String::new() + } else { + format!(" {}", c.dim(&format!("[{}]", func.section))) + }; + let note = if func.note.is_empty() { + String::new() + } else { + format!(" {}", c.dim(&format!("({})", func.note))) + }; + let mut meta = Vec::new(); + if !func.symbol_source.is_empty() { + meta.push(func.symbol_source.clone()); + } + if !func.symbol_category.is_empty() && func.symbol_category != func.symbol_source { + meta.push(func.symbol_category.clone()); + } + if !func.symbol_size.is_empty() { + meta.push(format!("size {}", func.symbol_size)); + } + if !func.decode_bound.is_empty() { + meta.push(format!("bound {}", func.decode_bound)); + } + if func.thread_lane != 0 { + meta.push(format!("thread lane {}", func.thread_lane)); + } + let meta = if meta.is_empty() { + String::new() + } else { + format!(" {}", c.dim(&format!("<{}>", meta.join(", ")))) + }; + let prototype = if func.prototype.is_empty() { + String::new() + } else { + format!(" {}", c.dim(&func.prototype)) + }; + format!( + "{} {} {}{}{}{}{}", + color_function_name( + &func.name, + &func.symbol_source, + &func.symbol_category, + func.thread_lane, + c, + ), + c.dim(&func.rva), + c.dim(&func.kind), + section, + meta, + prototype, + note + ) +} + +fn color_pdb_status(pdb: &PdbInfo, c: &Colors) -> String { + if !pdb.enabled { + c.dim("disabled") + } else if pdb.loaded { + c.green("loaded") + } else { + c.yellow(&format!("unavailable ({})", pdb.error)) + } +} + +fn render_symbol_legend(c: &Colors) -> String { + [ + c.b_mag("internal-pdb"), + c.b_cyan("internal-export"), + c.b_yellow("internal/c++"), + c.yellow("internal/crt"), + c.b_red("nt-api"), + c.cyan("microsoft-api"), + c.b_yellow("cpp-runtime"), + c.yellow("crt-runtime"), + c.green("external-import"), + ] + .join(" ") +} + +fn color_function_name( + name: &str, + source: &str, + category: &str, + lane: usize, + c: &Colors, +) -> String { + if lane != 0 { + return lane_color(lane, name, c); + } + match category { + "internal-pdb" => c.b_mag(name), + "internal-cpp" => c.b_yellow(name), + "internal-crt" => c.yellow(name), + "internal-export" => c.b_cyan(name), + _ if source == "pdb" => c.b_mag(name), + _ if source == "export" => c.b_cyan(name), + _ if name.starts_with("sub_") => c.cyan(name), + _ => c.b_white(name), + } +} + +fn color_target_name(edge: &FlowEdge, c: &Colors) -> String { + let name = if edge.target_rva != "0x00000000" { + format!("{} {}", edge.target, edge.target_rva) + } else { + edge.target.clone() + }; + + if edge + .tags + .iter() + .any(|tag| tag == "thread-spawn" || tag == "thread-api") + { + return c.b_mag(&name); + } + if edge.tags.iter().any(|tag| tag == "workpool") { + return c.magenta(&name); + } + match edge.target_category.as_str() { + "internal-pdb" => return c.b_mag(&name), + "internal-cpp" | "cpp-runtime" => return c.b_yellow(&name), + "internal-crt" | "crt-runtime" => return c.yellow(&name), + "internal-export" => return c.b_cyan(&name), + "nt-api" => return c.b_red(&name), + "microsoft-api" => return c.cyan(&name), + "external-import" => return c.green(&name), + _ => {} + } + if edge.target_source == "pdb" { + return c.b_mag(&name); + } + if edge.target_source == "import" || edge.tags.iter().any(|tag| tag == "import") { + return c.cyan(&name); + } + c.b_white(&name) +} + +fn color_branch(branch: &str, lane: usize, tags: &[String], c: &Colors) -> String { + if tags + .iter() + .any(|tag| tag == "thread-spawn" || tag == "thread-api") + { + c.b_mag(branch) + } else if tags.iter().any(|tag| tag == "workpool") { + c.magenta(branch) + } else if lane != 0 { + lane_color(lane, branch, c) + } else { + c.dim(branch) + } +} + +fn lane_color(lane: usize, text: &str, c: &Colors) -> String { + match lane % 5 { + 1 => c.b_mag(text), + 2 => c.b_yellow(text), + 3 => c.b_blue(text), + 4 => c.b_cyan(text), + _ => c.green(text), + } +} + +fn is_nt_like_name(name: &str) -> bool { + let tail = name.rsplit(['!', ':']).next().unwrap_or(name); + tail.starts_with("Nt") || tail.starts_with("Zw") || tail.starts_with("Rtl") +} + +fn classify_function_symbol(name: &str, source: &str, meta: Option<&SymbolMeta>) -> String { + if source == "pdb" { + let prototype = meta.map(|m| m.prototype.as_str()).unwrap_or_default(); + if is_cpp_symbol(name) || is_cpp_symbol(prototype) { + "internal-cpp" + } else if is_crt_symbol_name(name) { + "internal-crt" + } else { + "internal-pdb" + } + } else if source == "export" { + "internal-export" + } else if source == "symbol" { + "internal-symbol" + } else { + "synthetic" + } + .to_owned() +} + +fn classify_edge_target(name: &str, source: &str, meta: Option<&SymbolMeta>) -> String { + if source == "pdb" || source == "export" || source == "symbol" { + return classify_function_symbol(name, source, meta); + } + if source != "import" { + return source.to_owned(); + } + + let (dll, func) = split_import_name(name); + if is_nt_like_name(func) { + "nt-api" + } else if is_cpp_runtime_symbol(&dll, func) { + "cpp-runtime" + } else if is_crt_runtime_symbol(&dll, func) { + "crt-runtime" + } else if is_microsoft_dll(&dll) { + "microsoft-api" + } else { + "external-import" + } + .to_owned() +} + +fn split_import_name(name: &str) -> (String, &str) { + if let Some((dll, func)) = name.rsplit_once('!') { + (dll.to_ascii_lowercase(), func) + } else { + (String::new(), name) + } +} + +fn is_microsoft_dll(dll: &str) -> bool { + let dll = dll.trim_start_matches("api-ms-"); + dll.starts_with("win-") + || dll.starts_with("ext-ms-") + || matches!( + dll, + "ntdll.dll" + | "kernel32.dll" + | "kernelbase.dll" + | "user32.dll" + | "gdi32.dll" + | "advapi32.dll" + | "sechost.dll" + | "rpcrt4.dll" + | "shell32.dll" + | "ole32.dll" + | "oleaut32.dll" + | "combase.dll" + | "ws2_32.dll" + | "bcrypt.dll" + | "crypt32.dll" + | "wintrust.dll" + | "winhttp.dll" + | "wininet.dll" + | "urlmon.dll" + | "shlwapi.dll" + | "version.dll" + | "dbghelp.dll" + | "psapi.dll" + | "iphlpapi.dll" + | "dnsapi.dll" + | "netapi32.dll" + | "wtsapi32.dll" + | "mswsock.dll" + | "imm32.dll" + | "setupapi.dll" + | "cfgmgr32.dll" + | "powrprof.dll" + | "mpr.dll" + | "userenv.dll" + | "dwmapi.dll" + | "uxtheme.dll" + | "propsys.dll" + | "profapi.dll" + | "normaliz.dll" + ) +} + +fn is_crt_runtime_symbol(dll: &str, name: &str) -> bool { + dll.contains("ucrt") + || dll.contains("msvcrt") + || dll.contains("vcruntime") + || dll.contains("api-ms-win-crt") + || is_crt_symbol_name(name) +} + +fn is_cpp_runtime_symbol(dll: &str, name: &str) -> bool { + dll.contains("msvcp") + || name.contains("Cxx") + || name.contains("CXX") + || name.contains("C++") + || name.contains("std::") + || name.starts_with("??") + || name.starts_with("?") + || name.contains("operator ") + || name.contains("__std_") +} + +fn is_cpp_symbol(text: &str) -> bool { + text.contains("::") + || text.starts_with("??") + || text.starts_with("?") + || text.contains("operator ") + || text.contains("class ") + || text.contains("struct ") + || text.contains("std::") + || text.contains("ATL::") + || text.contains("wil::") + || text.contains("Microsoft::") +} + +fn is_crt_symbol_name(name: &str) -> bool { + let lower = name.to_ascii_lowercase(); + lower.starts_with("__scrt") + || lower.starts_with("__crt") + || lower.starts_with("_crt") + || lower.starts_with("_initterm") + || lower.starts_with("_seh") + || lower.starts_with("_except") + || lower.starts_with("_cxx") + || lower.contains("security_cookie") + || matches!( + lower.as_str(), + "memcpy" + | "memmove" + | "memset" + | "memcmp" + | "malloc" + | "free" + | "calloc" + | "realloc" + | "strlen" + | "strnlen" + | "strcmp" + | "strncmp" + | "stricmp" + | "_stricmp" + | "_strnicmp" + | "strcpy" + | "strncpy" + | "strchr" + | "strrchr" + | "strstr" + | "wcslen" + | "wcscmp" + | "_wcsicmp" + | "_wcsnicmp" + | "wcscpy" + | "wcschr" + | "wcsrchr" + | "wcsstr" + | "atexit" + | "exit" + | "abort" + | "terminate" + ) +} + +fn build_pdb_function_index(symbols: &[PdbSymbol]) -> HashMap { + let mut out = HashMap::new(); + for sym in symbols { + if sym.kind != "function" || sym.rva == 0 { + continue; + } + let replace = out + .get(&sym.rva) + .map(|old: &PdbFunction| old.size == 0 && sym.size > 0) + .unwrap_or(true); + if replace { + out.insert( + sym.rva, + PdbFunction { + size: sym.size, + type_name: sym.type_name.clone(), + }, + ); + } + } + out +} + +fn callback_spec(name: &str) -> Option { + let name = normalize_api_name(name); + let spec = match name.as_str() { + "createthread" | "beginthreadex" | "_beginthreadex" => CallbackSpec { + relation: "thread-start", + tag: "thread-spawn", + arg_index: 3, + }, + "beginthread" | "_beginthread" => CallbackSpec { + relation: "thread-start", + tag: "thread-spawn", + arg_index: 1, + }, + "createremotethread" => CallbackSpec { + relation: "thread-start", + tag: "thread-spawn", + arg_index: 4, + }, + "queueuserworkitem" + | "rtlqueueworkitem" + | "createthreadpoolwork" + | "trysubmitthreadpoolcallback" + | "createthreadpooltimer" + | "createthreadpoolwait" => CallbackSpec { + relation: "work-callback", + tag: "workpool", + arg_index: 1, + }, + "tpallocwork" | "tpalloctimer" | "tpallocwait" => CallbackSpec { + relation: "work-callback", + tag: "workpool", + arg_index: 2, + }, + "registerwaitforsingleobject" => CallbackSpec { + relation: "work-callback", + tag: "workpool", + arg_index: 3, + }, + _ => return None, + }; + Some(spec) +} + +fn thread_api_intent(name: &str) -> Option<&'static str> { + match normalize_api_name(name).as_str() { + "switchtothread" | "ntyieldexecution" | "zwyieldexecution" => Some("thread-yield"), + "openthread" | "ntopenthread" | "zwopenthread" => Some("thread-open"), + "getthreadcontext" + | "wow64getthreadcontext" + | "ntgetcontextthread" + | "zwgetcontextthread" => Some("thread-context-read"), + "setthreadcontext" + | "wow64setthreadcontext" + | "ntsetcontextthread" + | "zwsetcontextthread" => Some("thread-context-write"), + "suspendthread" | "ntsuspendthread" | "zwsuspendthread" => Some("thread-suspend"), + "resumethread" | "ntresumethread" | "zwresumethread" => Some("thread-resume"), + "queuethreadapc" | "ntqueueapcthread" | "zwqueueapcthread" => Some("thread-apc"), + "getthreadid" | "getcurrentthreadid" | "teb" => Some("thread-id"), + "getcurrentthread" | "duplicatehandle" => Some("thread-handle"), + _ => None, + } +} + +fn describe_thread_intent( + call: &ApiCall, + insns: &[Instruction], + pe: &PeFile, + raw: &[u8], +) -> Option { + let intent = thread_api_intent(&call.label)?; + let name = normalize_api_name(&call.label); + let mut detail = format!("thread intent: {}", intent); + + if matches!( + name.as_str(), + "switchtothread" | "ntyieldexecution" | "zwyieldexecution" + ) { + detail.push_str(" (current thread yields; no target thread handle)"); + return Some(detail); + } + + let interesting_arg = match name.as_str() { + "openthread" | "ntopenthread" | "zwopenthread" => Some(3), + "getthreadcontext" + | "wow64getthreadcontext" + | "ntgetcontextthread" + | "zwgetcontextthread" + | "setthreadcontext" + | "wow64setthreadcontext" + | "ntsetcontextthread" + | "zwsetcontextthread" + | "suspendthread" + | "ntsuspendthread" + | "zwsuspendthread" + | "resumethread" + | "ntresumethread" + | "zwresumethread" + | "queuethreadapc" + | "ntqueueapcthread" + | "zwqueueapcthread" => Some(1), + _ => None, + }; + + if let Some(arg) = interesting_arg { + if let Some(value) = recover_immediate_arg(insns, call.rva, arg, pe, raw) { + detail.push_str(&format!("; arg{}=0x{:X}", arg, value)); + } else { + detail.push_str(&format!("; arg{} unresolved", arg)); + } + } + Some(detail) +} + +fn recover_immediate_arg( + insns: &[Instruction], + call_rva: u32, + arg_index: usize, + pe: &PeFile, + raw: &[u8], +) -> Option { + let call_idx = insns.iter().position(|insn| insn.rva == call_rva)?; + if pe.arch == 64 { + let reg = x64_arg_register(arg_index)?; + return resolve_immediate_register_before(insns, call_idx, reg, pe, raw, 0); + } + recover_x86_stack_immediate_arg(insns, call_idx, arg_index, pe, raw) +} + +fn recover_x86_stack_immediate_arg( + insns: &[Instruction], + call_idx: usize, + arg_index: usize, + pe: &PeFile, + raw: &[u8], +) -> Option { + let mut seen_args = 0usize; + for (idx, insn) in insns[..call_idx].iter().enumerate().rev().take(48) { + if insn.iced.mnemonic() != Mnemonic::Push || insn.iced.op_count() == 0 { + continue; + } + seen_args += 1; + if seen_args != arg_index { + continue; + } + if let Some(value) = immediate_operand_value(&insn.iced, 0) { + return Some(value); + } + if insn.iced.op0_kind() == OpKind::Register { + return resolve_immediate_register_before( + insns, + idx, + insn.iced.op0_register().full_register(), + pe, + raw, + 0, + ); + } + break; + } + None +} + +fn recover_callback_target( + insns: &[Instruction], + call_rva: u32, + arg_index: usize, + pe: &PeFile, + raw: &[u8], +) -> Option<(u32, String)> { + let call_idx = insns.iter().position(|insn| insn.rva == call_rva)?; + if pe.arch == 64 { + let reg = x64_arg_register(arg_index)?; + return resolve_register_before(insns, call_idx, reg, pe, raw, 0) + .map(|(rva, method)| (rva, format!("{} {}", register_name(reg), method))); + } + + resolve_x86_stack_arg(insns, call_idx, arg_index, pe, raw) +} + +fn resolve_x86_stack_arg( + insns: &[Instruction], + call_idx: usize, + arg_index: usize, + pe: &PeFile, + raw: &[u8], +) -> Option<(u32, String)> { + let mut seen_args = 0usize; + for (idx, insn) in insns[..call_idx].iter().enumerate().rev().take(48) { + if insn.iced.mnemonic() != Mnemonic::Push || insn.iced.op_count() == 0 { + continue; + } + seen_args += 1; + if seen_args != arg_index { + continue; + } + if let Some(rva) = code_target_from_operand(&insn.iced, 0, pe, raw, false) { + return Some((rva, "stack push immediate".to_owned())); + } + if insn.iced.op0_kind() == OpKind::Register { + let reg = insn.iced.op0_register().full_register(); + return resolve_register_before(insns, idx, reg, pe, raw, 0).map(|(rva, method)| { + (rva, format!("stack push {} {}", register_name(reg), method)) + }); + } + break; + } + None +} + +fn resolve_register_before( + insns: &[Instruction], + before_idx: usize, + reg: Register, + pe: &PeFile, + raw: &[u8], + depth: usize, +) -> Option<(u32, String)> { + if depth > 6 { + return None; + } + let wanted = reg.full_register(); + let scan_start = before_idx.saturating_sub(64); + + for (idx, insn) in insns[scan_start..before_idx].iter().enumerate().rev() { + let absolute_idx = scan_start + idx; + let iced = &insn.iced; + if iced.op_count() == 0 || iced.op0_kind() != OpKind::Register { + continue; + } + let dst = iced.op0_register().full_register(); + if dst != wanted { + continue; + } + + match iced.mnemonic() { + Mnemonic::Lea | Mnemonic::Mov => { + if iced.op1_kind() == OpKind::Register { + let src = iced.op1_register().full_register(); + return resolve_register_before(insns, absolute_idx, src, pe, raw, depth + 1) + .map(|(rva, method)| { + (rva, format!("<- {} {}", register_name(src), method)) + }); + } + + let deref_memory = iced.mnemonic() == Mnemonic::Mov; + if let Some(rva) = code_target_from_operand(iced, 1, pe, raw, deref_memory) { + let method = if iced.mnemonic() == Mnemonic::Lea { + "loaded address".to_owned() + } else if deref_memory && iced.op1_kind() == OpKind::Memory { + "loaded function pointer".to_owned() + } else { + "loaded immediate".to_owned() + }; + return Some((rva, method)); + } + return None; + } + Mnemonic::Xor + if iced.op1_kind() == OpKind::Register + && iced.op1_register().full_register() == wanted => + { + return None; + } + _ => return None, + } + } + + None +} + +fn resolve_immediate_register_before( + insns: &[Instruction], + before_idx: usize, + reg: Register, + pe: &PeFile, + raw: &[u8], + depth: usize, +) -> Option { + if depth > 6 { + return None; + } + let wanted = reg.full_register(); + let scan_start = before_idx.saturating_sub(64); + + for (idx, insn) in insns[scan_start..before_idx].iter().enumerate().rev() { + let absolute_idx = scan_start + idx; + let iced = &insn.iced; + if iced.op_count() == 0 || iced.op0_kind() != OpKind::Register { + continue; + } + if iced.op0_register().full_register() != wanted { + continue; + } + + match iced.mnemonic() { + Mnemonic::Mov | Mnemonic::Lea => { + if let Some(value) = immediate_operand_value(iced, 1) { + return Some(value); + } + if iced.op1_kind() == OpKind::Register { + return resolve_immediate_register_before( + insns, + absolute_idx, + iced.op1_register().full_register(), + pe, + raw, + depth + 1, + ); + } + if iced.op1_kind() == OpKind::Memory { + let addr = memory_address(iced)?; + if let Some(value) = read_pointer_value(pe, raw, addr) { + return Some(value); + } + } + return None; + } + Mnemonic::Xor + if iced.op1_kind() == OpKind::Register + && iced.op1_register().full_register() == wanted => + { + return Some(0); + } + _ => return None, + } + } + + None +} + +fn code_target_from_operand( + instr: &iced_x86::Instruction, + op_index: u32, + pe: &PeFile, + raw: &[u8], + deref_memory: bool, +) -> Option { + let kind = instr.op_kind(op_index); + match kind { + OpKind::Immediate8 => code_rva_from_value(pe, instr.immediate8() as u64), + OpKind::Immediate16 => code_rva_from_value(pe, instr.immediate16() as u64), + OpKind::Immediate32 | OpKind::Immediate32to64 => { + code_rva_from_value(pe, instr.immediate32() as u64) + } + OpKind::Immediate64 => code_rva_from_value(pe, instr.immediate64()), + OpKind::Memory => { + let addr = memory_address(instr)?; + if deref_memory { + read_pointer_target(pe, raw, addr) + } else { + code_rva_from_value(pe, addr) + } + } + _ => None, + } +} + +fn immediate_operand_value(instr: &iced_x86::Instruction, op_index: u32) -> Option { + match instr.op_kind(op_index) { + OpKind::Immediate8 => Some(instr.immediate8() as u64), + OpKind::Immediate16 => Some(instr.immediate16() as u64), + OpKind::Immediate32 | OpKind::Immediate32to64 => Some(instr.immediate32() as u64), + OpKind::Immediate64 => Some(instr.immediate64()), + _ => None, + } +} + +fn memory_address(instr: &iced_x86::Instruction) -> Option { + if matches!(instr.memory_base(), Register::RIP | Register::EIP) { + Some(instr.ip_rel_memory_address()) + } else if instr.memory_base() == Register::None && instr.memory_index() == Register::None { + let value = instr.memory_displacement64(); + (value != 0).then_some(value) + } else { + None + } +} + +fn read_pointer_target(pe: &PeFile, raw: &[u8], address: u64) -> Option { + let slot_rva = pe + .va_to_rva(address) + .or_else(|| code_rva_from_value(pe, address))?; + let off = pe.rva_to_offset(slot_rva)?; + let value = read_pointer_value_at_offset(pe, raw, off); + code_rva_from_value(pe, value) +} + +fn read_pointer_value(pe: &PeFile, raw: &[u8], address: u64) -> Option { + let slot_rva = pe.va_to_rva(address).or_else(|| { + u32::try_from(address) + .ok() + .filter(|rva| pe.rva_to_section(*rva).is_some()) + })?; + let off = pe.rva_to_offset(slot_rva)?; + Some(read_pointer_value_at_offset(pe, raw, off)) +} + +fn read_pointer_value_at_offset(pe: &PeFile, raw: &[u8], off: usize) -> u64 { + if pe.arch == 64 { + read_u64(raw, off) + } else { + read_u32(raw, off) as u64 + } +} + +fn code_rva_from_value(pe: &PeFile, value: u64) -> Option { + pe.va_to_rva(value) + .or_else(|| { + u32::try_from(value) + .ok() + .filter(|rva| pe.rva_to_section(*rva).is_some()) + }) + .filter(|rva| is_executable_rva(pe, *rva)) +} + +fn executable_target_rva(pe: &PeFile, rva: u32) -> Option { + (rva != 0 && is_executable_rva(pe, rva)).then_some(rva) +} + +fn is_executable_rva(pe: &PeFile, rva: u32) -> bool { + pe.rva_to_section(rva) + .is_some_and(|section| section.is_executable()) +} + +fn x64_arg_register(arg_index: usize) -> Option { + match arg_index { + 1 => Some(Register::RCX), + 2 => Some(Register::RDX), + 3 => Some(Register::R8), + 4 => Some(Register::R9), + _ => None, + } +} + +fn relation_title(relation: &str) -> String { + match relation { + "thread-start" => "Thread Start".to_owned(), + "work-callback" => "Workpool Callback".to_owned(), + _ => "Callback".to_owned(), + } +} + +fn root_name(root: &PeStartupRoutine, symbol_index: &SymbolIndex, image_base: u64) -> String { + let name = best_symbol_name(symbol_index, image_base, root.rva); + if name.starts_with("sub_") { + format!("{} {}", root.kind, name) + } else { + name + } +} + +fn best_symbol_name(symbol_index: &SymbolIndex, image_base: u64, rva: u32) -> String { + let va = image_base + rva as u64; + if let Some(hit) = symbol_index.lookup(va) { + if hit.displacement == 0 { + return hit.symbol.name; + } + if hit.displacement <= 0x20 { + return format!("{}+0x{:X}", hit.symbol.name, hit.displacement); + } + } + format!("sub_{:08X}", rva) +} + +fn call_target_name(call: &ApiCall) -> String { + if call.dll.is_empty() { + call.label.clone() + } else { + format!("{}!{}", call.dll, call.label) + } +} + +fn join_detail(parts: &[String]) -> String { + parts + .iter() + .filter(|part| !part.trim().is_empty()) + .cloned() + .collect::>() + .join("; ") +} + +fn normalize_api_name(name: &str) -> String { + name.rsplit(['!', ':']) + .next() + .unwrap_or(name) + .trim_start_matches('_') + .trim_end_matches(['A', 'W']) + .to_ascii_lowercase() +} + +fn is_terminator_api(name: &str) -> bool { + matches!( + normalize_api_name(name).as_str(), + "exitprocess" + | "rtlexituserprocess" + | "terminateprocess" + | "ntterminateprocess" + | "zwterminateprocess" + | "exitthread" + | "rtlexituserthread" + | "ntterminatethread" + | "zwterminatethread" + | "terminatethread" + | "exit" + | "quick_exit" + | "abort" + ) +} + +fn register_name(reg: Register) -> &'static str { + match reg.full_register() { + Register::RAX => "rax", + Register::RBX => "rbx", + Register::RCX => "rcx", + Register::RDX => "rdx", + Register::RSI => "rsi", + Register::RDI => "rdi", + Register::R8 => "r8", + Register::R9 => "r9", + Register::R10 => "r10", + Register::R11 => "r11", + Register::R12 => "r12", + Register::R13 => "r13", + Register::R14 => "r14", + Register::R15 => "r15", + Register::EAX => "eax", + Register::EBX => "ebx", + Register::ECX => "ecx", + Register::EDX => "edx", + _ => "reg", + } +} + +fn hex32(value: u32) -> String { + format!("0x{:08X}", value) +} + +fn hex64(value: u64) -> String { + format!("0x{:016X}", value) +} + +#[cfg(test)] +mod tests { + use super::{ + callback_spec, classify_edge_target, classify_function_symbol, normalize_api_name, + thread_api_intent, + }; + + #[test] + fn callback_specs_cover_thread_and_pool_apis() { + let create_thread = callback_spec("KERNEL32.dll!CreateThread").unwrap(); + assert_eq!(create_thread.relation, "thread-start"); + assert_eq!(create_thread.arg_index, 3); + + let work = callback_spec("CreateThreadpoolWork").unwrap(); + assert_eq!(work.relation, "work-callback"); + assert_eq!(work.arg_index, 1); + } + + #[test] + fn normalize_api_name_strips_scope_and_ansi_suffix() { + assert_eq!(normalize_api_name("KERNEL32!CreateThread"), "createthread"); + assert_eq!(normalize_api_name("USER32!MessageBoxW"), "messagebox"); + } + + #[test] + fn thread_api_intents_cover_context_and_yield_calls() { + assert_eq!(thread_api_intent("SwitchToThread"), Some("thread-yield")); + assert_eq!( + thread_api_intent("ntdll!NtGetContextThread"), + Some("thread-context-read") + ); + assert_eq!( + thread_api_intent("kernel32!OpenThread"), + Some("thread-open") + ); + } + + #[test] + fn symbol_categories_split_internal_and_runtime_targets() { + assert_eq!( + classify_function_symbol("RealFunction", "pdb", None), + "internal-pdb" + ); + assert_eq!( + classify_function_symbol("?Run@@YAXXZ", "pdb", None), + "internal-cpp" + ); + assert_eq!( + classify_function_symbol("_initterm", "pdb", None), + "internal-crt" + ); + assert_eq!( + classify_function_symbol("DllMain", "export", None), + "internal-export" + ); + assert_eq!( + classify_edge_target("ntdll.dll!NtOpenProcess", "import", None), + "nt-api" + ); + assert_eq!( + classify_edge_target("kernel32.dll!CreateFileW", "import", None), + "microsoft-api" + ); + assert_eq!( + classify_edge_target("ucrtbase.dll!malloc", "import", None), + "crt-runtime" + ); + assert_eq!( + classify_edge_target("ntdll.dll!wcsrchr", "import", None), + "crt-runtime" + ); + assert_eq!( + classify_edge_target("msvcp140.dll!?_Xlength_error@std@@YAXXZ", "import", None), + "cpp-runtime" + ); + assert_eq!( + classify_edge_target("plugin.dll!Run", "import", None), + "external-import" + ); + } +} diff --git a/resx/src/analysis/recursive_cfg.rs b/resx/src/analysis/recursive_cfg.rs new file mode 100644 index 0000000..01deda0 --- /dev/null +++ b/resx/src/analysis/recursive_cfg.rs @@ -0,0 +1,312 @@ +use std::collections::{BTreeMap, BTreeSet, VecDeque}; + +use iced_x86::Mnemonic; +use serde::Serialize; + +use crate::analysis::disasm::{disassemble_at, is_ret, Instruction}; +use crate::analysis::ir::{summarize_typed_ir, IrOp}; +use crate::analysis::symbols::SymbolIndex; +use crate::core::config::Config; +use crate::formats::pe::{read_runtime_function, Export, PeFile}; + +#[derive(Debug, Clone, Default, Serialize)] +pub struct RecursiveCfg { + pub entry_rva: String, + pub function_end_rva: String, + pub confidence: String, + pub blocks: Vec, + pub edges: Vec, + pub discovered_targets: Vec, + pub unresolved_indirect: Vec, + pub ir: Vec, + pub notes: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct RecursiveBlock { + pub id: String, + pub start_rva: String, + pub end_rva: String, + pub insn_count: usize, + pub confidence: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct RecursiveEdge { + pub from: String, + pub to: String, + pub kind: String, + pub confidence: String, + pub detail: String, +} + +pub struct RecursiveCfgRequest<'a> { + pub raw: &'a [u8], + pub pe: &'a PeFile, + pub start_rva: u32, + pub arch: u32, + pub image_base: u64, + pub exports: &'a [Export], + pub symbols: Option<&'a SymbolIndex>, + pub cfg: &'a Config, + pub prototype: &'a str, +} + +pub fn recover_recursive_cfg(request: RecursiveCfgRequest<'_>) -> RecursiveCfg { + let RecursiveCfgRequest { + raw, + pe, + start_rva, + arch, + image_base, + exports, + symbols, + cfg, + prototype, + } = request; + let runtime = read_runtime_function(pe, raw, start_rva); + let function_end_rva = runtime + .as_ref() + .map(|r| r.end_rva) + .unwrap_or_else(|| section_end(pe, start_rva).unwrap_or(start_rva.saturating_add(0x1000))); + let decode_limit = function_end_rva.saturating_sub(start_rva).max(1); + let mut queue = VecDeque::from([start_rva]); + let mut seen = BTreeSet::new(); + let mut block_map: BTreeMap> = BTreeMap::new(); + let mut edges = Vec::new(); + let mut discovered_targets = BTreeSet::new(); + let mut unresolved_indirect = BTreeSet::new(); + let mut notes = Vec::new(); + let mut all_insns = Vec::new(); + + while let Some(block_start) = queue.pop_front() { + if !seen.insert(block_start) || seen.len() > cfg.max_total.max(64) { + continue; + } + if !same_function_range(start_rva, function_end_rva, block_start) { + discovered_targets.insert(block_start); + continue; + } + let Some(file_off) = pe.rva_to_offset(block_start) else { + notes.push(format!( + "block {} is not mapped to file bytes", + hex32(block_start) + )); + continue; + }; + + let mut local_cfg = cfg.clone(); + local_cfg.max_bytes = decode_limit.min(cfg.max_bytes.max(512) as u32) as usize; + local_cfg.max_insns = cfg.max_insns.clamp(64, 2048); + let Ok(linear) = disassemble_at( + raw, + pe, + file_off, + block_start, + arch, + image_base, + exports, + symbols, + &local_cfg, + ) else { + notes.push(format!("failed to decode block {}", hex32(block_start))); + continue; + }; + + let mut block = Vec::new(); + for insn in linear { + if !same_function_range(start_rva, function_end_rva, insn.rva) { + break; + } + let stop = terminates_block(&insn); + block.push(insn); + if stop { + break; + } + } + if block.is_empty() { + continue; + } + + let last = block.last().cloned().unwrap(); + let from = block_id(block_start); + for edge in block_edges(&block, image_base) { + match edge.target_rva { + Some(target) if same_function_range(start_rva, function_end_rva, target) => { + queue.push_back(target); + discovered_targets.insert(target); + edges.push(RecursiveEdge { + from: from.clone(), + to: block_id(target), + kind: edge.kind, + confidence: edge.confidence, + detail: edge.detail, + }); + } + Some(target) => { + discovered_targets.insert(target); + edges.push(RecursiveEdge { + from: from.clone(), + to: format!("sub_{:08X}", target), + kind: edge.kind, + confidence: "medium".to_owned(), + detail: format!("outside current function: {}", edge.detail), + }); + } + None => { + if edge.kind == "indirect" { + unresolved_indirect.insert(hex32(last.rva)); + } + edges.push(RecursiveEdge { + from: from.clone(), + to: String::new(), + kind: edge.kind, + confidence: edge.confidence, + detail: edge.detail, + }); + } + } + } + all_insns.extend(block.iter().cloned()); + block_map.insert(block_start, block); + } + + let blocks = block_map + .iter() + .map(|(start, insns)| { + let end = insns.last().map(|i| i.rva).unwrap_or(*start); + RecursiveBlock { + id: block_id(*start), + start_rva: hex32(*start), + end_rva: hex32(end), + insn_count: insns.len(), + confidence: if runtime.is_some() { "high" } else { "medium" }.to_owned(), + } + }) + .collect::>(); + let ir = summarize_typed_ir(&all_insns, image_base, symbols, prototype).ops; + if runtime.is_none() { + notes.push( + "recursive CFG used section/max-byte bounds because .pdata did not bound this function" + .to_owned(), + ); + } + if !unresolved_indirect.is_empty() { + notes.push( + "one or more indirect branches require data-flow or runtime state to resolve" + .to_owned(), + ); + } + + RecursiveCfg { + entry_rva: hex32(start_rva), + function_end_rva: hex32(function_end_rva), + confidence: if runtime.is_some() { "high" } else { "medium" }.to_owned(), + blocks, + edges, + discovered_targets: discovered_targets.into_iter().map(hex32).collect(), + unresolved_indirect: unresolved_indirect.into_iter().collect(), + ir, + notes, + } +} + +#[derive(Debug)] +struct EdgeCandidate { + target_rva: Option, + kind: String, + confidence: String, + detail: String, +} + +fn block_edges(block: &[Instruction], image_base: u64) -> Vec { + let Some(last) = block.last() else { + return Vec::new(); + }; + let mut edges = Vec::new(); + if last.is_jcc { + if last.call_target != 0 { + edges.push(EdgeCandidate { + target_rva: Some(last.call_target.wrapping_sub(image_base) as u32), + kind: "taken".to_owned(), + confidence: "high".to_owned(), + detail: last.mnemonic.clone(), + }); + } + edges.push(EdgeCandidate { + target_rva: next_rva(last), + kind: "fallthrough".to_owned(), + confidence: "high".to_owned(), + detail: "conditional fallthrough".to_owned(), + }); + } else if last.is_jmp { + if last.call_target != 0 { + edges.push(EdgeCandidate { + target_rva: Some(last.call_target.wrapping_sub(image_base) as u32), + kind: "jump".to_owned(), + confidence: "high".to_owned(), + detail: last.comment.clone(), + }); + } else { + edges.push(EdgeCandidate { + target_rva: None, + kind: "indirect".to_owned(), + confidence: "low".to_owned(), + detail: format!("indirect jump via {}", last.operands), + }); + } + } else if is_ret(last.iced.mnemonic()) { + edges.push(EdgeCandidate { + target_rva: None, + kind: "return".to_owned(), + confidence: "high".to_owned(), + detail: "function return".to_owned(), + }); + } else if matches!( + last.iced.mnemonic(), + Mnemonic::Int | Mnemonic::Syscall | Mnemonic::Sysenter + ) { + edges.push(EdgeCandidate { + target_rva: None, + kind: "terminal".to_owned(), + confidence: "medium".to_owned(), + detail: last.mnemonic.clone(), + }); + } else { + edges.push(EdgeCandidate { + target_rva: next_rva(last), + kind: "fallthrough".to_owned(), + confidence: "medium".to_owned(), + detail: "linear fallthrough".to_owned(), + }); + } + edges +} + +fn terminates_block(insn: &Instruction) -> bool { + insn.is_jmp || insn.is_jcc || is_ret(insn.iced.mnemonic()) +} + +fn next_rva(insn: &Instruction) -> Option { + Some(insn.rva.saturating_add(insn.bytes.len() as u32)) +} + +fn same_function_range(start: u32, end: u32, rva: u32) -> bool { + rva >= start && rva < end +} + +fn section_end(pe: &PeFile, rva: u32) -> Option { + pe.rva_to_section(rva).map(|section| { + section + .virtual_address + .saturating_add(section.virtual_size.max(section.raw_size)) + }) +} + +fn block_id(rva: u32) -> String { + format!("block_{:08X}", rva) +} + +fn hex32(value: u32) -> String { + format!("0x{:08X}", value) +} diff --git a/resx/src/analysis/symbols.rs b/resx/src/analysis/symbols.rs index 4edab09..74b7847 100644 --- a/resx/src/analysis/symbols.rs +++ b/resx/src/analysis/symbols.rs @@ -113,44 +113,57 @@ impl SymbolIndex { }); } - let (_, sym) = self.inner.ordered.range(..=address).next_back()?; - let displacement = address.saturating_sub(sym.va); - let within = if sym.size > 0 { - displacement < sym.size - } else { - displacement <= 0x100 + let prev = self.inner.ordered.range(..=address).next_back()?; + let next = self.inner.ordered.range(address..).next(); + + let prev_sym = prev.1; + let prev_disp = address.saturating_sub(prev_sym.va); + + // Strong match if the symbol has a real size and we're inside it. + if prev_sym.size > 0 && prev_disp < prev_sym.size { + return Some(SymbolMatch { + symbol: prev_sym.clone(), + displacement: prev_disp, + }); + } + + // Unknown-size symbols are dangerous. Be much stricter. + // Allow only a tiny near-window, and only if there isn't a competing next symbol + // that is equally or more plausible. + let near_window = match prev_sym.kind.as_str() { + "function" => 0x20, + "data" => 0x10, + _ => 0x08, }; - if !within { + if prev_disp > near_window { return None; } + if let Some((next_va, _next_sym)) = next { + let next_gap = next_va.saturating_sub(address); + if next_gap <= prev_disp { + return None; + } + } + Some(SymbolMatch { - symbol: sym.clone(), - displacement, + symbol: prev_sym.clone(), + displacement: prev_disp, }) } } -fn insert_symbol(index: &mut SymbolIndexInner, sym: ResolvedSymbol) { - let keep_existing = index - .exact - .get(&sym.va) - .map(|old| score(old) >= score(&sym)) - .unwrap_or(false); - if !keep_existing { - index.exact.insert(sym.va, sym.clone()); - index.ordered.insert(sym.va, sym); - } -} - fn score(sym: &ResolvedSymbol) -> u32 { let mut score = 0; + if sym.kind == "function" { + score += 6; + } if sym.kind == "data" { score += 4; } - if sym.kind == "function" { - score += 3; + if sym.size > 0 { + score += 4; } if !sym.type_name.is_empty() { score += 2; @@ -160,3 +173,15 @@ fn score(sym: &ResolvedSymbol) -> u32 { } score } + +fn insert_symbol(index: &mut SymbolIndexInner, sym: ResolvedSymbol) { + let keep_existing = index + .exact + .get(&sym.va) + .map(|old| score(old) >= score(&sym)) + .unwrap_or(false); + if !keep_existing { + index.exact.insert(sym.va, sym.clone()); + index.ordered.insert(sym.va, sym); + } +} diff --git a/resx/src/analysis/thunk.rs b/resx/src/analysis/thunk.rs index c66b9a8..db0a5e7 100644 --- a/resx/src/analysis/thunk.rs +++ b/resx/src/analysis/thunk.rs @@ -15,6 +15,10 @@ pub enum ThunkResolution { Direct { target_rva: u32, }, + Chain { + hops: Vec, + final_target: Box, + }, } impl ThunkResolution { @@ -24,37 +28,66 @@ impl ThunkResolution { dll, func, slot_rva, - } => format!( - "IAT thunk → {}!{} [slot RVA 0x{:08X}]", - dll, func, slot_rva - ), - ThunkResolution::IatUnresolved { slot_rva } => format!( - "IAT thunk @ slot RVA 0x{:08X} (import not resolved)", - slot_rva - ), + } => { + format!( + "IAT thunk → {}!{} [slot RVA 0x{:08X}]", + dll, func, slot_rva + ) + } + ThunkResolution::IatUnresolved { slot_rva } => { + format!( + "IAT thunk @ slot RVA 0x{:08X} (import not resolved)", + slot_rva + ) + } ThunkResolution::Direct { target_rva } => { format!("JMP rel32 → RVA 0x{:08X}", target_rva) } + ThunkResolution::Chain { hops, final_target } => { + format!( + "thunk chain [{}] → {}", + hops.iter() + .map(|r| format!("0x{:08X}", r)) + .collect::>() + .join(" → "), + final_target.desc() + ) + } } } pub fn iat_dll(&self) -> Option<&str> { - if let ThunkResolution::Iat { dll, .. } = self { - Some(dll) - } else { - None + match self { + ThunkResolution::Iat { dll, .. } => Some(dll), + ThunkResolution::Chain { final_target, .. } => final_target.iat_dll(), + _ => None, } } + pub fn iat_func(&self) -> Option<&str> { - if let ThunkResolution::Iat { func, .. } = self { - Some(func) - } else { - None + match self { + ThunkResolution::Iat { func, .. } => Some(func), + ThunkResolution::Chain { final_target, .. } => final_target.iat_func(), + _ => None, } } } pub fn follow_jmp_thunk(raw: &[u8], pe: &PeFile, start_rva: u32) -> Option { + follow_jmp_thunk_inner(raw, pe, start_rva, 0, &mut Vec::new()) +} + +fn follow_jmp_thunk_inner( + raw: &[u8], + pe: &PeFile, + start_rva: u32, + depth: usize, + hops: &mut Vec, +) -> Option { + if depth >= 8 || hops.contains(&start_rva) { + return None; + } + let off = pe.rva_to_offset(start_rva)?; if off >= raw.len() { return None; @@ -74,13 +107,26 @@ pub fn follow_jmp_thunk(raw: &[u8], pe: &PeFile, start_rva: u32) -> Option { let target_va = instr.near_branch_target(); - let target_rva = (target_va.wrapping_sub(pe.image_base)) as u32; - Some(ThunkResolution::Direct { target_rva }) + let target_rva = target_va.wrapping_sub(pe.image_base) as u32; + + // Follow chained JMP stubs inside the image. + if pe.rva_to_offset(target_rva).is_some() { + if let Some(next) = follow_jmp_thunk_inner(raw, pe, target_rva, depth + 1, hops) { + ThunkResolution::Chain { + hops: hops.clone(), + final_target: Box::new(next), + } + } else { + ThunkResolution::Direct { target_rva } + } + } else { + ThunkResolution::Direct { target_rva } + } } OpKind::Memory => { @@ -93,13 +139,10 @@ pub fn follow_jmp_thunk(raw: &[u8], pe: &PeFile, start_rva: u32) -> Option Option Some(ThunkResolution::Iat { + Some((dll, func)) => ThunkResolution::Iat { dll, func, slot_rva, - }), - None => Some(ThunkResolution::IatUnresolved { slot_rva }), + }, + None => ThunkResolution::IatUnresolved { slot_rva }, } } - _ => None, - } + _ => return None, + }; + + Some(resolved) } diff --git a/resx/src/cli/help.rs b/resx/src/cli/help.rs index 8d99a03..c48b642 100644 --- a/resx/src/cli/help.rs +++ b/resx/src/cli/help.rs @@ -33,6 +33,7 @@ USAGE resx cfg [options] resx cfg --at [options] resx cfg --ordinal [options] + resx reconstruct-cfg [flow options] resx intelli [function] [options] resx types [query] [options] @@ -50,6 +51,7 @@ USAGE resx locate-sym [options] resx explain [--prefix|--api] [options] + resx scan [--jsonl] [scan options] resx yara [options] resx update [options] resx help @@ -57,6 +59,8 @@ USAGE COMMANDS dump Disassemble or reconstruct one target by name, RVA, or ordinal. cfg Show a control-flow graph view for one target by name, RVA, or ordinal. + reconstruct-cfg + Rebuild a best-effort startup-to-exit flow waterfall for one image. intelli Run heuristic triage over a target image or function. types Browse PDB-backed type names and symbol references. peinfo Show PE metadata, version resources, signer info, and headers. @@ -70,6 +74,7 @@ COMMANDS locate Show export-backed matches in the priority list. locate-sym Show export/symbol-backed matches in the priority list. explain Explain a prefix or API-style symbol name from the built-in glossary. + scan Inventory EXE/DLL/SYS files and rank fuzz target candidates. yara Scan a PE image with one or more YARA rules. update Pull the latest version from the current git remote/branch. help Show this help text. @@ -83,11 +88,18 @@ DUMP / INTELLI OPTIONS --unsafe-map-image allow mapping an on-disk image into RESX for checks that need memory bytes --hookchk show static entry-hook / thunk indicators --intelli run heuristic triage + --hostile aggressive tracing: recursive register backward-slice, + decoder-driven reverse-index, indirect-JMP emission, + suspicion annotations in disasm output --xrefs show incoming intra-image CALL/JMP references to the target --strings show referenced string literals --funcs show API call map: every CALL/JMP with its resolved target --funcs-depth recursively trace internal subs N levels deep (implies --funcs) --cfg text show a colour-coded basic control-flow graph + --reconstruct-cfg reconstruct startup/TLS flow as an ASCII waterfall + --thread-filter filter reconstruct-cfg to thread paths/APIs + values: all, spawned, api, or text + --api-filter filter reconstruct-cfg to matching API/function paths --explain explain the current dump target name with prefix/body glossary hints --prefix force explain-mode prefix interpretation --api force explain-mode API/symbol interpretation @@ -119,6 +131,13 @@ FOLLOW OPTIONS --max-dll-size max image size --workers parallel workers +SCAN OPTIONS + --jsonl emit one JSON object per image + --extensions comma-separated extensions, default exe,dll,sys + --max-files cap files scanned + --max-file-mb skip images above this size + --max-candidates cap fuzz candidates per image + GLOBAL OPTIONS --arch --path @@ -148,12 +167,14 @@ EXAMPLES resx pechk .\sample.dll resx dump ntoskrnl.exe NtQuerySystemInformation --cfg text resx cfg ntdll.dll --at 0x161F40 + resx reconstruct-cfg suspicious.dll --depth 6 --max-total 300 resx callers ntdll.dll NtOpenProcess --depth 2 --format flat resx callers ntdll.dll NtOpenProcess --include-dir C:\Work\Drivers resx callers ntoskrnl.exe PsOpenProcess --include-dir C:\Windows\System32\drivers --scope-file *.sys resx priority resx locate NtOpenProcess --include-dir C:\Work\Drivers resx locate-sym NtOpenProcess --include-image .\mydriver.sys + resx scan C:\Windows\System32\drivers --jsonl --max-files 200 resx explain Nt resx explain NtQuerySystemInformation resx dump ntoskrnl.exe NtQuerySystemInformation --explain @@ -172,6 +193,7 @@ pub fn example_topic<'a>(raw_args: &'a [String], cli: &'a Cli) -> &'a str { const KNOWN: &[&str] = &[ "dump", "cfg", + "reconstruct-cfg", "intelli", "peinfo", "sections", @@ -184,6 +206,7 @@ pub fn example_topic<'a>(raw_args: &'a [String], cli: &'a Cli) -> &'a str { "locate", "locate-sym", "explain", + "scan", "yara", "edrchk", "follow", @@ -229,6 +252,10 @@ pub fn preprocess_args(raw_args: &[String]) -> Vec { rewritten.push("--cfg".to_string()); rewritten.push("text".to_string()); } + "reconstruct-cfg" => { + rewritten.extend(raw_args.iter().skip(2).cloned()); + rewritten.push("--reconstruct-cfg".to_string()); + } "intelli" => { rewritten.extend(raw_args.iter().skip(2).cloned()); rewritten.push("--intelli".to_string()); @@ -277,6 +304,16 @@ pub fn preprocess_args(raw_args: &[String]) -> Vec { rewritten.push("--explain".to_string()); rewritten.extend(raw_args.iter().skip(2).cloned()); } + "scan" => { + rewritten.push("--resx-scan".to_string()); + if let Some(root) = raw_args.get(2) { + rewritten.push("--scan-root".to_string()); + rewritten.push(root.clone()); + rewritten.extend(raw_args.iter().skip(3).cloned()); + } else { + rewritten.extend(raw_args.iter().skip(2).cloned()); + } + } "yara" => { if raw_args.len() >= 4 { rewritten.push(raw_args[2].clone()); @@ -341,6 +378,25 @@ CFG EXAMPLES resx cfg ntoskrnl.exe NtQuerySystemInformation resx cfg ntdll.dll --at 0x161F40 resx cfg user32.dll --ordinal 650 +"# + } + "reconstruct-cfg" => { + r#" +RECONSTRUCT-CFG EXAMPLES + resx reconstruct-cfg suspicious.dll + resx suspicious.dll --reconstruct-cfg --depth 8 --max-total 500 + resx reconstruct-cfg suspicious.dll --thread-filter spawned + resx reconstruct-cfg suspicious.dll --thread-filter api --api-filter GetThreadContext + resx reconstruct-cfg .\sample.exe --json + +NOTES + Starts at PE entry/TLS/startup handoff candidates, follows intra-image CALL/JMP + targets, marks imports and unresolved indirect calls, and follows statically + recovered thread/workpool callback arguments when they point back into the image. + PDB symbols are used when available for names, prototype text, and size-backed + decode bounds. Internal PDB/export functions, Nt APIs, Microsoft DLL imports, + CRT/C++ runtime calls, and external DLL imports are tagged separately. + Use --thread-filter and --api-filter for non-interactive focus. "# } "peinfo" => { @@ -385,6 +441,18 @@ YARA EXAMPLES NOTES Accepts one or more rule files through the `yara` shorthand command or `--yara`. +"# + } + "scan" => { + r#" +SCAN EXAMPLES + resx scan C:\Windows\System32\drivers --jsonl --max-files 200 + resx scan .\samples --extensions exe,dll,sys --max-candidates 16 + resx scan .\samples --max-file-mb 100 --json + +NOTES + Inventories PE images and ranks fuzz-target candidates using image kind, + risk imports, exports, startup paths, section anomalies, and symbol names. "# } "follow" | "callers" => { @@ -446,7 +514,9 @@ GENERAL EXAMPLES resx dump ntdll.dll NtCreateFile resx intelli suspicious.dll resx dump ntoskrnl.exe NtQuerySystemInformation --cfg text + resx reconstruct-cfg suspicious.dll --depth 6 resx callers ntdll.dll NtOpenProcess --depth 2 + resx scan C:\Windows\System32\drivers --jsonl --max-files 200 resx locate-sym NtOpenProcess resx update "# diff --git a/resx/src/cli/router.rs b/resx/src/cli/router.rs index e05db21..e01ef81 100644 --- a/resx/src/cli/router.rs +++ b/resx/src/cli/router.rs @@ -23,6 +23,14 @@ pub fn dispatch( return commands::update::run(cfg, w, c); } + if cli.resx_scan { + return commands::scan::run(cli, w); + } + + if cfg.reconstruct_cfg { + return commands::reconstruct_cfg::run(&dll_arg, cfg, w, c); + } + if cfg.explain { let term = if !func_arg.is_empty() { &func_arg @@ -85,12 +93,12 @@ pub fn dispatch( } if dll_arg.is_empty() { return Err( - "Specify a command such as dump, cfg, intelli, types, peinfo, sections, eat, iat, syms, pechk, priority, callers, locate, locate-sym, yara, update, or help".to_owned(), + "Specify a command such as dump, cfg, reconstruct-cfg, intelli, types, peinfo, sections, eat, iat, syms, pechk, priority, callers, locate, locate-sym, scan, yara, update, or help".to_owned(), ); } Err( - "Incomplete command. Use `resx dump `, `resx callers `, `resx locate `, `resx priority`, `resx update`, or `resx help`".to_owned(), + "Incomplete command. Use `resx dump `, `resx reconstruct-cfg `, `resx callers `, `resx scan `, `resx locate `, `resx priority`, `resx update`, or `resx help`".to_owned(), ) } @@ -111,6 +119,7 @@ fn is_locate_mode(cfg: &Config, dll_arg: &str, func_arg: &str) -> bool { && !cfg.pechk && !cfg.hookchk && !cfg.intelli + && !cfg.reconstruct_cfg && !cfg.explain && cfg.cfg_view.is_empty() && cfg.yara.is_empty()) @@ -126,6 +135,7 @@ fn should_dump(cfg: &Config, func_arg: &str) -> bool { || cfg.pechk || cfg.hookchk || cfg.intelli + || cfg.reconstruct_cfg || !cfg.cfg_view.is_empty() || !cfg.yara.is_empty() } diff --git a/resx/src/commands/dump/callmap.rs b/resx/src/commands/dump/callmap.rs index 25c8e1d..836dd9f 100644 --- a/resx/src/commands/dump/callmap.rs +++ b/resx/src/commands/dump/callmap.rs @@ -14,12 +14,22 @@ use super::style::{color_kind, color_target, is_nt_api, short_dll_name}; use super::switchfmt::{format_case_values, format_class_value}; use super::{RecoveredSwitchDispatch, RecoveredSwitchTarget}; +const NT_KERNEL_IMAGES: &[&str] = &[ + "ntoskrnl.exe", + "ntkrnlmp.exe", + "ntkrnlpa.exe", + "ntkrpamp.exe", +]; +const WIN32K_KERNEL_IMAGES: &[&str] = &["win32kbase.sys", "win32kfull.sys", "win32k.sys"]; +const NO_KERNEL_IMAGES: &[&str] = &[]; + #[allow(clippy::too_many_arguments)] pub(super) fn print_api_calls( w: &mut dyn Write, calls: &[ApiCall], insns: &[Instruction], func_name: &str, + source_image_name: &str, c: &Colors, raw: &[u8], pe: &crate::formats::pe::PeFile, @@ -30,7 +40,7 @@ pub(super) fn print_api_calls( cfg: &Config, root_rva: u32, ) { - let synthetic_syscall = synthetic_syscall_call(insns, func_name); + let synthetic_syscall = synthetic_syscall_call(insns, func_name, source_image_name); let display_calls: Vec = if let Some(call) = synthetic_syscall { let mut merged = calls.to_vec(); if !merged.iter().any(|existing| { @@ -92,6 +102,7 @@ pub(super) fn render_api_call_tree( calls: &[ApiCall], insns: &[Instruction], func_name: &str, + source_image_name: &str, raw: &[u8], pe: &crate::formats::pe::PeFile, symbol_index: &crate::analysis::symbols::SymbolIndex, @@ -101,7 +112,7 @@ pub(super) fn render_api_call_tree( cfg: &Config, root_rva: u32, ) -> String { - let synthetic_syscall = synthetic_syscall_call(insns, func_name); + let synthetic_syscall = synthetic_syscall_call(insns, func_name, source_image_name); let display_calls: Vec = if let Some(call) = synthetic_syscall { let mut merged = calls.to_vec(); if !merged.iter().any(|existing| { @@ -151,7 +162,11 @@ pub(super) struct SyscallCallDetails { pub service_number: Option, } -fn synthetic_syscall_call(insns: &[Instruction], func_name: &str) -> Option { +fn synthetic_syscall_call( + insns: &[Instruction], + func_name: &str, + source_image_name: &str, +) -> Option { if !is_nt_api(func_name) { return None; } @@ -166,7 +181,7 @@ fn synthetic_syscall_call(insns: &[Instruction], func_name: &str) -> Option Option Option { - synthetic_syscall_call(insns, func_name) + synthetic_syscall_call(insns, func_name, source_image_name) } fn detect_syscall_number_from_insns(insns: &[Instruction]) -> Option { @@ -406,6 +422,7 @@ fn print_calls_recursive( image.raw, image.symbol_index, image.image_base, + cfg.hostile, ); if !sub_calls.is_empty() { let child_prefix = @@ -447,6 +464,7 @@ fn print_calls_recursive( &target.image.raw, &target.image.symbol_index, target.image.image_base, + cfg.hostile, ); if !sub_calls.is_empty() { let child_prefix = @@ -562,6 +580,7 @@ fn write_calls_recursive_text( image.raw, image.symbol_index, image.image_base, + cfg.hostile, ); if !sub_calls.is_empty() { let child_prefix = @@ -601,6 +620,7 @@ fn write_calls_recursive_text( &target.image.raw, &target.image.symbol_index, target.image.image_base, + cfg.hostile, ); if !sub_calls.is_empty() { let child_prefix = @@ -638,17 +658,12 @@ fn resolve_syscall_trace_target( if !call.is_import || !is_nt_api(&call.label) { return None; } - let dll = call.dll.to_ascii_lowercase(); - if !dll.eq("ntdll.dll") && !dll.eq("ntdll") { + let kernel_images = syscall_kernel_images(call); + if kernel_images.is_empty() { return None; } - for kernel_name in [ - "ntoskrnl.exe", - "ntkrnlmp.exe", - "ntkrnlpa.exe", - "ntkrpamp.exe", - ] { + for kernel_name in kernel_images { let Some(image) = load_trace_image(kernel_name, cfg) else { continue; }; @@ -673,6 +688,91 @@ fn resolve_syscall_trace_target( None } +fn syscall_stub_provider(func_name: &str, source_image_name: &str) -> &'static str { + if is_win32k_syscall_provider(source_image_name) || is_probable_win32k_syscall_name(func_name) { + "win32u.dll" + } else { + "ntdll.dll" + } +} + +fn syscall_kernel_images(call: &ApiCall) -> &'static [&'static str] { + if is_win32k_syscall_provider(&call.dll) || is_probable_win32k_syscall_name(&call.label) { + WIN32K_KERNEL_IMAGES + } else if is_native_syscall_provider(&call.dll) { + NT_KERNEL_IMAGES + } else { + NO_KERNEL_IMAGES + } +} + +fn is_native_syscall_provider(name: &str) -> bool { + normalize_image_base(name) == "ntdll" +} + +fn is_win32k_syscall_provider(name: &str) -> bool { + matches!( + normalize_image_base(name).as_str(), + "win32u" | "user32" | "gdi32" | "gdi32full" + ) +} + +fn normalize_image_base(name: &str) -> String { + let file = name + .rsplit(&['/', '\\'][..]) + .next() + .unwrap_or(name) + .to_ascii_lowercase(); + file.strip_suffix(".dll") + .or_else(|| file.strip_suffix(".exe")) + .or_else(|| file.strip_suffix(".sys")) + .unwrap_or(&file) + .to_owned() +} + +fn is_probable_win32k_syscall_name(name: &str) -> bool { + const PREFIXES: &[&str] = &[ + "NtBindComposition", + "NtCloseComposition", + "NtComposition", + "NtCompositor", + "NtConfigureInputSpace", + "NtConfirmComposition", + "NtCreateComposition", + "NtCreateImplicitComposition", + "NtDComposition", + "NtDesktop", + "NtDuplicateComposition", + "NtDxgk", + "NtEnableOneCore", + "NtFlipObject", + "NtGdi", + "NtHWCursor", + "NtInputSpace", + "NtIsOneCore", + "NtKST", + "NtMIT", + "NtMapVisual", + "NtMin", + "NtModerncore", + "NtNotifyPresent", + "NtOpenComposition", + "NtQueryComposition", + "NtRIM", + "NtSetComposition", + "NtSetCursor", + "NtSetPointer", + "NtSetShell", + "NtTokenManager", + "NtUnBindComposition", + "NtUpdateInputSink", + "NtUser", + "NtValidateComposition", + "NtVisual", + ]; + PREFIXES.iter().any(|prefix| name.starts_with(prefix)) +} + fn load_trace_image(name: &str, cfg: &Config) -> Option { let dll_path = find_dll_path(name, cfg).ok()?; let dll_name = dll_path.file_name()?.to_string_lossy().to_string(); @@ -1134,3 +1234,74 @@ fn resolve_dispatch_rva_local( } Some(target_rva) } + +#[cfg(test)] +mod tests { + use super::{ + is_probable_win32k_syscall_name, normalize_image_base, syscall_kernel_images, + syscall_stub_provider, NT_KERNEL_IMAGES, WIN32K_KERNEL_IMAGES, + }; + use crate::analysis::disasm::ApiCall; + + fn import_call(dll: &str, label: &str) -> ApiCall { + ApiCall { + rva: 0x1000, + kind: "call".to_owned(), + target_rva: 0, + label: label.to_owned(), + dll: dll.to_owned(), + is_import: true, + is_indirect: false, + indirect_method: None, + switch_cases: Vec::new(), + } + } + + #[test] + fn syscall_images_route_native_and_win32k_families() { + let native = import_call("ntdll.dll", "NtOpenProcess"); + assert_eq!(syscall_kernel_images(&native), NT_KERNEL_IMAGES); + + let gui = import_call("win32u.dll", "NtUserGetMessage"); + assert_eq!(syscall_kernel_images(&gui), WIN32K_KERNEL_IMAGES); + + let gdi = import_call("user32.dll", "NtGdiDdDDICreateDevice"); + assert_eq!(syscall_kernel_images(&gdi), WIN32K_KERNEL_IMAGES); + } + + #[test] + fn synthetic_provider_uses_win32u_for_gui_syscalls() { + assert_eq!( + syscall_stub_provider("NtUserGetMessage", "win32u.dll"), + "win32u.dll" + ); + assert_eq!( + syscall_stub_provider("NtDCompositionCreateChannel", "win32u.dll"), + "win32u.dll" + ); + assert_eq!( + syscall_stub_provider("NtQuerySystemInformation", "ntdll.dll"), + "ntdll.dll" + ); + } + + #[test] + fn win32k_syscall_name_detection_covers_gui_exports() { + assert!(is_probable_win32k_syscall_name("NtUserGetMessage")); + assert!(is_probable_win32k_syscall_name("NtGdiCreateBitmap")); + assert!(is_probable_win32k_syscall_name( + "NtDCompositionCommitChannel" + )); + assert!(!is_probable_win32k_syscall_name("NtOpenProcess")); + } + + #[test] + fn image_base_normalization_strips_common_extensions() { + assert_eq!( + normalize_image_base(r"C:\Windows\System32\win32kfull.sys"), + "win32kfull" + ); + assert_eq!(normalize_image_base("ntoskrnl.exe"), "ntoskrnl"); + assert_eq!(normalize_image_base("win32u.dll"), "win32u"); + } +} diff --git a/resx/src/commands/dump/json.rs b/resx/src/commands/dump/json.rs index 58151fe..8331acc 100644 --- a/resx/src/commands/dump/json.rs +++ b/resx/src/commands/dump/json.rs @@ -1,10 +1,14 @@ use serde::Serialize; +use crate::analysis::discovery::FunctionDiscoveryReport; use crate::analysis::edr::EdrCheckResult; use crate::analysis::explain::ExplainResult; +use crate::analysis::indirect::IndirectFlowReport; use crate::analysis::intelli::IntelliFinding; +use crate::analysis::ir::TypedIrSummary; +use crate::analysis::recursive_cfg::RecursiveCfg; use crate::analysis::yara::YaraMatch; -use crate::formats::pe::{PeAnomaly, PeSection, PeStartupRoutine}; +use crate::formats::pe::{PeAnomaly, PeDataSummary, PeSection, PeStartupRoutine}; #[derive(Serialize)] pub(crate) struct InsnJson { @@ -68,6 +72,16 @@ pub(crate) struct FuncResult { pub(crate) xrefs: Vec, #[serde(skip_serializing_if = "Vec::is_empty")] pub(crate) strings: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) data: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) function_discovery: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) recursive_cfg: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) typed_ir: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) indirect_flow: Option, #[serde(skip_serializing_if = "Vec::is_empty")] pub(crate) intelli_findings: Vec, #[serde(skip_serializing_if = "String::is_empty")] @@ -170,6 +184,65 @@ pub(crate) struct StartupRoutineJson { pub(crate) note: String, } +#[derive(Serialize)] +pub(crate) struct PeDataSummaryJson { + #[serde(skip_serializing_if = "Vec::is_empty")] + pub(crate) strings: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub(crate) vtables: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub(crate) pointers: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub(crate) unwind: Vec, +} + +#[derive(Serialize)] +pub(crate) struct PeDataStringJson { + pub(crate) rva: String, + pub(crate) section: String, + pub(crate) encoding: String, + pub(crate) value: String, +} + +#[derive(Serialize)] +pub(crate) struct PeVTableJson { + pub(crate) rva: String, + pub(crate) section: String, + pub(crate) entries: Vec, +} + +#[derive(Serialize)] +pub(crate) struct PeDataPointerJson { + pub(crate) rva: String, + pub(crate) target_rva: String, + pub(crate) section: String, + pub(crate) target_section: String, + pub(crate) kind: String, +} + +#[derive(Serialize)] +pub(crate) struct PeRuntimeFunctionJson { + pub(crate) begin_rva: String, + pub(crate) end_rva: String, + pub(crate) unwind_info_rva: String, + pub(crate) prolog_size: u8, + pub(crate) unwind_codes: u8, + pub(crate) flags: String, + pub(crate) stack_alloc_size: String, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub(crate) unwind_operations: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub(crate) saved_registers: Vec, + #[serde(skip_serializing_if = "String::is_empty")] + pub(crate) exception_handler_rva: String, + #[serde(skip_serializing_if = "String::is_empty")] + pub(crate) handler_data_rva: String, + #[serde(skip_serializing_if = "String::is_empty")] + pub(crate) chained_parent: String, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub(crate) epilog_scopes: Vec, +} + pub(crate) fn to_edr_json(edr: &EdrCheckResult) -> EdrJson { EdrJson { in_memory_available: edr.in_memory_available, @@ -225,6 +298,115 @@ pub(crate) fn to_yara_json(m: &YaraMatch) -> YaraJson { } } +pub(crate) fn to_data_summary_json(summary: &PeDataSummary) -> PeDataSummaryJson { + PeDataSummaryJson { + strings: summary + .strings + .iter() + .map(|s| PeDataStringJson { + rva: format!("0x{:08X}", s.rva), + section: s.section_name.clone(), + encoding: s.encoding.clone(), + value: s.value.clone(), + }) + .collect(), + vtables: summary + .vtables + .iter() + .map(|v| PeVTableJson { + rva: format!("0x{:08X}", v.rva), + section: v.section_name.clone(), + entries: v + .entries + .iter() + .map(|rva| format!("0x{:08X}", rva)) + .collect(), + }) + .collect(), + pointers: summary + .pointers + .iter() + .map(|p| PeDataPointerJson { + rva: format!("0x{:08X}", p.rva), + target_rva: format!("0x{:08X}", p.target_rva), + section: p.section_name.clone(), + target_section: p.target_section_name.clone(), + kind: p.kind.clone(), + }) + .collect(), + unwind: summary + .runtime_functions + .iter() + .map(|u| PeRuntimeFunctionJson { + begin_rva: format!("0x{:08X}", u.begin_rva), + end_rva: format!("0x{:08X}", u.end_rva), + unwind_info_rva: format!("0x{:08X}", u.unwind_info_rva), + prolog_size: u.prolog_size, + unwind_codes: u.unwind_code_count, + flags: format!("0x{:X}", u.unwind_flags), + stack_alloc_size: format!("0x{:X}", u.stack_alloc_size), + unwind_operations: u + .unwind_operations + .iter() + .map(|op| { + if op.stack_offset == 0 { + format!( + "{}@+0x{:X}/info{}: {}", + op.op, op.code_offset, op.info, op.description + ) + } else { + format!( + "{}@+0x{:X}/info{}: {} [stack+0x{:X}]", + op.op, op.code_offset, op.info, op.description, op.stack_offset + ) + } + }) + .collect(), + saved_registers: u + .saved_registers + .iter() + .map(|reg| { + format!( + "{} stack+0x{:X} prolog+0x{:X}", + reg.register, reg.stack_offset, reg.prolog_offset + ) + }) + .collect(), + exception_handler_rva: if u.exception_handler_rva == 0 { + String::new() + } else { + format!("0x{:08X}", u.exception_handler_rva) + }, + handler_data_rva: if u.handler_data_rva == 0 { + String::new() + } else { + format!("0x{:08X}", u.handler_data_rva) + }, + chained_parent: u + .chained_parent + .as_ref() + .map(|parent| { + format!( + "0x{:08X}..0x{:08X} unwind=0x{:08X}", + parent.begin_rva, parent.end_rva, parent.unwind_info_rva + ) + }) + .unwrap_or_default(), + epilog_scopes: u + .epilog_scopes + .iter() + .map(|scope| { + format!( + "0x{:X}..0x{:X} {}", + scope.start_offset, scope.end_offset, scope.source + ) + }) + .collect(), + }) + .collect(), + } +} + pub(crate) fn hex_bytes(bytes: &[u8]) -> String { bytes .iter() diff --git a/resx/src/commands/dump/mod.rs b/resx/src/commands/dump/mod.rs index c2bb3df..130f3f7 100644 --- a/resx/src/commands/dump/mod.rs +++ b/resx/src/commands/dump/mod.rs @@ -12,8 +12,8 @@ use self::callmap::{ QsiDispatcher, }; use self::json::{ - hex_bytes, to_anomaly_json, to_edr_json, to_section_json, to_startup_json, to_yara_json, - ApiCallJson, FuncResult, InsnJson, + hex_bytes, to_anomaly_json, to_data_summary_json, to_edr_json, to_section_json, + to_startup_json, to_yara_json, ApiCallJson, FuncResult, InsnJson, }; use self::switchfmt::{format_case_summary, format_target_symbol_spaced}; use crate::analysis::cfgview::{ @@ -23,10 +23,14 @@ use crate::analysis::cfgview::{ use crate::analysis::disasm::{ collect_api_calls, disassemble_at, find_string_refs, find_xrefs, ApiCall, Instruction, }; +use crate::analysis::discovery::discover_functions; use crate::analysis::edr::{check_prologue, EdrCheckResult}; use crate::analysis::explain::explain_symbol; +use crate::analysis::indirect::analyze_indirect_flow; use crate::analysis::intelli::{analyze_image, IntelliFinding}; +use crate::analysis::ir::summarize_typed_ir; use crate::analysis::recomp::recomp_c; +use crate::analysis::recursive_cfg::{recover_recursive_cfg, RecursiveCfgRequest}; use crate::analysis::symbols::SymbolIndex; use crate::analysis::thunk::{follow_jmp_thunk, ThunkResolution}; use crate::analysis::yara::scan_file; @@ -41,8 +45,9 @@ use crate::core::output::{ use crate::core::search::find_dll_path; use crate::formats::pdb::{load_pdb_symbol, load_pdb_symbols}; use crate::formats::pe::{ - find_iat_slots_by_name, find_startup_routines, parse_pe, read_exports, read_imports, - read_runtime_function, resolve_iat_slot, Export, PeStartupRoutine, + find_iat_slots_by_name, find_startup_routines, parse_pe, read_data_summary, read_exports, + read_imports, read_load_config, read_runtime_function, resolve_iat_slot, Export, + PeStartupRoutine, }; #[derive(Debug, Clone)] @@ -199,6 +204,20 @@ pub fn run( progress.tick("reading import table"); let import_count: usize = imports.iter().map(|dll| dll.entries.len()).sum(); let startup_routines = find_startup_routines(&pe, &raw); + let load_config = read_load_config(&pe, &raw); + let function_discovery = if cfg.json { + Some(discover_functions( + &raw, + &pe, + &exports, + &symbol_index, + &pdb_symbols, + &startup_routines, + cfg, + )) + } else { + None + }; let yara_matches = if cfg.yara.is_empty() { Vec::new() } else { @@ -274,6 +293,17 @@ pub fn run( instructions: Vec::new(), xrefs: Vec::new(), strings: Vec::new(), + data: Some(to_data_summary_json(&read_data_summary(&pe, &raw))), + function_discovery, + recursive_cfg: None, + typed_ir: None, + indirect_flow: Some(analyze_indirect_flow( + &pe, + &imports, + &read_data_summary(&pe, &raw), + &[], + load_config.as_ref(), + )), intelli_findings: metadata_intelli, recomp: String::new(), cfg: String::new(), @@ -382,6 +412,43 @@ pub fn run( .ok(); } } + ThunkResolution::Chain { + ref final_target, .. + } => { + followed_desc = res.desc(); + match final_target.as_ref() { + ThunkResolution::Iat { dll, func, .. } if !dll.is_empty() => { + if !cfg.quiet { + writeln!(w, "{}", c.info(&format!("Thunk chain: {}", res.desc()))) + .ok(); + } + let new_cfg = cfg.clone(); + return run(dll, func, &new_cfg, w, c); + } + ThunkResolution::Direct { + target_rva: new_rva, + } => { + target_rva = *new_rva; + file_off = pe.rva_to_offset(target_rva).ok_or_else(|| { + format!("RVA 0x{:08X}: not in any section", target_rva) + })?; + if !cfg.quiet { + writeln!( + w, + "{}", + c.info(&format!("Following chain: {}", res.desc())) + ) + .ok(); + } + } + _ => { + if !cfg.quiet { + writeln!(w, "{}", c.info(&format!("Thunk chain: {}", res.desc()))) + .ok(); + } + } + } + } _ => {} } } @@ -496,9 +563,15 @@ pub fn run( } else { Vec::new() }; + let data_summary = if cfg.json || cfg.show_strings { + Some(read_data_summary(&pe, &raw)) + } else { + None + }; let api_calls = if cfg.funcs_depth > 0 { - let mut calls = collect_api_calls(&insns, &pe, &raw, &symbol_index, image_base); + let mut calls = + collect_api_calls(&insns, &pe, &raw, &symbol_index, image_base, cfg.hostile); // Merge switch-dispatch targets. First drop any unresolved register-indirect // entry at the same JMP site (they are superseded by the resolved targets). @@ -521,6 +594,7 @@ pub fn run( &api_calls, &insns, &resolved_name, + &dll_name, &raw, &pe, &symbol_index, @@ -533,8 +607,44 @@ pub fn run( } else { String::new() }; - let current_syscall = synthetic_syscall_api_call(&insns, &resolved_name) + let current_syscall = synthetic_syscall_api_call(&insns, &resolved_name, &dll_name) .and_then(|call| resolve_syscall_call_details(&call, &insns, cfg)); + let prototype = symbol_index + .exact(image_base + target_rva as u64) + .map(|sym| sym.type_name) + .unwrap_or_default(); + let typed_ir_summary = if cfg.json { + Some(summarize_typed_ir( + &insns, + image_base, + Some(&symbol_index), + &prototype, + )) + } else { + None + }; + let recursive_cfg = if cfg.json || want_cfg { + Some(recover_recursive_cfg(RecursiveCfgRequest { + raw: &raw, + pe: &pe, + start_rva: target_rva, + arch, + image_base, + exports: &exports, + symbols: Some(&symbol_index), + cfg, + prototype: &prototype, + })) + } else { + None + }; + let indirect_flow = if cfg.json { + data_summary.as_ref().map(|data| { + analyze_indirect_flow(&pe, &imports, data, &api_calls, load_config.as_ref()) + }) + } else { + None + }; let intelli_findings = if want_intelli { let findings = analyze_image(&raw, &imports, Some(&insns)); @@ -678,6 +788,7 @@ pub fn run( &api_calls, &insns, &resolved_name, + &dll_name, c, &raw, &pe, @@ -816,6 +927,11 @@ pub fn run( .collect(), xrefs, strings: str_refs, + data: data_summary.as_ref().map(to_data_summary_json), + function_discovery, + recursive_cfg, + typed_ir: typed_ir_summary, + indirect_flow, intelli_findings: if only_metadata { metadata_intelli } else { diff --git a/resx/src/commands/dump/style.rs b/resx/src/commands/dump/style.rs index 14638be..4cfae12 100644 --- a/resx/src/commands/dump/style.rs +++ b/resx/src/commands/dump/style.rs @@ -35,6 +35,10 @@ pub(super) fn color_kind(kind: &str, c: &Colors) -> String { pub(super) fn short_dll_name(name: &str) -> &str { name.strip_suffix(".dll") .or_else(|| name.strip_suffix(".DLL")) + .or_else(|| name.strip_suffix(".exe")) + .or_else(|| name.strip_suffix(".EXE")) + .or_else(|| name.strip_suffix(".sys")) + .or_else(|| name.strip_suffix(".SYS")) .unwrap_or(name) } diff --git a/resx/src/commands/mod.rs b/resx/src/commands/mod.rs index 94c92b9..ed060e6 100644 --- a/resx/src/commands/mod.rs +++ b/resx/src/commands/mod.rs @@ -6,6 +6,8 @@ pub mod intelli; pub mod locate; pub mod peinfo; pub mod priority; +pub mod reconstruct_cfg; +pub mod scan; pub mod show_eat; pub mod show_iat; pub mod show_syms; diff --git a/resx/src/commands/peinfo/detect.rs b/resx/src/commands/peinfo/detect.rs index 56e6f4a..6d7ac82 100644 --- a/resx/src/commands/peinfo/detect.rs +++ b/resx/src/commands/peinfo/detect.rs @@ -1,6 +1,7 @@ use crate::formats::pe::{ ImportDll, PeClrInfo, PeDebugInfo, PeFile, PeLoadConfigInfo, IMAGE_GUARD_XFG_ENABLED, }; +use regex::Regex; use super::model::{BuildAssessment, Candidate}; @@ -90,10 +91,10 @@ pub fn assess_build( }) .collect::>(); - let mut langs: Vec<(&str, Candidate)> = Vec::new(); - let mut tools: Vec<(&str, Candidate)> = Vec::new(); - let mut components: Vec<(&str, Candidate)> = Vec::new(); - let mut packers: Vec<(&str, Candidate)> = Vec::new(); + let mut langs: Vec<(String, Candidate)> = Vec::new(); + let mut tools: Vec<(String, Candidate)> = Vec::new(); + let mut components: Vec<(String, Candidate)> = Vec::new(); + let mut packers: Vec<(String, Candidate)> = Vec::new(); if let Some(clr_info) = clr { push_candidate( @@ -442,7 +443,7 @@ pub fn assess_build( } } -fn apply_rust_crate_heuristics(list: &mut Vec<(&'static str, Candidate)>, strings: &[String]) { +fn apply_rust_crate_heuristics(list: &mut Vec<(String, Candidate)>, strings: &[String]) { let crate_markers: [(&str, &[&str]); 13] = [ ( "Tokio", @@ -475,10 +476,71 @@ fn apply_rust_crate_heuristics(list: &mut Vec<(&'static str, Candidate)>, string ); } } + + for crate_name in infer_rust_crates(strings).into_iter().take(16) { + let label = format!("Rust crate: {}", crate_name); + push_candidate( + list, + label, + 65, + format!( + "Rust crate inferred from string/symbol patterns ({})", + crate_name + ), + ); + } +} + +fn infer_rust_crates(strings: &[String]) -> Vec { + let patterns = [ + r"(?:^|[^a-z0-9_])([a-z][a-z0-9_]{1,31})::[a-z_][a-z0-9_]*", + r"registry/src/[^/\\]+/([a-z0-9_-]+)-\d+\.\d+\.\d+", + r"cargo/registry/src/[^/\\]+/([a-z0-9_-]+)-\d+\.\d+\.\d+", + ]; + let regexes = patterns + .iter() + .filter_map(|pattern| Regex::new(pattern).ok()) + .collect::>(); + let mut counts = std::collections::BTreeMap::::new(); + let deny = [ + "alloc", + "core", + "std", + "proc_macro", + "test", + "panic_unwind", + "compiler_builtins", + "windows", + ]; + + for s in strings { + for re in ®exes { + for cap in re.captures_iter(s) { + let Some(raw) = cap.get(1).map(|m| m.as_str()) else { + continue; + }; + let name = raw.replace('-', "_").to_ascii_lowercase(); + if name.len() < 2 + || deny.contains(&name.as_str()) + || name.chars().all(|ch| ch.is_ascii_digit()) + { + continue; + } + *counts.entry(name).or_default() += 1; + } + } + } + + let mut crates = counts + .into_iter() + .filter(|(_, count)| *count >= 1) + .collect::>(); + crates.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0))); + crates.into_iter().map(|(name, _)| name).collect() } fn apply_component_heuristics( - list: &mut Vec<(&'static str, Candidate)>, + list: &mut Vec<(String, Candidate)>, import_dlls: &[String], strings: &[String], ) { @@ -555,7 +617,7 @@ fn apply_component_heuristics( } fn apply_packer_heuristics( - list: &mut Vec<(&'static str, Candidate)>, + list: &mut Vec<(String, Candidate)>, pe: &PeFile, sections: &[String], import_dlls: &[String], @@ -692,12 +754,13 @@ fn collect_image_strings(raw: &[u8]) -> Vec { } fn push_candidate( - list: &mut Vec<(&'static str, Candidate)>, - label: &'static str, + list: &mut Vec<(String, Candidate)>, + label: impl Into, score: i32, evidence: impl Into, ) { - if let Some((_, existing)) = list.iter_mut().find(|(name, _)| *name == label) { + let label = label.into(); + if let Some((_, existing)) = list.iter_mut().find(|(name, _)| name == &label) { existing.score += score; existing.evidence.push(evidence.into()); return; @@ -712,19 +775,19 @@ fn push_candidate( } fn finalize_candidates( - list: &mut Vec<(&'static str, Candidate)>, + list: &mut [(String, Candidate)], limit: usize, min_score: i32, ) -> Vec { - list.sort_by(|a, b| b.1.score.cmp(&a.1.score).then_with(|| a.0.cmp(b.0))); + list.sort_by(|a, b| b.1.score.cmp(&a.1.score).then_with(|| a.0.cmp(&b.0))); list.iter() .filter(|(_, item)| item.score >= min_score) .take(limit) - .map(|(label, _)| (*label).to_owned()) + .map(|(label, _)| label.clone()) .collect() } -fn collect_evidence(list: &[(&'static str, Candidate)], min_score: i32) -> Vec { +fn collect_evidence(list: &[(String, Candidate)], min_score: i32) -> Vec { let mut out = Vec::new(); for (label, candidate) in list { if candidate.score < min_score { @@ -758,11 +821,11 @@ fn has_section(sections: &[String], name: &str) -> bool { sections.iter().any(|section| section == &want) } -fn contains_label(list: &[(&str, Candidate)], label: &str) -> bool { +fn contains_label(list: &[(String, Candidate)], label: &str) -> bool { list.iter() - .any(|(name, item)| *name == label && item.score >= 40) + .any(|(name, item)| name == label && item.score >= 40) } -fn contains_any_label(list: &[(&str, Candidate)], labels: &[&str]) -> bool { +fn contains_any_label(list: &[(String, Candidate)], labels: &[&str]) -> bool { labels.iter().any(|label| contains_label(list, label)) } diff --git a/resx/src/commands/peinfo/mod.rs b/resx/src/commands/peinfo/mod.rs index f45cfb4..79d0a73 100644 --- a/resx/src/commands/peinfo/mod.rs +++ b/resx/src/commands/peinfo/mod.rs @@ -9,8 +9,8 @@ use crate::core::output::StageProgress; use crate::core::search::find_dll_path; use crate::formats::metadata::{query_file_metadata, FileMetadata}; use crate::formats::pe::{ - find_startup_routines, parse_pe, read_clr_info, read_debug_info, read_exports, read_imports, - read_load_config, ImportDll, PeDebugInfo, PeLoadConfigInfo, + find_startup_routines, parse_pe, read_clr_info, read_data_summary, read_debug_info, + read_exports, read_imports, read_load_config, ImportDll, PeDebugInfo, PeLoadConfigInfo, IMAGE_DLLCHARACTERISTICS_APPCONTAINER, IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY, IMAGE_DLLCHARACTERISTICS_GUARD_CF, IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA, IMAGE_DLLCHARACTERISTICS_NO_SEH, @@ -25,8 +25,8 @@ use crate::formats::pe::{ use self::detect::{assess_build, detect_image_kind, machine_name, subsystem_name}; use self::model::{ - debug_types, safe_seh, to_anomaly_json, to_section_json, to_startup_json, AnalysisJson, - DebugJson, MitigationsJson, NameJson, PeInfoJson, SignerJson, + debug_types, safe_seh, to_anomaly_json, to_data_summary_json, to_section_json, to_startup_json, + AnalysisJson, DebugJson, MitigationsJson, NameJson, PeInfoJson, SignerJson, }; use self::render::{blank_as_unknown, render_text, TextReport}; @@ -88,6 +88,7 @@ pub fn run(dll_arg: &str, cfg: &Config, w: &mut dyn Write, c: &Colors) -> Result ); let veh_imports = detect_veh_imports(&imports); let startup_routines = find_startup_routines(&pe, &raw); + let data_summary = read_data_summary(&pe, &raw); if cfg.json { let out = PeInfoJson { @@ -129,6 +130,7 @@ pub fn run(dll_arg: &str, cfg: &Config, w: &mut dyn Write, c: &Colors) -> Result }, debug: to_debug_json(&pe, &debug), mitigations: to_mitigations_json(&pe, load_config.as_ref(), &veh_imports), + data: to_data_summary_json(&data_summary), names: NameJson { product_name: metadata.product_name.clone(), file_description: metadata.file_description.clone(), diff --git a/resx/src/commands/peinfo/model.rs b/resx/src/commands/peinfo/model.rs index b2bf441..d8fc117 100644 --- a/resx/src/commands/peinfo/model.rs +++ b/resx/src/commands/peinfo/model.rs @@ -1,6 +1,8 @@ use serde::Serialize; -use crate::formats::pe::{PeAnomaly, PeDebugInfo, PeLoadConfigInfo, PeSection, PeStartupRoutine}; +use crate::formats::pe::{ + PeAnomaly, PeDataSummary, PeDebugInfo, PeLoadConfigInfo, PeSection, PeStartupRoutine, +}; #[derive(Serialize)] pub struct PeInfoJson { @@ -30,6 +32,7 @@ pub struct PeInfoJson { pub analysis: AnalysisJson, pub debug: DebugJson, pub mitigations: MitigationsJson, + pub data: DataSummaryJson, pub names: NameJson, pub signer: SignerJson, #[serde(skip_serializing_if = "Vec::is_empty")] @@ -173,6 +176,33 @@ pub struct MitigationsJson { pub veh_imports: Vec, } +#[derive(Serialize)] +pub struct DataSummaryJson { + pub string_count: usize, + pub vtable_count: usize, + pub pointer_count: usize, + pub unwind_count: usize, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub strings: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub vtables: Vec, +} + +#[derive(Serialize)] +pub struct DataStringJson { + pub rva: String, + pub section: String, + pub encoding: String, + pub value: String, +} + +#[derive(Serialize)] +pub struct VTableJson { + pub rva: String, + pub section: String, + pub entries: Vec, +} + pub struct BuildAssessment { pub platform: String, pub runtime: String, @@ -222,6 +252,41 @@ pub fn to_startup_json(entry: &PeStartupRoutine) -> StartupRoutineJson { } } +pub fn to_data_summary_json(summary: &PeDataSummary) -> DataSummaryJson { + DataSummaryJson { + string_count: summary.strings.len(), + vtable_count: summary.vtables.len(), + pointer_count: summary.pointers.len(), + unwind_count: summary.runtime_functions.len(), + strings: summary + .strings + .iter() + .take(64) + .map(|s| DataStringJson { + rva: format!("0x{:08X}", s.rva), + section: s.section_name.clone(), + encoding: s.encoding.clone(), + value: s.value.clone(), + }) + .collect(), + vtables: summary + .vtables + .iter() + .take(64) + .map(|v| VTableJson { + rva: format!("0x{:08X}", v.rva), + section: v.section_name.clone(), + entries: v + .entries + .iter() + .take(32) + .map(|rva| format!("0x{:08X}", rva)) + .collect(), + }) + .collect(), + } +} + pub fn debug_types(debug: &PeDebugInfo) -> Vec { debug .entries diff --git a/resx/src/commands/reconstruct_cfg.rs b/resx/src/commands/reconstruct_cfg.rs new file mode 100644 index 0000000..6b8b00f --- /dev/null +++ b/resx/src/commands/reconstruct_cfg.rs @@ -0,0 +1,96 @@ +use std::io::Write; + +use crate::analysis::reconstruct::{reconstruct_image, render_ascii, PdbInfo}; +use crate::analysis::symbols::SymbolIndex; +use crate::core::color::Colors; +use crate::core::config::Config; +use crate::core::json::versioned_object; +use crate::core::search::find_dll_path; +use crate::formats::pdb::load_pdb_symbols; +use crate::formats::pe::{find_startup_routines, parse_pe, read_exports}; + +pub fn run(dll_arg: &str, cfg: &Config, w: &mut dyn Write, c: &Colors) -> Result<(), String> { + if dll_arg.is_empty() { + return Err( + "Use `resx reconstruct-cfg ` or `resx --reconstruct-cfg`".to_owned(), + ); + } + + let dll_path = find_dll_path(dll_arg, cfg)?; + let dll_name = dll_path + .file_name() + .unwrap_or_default() + .to_string_lossy() + .to_string(); + let dll_path_str = dll_path.to_string_lossy().to_string(); + + if !cfg.quiet && !cfg.json { + writeln!( + w, + "{}", + c.info(&format!( + "Reconstructing startup flow for {}...", + dll_path.display() + )) + ) + .ok(); + } + + let raw = std::fs::read(&dll_path).map_err(|e| format!("read file: {}", e))?; + let pe = parse_pe(&raw).map_err(|e| e.0)?; + let arch = cfg.effective_arch(pe.arch); + let exports = read_exports(&pe, &raw); + let (pdb_symbols, pdb_info) = if cfg.no_pdb { + (Vec::new(), PdbInfo::disabled()) + } else { + match load_pdb_symbols( + &dll_path_str, + &cfg.sym_path, + &cfg.sym_server, + &cfg.pdb_file, + cfg.verbose, + cfg.reload, + ) { + Ok(symbols) => { + let info = PdbInfo::loaded(&symbols); + (symbols, info) + } + Err(err) => { + if cfg.verbose && !cfg.quiet && !cfg.json { + writeln!(w, "{}", c.dim(&format!("PDB symbols unavailable: {}", err))).ok(); + } + (Vec::new(), PdbInfo::unavailable(err)) + } + } + }; + let symbol_index = SymbolIndex::from_exports_and_pdb(&exports, &pdb_symbols, pe.image_base); + let startup_routines = find_startup_routines(&pe, &raw); + + let report = reconstruct_image( + &dll_name, + &dll_path_str, + &raw, + &pe, + &exports, + &symbol_index, + &pdb_symbols, + pdb_info, + &startup_routines, + arch, + cfg, + ); + + if cfg.json { + writeln!( + w, + "{}", + serde_json::to_string_pretty(&versioned_object("reconstruct_cfg", &report)) + .unwrap_or_default() + ) + .ok(); + } else { + write!(w, "{}", render_ascii(&report, c, cfg)).ok(); + } + + Ok(()) +} diff --git a/resx/src/commands/scan.rs b/resx/src/commands/scan.rs new file mode 100644 index 0000000..16b5b42 --- /dev/null +++ b/resx/src/commands/scan.rs @@ -0,0 +1,576 @@ +use std::collections::{BTreeSet, VecDeque}; +use std::io::Write; +use std::path::{Path, PathBuf}; + +use rayon::prelude::*; +use serde::Serialize; + +use crate::analysis::discovery::discover_functions; +use crate::analysis::indirect::analyze_indirect_flow; +use crate::analysis::symbols::display_symbol_name; +use crate::analysis::symbols::SymbolIndex; +use crate::core::config::{Cli, Config}; +use crate::formats::pe::{ + find_startup_routines, parse_pe, read_data_summary, read_debug_info, read_exports, + read_imports, read_load_config, Export, ImportDll, PeFile, +}; + +#[derive(Serialize)] +struct ScanEnvelope { + tool: &'static str, + schema_version: u32, + kind: &'static str, + root: String, + files_seen: usize, + files_reported: usize, + results: Vec, +} + +#[derive(Serialize)] +struct ImageScanReport { + path: String, + name: String, + kind: String, + arch: String, + size_bytes: u64, + entry_point: String, + exports: usize, + imports: usize, + runtime_functions: usize, + discovered_functions: usize, + function_sources: Vec, + indirect_edges: usize, + indirect_tables: usize, + risk_score: u32, + risk_imports: Vec, + candidates: Vec, + input_surfaces: Vec, + fuzz_manifest: Vec, + anomalies: Vec, + pdb_name: String, +} + +#[derive(Serialize, Clone)] +struct RiskImportReport { + dll: String, + name: String, + category: String, +} + +#[derive(Serialize, Clone)] +struct FuzzCandidateReport { + name: String, + rva: String, + source: String, + score: u32, + input_surface: String, + harness_kind: String, + suggested_invocation: String, + confidence: String, + reasons: Vec, +} + +#[derive(Serialize, Clone)] +struct FunctionSourceReport { + source: String, + count: usize, +} + +#[derive(Serialize, Clone)] +struct FuzzManifestEntry { + image: String, + function: String, + rva: String, + harness_kind: String, + input_surface: String, + seed_hint: String, +} + +pub fn run(cli: &Cli, w: &mut dyn Write) -> Result<(), String> { + let root = cli + .scan_root + .as_deref() + .ok_or_else(|| "Use `resx scan [--jsonl]`".to_owned())?; + let root = PathBuf::from(root); + let extensions = parse_extensions(&cli.scan_extensions); + let max_file_bytes = cli.max_file_mb.saturating_mul(1024 * 1024); + let paths = collect_image_paths(&root, &extensions, cli.max_files, max_file_bytes)?; + let cfg = Config::from_cli(cli, false); + + let mut results: Vec = paths + .par_iter() + .filter_map(|path| scan_one(path, cli.max_candidates, &cfg).ok()) + .collect(); + results.sort_by(|a, b| { + b.risk_score + .cmp(&a.risk_score) + .then_with(|| a.path.cmp(&b.path)) + }); + + if cli.jsonl { + for result in &results { + let line = serde_json::to_string(result).map_err(|e| format!("json: {}", e))?; + writeln!(w, "{}", line).ok(); + } + return Ok(()); + } + + let envelope = ScanEnvelope { + tool: "resx", + schema_version: 1, + kind: "scan", + root: root.display().to_string(), + files_seen: paths.len(), + files_reported: results.len(), + results, + }; + let json = serde_json::to_string_pretty(&envelope).map_err(|e| format!("json: {}", e))?; + writeln!(w, "{}", json).ok(); + Ok(()) +} + +fn scan_one(path: &Path, max_candidates: usize, cfg: &Config) -> Result { + let raw = std::fs::read(path).map_err(|e| format!("read '{}': {}", path.display(), e))?; + let pe = parse_pe(&raw).map_err(|e| e.0)?; + let exports = read_exports(&pe, &raw); + let imports = read_imports(&pe, &raw); + let debug = read_debug_info(&pe, &raw); + let data_summary = read_data_summary(&pe, &raw); + let runtime_functions = data_summary.runtime_functions.len(); + let startup_routines = find_startup_routines(&pe, &raw); + let symbol_index = SymbolIndex::from_exports_and_pdb(&exports, &[], pe.image_base); + let function_discovery = discover_functions( + &raw, + &pe, + &exports, + &symbol_index, + &[], + &startup_routines, + cfg, + ); + let indirect = analyze_indirect_flow( + &pe, + &imports, + &data_summary, + &[], + read_load_config(&pe, &raw).as_ref(), + ); + let risk_imports = collect_risk_imports(&imports); + let candidates = select_candidates(path, &pe, &exports, &imports, max_candidates); + let input_surfaces = classify_input_surfaces(&risk_imports, &candidates); + let fuzz_manifest = candidates + .iter() + .map(|candidate| FuzzManifestEntry { + image: path.display().to_string(), + function: candidate.name.clone(), + rva: candidate.rva.clone(), + harness_kind: candidate.harness_kind.clone(), + input_surface: candidate.input_surface.clone(), + seed_hint: candidate + .reasons + .first() + .cloned() + .unwrap_or_else(|| "binary input".to_owned()), + }) + .collect::>(); + let risk_score = image_risk_score(path, &pe, &risk_imports, &candidates); + + Ok(ImageScanReport { + path: path.display().to_string(), + name: file_name(path), + kind: image_kind(path, &pe), + arch: format!("x{}", pe.arch), + size_bytes: raw.len() as u64, + entry_point: hex32(pe.entry_point), + exports: exports.len(), + imports: imports.iter().map(|dll| dll.entries.len()).sum(), + runtime_functions, + discovered_functions: function_discovery.stats.total, + function_sources: source_counts(&function_discovery), + indirect_edges: indirect.edges.len(), + indirect_tables: indirect.tables.len(), + risk_score, + risk_imports, + candidates, + input_surfaces, + fuzz_manifest, + anomalies: pe + .anomalies + .iter() + .map(|a| format!("{}:{}:{}", a.severity, a.kind, a.detail)) + .collect(), + pdb_name: debug + .codeview + .as_ref() + .map(|cv| cv.pdb_name.clone()) + .unwrap_or_default(), + }) +} + +fn collect_image_paths( + root: &Path, + extensions: &BTreeSet, + max_files: usize, + max_file_bytes: u64, +) -> Result, String> { + let mut out = Vec::new(); + let mut queue = VecDeque::new(); + queue.push_back(root.to_path_buf()); + + while let Some(path) = queue.pop_front() { + if out.len() >= max_files { + break; + } + let Ok(meta) = std::fs::metadata(&path) else { + continue; + }; + if meta.is_dir() { + let Ok(entries) = std::fs::read_dir(&path) else { + continue; + }; + for entry in entries.flatten() { + queue.push_back(entry.path()); + } + continue; + } + if !meta.is_file() || meta.len() > max_file_bytes { + continue; + } + let ext = path + .extension() + .and_then(|ext| ext.to_str()) + .unwrap_or_default() + .to_ascii_lowercase(); + if extensions.contains(&ext) { + out.push(path); + } + } + + Ok(out) +} + +fn select_candidates( + path: &Path, + pe: &PeFile, + exports: &[Export], + imports: &[ImportDll], + max_candidates: usize, +) -> Vec { + let mut out = Vec::new(); + let driver = is_driver(path, pe); + + if pe.entry_point != 0 { + out.push(FuzzCandidateReport { + name: "entry_point".to_owned(), + rva: hex32(pe.entry_point), + source: "entry_point".to_owned(), + score: if driver { 25 } else { 10 }, + input_surface: if driver { + "driver-startup".to_owned() + } else { + "process-startup".to_owned() + }, + harness_kind: if driver { + "driver-entry harness".to_owned() + } else { + "process harness".to_owned() + }, + suggested_invocation: "load image and invoke startup path with guarded environment" + .to_owned(), + confidence: "medium".to_owned(), + reasons: if driver { + vec!["driver startup path".to_owned()] + } else { + vec!["process/module startup path".to_owned()] + }, + }); + } + + for export in exports { + let (score, reasons) = score_function_name(&export.name, driver); + if score == 0 { + continue; + } + out.push(FuzzCandidateReport { + name: display_symbol_name(&export.name), + rva: hex32(export.rva), + source: "export".to_owned(), + score, + input_surface: candidate_surface(&export.name, driver), + harness_kind: candidate_harness(&export.name, driver), + suggested_invocation: candidate_invocation(&export.name, driver), + confidence: if score >= 35 { "high" } else { "medium" }.to_owned(), + reasons, + }); + } + + if out.len() <= 1 && !exports.is_empty() { + for export in exports + .iter() + .take(max_candidates.saturating_sub(out.len())) + { + out.push(FuzzCandidateReport { + name: display_symbol_name(&export.name), + rva: hex32(export.rva), + source: "export".to_owned(), + score: 1, + input_surface: candidate_surface(&export.name, driver), + harness_kind: candidate_harness(&export.name, driver), + suggested_invocation: candidate_invocation(&export.name, driver), + confidence: "low".to_owned(), + reasons: vec!["exported entry point".to_owned()], + }); + } + } + + if imports.iter().any(imports_device_io) { + for candidate in &mut out { + if candidate.source == "entry_point" { + candidate.score += 10; + candidate + .reasons + .push("image imports device I/O APIs".to_owned()); + } + } + } + + out.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.name.cmp(&b.name))); + out.truncate(max_candidates); + out +} + +fn source_counts( + discovery: &crate::analysis::discovery::FunctionDiscoveryReport, +) -> Vec { + let mut counts = std::collections::BTreeMap::new(); + for function in &discovery.functions { + *counts.entry(function.source.clone()).or_insert(0usize) += 1; + } + counts + .into_iter() + .map(|(source, count)| FunctionSourceReport { source, count }) + .collect() +} + +fn classify_input_surfaces( + risk_imports: &[RiskImportReport], + candidates: &[FuzzCandidateReport], +) -> Vec { + let mut out = std::collections::BTreeSet::new(); + for risk in risk_imports { + out.insert(risk.category.clone()); + } + for candidate in candidates { + out.insert(candidate.input_surface.clone()); + } + out.into_iter().collect() +} + +fn candidate_surface(name: &str, driver: bool) -> String { + let lower = name.to_ascii_lowercase(); + if lower.contains("ioctl") || lower.contains("devicecontrol") { + "ioctl".to_owned() + } else if lower.contains("parse") || lower.contains("decode") || lower.contains("deserialize") { + "structured-input".to_owned() + } else if lower.contains("packet") || lower.contains("message") || lower.contains("http") { + "network-or-message".to_owned() + } else if driver { + "kernel-entry".to_owned() + } else { + "export-call".to_owned() + } +} + +fn candidate_harness(name: &str, driver: bool) -> String { + match candidate_surface(name, driver).as_str() { + "ioctl" => "ioctl harness".to_owned(), + "structured-input" => "buffer parser harness".to_owned(), + "network-or-message" => "message corpus harness".to_owned(), + "kernel-entry" => "driver dispatch harness".to_owned(), + _ => "export harness".to_owned(), + } +} + +fn candidate_invocation(name: &str, driver: bool) -> String { + match candidate_surface(name, driver).as_str() { + "ioctl" => "open device, mutate IOCTL code and input/output buffers".to_owned(), + "structured-input" => "call export with mutable byte buffer and size arguments".to_owned(), + "network-or-message" => "feed corpus bytes as packet/message payload".to_owned(), + "kernel-entry" => "invoke dispatch routine with synthetic IRP/request context".to_owned(), + _ => format!("resolve export {name} and call with guarded fuzz arguments"), + } +} + +fn collect_risk_imports(imports: &[ImportDll]) -> Vec { + let mut out = Vec::new(); + for dll in imports { + for entry in &dll.entries { + if let Some(category) = risk_import_category(&entry.name) { + out.push(RiskImportReport { + dll: dll.dll.clone(), + name: entry.name.clone(), + category: category.to_owned(), + }); + } + } + } + out.sort_by(|a, b| { + a.category + .cmp(&b.category) + .then_with(|| a.dll.cmp(&b.dll)) + .then_with(|| a.name.cmp(&b.name)) + }); + out.dedup_by(|a, b| a.dll == b.dll && a.name == b.name); + out +} + +fn risk_import_category(name: &str) -> Option<&'static str> { + let lower = name.to_ascii_lowercase(); + [ + ("deviceiocontrol", "ioctl"), + ("ntdeviceiocontrolfile", "ioctl"), + ("zwdeviceiocontrolfile", "ioctl"), + ("iocreatedevice", "driver-device"), + ("iocreatesymboliclink", "driver-device"), + ("wdfdevicecreate", "driver-device"), + ("probeforread", "kernel-user-buffer"), + ("probeforwrite", "kernel-user-buffer"), + ("mmmapiospace", "kernel-memory"), + ("memcpy", "memory-copy"), + ("strcpy", "string-copy"), + ("wcscpy", "string-copy"), + ("recv", "network-input"), + ("wsarecv", "network-input"), + ("internetreadfile", "network-input"), + ("readfile", "file-input"), + ("cryptdecodeobject", "parser"), + ("cert", "parser"), + ("rtldecompressbuffer", "decompression"), + ("bcryptdecrypt", "crypto"), + ("cryptdecrypt", "crypto"), + ("regqueryvalue", "registry"), + ("zwqueryvaluekey", "registry"), + ] + .iter() + .find_map(|(needle, category)| lower.contains(needle).then_some(*category)) +} + +fn score_function_name(name: &str, driver: bool) -> (u32, Vec) { + let lower = name.to_ascii_lowercase(); + let mut score = 0u32; + let mut reasons = Vec::new(); + + for (needle, value, reason) in [ + ("devicecontrol", 35, "device I/O dispatch"), + ("ioctl", 35, "IOCTL path"), + ("dispatch", 20, "dispatch routine"), + ("irp", 20, "IRP routine"), + ("parse", 18, "parser-like name"), + ("decode", 18, "decoder-like name"), + ("deserialize", 18, "deserializer-like name"), + ("decompress", 18, "decompression path"), + ("packet", 14, "packet handling"), + ("message", 12, "message handling"), + ("tlv", 12, "structured input"), + ("asn", 12, "structured input"), + ("rpc", 12, "RPC-facing path"), + ("http", 10, "network-facing path"), + ("read", 8, "input read path"), + ("write", 8, "input write path"), + ("copy", 8, "copy boundary"), + ] { + if lower.contains(needle) { + score += value; + reasons.push(reason.to_owned()); + } + } + + if driver { + for (needle, value, reason) in [ + ("driverentry", 25, "driver entry"), + ("adddevice", 20, "PnP add-device path"), + ("evt", 10, "WDF event callback"), + ] { + if lower.contains(needle) { + score += value; + reasons.push(reason.to_owned()); + } + } + } + + reasons.sort(); + reasons.dedup(); + (score, reasons) +} + +fn image_risk_score( + path: &Path, + pe: &PeFile, + risk_imports: &[RiskImportReport], + candidates: &[FuzzCandidateReport], +) -> u32 { + let mut score = 0u32; + if is_driver(path, pe) { + score += 25; + } + score += risk_imports.len().min(20) as u32 * 3; + score += candidates.iter().map(|c| c.score).max().unwrap_or(0); + score += pe + .sections + .iter() + .filter(|section| section.unusual_protection_reason().is_some()) + .count() as u32 + * 5; + if pe.header_corruption_detected() { + score += 10; + } + score +} + +fn parse_extensions(raw: &str) -> BTreeSet { + raw.split(',') + .map(|item| item.trim().trim_start_matches('.').to_ascii_lowercase()) + .filter(|item| !item.is_empty()) + .collect() +} + +fn image_kind(path: &Path, pe: &PeFile) -> String { + if is_driver(path, pe) { + "driver".to_owned() + } else if path + .extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| ext.eq_ignore_ascii_case("dll")) + { + "dll".to_owned() + } else { + "exe".to_owned() + } +} + +fn is_driver(path: &Path, pe: &PeFile) -> bool { + path.extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| ext.eq_ignore_ascii_case("sys")) + || pe.subsystem == 1 +} + +fn imports_device_io(dll: &ImportDll) -> bool { + dll.entries.iter().any(|entry| { + matches!( + risk_import_category(&entry.name), + Some("ioctl" | "driver-device") + ) + }) +} + +fn file_name(path: &Path) -> String { + path.file_name() + .unwrap_or_default() + .to_string_lossy() + .to_string() +} + +fn hex32(value: u32) -> String { + format!("0x{:08X}", value) +} diff --git a/resx/src/core/config.rs b/resx/src/core/config.rs index 57f9a6f..9bb8e23 100644 --- a/resx/src/core/config.rs +++ b/resx/src/core/config.rs @@ -74,6 +74,15 @@ pub struct Cli { #[arg(long = "intelli")] pub intelli: bool, + #[arg(long = "reconstruct-cfg")] + pub reconstruct_cfg: bool, + + #[arg(long = "thread-filter", default_value = "")] + pub reconstruct_thread_filter: String, + + #[arg(long = "api-filter", default_value = "")] + pub reconstruct_api_filter: String, + #[arg(long = "max-insns", default_value_t = 500)] pub max_insns: usize, @@ -174,6 +183,27 @@ pub struct Cli { #[arg(long = "yara", action = clap::ArgAction::Append, value_name = "RULE_FILE")] pub yara: Vec, + #[arg(long = "resx-scan", hide = true)] + pub resx_scan: bool, + + #[arg(long = "scan-root", hide = true)] + pub scan_root: Option, + + #[arg(long = "jsonl")] + pub jsonl: bool, + + #[arg(long = "extensions", default_value = "exe,dll,sys")] + pub scan_extensions: String, + + #[arg(long = "max-files", default_value_t = 200)] + pub max_files: usize, + + #[arg(long = "max-file-mb", default_value_t = 200)] + pub max_file_mb: u64, + + #[arg(long = "max-candidates", default_value_t = 32)] + pub max_candidates: usize, + #[arg(long = "include-dir", alias = "scan-dir", action = clap::ArgAction::Append, value_name = "DIR")] pub scan_dirs: Vec, @@ -236,6 +266,11 @@ pub struct Cli { #[arg(long = "api")] pub explain_api: bool, + + /// Enable aggressive tracing: recursive register backward-slice, decoder-driven + /// reverse-index, indirect-JMP emission, and suspicion annotations in disasm. + #[arg(long = "hostile")] + pub hostile: bool, } #[derive(Debug, Clone)] @@ -268,6 +303,9 @@ pub struct Config { pub unsafe_map_image: bool, pub hookchk: bool, pub intelli: bool, + pub reconstruct_cfg: bool, + pub reconstruct_thread_filter: String, + pub reconstruct_api_filter: String, pub max_insns: usize, pub max_bytes: usize, @@ -317,6 +355,7 @@ pub struct Config { pub explain: bool, pub explain_prefix: bool, pub explain_api: bool, + pub hostile: bool, } impl Config { @@ -351,6 +390,9 @@ impl Config { unsafe_map_image: cli.unsafe_map_image, hookchk: cli.hookchk, intelli: cli.intelli, + reconstruct_cfg: cli.reconstruct_cfg, + reconstruct_thread_filter: cli.reconstruct_thread_filter.clone(), + reconstruct_api_filter: cli.reconstruct_api_filter.clone(), max_insns: cli.max_insns, max_bytes: cli.max_bytes, show_bytes: cli.show_bytes && !cli.no_bytes, @@ -361,7 +403,7 @@ impl Config { show_rva: cli.show_rva, addr_width: cli.addr_width, byte_col_width: cli.byte_col_width, - json: cli.json, + json: cli.json || cli.jsonl || cli.resx_scan, out_file: cli.out_file.clone().unwrap_or_default(), verbose: cli.verbose, quiet: cli.quiet, @@ -397,6 +439,7 @@ impl Config { explain: cli.explain, explain_prefix: cli.explain_prefix, explain_api: cli.explain_api, + hostile: cli.hostile, } } diff --git a/resx/src/core/priority.rs b/resx/src/core/priority.rs index e28b42f..da888ef 100644 --- a/resx/src/core/priority.rs +++ b/resx/src/core/priority.rs @@ -7,7 +7,11 @@ use crate::core::config::Config; pub const EXACT_SEARCH_PRIORITY: &[&str] = &[ "ntoskrnl.exe", + "win32k.sys", + "win32kbase.sys", + "win32kfull.sys", "ntdll.dll", + "win32u.dll", "kernelbase.dll", "kernel32.dll", "advapi32.dll", @@ -73,6 +77,7 @@ pub const PREFIX_SEARCH_PRIORITY: &[&str] = &[ "setup", "cfgmgr", "wdf", + "win32k", "mf", "dx", "d3d", @@ -295,3 +300,32 @@ fn dedup_dirs(dirs: Vec) -> Vec { } out } + +#[cfg(test)] +mod tests { + use super::{built_in_priority_names, matcher_from_lists}; + use std::path::Path; + + #[test] + fn win32k_family_is_builtin_priority() { + let names = built_in_priority_names(); + for expected in [ + "user32.dll", + "win32u.dll", + "win32k.sys", + "win32kbase.sys", + "win32kfull.sys", + ] { + assert!( + names.iter().any(|name| name == expected), + "missing built-in priority image {expected}" + ); + } + } + + #[test] + fn win32k_prefix_is_priority_ranked() { + let matcher = matcher_from_lists(Vec::new(), vec!["win32k".to_owned()], Vec::new()); + assert!(matcher.is_priority_path(Path::new("win32kbase_rs.sys"))); + } +} diff --git a/resx/src/core/search.rs b/resx/src/core/search.rs index c826258..74e09a2 100644 --- a/resx/src/core/search.rs +++ b/resx/src/core/search.rs @@ -3,6 +3,26 @@ use std::path::{Path, PathBuf}; use crate::core::config::Config; use crate::core::priority::global_lookup_dirs; +pub fn image_name_candidates(name: &str) -> Vec { + if Path::new(name).extension().is_some() { + return vec![name.to_owned()]; + } + + let lower = name.to_ascii_lowercase(); + let extensions: &[&str] = if lower == "ntoskrnl" || lower.starts_with("ntkrnl") { + &["exe", "sys", "dll"] + } else if lower.starts_with("win32k") { + &["sys", "dll", "exe"] + } else { + &["dll", "exe", "sys"] + }; + + extensions + .iter() + .map(|ext| format!("{}.{}", name, ext)) + .collect() +} + pub fn find_dll_path(name: &str, cfg: &Config) -> Result { if name.contains('/') || name.contains('\\') { let p = PathBuf::from(name); @@ -12,18 +32,15 @@ pub fn find_dll_path(name: &str, cfg: &Config) -> Result { return Err(format!("file not found: {}", name)); } - let base = if Path::new(name).extension().is_none() { - format!("{}.dll", name) - } else { - name.to_owned() - }; - + let candidates = image_name_candidates(name); let dirs = global_lookup_dirs(cfg); for dir in &dirs { - let candidate = dir.join(&base); - if candidate.exists() { - return candidate.canonicalize().map_err(|e| e.to_string()); + for base in &candidates { + let candidate = dir.join(base); + if candidate.exists() { + return candidate.canonicalize().map_err(|e| e.to_string()); + } } } @@ -32,3 +49,37 @@ pub fn find_dll_path(name: &str, cfg: &Config) -> Result { name )) } + +#[cfg(test)] +mod tests { + use super::image_name_candidates; + + #[test] + fn extensionless_win32k_prefers_sys() { + assert_eq!( + image_name_candidates("win32kfull"), + vec![ + "win32kfull.sys".to_owned(), + "win32kfull.dll".to_owned(), + "win32kfull.exe".to_owned() + ] + ); + } + + #[test] + fn extensionless_ntoskrnl_prefers_exe() { + assert_eq!( + image_name_candidates("ntoskrnl"), + vec![ + "ntoskrnl.exe".to_owned(), + "ntoskrnl.sys".to_owned(), + "ntoskrnl.dll".to_owned() + ] + ); + } + + #[test] + fn extensionless_user_mode_defaults_to_dll() { + assert_eq!(image_name_candidates("user32")[0], "user32.dll"); + } +} diff --git a/resx/src/formats/pe/metadata.rs b/resx/src/formats/pe/metadata.rs index c2f84dc..c50f760 100644 --- a/resx/src/formats/pe/metadata.rs +++ b/resx/src/formats/pe/metadata.rs @@ -3,8 +3,10 @@ use super::constants::{ IMAGE_DIRECTORY_ENTRY_EXCEPTION, IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG, IMAGE_DIRECTORY_ENTRY_TLS, }; use super::types::{ - read_cstr, read_u16, read_u32, read_u64, PeClrInfo, PeCodeViewInfo, PeDebugEntry, PeDebugInfo, - PeFile, PeLoadConfigInfo, PeRuntimeFunctionInfo, PeStartupRoutine, PeTlsCallback, PeTlsInfo, + read_cstr, read_u16, read_u32, read_u64, PeChainedRuntimeFunction, PeClrInfo, PeCodeViewInfo, + PeDataPointer, PeDataString, PeDataSummary, PeDebugEntry, PeDebugInfo, PeEpilogScope, PeFile, + PeLoadConfigInfo, PeRuntimeFunctionInfo, PeSavedRegister, PeStartupRoutine, PeTlsCallback, + PeTlsInfo, PeUnwindOperation, PeVTable, }; use iced_x86::{Decoder, DecoderOptions, Mnemonic, OpKind, Register}; use std::collections::{BTreeSet, VecDeque}; @@ -156,6 +158,47 @@ pub fn read_runtime_function( None } +pub fn read_runtime_functions(pe: &PeFile, raw: &[u8]) -> Vec { + if pe.arch != 64 { + return Vec::new(); + } + + let (dir_rva, dir_size) = pe.data_dir(IMAGE_DIRECTORY_ENTRY_EXCEPTION); + if dir_rva == 0 || dir_size < 12 { + return Vec::new(); + } + let Some(mut off) = pe.rva_to_offset(dir_rva) else { + return Vec::new(); + }; + let end = off.saturating_add(dir_size as usize).min(raw.len()); + let mut out = Vec::new(); + while off + 12 <= end { + let begin_rva = read_u32(raw, off); + let end_rva = read_u32(raw, off + 4); + let unwind_info_rva = read_u32(raw, off + 8); + if begin_rva != 0 && end_rva > begin_rva { + if let Some(info) = parse_unwind_info(pe, raw, begin_rva, end_rva, unwind_info_rva) { + out.push(info); + } + } + off += 12; + } + out +} + +pub fn read_data_summary(pe: &PeFile, raw: &[u8]) -> PeDataSummary { + let runtime_functions = read_runtime_functions(pe, raw); + let strings = read_data_strings(pe, raw, 256); + let pointers = read_data_pointers(pe, raw, 512); + let vtables = read_vtables_from_pointers(pe, &pointers, 128); + PeDataSummary { + strings, + vtables, + pointers, + runtime_functions, + } +} + pub fn read_tls_info(pe: &PeFile, raw: &[u8]) -> Option { let (dir_rva, dir_size) = pe.data_dir(IMAGE_DIRECTORY_ENTRY_TLS); let min_size = if pe.arch == 64 { 40usize } else { 24usize }; @@ -181,6 +224,190 @@ pub fn read_tls_info(pe: &PeFile, raw: &[u8]) -> Option { Some(PeTlsInfo { callbacks }) } +fn read_data_strings(pe: &PeFile, raw: &[u8], limit: usize) -> Vec { + let mut out = Vec::new(); + let mut seen = BTreeSet::new(); + for section in pe + .sections + .iter() + .filter(|section| is_data_section(§ion.name)) + { + let start = section.raw_offset as usize; + let end = start + .saturating_add(section.raw_size as usize) + .min(raw.len()); + let mut off = start; + while off < end && out.len() < limit { + if let Some((value, consumed)) = read_ascii_string_at(raw, off, end) { + let rva = section.virtual_address + (off - start) as u32; + if seen.insert((rva, "ascii")) { + out.push(PeDataString { + rva, + section_name: section.name.clone(), + encoding: "ascii".to_owned(), + value, + }); + } + off += consumed.max(1); + continue; + } + if let Some((value, consumed)) = read_utf16_string_at(raw, off, end) { + let rva = section.virtual_address + (off - start) as u32; + if seen.insert((rva, "utf16")) { + out.push(PeDataString { + rva, + section_name: section.name.clone(), + encoding: "utf16".to_owned(), + value, + }); + } + off += consumed.max(2); + continue; + } + off += 1; + } + } + out +} + +fn read_data_pointers(pe: &PeFile, raw: &[u8], limit: usize) -> Vec { + let ptr_width = if pe.arch == 64 { 8usize } else { 4usize }; + let mut out = Vec::new(); + let mut seen = BTreeSet::new(); + for section in pe + .sections + .iter() + .filter(|section| is_data_section(§ion.name)) + { + let start = section.raw_offset as usize; + let end = start + .saturating_add(section.raw_size as usize) + .min(raw.len()); + let mut off = start; + while off + ptr_width <= end && out.len() < limit { + let value = if ptr_width == 8 { + read_u64(raw, off) + } else { + read_u32(raw, off) as u64 + }; + let site_rva = section.virtual_address + (off - start) as u32; + if let Some(target_rva) = pe.va_to_rva(value) { + if let Some(target_section) = pe.rva_to_section(target_rva) { + if seen.insert(site_rva) { + out.push(PeDataPointer { + rva: site_rva, + target_rva, + section_name: section.name.clone(), + target_section_name: target_section.name.clone(), + kind: if target_section.is_executable() { + "code".to_owned() + } else { + "data".to_owned() + }, + }); + } + } + } + off += ptr_width; + } + } + out +} + +fn read_vtables_from_pointers( + pe: &PeFile, + pointers: &[PeDataPointer], + limit: usize, +) -> Vec { + let ptr_width = if pe.arch == 64 { 8u32 } else { 4u32 }; + let mut out = Vec::new(); + let mut idx = 0usize; + while idx < pointers.len() && out.len() < limit { + if pointers[idx].kind != "code" { + idx += 1; + continue; + } + let start = idx; + let mut entries = vec![pointers[idx].target_rva]; + idx += 1; + while idx < pointers.len() + && pointers[idx].kind == "code" + && pointers[idx].section_name == pointers[start].section_name + && pointers[idx].rva == pointers[idx - 1].rva.saturating_add(ptr_width) + && entries.len() < 256 + { + entries.push(pointers[idx].target_rva); + idx += 1; + } + if entries.len() >= 2 { + out.push(PeVTable { + rva: pointers[start].rva, + section_name: pointers[start].section_name.clone(), + entries, + }); + } + } + out +} + +fn is_data_section(name: &str) -> bool { + matches!( + name.to_ascii_lowercase().as_str(), + ".rdata" + | "rdata" + | ".data" + | "data" + | ".pdata" + | "pdata" + | ".xdata" + | "xdata" + | ".idata" + | "idata" + ) +} + +fn read_ascii_string_at(raw: &[u8], off: usize, end: usize) -> Option<(String, usize)> { + let mut pos = off; + while pos < end && matches!(raw[pos], 0x20..=0x7E | b'\t' | b'\r' | b'\n') { + pos += 1; + } + let len = pos.saturating_sub(off); + if len < 4 || pos >= end || raw[pos] != 0 { + return None; + } + let text = String::from_utf8_lossy(&raw[off..pos]).to_string(); + let alpha = text.bytes().filter(|b| b.is_ascii_alphabetic()).count(); + if alpha < 2 { + return None; + } + Some((text, len + 1)) +} + +fn read_utf16_string_at(raw: &[u8], off: usize, end: usize) -> Option<(String, usize)> { + let mut units = Vec::new(); + let mut pos = off; + while pos + 1 < end { + let unit = u16::from_le_bytes([raw[pos], raw[pos + 1]]); + if unit == 0 { + break; + } + if !(0x20..=0x7E).contains(&unit) && !matches!(unit, 9 | 10 | 13) { + break; + } + units.push(unit); + pos += 2; + } + if units.len() < 4 || pos + 1 >= end || raw[pos] != 0 || raw[pos + 1] != 0 { + return None; + } + let text = String::from_utf16(&units).ok()?; + let alpha = text.bytes().filter(|b| b.is_ascii_alphabetic()).count(); + if alpha < 2 { + return None; + } + Some((text, (pos + 2) - off)) +} + pub fn find_startup_routines(pe: &PeFile, raw: &[u8]) -> Vec { let mut out = Vec::new(); let mut seen = BTreeSet::new(); @@ -548,9 +775,29 @@ fn parse_unwind_info( let codes_size = (unwind_code_count as usize) * 2; let aligned_codes_size = (codes_size + 3) & !3; let handler_field_off = off + 4 + aligned_codes_size; - - let exception_handler_rva = if unwind_flags & 0x3 != 0 && handler_field_off + 4 <= raw.len() { - read_u32(raw, handler_field_off) + let (unwind_operations, stack_alloc_size, saved_registers) = + parse_unwind_operations(raw, off + 4, unwind_code_count); + let chained_parent = if unwind_flags & 0x4 != 0 && handler_field_off + 12 <= raw.len() { + Some(PeChainedRuntimeFunction { + begin_rva: read_u32(raw, handler_field_off), + end_rva: read_u32(raw, handler_field_off + 4), + unwind_info_rva: read_u32(raw, handler_field_off + 8), + }) + } else { + None + }; + let exception_handler_rva = + if unwind_flags & 0x3 != 0 && unwind_flags & 0x4 == 0 && handler_field_off + 4 <= raw.len() + { + read_u32(raw, handler_field_off) + } else { + 0 + }; + let handler_data_rva = if exception_handler_rva != 0 { + unwind_info_rva + .saturating_add(4) + .saturating_add(aligned_codes_size as u32) + .saturating_add(4) } else { 0 }; @@ -566,9 +813,187 @@ fn parse_unwind_info( frame_register, frame_offset, exception_handler_rva, + handler_data_rva, + stack_alloc_size, + saved_registers, + unwind_operations, + chained_parent, + epilog_scopes: infer_epilog_scopes(raw, pe, begin_rva, end_rva), }) } +fn parse_unwind_operations( + raw: &[u8], + codes_off: usize, + count: u8, +) -> (Vec, u32, Vec) { + let mut ops = Vec::new(); + let mut saved = Vec::new(); + let mut stack_alloc = 0u32; + let mut idx = 0usize; + while idx < count as usize { + let off = codes_off + idx * 2; + if off + 2 > raw.len() { + break; + } + let code_offset = raw[off]; + let b = raw[off + 1]; + let uwop = b & 0x0F; + let info = b >> 4; + let mut stack_offset = 0u32; + let mut extra_slots = 0usize; + let (name, description) = match uwop { + 0 => { + saved.push(PeSavedRegister { + register: unwind_reg_name(info).to_owned(), + stack_offset: stack_alloc, + prolog_offset: code_offset, + }); + ( + "UWOP_PUSH_NONVOL", + format!("push {}", unwind_reg_name(info)), + ) + } + 1 => { + if info == 0 { + let extra = read_u16(raw, off + 2) as u32 * 8; + stack_alloc = stack_alloc.saturating_add(extra); + stack_offset = extra; + extra_slots = 1; + ("UWOP_ALLOC_LARGE", format!("alloc large 0x{:X}", extra)) + } else { + let extra = read_u32(raw, off + 2); + stack_alloc = stack_alloc.saturating_add(extra); + stack_offset = extra; + extra_slots = 2; + ("UWOP_ALLOC_LARGE", format!("alloc large 0x{:X}", extra)) + } + } + 2 => { + let extra = (info as u32) * 8 + 8; + stack_alloc = stack_alloc.saturating_add(extra); + stack_offset = extra; + ("UWOP_ALLOC_SMALL", format!("alloc small 0x{:X}", extra)) + } + 3 => ("UWOP_SET_FPREG", "establish frame pointer".to_owned()), + 4 | 5 => { + let scale = if uwop == 4 { 8 } else { 1 }; + let slots = if uwop == 4 { 1 } else { 2 }; + let extra = if uwop == 4 { + read_u16(raw, off + 2) as u32 * scale + } else { + read_u32(raw, off + 2) * scale + }; + saved.push(PeSavedRegister { + register: unwind_reg_name(info).to_owned(), + stack_offset: extra, + prolog_offset: code_offset, + }); + stack_offset = extra; + extra_slots = slots; + ( + if uwop == 4 { + "UWOP_SAVE_NONVOL" + } else { + "UWOP_SAVE_NONVOL_FAR" + }, + format!("save {} at stack+0x{:X}", unwind_reg_name(info), extra), + ) + } + 8 | 9 => { + let scale = if uwop == 8 { 16 } else { 1 }; + let slots = if uwop == 8 { 1 } else { 2 }; + let extra = if uwop == 8 { + read_u16(raw, off + 2) as u32 * scale + } else { + read_u32(raw, off + 2) * scale + }; + stack_offset = extra; + extra_slots = slots; + ( + if uwop == 8 { + "UWOP_SAVE_XMM128" + } else { + "UWOP_SAVE_XMM128_FAR" + }, + format!("save xmm{} at stack+0x{:X}", info, extra), + ) + } + 10 => ( + "UWOP_PUSH_MACHFRAME", + if info == 0 { + "push machine frame".to_owned() + } else { + "push machine frame with error code".to_owned() + }, + ), + _ => ("UWOP_UNKNOWN", format!("unknown unwind op {}", uwop)), + }; + ops.push(PeUnwindOperation { + code_offset, + op: name.to_owned(), + info, + stack_offset, + description, + }); + idx += 1 + extra_slots; + } + (ops, stack_alloc, saved) +} + +fn infer_epilog_scopes( + raw: &[u8], + pe: &PeFile, + begin_rva: u32, + end_rva: u32, +) -> Vec { + let Some(start) = pe.rva_to_offset(begin_rva) else { + return Vec::new(); + }; + let Some(end) = pe.rva_to_offset(end_rva.saturating_sub(1)).map(|v| v + 1) else { + return Vec::new(); + }; + let end = end.min(raw.len()); + if start >= end { + return Vec::new(); + } + let window_start = end.saturating_sub(32).max(start); + let mut scopes = Vec::new(); + for (off, b) in raw.iter().enumerate().take(end).skip(window_start) { + if *b == 0xC3 || *b == 0xC2 || *b == 0xCB || *b == 0xCA { + let rva = begin_rva.saturating_add((off - start) as u32); + scopes.push(PeEpilogScope { + start_offset: rva.saturating_sub(begin_rva), + end_offset: rva.saturating_sub(begin_rva).saturating_add(1), + source: "ret-scan".to_owned(), + }); + } + } + scopes +} + +fn unwind_reg_name(reg: u8) -> &'static str { + match reg { + 0 => "rax", + 1 => "rcx", + 2 => "rdx", + 3 => "rbx", + 4 => "rsp", + 5 => "rbp", + 6 => "rsi", + 7 => "rdi", + 8 => "r8", + 9 => "r9", + 10 => "r10", + 11 => "r11", + 12 => "r12", + 13 => "r13", + 14 => "r14", + 15 => "r15", + _ => "unknown", + } +} + fn parse_codeview_info(raw: &[u8]) -> Option { if raw.len() < 24 || &raw[..4] != b"RSDS" { return None; diff --git a/resx/src/formats/pe/mod.rs b/resx/src/formats/pe/mod.rs index 9742fd3..88af360 100644 --- a/resx/src/formats/pe/mod.rs +++ b/resx/src/formats/pe/mod.rs @@ -9,7 +9,8 @@ pub use constants::*; pub use exports::{attribute_to_func, read_exports}; pub use imports::{find_iat_slots_by_name, read_imports, resolve_iat_slot}; pub use metadata::{ - find_startup_routines, read_clr_info, read_debug_info, read_load_config, read_runtime_function, + find_startup_routines, read_clr_info, read_data_summary, read_debug_info, read_load_config, + read_runtime_function, read_runtime_functions, }; pub use parse::parse_pe; pub use types::*; diff --git a/resx/src/formats/pe/types.rs b/resx/src/formats/pe/types.rs index b5ad8b5..5874b50 100644 --- a/resx/src/formats/pe/types.rs +++ b/resx/src/formats/pe/types.rs @@ -266,6 +266,74 @@ pub struct PeRuntimeFunctionInfo { pub frame_register: u8, pub frame_offset: u8, pub exception_handler_rva: u32, + pub handler_data_rva: u32, + pub stack_alloc_size: u32, + pub saved_registers: Vec, + pub unwind_operations: Vec, + pub chained_parent: Option, + pub epilog_scopes: Vec, +} + +#[derive(Debug, Clone, Default)] +pub struct PeDataSummary { + pub strings: Vec, + pub vtables: Vec, + pub pointers: Vec, + pub runtime_functions: Vec, +} + +#[derive(Debug, Clone)] +pub struct PeDataString { + pub rva: u32, + pub section_name: String, + pub encoding: String, + pub value: String, +} + +#[derive(Debug, Clone)] +pub struct PeVTable { + pub rva: u32, + pub section_name: String, + pub entries: Vec, +} + +#[derive(Debug, Clone)] +pub struct PeDataPointer { + pub rva: u32, + pub target_rva: u32, + pub section_name: String, + pub target_section_name: String, + pub kind: String, +} + +#[derive(Debug, Clone)] +pub struct PeUnwindOperation { + pub code_offset: u8, + pub op: String, + pub info: u8, + pub stack_offset: u32, + pub description: String, +} + +#[derive(Debug, Clone)] +pub struct PeSavedRegister { + pub register: String, + pub stack_offset: u32, + pub prolog_offset: u8, +} + +#[derive(Debug, Clone)] +pub struct PeChainedRuntimeFunction { + pub begin_rva: u32, + pub end_rva: u32, + pub unwind_info_rva: u32, +} + +#[derive(Debug, Clone)] +pub struct PeEpilogScope { + pub start_offset: u32, + pub end_offset: u32, + pub source: String, } #[derive(Debug)] From c01b1c0f5377e57bcbe16a69c5aa17aaee4aff38 Mon Sep 17 00:00:00 2001 From: dutchpsycho <178704185+dutchpsycho@users.noreply.github.com> Date: Mon, 11 May 2026 17:41:51 +1000 Subject: [PATCH 2/4] test(cli): add binary analysis fixtures --- resx-palace/.gitignore | 1 + resx-palace/README.md | 19 +++ resx-palace/scripts/build.ps1 | 99 +++++++++++++++ resx-palace/src/resx_palace.def | 7 + resx-palace/src/resx_palace.h | 23 ++++ resx-palace/src/resx_palace_dll.c | 125 ++++++++++++++++++ resx-palace/src/resx_palace_exe.c | 50 ++++++++ resx/tests/resx_palace_samples.rs | 204 ++++++++++++++++++++++++++++++ resx/tests/system_gui_images.rs | 105 +++++++++++++++ 9 files changed, 633 insertions(+) create mode 100644 resx-palace/.gitignore create mode 100644 resx-palace/README.md create mode 100644 resx-palace/scripts/build.ps1 create mode 100644 resx-palace/src/resx_palace.def create mode 100644 resx-palace/src/resx_palace.h create mode 100644 resx-palace/src/resx_palace_dll.c create mode 100644 resx-palace/src/resx_palace_exe.c create mode 100644 resx/tests/resx_palace_samples.rs create mode 100644 resx/tests/system_gui_images.rs diff --git a/resx-palace/.gitignore b/resx-palace/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/resx-palace/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/resx-palace/README.md b/resx-palace/README.md new file mode 100644 index 0000000..4c61d3d --- /dev/null +++ b/resx-palace/README.md @@ -0,0 +1,19 @@ +# resx-palace + +Small Windows PE sample corpus for RESX integration tests. + +The corpus builds one DLL and one EXE without network access. The DLL exports +functions named to exercise parser-like, IOCTL-like, callback/thread-like, +switch/jump-table-like, and indirect-call-like RESX detection paths. + +## Build + +```powershell +.\resx-palace\scripts\build.ps1 +``` + +The script looks for `cl.exe` first, then `clang-cl.exe`. Run it from a Visual +Studio Developer PowerShell, Developer Command Prompt, or an environment where +one of those compilers is already on `PATH`. + +Outputs are written to `resx-palace\build\`. diff --git a/resx-palace/scripts/build.ps1 b/resx-palace/scripts/build.ps1 new file mode 100644 index 0000000..07664c9 --- /dev/null +++ b/resx-palace/scripts/build.ps1 @@ -0,0 +1,99 @@ +param( + [string]$OutDir = (Join-Path $PSScriptRoot "..\build"), + [switch]$Clean +) + +$ErrorActionPreference = "Stop" + +$root = (Resolve-Path (Join-Path $PSScriptRoot "..")).Path +$src = Join-Path $root "src" +if ([System.IO.Path]::IsPathRooted($OutDir)) { + $out = $OutDir +} else { + $out = Join-Path $root $OutDir +} + +if ($Clean -and (Test-Path $out)) { + Remove-Item -LiteralPath $out -Recurse -Force +} +New-Item -ItemType Directory -Force -Path $out | Out-Null + +function Import-VsDevCmd { + if ($env:INCLUDE -like "*Windows Kits*") { + return + } + + $candidates = @( + "C:\Program Files\Microsoft Visual Studio", + "C:\Program Files (x86)\Microsoft Visual Studio" + ) + + foreach ($candidate in $candidates) { + if (-not (Test-Path $candidate)) { + continue + } + $vsDevCmd = Get-ChildItem $candidate -Recurse -Filter VsDevCmd.bat -ErrorAction SilentlyContinue | + Select-Object -First 1 -ExpandProperty FullName + if (-not $vsDevCmd) { + continue + } + + $cmd = "`"$vsDevCmd`" -arch=amd64 -host_arch=amd64 >nul && set" + $lines = & cmd.exe /s /c $cmd + if ($LASTEXITCODE -ne 0) { + continue + } + foreach ($line in $lines) { + $parts = $line -split "=", 2 + if ($parts.Length -eq 2) { + Set-Item -Path "env:$($parts[0])" -Value $parts[1] + } + } + return + } +} + +function Find-Compiler { + $cl = Get-Command cl.exe -ErrorAction SilentlyContinue + if ($cl) { return @{ Path = $cl.Source; Kind = "cl" } } + + $clang = Get-Command clang-cl.exe -ErrorAction SilentlyContinue + if ($clang) { return @{ Path = $clang.Source; Kind = "clang-cl" } } + + throw "No supported Windows C compiler found. Run from a Visual Studio Developer shell, or put cl.exe/clang-cl.exe on PATH." +} + +Import-VsDevCmd +$compiler = Find-Compiler +$common = @( + "/nologo", + "/W4", + "/WX", + "/O2", + "/GS", + "/guard:cf", + "/I$src" +) + +$dllSource = Join-Path $src "resx_palace_dll.c" +$exeSource = Join-Path $src "resx_palace_exe.c" +$defFile = Join-Path $src "resx_palace.def" +$dllPath = Join-Path $out "resx_palace.dll" +$libPath = Join-Path $out "resx_palace.lib" +$exePath = Join-Path $out "resx_palace_probe.exe" +$dllObj = Join-Path $out "resx_palace_dll.obj" +$exeObj = Join-Path $out "resx_palace_exe.obj" + +& $compiler.Path @common "/LD" "/Fo:$dllObj" $dllSource "/Fe:$dllPath" "/link" "/DEF:$defFile" "/IMPLIB:$libPath" "/OUT:$dllPath" +if ($LASTEXITCODE -ne 0) { + exit $LASTEXITCODE +} + +& $compiler.Path @common "/Fo:$exeObj" $exeSource $libPath "/Fe:$exePath" "/link" "/OUT:$exePath" +if ($LASTEXITCODE -ne 0) { + exit $LASTEXITCODE +} + +Write-Host "Built:" +Write-Host " $dllPath" +Write-Host " $exePath" diff --git a/resx-palace/src/resx_palace.def b/resx-palace/src/resx_palace.def new file mode 100644 index 0000000..fa412ff --- /dev/null +++ b/resx-palace/src/resx_palace.def @@ -0,0 +1,7 @@ +LIBRARY "resx_palace" +EXPORTS + ResxParsePacket + ResxDeviceIoctlDispatch + ResxThreadCallbackEntry + ResxSwitchJumpTableDispatch + ResxIndirectCallMessage diff --git a/resx-palace/src/resx_palace.h b/resx-palace/src/resx_palace.h new file mode 100644 index 0000000..b8e011c --- /dev/null +++ b/resx-palace/src/resx_palace.h @@ -0,0 +1,23 @@ +#pragma once + +#ifdef RESX_PALACE_EXPORTS +#define RESX_PALACE_API __declspec(dllexport) +#else +#define RESX_PALACE_API __declspec(dllimport) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int (__cdecl *ResxPalaceCallback)(int value); + +RESX_PALACE_API int ResxParsePacket(const unsigned char *data, unsigned int len); +RESX_PALACE_API int ResxDeviceIoctlDispatch(unsigned int code, void *buffer, unsigned int len); +RESX_PALACE_API unsigned long __stdcall ResxThreadCallbackEntry(void *ctx); +RESX_PALACE_API int ResxSwitchJumpTableDispatch(unsigned int opcode, int value); +RESX_PALACE_API int ResxIndirectCallMessage(ResxPalaceCallback callback, int value); + +#ifdef __cplusplus +} +#endif diff --git a/resx-palace/src/resx_palace_dll.c b/resx-palace/src/resx_palace_dll.c new file mode 100644 index 0000000..4049f64 --- /dev/null +++ b/resx-palace/src/resx_palace_dll.c @@ -0,0 +1,125 @@ +#define WIN32_LEAN_AND_MEAN +#define RESX_PALACE_EXPORTS +#include +#include "resx_palace.h" + +static volatile LONG g_resx_sink; + +static int palace_add(int value) { + return value + 17; +} + +static int palace_xor(int value) { + return value ^ 0x5A5A; +} + +static int palace_mix(int value) { + return (value * 3) - 9; +} + +RESX_PALACE_API int ResxParsePacket(const unsigned char *data, unsigned int len) { + unsigned int cursor = 0; + int score = 0; + + while (cursor + 2 <= len) { + unsigned char tag = data[cursor++]; + unsigned char size = data[cursor++]; + if (cursor + size > len) { + return -10; + } + + switch (tag) { + case 0x01: + score += size; + break; + case 0x02: + score ^= data[cursor]; + break; + case 0x10: + if (size >= 4 && data[cursor] == 'R' && data[cursor + 1] == 'E') { + score += 100; + } + break; + default: + score -= tag; + break; + } + cursor += size; + } + + InterlockedExchange(&g_resx_sink, score); + return score; +} + +RESX_PALACE_API int ResxDeviceIoctlDispatch(unsigned int code, void *buffer, unsigned int len) { + unsigned char *bytes = (unsigned char *)buffer; + + if (buffer == 0 || len == 0) { + return -1; + } + + if (code == 0x222000) { + bytes[0] ^= 0xA5; + return (int)bytes[0]; + } + if (code == 0x222004 && len >= 4) { + return ResxParsePacket(bytes, len); + } + if ((code & 3) == 3) { + return (int)(len + code); + } + return -2; +} + +RESX_PALACE_API DWORD WINAPI ResxThreadCallbackEntry(void *ctx) { + int value = ctx ? *(int *)ctx : 0; + InterlockedAdd(&g_resx_sink, value); + return (DWORD)(value + 1); +} + +RESX_PALACE_API int ResxSwitchJumpTableDispatch(unsigned int opcode, int value) { + switch (opcode) { + case 0: + return value + 1; + case 1: + return value - 1; + case 2: + return value * 2; + case 3: + return value / 2; + case 4: + return value ^ 0x33; + case 5: + return value | 0x100; + case 6: + return value & 0x7F; + case 7: + return value + palace_add(value); + case 8: + return palace_xor(value); + case 9: + return palace_mix(value); + default: + return -100; + } +} + +RESX_PALACE_API int ResxIndirectCallMessage(ResxPalaceCallback callback, int value) { + ResxPalaceCallback table[3]; + unsigned int index = (unsigned int)value % 3; + + table[0] = palace_add; + table[1] = palace_xor; + table[2] = callback ? callback : palace_mix; + + return table[index](value); +} + +BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved) { + (void)instance; + (void)reserved; + if (reason == DLL_PROCESS_ATTACH) { + InterlockedExchange(&g_resx_sink, 1); + } + return TRUE; +} diff --git a/resx-palace/src/resx_palace_exe.c b/resx-palace/src/resx_palace_exe.c new file mode 100644 index 0000000..0528568 --- /dev/null +++ b/resx-palace/src/resx_palace_exe.c @@ -0,0 +1,50 @@ +#define WIN32_LEAN_AND_MEAN +#include +#include "resx_palace.h" + +static int __cdecl local_callback(int value) { + return value + 101; +} + +int main(void) { + unsigned char packet[] = { + 0x01, 0x02, 0xAA, 0x55, + 0x10, 0x04, 'R', 'E', 'S', 'X', + }; + int thread_value = 7; + DWORD thread_id = 0; + DWORD bytes_returned = 0; + HANDLE thread_handle; + HANDLE console_handle; + int result = 0; + + result += ResxParsePacket(packet, (unsigned int)sizeof(packet)); + result += ResxDeviceIoctlDispatch(0x222004, packet, (unsigned int)sizeof(packet)); + result += ResxSwitchJumpTableDispatch((unsigned int)(result & 7), result); + result += ResxIndirectCallMessage(local_callback, result); + + thread_handle = CreateThread( + 0, + 0, + ResxThreadCallbackEntry, + &thread_value, + 0, + &thread_id); + if (thread_handle) { + WaitForSingleObject(thread_handle, 1000); + CloseHandle(thread_handle); + } + + console_handle = GetStdHandle(STD_INPUT_HANDLE); + DeviceIoControl( + console_handle, + 0x222000, + packet, + (DWORD)sizeof(packet), + packet, + (DWORD)sizeof(packet), + &bytes_returned, + 0); + + return result == 0 ? 1 : 0; +} diff --git a/resx/tests/resx_palace_samples.rs b/resx/tests/resx_palace_samples.rs new file mode 100644 index 0000000..4909234 --- /dev/null +++ b/resx/tests/resx_palace_samples.rs @@ -0,0 +1,204 @@ +#![cfg(windows)] + +use std::path::{Path, PathBuf}; +use std::process::Command; + +use serde_json::Value; + +const EXPORTS: &[&str] = &[ + "ResxParsePacket", + "ResxDeviceIoctlDispatch", + "ResxThreadCallbackEntry", + "ResxSwitchJumpTableDispatch", + "ResxIndirectCallMessage", +]; + +fn workspace_root() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("resx crate should be inside workspace") + .to_path_buf() +} + +fn corpus_root() -> PathBuf { + workspace_root().join("resx-palace") +} + +fn build_dir() -> PathBuf { + corpus_root().join("build") +} + +fn sample_path(name: &str) -> PathBuf { + build_dir().join(name) +} + +fn ensure_samples() -> Option<(PathBuf, PathBuf)> { + let dll = sample_path("resx_palace.dll"); + let exe = sample_path("resx_palace_probe.exe"); + if dll.exists() && exe.exists() { + return Some((dll, exe)); + } + + let script = corpus_root().join("scripts").join("build.ps1"); + let output = Command::new("powershell") + .args([ + "-NoProfile", + "-ExecutionPolicy", + "Bypass", + "-File", + script.to_str().unwrap(), + ]) + .current_dir(workspace_root()) + .output() + .expect("failed to launch resx-palace build script"); + + if !output.status.success() { + eprintln!( + "skipping resx-palace integration test: sample build failed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + return None; + } + + if dll.exists() && exe.exists() { + Some((dll, exe)) + } else { + eprintln!("skipping resx-palace integration test: build did not produce expected samples"); + None + } +} + +fn run_resx(args: &[&str]) -> String { + let output = Command::new(env!("CARGO_BIN_EXE_resx")) + .args(args) + .output() + .expect("failed to run resx"); + assert!( + output.status.success(), + "resx failed for {:?}\nstdout:\n{}\nstderr:\n{}", + args, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout).expect("resx stdout was not utf-8") +} + +fn run_json(args: &[&str]) -> Value { + serde_json::from_str(&run_resx(args)).expect("resx stdout was not json") +} + +fn array_contains_name(items: &Value, name: &str) -> bool { + items + .as_array() + .is_some_and(|values| values.iter().any(|item| item["name"] == name)) +} + +#[test] +fn resx_palace_samples_exercise_binary_analysis_commands() { + let Some((dll, exe)) = ensure_samples() else { + return; + }; + let dll = dll.to_str().unwrap(); + let exe = exe.to_str().unwrap(); + let scan_root = build_dir(); + let scan_root = scan_root.to_str().unwrap(); + + let peinfo = run_json(&["peinfo", dll, "--json", "--no-color", "--quiet"]); + assert_eq!(peinfo["schema_version"], 1); + assert_eq!(peinfo["peinfo"]["file_name"], "resx_palace.dll"); + assert!(peinfo["peinfo"]["sections"] + .as_array() + .is_some_and(|sections| !sections.is_empty())); + + let eat = run_json(&["eat", dll, "--json", "--no-color", "--quiet"]); + assert_eq!(eat["schema_version"], 1); + for expected in EXPORTS { + assert!( + array_contains_name(&eat["exports"], expected), + "missing export {expected} in {eat:#}" + ); + } + + for expected in EXPORTS { + let dump = run_json(&["dump", dll, expected, "--json", "--no-color", "--quiet"]); + assert_eq!(dump["schema_version"], 1); + assert_eq!(dump["dump"]["function"], *expected); + assert!(dump["dump"]["instructions"] + .as_array() + .is_some_and(|instructions| !instructions.is_empty())); + } + + let cfg = run_resx(&[ + "cfg", + dll, + "ResxSwitchJumpTableDispatch", + "--no-color", + "--quiet", + ]); + assert!(cfg.contains("CFG: resx_palace.dll!ResxSwitchJumpTableDispatch")); + assert!( + cfg.contains("block") || cfg.contains("Basic") || cfg.contains("->"), + "cfg output did not look like a graph:\n{cfg}" + ); + + let reconstruct = run_json(&[ + "reconstruct-cfg", + exe, + "--json", + "--no-color", + "--quiet", + "--no-pdb", + "--depth", + "2", + "--max-total", + "64", + ]); + assert_eq!(reconstruct["schema_version"], 1); + assert_eq!( + reconstruct["reconstruct_cfg"]["image"], + "resx_palace_probe.exe" + ); + assert!(reconstruct["reconstruct_cfg"]["roots"] + .as_array() + .is_some_and(|roots| !roots.is_empty())); + + let scan = run_json(&[ + "scan", + scan_root, + "--extensions", + "exe,dll", + "--max-files", + "8", + "--max-candidates", + "16", + "--no-color", + "--quiet", + ]); + assert_eq!(scan["schema_version"], 1); + assert_eq!(scan["kind"], "scan"); + let results = scan["results"] + .as_array() + .expect("scan results should be an array"); + assert!(results.iter().any(|item| item["name"] == "resx_palace.dll")); + assert!(results + .iter() + .any(|item| item["name"] == "resx_palace_probe.exe")); + + let dll_report = results + .iter() + .find(|item| item["name"] == "resx_palace.dll") + .expect("missing DLL scan report"); + for expected in [ + "ResxParsePacket", + "ResxDeviceIoctlDispatch", + "ResxThreadCallbackEntry", + "ResxSwitchJumpTableDispatch", + "ResxIndirectCallMessage", + ] { + assert!( + array_contains_name(&dll_report["candidates"], expected), + "missing scan candidate {expected} in {dll_report:#}" + ); + } +} diff --git a/resx/tests/system_gui_images.rs b/resx/tests/system_gui_images.rs new file mode 100644 index 0000000..e96c93d --- /dev/null +++ b/resx/tests/system_gui_images.rs @@ -0,0 +1,105 @@ +#![cfg(windows)] + +use std::path::PathBuf; +use std::process::Command; + +use serde_json::Value; + +fn system32() -> PathBuf { + PathBuf::from(std::env::var("SystemRoot").unwrap_or_else(|_| r"C:\Windows".to_owned())) + .join("System32") +} + +fn run_resx(args: &[&str]) -> String { + let output = Command::new(env!("CARGO_BIN_EXE_resx")) + .args(args) + .output() + .expect("failed to run resx"); + assert!( + output.status.success(), + "resx failed for {:?}\nstdout:\n{}\nstderr:\n{}", + args, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout).expect("resx stdout was not utf-8") +} + +fn run_json(args: &[&str]) -> Value { + serde_json::from_str(&run_resx(args)).expect("resx stdout was not json") +} + +#[test] +fn extensionless_lookup_supports_user_and_win32k_images() { + let images = [ + ("user32", "user32.dll"), + ("win32u", "win32u.dll"), + ("win32k", "win32k.sys"), + ("win32kbase", "win32kbase.sys"), + ("win32kfull", "win32kfull.sys"), + ]; + + for (arg, file_name) in images { + if !system32().join(file_name).is_file() { + eprintln!("skipping {file_name}: not present in System32"); + continue; + } + let peinfo = run_json(&["peinfo", arg, "--json", "--quiet", "--no-color", "--no-pdb"]); + assert_eq!(peinfo["schema_version"], 1); + assert_eq!(peinfo["peinfo"]["file_name"], file_name); + assert!(peinfo["peinfo"]["sections"] + .as_array() + .is_some_and(|sections| !sections.is_empty())); + } +} + +#[test] +fn win32u_syscall_stub_routes_to_win32k_family_when_available() { + if !system32().join("win32u.dll").is_file() { + eprintln!("skipping win32u syscall route test: win32u.dll not present"); + return; + } + + let eat = run_json(&[ + "eat", + "win32u", + "--json", + "--quiet", + "--no-color", + "--no-pdb", + ]); + let has_nt_user_get_message = eat["exports"].as_array().is_some_and(|exports| { + exports + .iter() + .any(|export| export["name"] == "NtUserGetMessage") + }); + if !has_nt_user_get_message { + eprintln!("skipping win32u syscall route test: NtUserGetMessage export not present"); + return; + } + + let dump = run_json(&[ + "dump", + "win32u", + "NtUserGetMessage", + "--json", + "--quiet", + "--no-color", + "--no-pdb", + "--funcs", + "--max-insns", + "24", + ]); + assert_eq!(dump["schema_version"], 1); + assert_eq!(dump["dump"]["function"], "NtUserGetMessage"); + assert_eq!( + dump["dump"]["current_syscall"]["kernel_symbol"], + "NtUserGetMessage" + ); + assert!(dump["dump"]["current_syscall"]["kernel_module"] + .as_str() + .is_some_and(|module| module.to_ascii_lowercase().starts_with("win32k"))); + assert!(dump["dump"]["api_call_tree"] + .as_str() + .is_some_and(|tree| tree.contains("win32u!NtUserGetMessage"))); +} From 96c225a5f7086d577d708305932afb241af4674d Mon Sep 17 00:00:00 2001 From: dutchpsycho <178704185+dutchpsycho@users.noreply.github.com> Date: Mon, 11 May 2026 17:41:52 +1000 Subject: [PATCH 3/4] feat(vscode): add reconstruction and scan workflows --- resx-vscode/media-src/main.ts | 412 +++++++++++++++++++++++++++++++++- resx-vscode/media/main.js | 410 ++++++++++++++++++++++++++++++++- resx-vscode/media/style.css | 140 ++++++++++++ resx-vscode/package.json | 10 + resx-vscode/src/commands.ts | 96 ++++++++ resx-vscode/src/editor.ts | 42 +++- resx-vscode/src/runner.ts | 5 + 7 files changed, 1101 insertions(+), 14 deletions(-) diff --git a/resx-vscode/media-src/main.ts b/resx-vscode/media-src/main.ts index cd1d150..37be7ca 100644 --- a/resx-vscode/media-src/main.ts +++ b/resx-vscode/media-src/main.ts @@ -108,7 +108,11 @@ vscode.postMessage({ command: 'ready' }); currentDumpDll: '', currentDumpPath: '', apiDepth: 1, + hostile: false, devLogs: [], + reconstructRequested: false, + scanRequested: false, + scanRoot: '', currentPeInfo: null, typesByName: new Map() as Map, // lowercased type name → TypeEntryJson asmMetaWidth: persistedUiState.asmMetaWidth, @@ -271,11 +275,12 @@ function resolveNavigationTarget(funcName, dll = null) { function dumpCacheKey(entry) { const scope = moduleScope(entry); const depth = Math.max(1, Math.min(5, Number(entry?.funcsDepth || 1) || 1)); + const hostile = st.hostile ? ':hostile' : ''; if (entry?.rva) - return `rva:${scope}:${normalizeRva(entry.rva)}:depth:${depth}`; + return `rva:${scope}:${normalizeRva(entry.rva)}:depth:${depth}${hostile}`; const dll = entry?.dll ? String(entry.dll).toLowerCase() : ''; const fn = entry?.fn ? String(entry.fn).toLowerCase() : ''; - return `fn:${scope}:${dll}!${fn}:depth:${depth}`; + return `fn:${scope}:${dll}!${fn}:depth:${depth}${hostile}`; } function startupBadgeLabel(entry) { const kind = String(entry?.kind || '').toLowerCase(); @@ -1387,6 +1392,12 @@ document.querySelectorAll('.tab').forEach(btn => { document.querySelectorAll('.panel').forEach(p => p.classList.toggle('active', p.id === `panel-${btn.dataset.tab}`)); st.activeTopTab = btn.dataset.tab; persistUiState(); + if (btn.dataset.tab === 'flow') { + ensureReconstructCfg(); + } + if (btn.dataset.tab === 'scan') { + ensureScanPanel(); + } // If a data-symbol or disasm type-link triggered a jump to the Types tab while // the panel is already rendered, find and select the right type entry now. if (btn.dataset.tab === 'types' && st._pendingTypeNav) { @@ -1604,6 +1615,7 @@ function _requestDump(entry, prefetch = false) { rva: normalizeRva(entry.rva), label: entry.label, funcsDepth: Math.max(1, Math.min(5, Number(entry.funcsDepth || st.apiDepth || 1) || 1)), + hostile: st.hostile, ...request, }); } @@ -1615,6 +1627,7 @@ function _requestDump(entry, prefetch = false) { dllPath: entry.dllPath || null, sourceLabel: entry.label, funcsDepth: Math.max(1, Math.min(5, Number(entry.funcsDepth || st.apiDepth || 1) || 1)), + hostile: st.hostile, ...request, }); } @@ -2028,6 +2041,12 @@ window.addEventListener('message', e => { case 'intelli': renderTriage(msg); break; + case 'reconstruct_cfg_result': + renderReconstructCfg(msg); + break; + case 'scan_result': + renderScan(msg); + break; case 'dev_log_history': st.devLogs = Array.isArray(msg.entries) ? msg.entries.slice() : []; renderDevLogs(); @@ -2085,6 +2104,355 @@ function _handleFilePicked(msg) { if (lbl) lbl.textContent = msg.path.split(/[/\\]/).pop() || msg.path; } + else if (msg.kind === 'scan_root') { + st.scanRoot = msg.path; + st.scanRequested = true; + $('panel-scan').innerHTML = '

Scanning…

'; + activateTab('scan'); + vscode.postMessage({ command: 'scan_path', path: st.scanRoot }); + } +} +function ensureReconstructCfg() { + if (st.reconstructRequested) + return; + st.reconstructRequested = true; + const panel = $('panel-flow'); + panel.innerHTML = '

Running reconstruct-cfg…

'; + vscode.postMessage({ command: 'reconstruct_cfg' }); +} +function ensureScanPanel() { + const panel = $('panel-scan'); + if (st.scanRequested) + return; + panel.innerHTML = ''; + const toolbar = document.createElement('div'); + toolbar.className = 'analysis-toolbar'; + const meta = document.createElement('div'); + meta.className = 'analysis-toolbar-meta'; + meta.textContent = 'Scan a folder for risky images and fuzz entry candidates.'; + const runCurrent = document.createElement('button'); + runCurrent.className = 'btn-sm'; + runCurrent.textContent = 'Scan Current Folder'; + runCurrent.addEventListener('click', () => { + st.scanRequested = true; + panel.innerHTML = '

Scanning…

'; + vscode.postMessage({ command: 'scan_path' }); + }); + const browse = document.createElement('button'); + browse.className = 'btn-sm'; + browse.textContent = 'Browse…'; + browse.addEventListener('click', () => vscode.postMessage({ command: 'scan_browse_folder' })); + toolbar.append(meta, runCurrent, browse); + panel.appendChild(toolbar); +} +function getPayloadObject(data, key) { + if (!data || typeof data !== 'object') + return null; + const record = data; + if (record[key] && typeof record[key] === 'object') + return record[key]; + return record; +} +function makeTag(text, extraClass = '') { + const tag = document.createElement('span'); + tag.className = `tag ${extraClass}`.trim(); + tag.textContent = String(text || ''); + return tag; +} +function appendMiniStats(container, stats) { + const grid = document.createElement('div'); + grid.className = 'mini-stats'; + Object.entries(stats).forEach(([label, value]) => { + const item = document.createElement('div'); + item.className = 'mini-stat'; + item.innerHTML = `${esc(value)}${esc(label)}`; + grid.appendChild(item); + }); + container.appendChild(grid); +} +function renderReconstructCfg(msg) { + const panel = $('panel-flow'); + panel.innerHTML = ''; + const data = getPayloadObject(msg.data, 'reconstruct_cfg'); + if (msg.error || !data) { + panel.appendChild(errBox(msg.error || 'reconstruct-cfg returned no data')); + st.reconstructRequested = false; + return; + } + const toolbar = document.createElement('div'); + toolbar.className = 'analysis-toolbar'; + const title = document.createElement('div'); + title.className = 'analysis-toolbar-meta'; + title.textContent = `${data.image || 'image'} ${data.arch ? `· ${data.arch}` : ''} · entry ${data.entry_point || 'unknown'}`; + const rerun = document.createElement('button'); + rerun.className = 'btn-sm'; + rerun.textContent = 'Re-run'; + rerun.addEventListener('click', () => { + panel.innerHTML = '

Running reconstruct-cfg…

'; + vscode.postMessage({ command: 'reconstruct_cfg' }); + }); + const raw = document.createElement('button'); + raw.className = 'btn-sm'; + raw.textContent = 'Open JSON'; + raw.addEventListener('click', () => vscode.postMessage({ + command: 'open_text_report', + language: 'json', + content: JSON.stringify(msg.data, null, 2), + })); + toolbar.append(title, rerun, raw); + panel.appendChild(toolbar); + appendMiniStats(panel, { + roots: data.stats?.roots ?? (data.roots || []).length, + functions: data.stats?.functions_expanded ?? 0, + calls: data.stats?.call_edges ?? 0, + imports: data.stats?.import_edges ?? 0, + indirect: data.stats?.indirect_edges ?? 0, + threads: data.stats?.thread_edges ?? 0, + workpools: data.stats?.workpool_edges ?? 0, + exceptions: data.stats?.exception_edges ?? 0, + }); + if (data.pdb) { + const pdb = _card('Function Discovery'); + pdb.body.appendChild(kvRow('PDB', data.pdb.status || (data.pdb.loaded ? 'loaded' : 'unavailable'))); + pdb.body.appendChild(kvRow('Symbols', data.pdb.symbol_count ?? 0)); + pdb.body.appendChild(kvRow('Functions', data.pdb.function_count ?? 0)); + pdb.body.appendChild(kvRow('Sized Functions', data.pdb.sized_function_count ?? 0)); + if (data.pdb.error) + pdb.body.appendChild(kvRow('PDB Error', data.pdb.error)); + panel.appendChild(pdb.card); + } + const roots = Array.isArray(data.roots) ? data.roots : []; + if (!roots.length) { + panel.appendChild(document.createRange().createContextualFragment('

No reconstructed roots.

')); + } + else { + const tree = document.createElement('div'); + tree.className = 'flow-tree'; + roots.forEach(root => tree.appendChild(renderFlowFunction(root, 0))); + panel.appendChild(tree); + } + if (Array.isArray(data.notes) && data.notes.length) { + const notes = document.createElement('div'); + notes.className = 'analysis-notes'; + data.notes.forEach(note => notes.appendChild(makeTag(note))); + panel.appendChild(notes); + } +} +function renderFlowFunction(fn, depth) { + const node = document.createElement('details'); + node.className = `flow-node depth-${Math.min(depth, 4)}`; + node.open = depth < 2; + const summary = document.createElement('summary'); + summary.className = 'flow-summary'; + const name = document.createElement('span'); + name.className = 'flow-name'; + name.textContent = fn.name || fn.rva || 'function'; + const rva = document.createElement('span'); + rva.className = 'rva flow-rva'; + rva.textContent = fn.rva || ''; + if (fn.rva) { + rva.title = 'Open function disassembly'; + rva.addEventListener('click', evt => { + evt.preventDefault(); + navigateRva(fn.rva, fn.name || fn.rva); + }); + } + summary.append(name, rva); + ['kind', 'section', 'symbol_source', 'symbol_category', 'status'].forEach(key => { + if (fn[key]) + summary.appendChild(makeTag(fn[key], `flow-tag-${key}`)); + }); + if (fn.thread_lane) + summary.appendChild(makeTag(`lane ${fn.thread_lane}`, 'flow-tag-thread')); + node.appendChild(summary); + if (fn.prototype || fn.decode_bound || fn.note || fn.returns?.length) { + const meta = document.createElement('div'); + meta.className = 'flow-meta'; + if (fn.prototype) + meta.appendChild(kvRow('Prototype', fn.prototype)); + if (fn.decode_bound) + meta.appendChild(kvRow('Decode Bound', fn.decode_bound)); + if (fn.note) + meta.appendChild(kvRow('Note', fn.note)); + if (fn.returns?.length) + meta.appendChild(kvRow('Returns', fn.returns.join(', '))); + node.appendChild(meta); + } + const edges = Array.isArray(fn.edges) ? fn.edges : []; + if (edges.length) { + const list = document.createElement('div'); + list.className = 'flow-edge-list'; + edges.forEach(edge => { + const row = document.createElement('div'); + row.className = 'flow-edge'; + const site = document.createElement('span'); + site.className = 'rva'; + site.textContent = edge.site_rva || ''; + if (edge.site_rva) { + site.title = 'Open edge site'; + site.addEventListener('click', () => navigateRva(edge.site_rva, edge.target || edge.site_rva)); + } + const target = document.createElement('span'); + target.className = 'flow-edge-target'; + target.textContent = edge.target || edge.target_rva || ''; + const detail = document.createElement('span'); + detail.className = 'flow-edge-detail'; + detail.textContent = [edge.kind, edge.relation, edge.detail].filter(Boolean).join(' · '); + row.append(site, target, detail); + (edge.tags || []).forEach(tag => row.appendChild(makeTag(tag, tag === 'indirect' ? 'tag-warn' : ''))); + list.appendChild(row); + if (edge.child) + list.appendChild(renderFlowFunction(edge.child, depth + 1)); + }); + node.appendChild(list); + } + return node; +} +function renderScan(msg) { + const panel = $('panel-scan'); + panel.innerHTML = ''; + const data = getPayloadObject(msg.data, 'scan'); + if (msg.error || !data) { + panel.appendChild(errBox(msg.error || 'scan returned no data')); + st.scanRequested = false; + const browse = document.createElement('button'); + browse.className = 'btn-sm'; + browse.textContent = 'Choose Folder'; + browse.addEventListener('click', () => vscode.postMessage({ command: 'scan_browse_folder' })); + panel.appendChild(browse); + return; + } + const results = Array.isArray(data.results) ? data.results : []; + const toolbar = document.createElement('div'); + toolbar.className = 'analysis-toolbar'; + const meta = document.createElement('div'); + meta.className = 'analysis-toolbar-meta'; + meta.textContent = `${results.length} image${results.length === 1 ? '' : 's'} · root ${data.root || msg.root || ''}`; + const rerun = document.createElement('button'); + rerun.className = 'btn-sm'; + rerun.textContent = 'Re-run'; + rerun.addEventListener('click', () => { + panel.innerHTML = '

Scanning…

'; + vscode.postMessage({ command: 'scan_path', path: st.scanRoot || data.root || msg.root || '' }); + }); + const browse = document.createElement('button'); + browse.className = 'btn-sm'; + browse.textContent = 'Browse…'; + browse.addEventListener('click', () => vscode.postMessage({ command: 'scan_browse_folder' })); + const raw = document.createElement('button'); + raw.className = 'btn-sm'; + raw.textContent = 'Open JSON'; + raw.addEventListener('click', () => vscode.postMessage({ + command: 'open_text_report', + language: 'json', + content: JSON.stringify(msg.data, null, 2), + })); + toolbar.append(meta, rerun, browse, raw); + panel.appendChild(toolbar); + appendMiniStats(panel, { + seen: data.files_seen ?? results.length, + reported: data.files_reported ?? results.length, + candidates: results.reduce((n, item) => n + (Array.isArray(item.candidates) ? item.candidates.length : 0), 0), + risk_imports: results.reduce((n, item) => n + (Array.isArray(item.risk_imports) ? item.risk_imports.length : 0), 0), + }); + if (!results.length) { + panel.appendChild(document.createRange().createContextualFragment('

No matching PE images were reported.

')); + return; + } + const { bar, lbl } = _searchBar(panel, 'Regex search scan results…'); + lbl.textContent = `${results.length} images`; + const list = document.createElement('div'); + list.className = 'scan-results'; + panel.appendChild(list); + const rows = []; + results.forEach(item => { + const details = document.createElement('details'); + details.className = 'scan-item'; + details.open = rows.length < 8; + details.dataset.text = JSON.stringify(item).toLowerCase(); + const summary = document.createElement('summary'); + summary.className = 'scan-summary'; + summary.innerHTML = `${esc(item.risk_score ?? 0)}${esc(item.name || item.path)}${esc([item.kind, item.arch, formatBytes(item.size_bytes), item.entry_point].filter(Boolean).join(' · '))}`; + details.appendChild(summary); + const body = document.createElement('div'); + body.className = 'scan-body'; + body.appendChild(kvRow('Path', item.path || '')); + body.appendChild(kvRow('Exports', item.exports ?? 0)); + body.appendChild(kvRow('Imports', item.imports ?? 0)); + body.appendChild(kvRow('Runtime Functions', item.runtime_functions ?? 0)); + if (item.pdb_name) + body.appendChild(kvRow('PDB', item.pdb_name)); + appendScanCandidateSection(body, item.candidates || []); + appendScanRiskImports(body, item.risk_imports || []); + if (Array.isArray(item.anomalies) && item.anomalies.length) { + const anom = document.createElement('div'); + anom.className = 'analysis-notes'; + item.anomalies.forEach(a => anom.appendChild(makeTag(a, 'tag-warn'))); + body.appendChild(anom); + } + details.appendChild(body); + list.appendChild(details); + rows.push(details); + }); + const inp = bar.querySelector('input'); + inp.addEventListener('input', () => { + const raw = inp.value.trim(); + let re = null; + let errEl = bar.querySelector('.regex-err') || (() => { const e = document.createElement('span'); e.className = 'regex-err'; bar.appendChild(e); return e; })(); + if (raw) { + try { + re = new RegExp(raw, 'i'); + inp.classList.remove('invalid'); + errEl.textContent = ''; + } + catch (ex) { + inp.classList.add('invalid'); + errEl.textContent = ex.message; + return; + } + } + else { + inp.classList.remove('invalid'); + errEl.textContent = ''; + } + let visible = 0; + rows.forEach(row => { + const show = !re || re.test(row.dataset.text || ''); + row.style.display = show ? '' : 'none'; + if (show) + visible++; + }); + lbl.textContent = re ? `${visible} / ${results.length} images` : `${results.length} images`; + }); +} +function appendScanCandidateSection(container, candidates) { + if (!candidates.length) + return; + const title = document.createElement('div'); + title.className = 'section-label'; + title.textContent = `Fuzz Candidates (${candidates.length})`; + container.appendChild(title); + candidates.slice(0, 12).forEach(candidate => { + const row = document.createElement('div'); + row.className = 'scan-candidate'; + row.innerHTML = `${esc(candidate.score ?? 0)}${esc(candidate.rva || '')}${esc(candidate.name || '')}${esc(candidate.source || '')}`; + if (Array.isArray(candidate.reasons)) { + candidate.reasons.forEach(reason => row.appendChild(makeTag(reason))); + } + container.appendChild(row); + }); +} +function appendScanRiskImports(container, imports) { + if (!imports.length) + return; + const title = document.createElement('div'); + title.className = 'section-label'; + title.textContent = `Risk Imports (${imports.length})`; + container.appendChild(title); + const wrap = document.createElement('div'); + wrap.className = 'tag-wrap'; + imports.slice(0, 32).forEach(item => wrap.appendChild(makeTag(`${item.dll}!${item.name} · ${item.category}`, 'tag-warn'))); + container.appendChild(wrap); } function renderPeInfo(msg) { const d = unwrapObjectPayload(msg.data, 'peinfo'); @@ -2135,6 +2503,17 @@ function renderOverview(d) { [['Exports', d.export_count], ['Import DLLs', d.import_dll_count], ['Imports', d.import_count]] .forEach(([k, v]) => cnts.body.appendChild(kvRow(k, v))); grid.appendChild(cnts.card); + const dataSummary = d.data || {}; + if (dataSummary.unwind_count || dataSummary.vtable_count || dataSummary.pointer_count || d.startup_routines?.length) { + const funcs = _card('Function Discovery'); + funcs.body.appendChild(kvRow('Startup Routines', d.startup_routines?.length || 0)); + funcs.body.appendChild(kvRow('Runtime Functions', dataSummary.unwind_count || 0)); + funcs.body.appendChild(kvRow('VTables', dataSummary.vtable_count || 0)); + funcs.body.appendChild(kvRow('Pointers', dataSummary.pointer_count || 0)); + if (d.mitigations?.cfg_function_table != null) + funcs.body.appendChild(kvRow('CFG Function Table', d.mitigations.cfg_function_table ? 'Present' : 'Absent')); + grid.appendChild(funcs.card); + } const n = d.names || {}; if (n.product_name || n.file_description) { const ver = _card('Version Info'); @@ -3083,6 +3462,27 @@ function renderDump(msg) { }); depthLabel.appendChild(depthSelect); toolbar.appendChild(depthLabel); + + const hostileLabel = document.createElement('label'); + hostileLabel.className = 'calls-hostile-label'; + hostileLabel.title = 'Enable aggressive tracing: recursive register backward-slice, decoder-driven cross-reference scan, indirect-JMP emission, suspicion annotations'; + const hostileCheck = document.createElement('input'); + hostileCheck.type = 'checkbox'; + hostileCheck.className = 'calls-hostile-check'; + hostileCheck.checked = st.hostile; + hostileCheck.addEventListener('change', () => { + st.hostile = hostileCheck.checked; + const entry = currentNavEntry(); + if (entry) { + st.dumpCache.clear(); + _showDumpLoading(entry.label); + _requestDump(entry); + } + }); + hostileLabel.appendChild(hostileCheck); + hostileLabel.appendChild(document.createTextNode(' Hostile')); + toolbar.appendChild(hostileLabel); + cp.appendChild(toolbar); const wrap = document.createElement('div'); wrap.className = 'tbl-wrap'; @@ -3698,15 +4098,15 @@ function wireAsmFlow(view, insnPane, insnBody, flowSvg, insns, apiCalls, imageNa continue; const y1 = src.offsetTop + Math.max(8, Math.floor(src.offsetHeight / 2)); const y2 = dst.offsetTop + Math.max(8, Math.floor(dst.offsetHeight / 2)); - const codeX = width - 8; - const laneX = codeX - 10 - edge.lane * 12; + const rightX = width - 8; + const laneX = rightX - 10 - edge.lane * 12; const color = edge.kind === 'call' ? 'rgba(78,201,176,.95)' : edge.kind === 'jcc' ? 'rgba(220,220,170,.98)' : 'rgba(86,156,214,.95)'; const path = document.createElementNS('http://www.w3.org/2000/svg', 'path'); - path.setAttribute('d', `M ${codeX} ${y1} H ${laneX} V ${y2} H ${codeX - 2}`); + path.setAttribute('d', `M ${rightX} ${y1} H ${laneX} V ${y2} H ${rightX - 2}`); path.setAttribute('fill', 'none'); path.setAttribute('stroke', color); path.setAttribute('stroke-width', edge.kind === 'call' ? '1.8' : edge.kind === 'jcc' ? '1.6' : '1.4'); @@ -3715,7 +4115,7 @@ function wireAsmFlow(view, insnPane, insnBody, flowSvg, insns, apiCalls, imageNa path.setAttribute('opacity', edge.kind === 'call' ? '0.78' : '0.96'); flowSvg.appendChild(path); const arrow = document.createElementNS('http://www.w3.org/2000/svg', 'path'); - arrow.setAttribute('d', `M ${codeX - 2} ${y2} l -6 -4 v 8 z`); + arrow.setAttribute('d', `M ${rightX - 2} ${y2} l -6 -4 v 8 z`); arrow.setAttribute('fill', color); flowSvg.appendChild(arrow); } diff --git a/resx-vscode/media/main.js b/resx-vscode/media/main.js index fe08c73..863d650 100644 --- a/resx-vscode/media/main.js +++ b/resx-vscode/media/main.js @@ -100,7 +100,11 @@ const st = { currentDumpDll: '', currentDumpPath: '', apiDepth: 1, + hostile: false, devLogs: [], + reconstructRequested: false, + scanRequested: false, + scanRoot: '', currentPeInfo: null, typesByName: new Map(), asmMetaWidth: persistedUiState.asmMetaWidth, @@ -263,11 +267,12 @@ function resolveNavigationTarget(funcName, dll = null) { function dumpCacheKey(entry) { const scope = moduleScope(entry); const depth = Math.max(1, Math.min(5, Number(entry?.funcsDepth || 1) || 1)); + const hostile = st.hostile ? ':hostile' : ''; if (entry?.rva) - return `rva:${scope}:${normalizeRva(entry.rva)}:depth:${depth}`; + return `rva:${scope}:${normalizeRva(entry.rva)}:depth:${depth}${hostile}`; const dll = entry?.dll ? String(entry.dll).toLowerCase() : ''; const fn = entry?.fn ? String(entry.fn).toLowerCase() : ''; - return `fn:${scope}:${dll}!${fn}:depth:${depth}`; + return `fn:${scope}:${dll}!${fn}:depth:${depth}${hostile}`; } function startupBadgeLabel(entry) { const kind = String(entry?.kind || '').toLowerCase(); @@ -1387,6 +1392,12 @@ document.querySelectorAll('.tab').forEach(btn => { document.querySelectorAll('.panel').forEach(p => p.classList.toggle('active', p.id === `panel-${btn.dataset.tab}`)); st.activeTopTab = btn.dataset.tab; persistUiState(); + if (btn.dataset.tab === 'flow') { + ensureReconstructCfg(); + } + if (btn.dataset.tab === 'scan') { + ensureScanPanel(); + } if (btn.dataset.tab === 'types' && st._pendingTypeNav) { const pending = st._pendingTypeNav; st._pendingTypeNav = null; @@ -1599,6 +1610,7 @@ function _requestDump(entry, prefetch = false) { rva: normalizeRva(entry.rva), label: entry.label, funcsDepth: Math.max(1, Math.min(5, Number(entry.funcsDepth || st.apiDepth || 1) || 1)), + hostile: st.hostile, ...request, }); } @@ -1610,6 +1622,7 @@ function _requestDump(entry, prefetch = false) { dllPath: entry.dllPath || null, sourceLabel: entry.label, funcsDepth: Math.max(1, Math.min(5, Number(entry.funcsDepth || st.apiDepth || 1) || 1)), + hostile: st.hostile, ...request, }); } @@ -2029,6 +2042,12 @@ window.addEventListener('message', e => { case 'intelli': renderTriage(msg); break; + case 'reconstruct_cfg_result': + renderReconstructCfg(msg); + break; + case 'scan_result': + renderScan(msg); + break; case 'dev_log_history': st.devLogs = Array.isArray(msg.entries) ? msg.entries.slice() : []; renderDevLogs(); @@ -2086,6 +2105,355 @@ function _handleFilePicked(msg) { if (lbl) lbl.textContent = msg.path.split(/[/\\]/).pop() || msg.path; } + else if (msg.kind === 'scan_root') { + st.scanRoot = msg.path; + st.scanRequested = true; + $('panel-scan').innerHTML = '

Scanning…

'; + activateTab('scan'); + vscode.postMessage({ command: 'scan_path', path: st.scanRoot }); + } +} +function ensureReconstructCfg() { + if (st.reconstructRequested) + return; + st.reconstructRequested = true; + const panel = $('panel-flow'); + panel.innerHTML = '

Running reconstruct-cfg…

'; + vscode.postMessage({ command: 'reconstruct_cfg' }); +} +function ensureScanPanel() { + const panel = $('panel-scan'); + if (st.scanRequested) + return; + panel.innerHTML = ''; + const toolbar = document.createElement('div'); + toolbar.className = 'analysis-toolbar'; + const meta = document.createElement('div'); + meta.className = 'analysis-toolbar-meta'; + meta.textContent = 'Scan a folder for risky images and fuzz entry candidates.'; + const runCurrent = document.createElement('button'); + runCurrent.className = 'btn-sm'; + runCurrent.textContent = 'Scan Current Folder'; + runCurrent.addEventListener('click', () => { + st.scanRequested = true; + panel.innerHTML = '

Scanning…

'; + vscode.postMessage({ command: 'scan_path' }); + }); + const browse = document.createElement('button'); + browse.className = 'btn-sm'; + browse.textContent = 'Browse…'; + browse.addEventListener('click', () => vscode.postMessage({ command: 'scan_browse_folder' })); + toolbar.append(meta, runCurrent, browse); + panel.appendChild(toolbar); +} +function getPayloadObject(data, key) { + if (!data || typeof data !== 'object') + return null; + const record = data; + if (record[key] && typeof record[key] === 'object') + return record[key]; + return record; +} +function makeTag(text, extraClass = '') { + const tag = document.createElement('span'); + tag.className = `tag ${extraClass}`.trim(); + tag.textContent = String(text || ''); + return tag; +} +function appendMiniStats(container, stats) { + const grid = document.createElement('div'); + grid.className = 'mini-stats'; + Object.entries(stats).forEach(([label, value]) => { + const item = document.createElement('div'); + item.className = 'mini-stat'; + item.innerHTML = `${esc(value)}${esc(label)}`; + grid.appendChild(item); + }); + container.appendChild(grid); +} +function renderReconstructCfg(msg) { + const panel = $('panel-flow'); + panel.innerHTML = ''; + const data = getPayloadObject(msg.data, 'reconstruct_cfg'); + if (msg.error || !data) { + panel.appendChild(errBox(msg.error || 'reconstruct-cfg returned no data')); + st.reconstructRequested = false; + return; + } + const toolbar = document.createElement('div'); + toolbar.className = 'analysis-toolbar'; + const title = document.createElement('div'); + title.className = 'analysis-toolbar-meta'; + title.textContent = `${data.image || 'image'} ${data.arch ? `· ${data.arch}` : ''} · entry ${data.entry_point || 'unknown'}`; + const rerun = document.createElement('button'); + rerun.className = 'btn-sm'; + rerun.textContent = 'Re-run'; + rerun.addEventListener('click', () => { + panel.innerHTML = '

Running reconstruct-cfg…

'; + vscode.postMessage({ command: 'reconstruct_cfg' }); + }); + const raw = document.createElement('button'); + raw.className = 'btn-sm'; + raw.textContent = 'Open JSON'; + raw.addEventListener('click', () => vscode.postMessage({ + command: 'open_text_report', + language: 'json', + content: JSON.stringify(msg.data, null, 2), + })); + toolbar.append(title, rerun, raw); + panel.appendChild(toolbar); + appendMiniStats(panel, { + roots: data.stats?.roots ?? (data.roots || []).length, + functions: data.stats?.functions_expanded ?? 0, + calls: data.stats?.call_edges ?? 0, + imports: data.stats?.import_edges ?? 0, + indirect: data.stats?.indirect_edges ?? 0, + threads: data.stats?.thread_edges ?? 0, + workpools: data.stats?.workpool_edges ?? 0, + exceptions: data.stats?.exception_edges ?? 0, + }); + if (data.pdb) { + const pdb = _card('Function Discovery'); + pdb.body.appendChild(kvRow('PDB', data.pdb.status || (data.pdb.loaded ? 'loaded' : 'unavailable'))); + pdb.body.appendChild(kvRow('Symbols', data.pdb.symbol_count ?? 0)); + pdb.body.appendChild(kvRow('Functions', data.pdb.function_count ?? 0)); + pdb.body.appendChild(kvRow('Sized Functions', data.pdb.sized_function_count ?? 0)); + if (data.pdb.error) + pdb.body.appendChild(kvRow('PDB Error', data.pdb.error)); + panel.appendChild(pdb.card); + } + const roots = Array.isArray(data.roots) ? data.roots : []; + if (!roots.length) { + panel.appendChild(document.createRange().createContextualFragment('

No reconstructed roots.

')); + } + else { + const tree = document.createElement('div'); + tree.className = 'flow-tree'; + roots.forEach(root => tree.appendChild(renderFlowFunction(root, 0))); + panel.appendChild(tree); + } + if (Array.isArray(data.notes) && data.notes.length) { + const notes = document.createElement('div'); + notes.className = 'analysis-notes'; + data.notes.forEach(note => notes.appendChild(makeTag(note))); + panel.appendChild(notes); + } +} +function renderFlowFunction(fn, depth) { + const node = document.createElement('details'); + node.className = `flow-node depth-${Math.min(depth, 4)}`; + node.open = depth < 2; + const summary = document.createElement('summary'); + summary.className = 'flow-summary'; + const name = document.createElement('span'); + name.className = 'flow-name'; + name.textContent = fn.name || fn.rva || 'function'; + const rva = document.createElement('span'); + rva.className = 'rva flow-rva'; + rva.textContent = fn.rva || ''; + if (fn.rva) { + rva.title = 'Open function disassembly'; + rva.addEventListener('click', evt => { + evt.preventDefault(); + navigateRva(fn.rva, fn.name || fn.rva); + }); + } + summary.append(name, rva); + ['kind', 'section', 'symbol_source', 'symbol_category', 'status'].forEach(key => { + if (fn[key]) + summary.appendChild(makeTag(fn[key], `flow-tag-${key}`)); + }); + if (fn.thread_lane) + summary.appendChild(makeTag(`lane ${fn.thread_lane}`, 'flow-tag-thread')); + node.appendChild(summary); + if (fn.prototype || fn.decode_bound || fn.note || fn.returns?.length) { + const meta = document.createElement('div'); + meta.className = 'flow-meta'; + if (fn.prototype) + meta.appendChild(kvRow('Prototype', fn.prototype)); + if (fn.decode_bound) + meta.appendChild(kvRow('Decode Bound', fn.decode_bound)); + if (fn.note) + meta.appendChild(kvRow('Note', fn.note)); + if (fn.returns?.length) + meta.appendChild(kvRow('Returns', fn.returns.join(', '))); + node.appendChild(meta); + } + const edges = Array.isArray(fn.edges) ? fn.edges : []; + if (edges.length) { + const list = document.createElement('div'); + list.className = 'flow-edge-list'; + edges.forEach(edge => { + const row = document.createElement('div'); + row.className = 'flow-edge'; + const site = document.createElement('span'); + site.className = 'rva'; + site.textContent = edge.site_rva || ''; + if (edge.site_rva) { + site.title = 'Open edge site'; + site.addEventListener('click', () => navigateRva(edge.site_rva, edge.target || edge.site_rva)); + } + const target = document.createElement('span'); + target.className = 'flow-edge-target'; + target.textContent = edge.target || edge.target_rva || ''; + const detail = document.createElement('span'); + detail.className = 'flow-edge-detail'; + detail.textContent = [edge.kind, edge.relation, edge.detail].filter(Boolean).join(' · '); + row.append(site, target, detail); + (edge.tags || []).forEach(tag => row.appendChild(makeTag(tag, tag === 'indirect' ? 'tag-warn' : ''))); + list.appendChild(row); + if (edge.child) + list.appendChild(renderFlowFunction(edge.child, depth + 1)); + }); + node.appendChild(list); + } + return node; +} +function renderScan(msg) { + const panel = $('panel-scan'); + panel.innerHTML = ''; + const data = getPayloadObject(msg.data, 'scan'); + if (msg.error || !data) { + panel.appendChild(errBox(msg.error || 'scan returned no data')); + st.scanRequested = false; + const browse = document.createElement('button'); + browse.className = 'btn-sm'; + browse.textContent = 'Choose Folder'; + browse.addEventListener('click', () => vscode.postMessage({ command: 'scan_browse_folder' })); + panel.appendChild(browse); + return; + } + const results = Array.isArray(data.results) ? data.results : []; + const toolbar = document.createElement('div'); + toolbar.className = 'analysis-toolbar'; + const meta = document.createElement('div'); + meta.className = 'analysis-toolbar-meta'; + meta.textContent = `${results.length} image${results.length === 1 ? '' : 's'} · root ${data.root || msg.root || ''}`; + const rerun = document.createElement('button'); + rerun.className = 'btn-sm'; + rerun.textContent = 'Re-run'; + rerun.addEventListener('click', () => { + panel.innerHTML = '

Scanning…

'; + vscode.postMessage({ command: 'scan_path', path: st.scanRoot || data.root || msg.root || '' }); + }); + const browse = document.createElement('button'); + browse.className = 'btn-sm'; + browse.textContent = 'Browse…'; + browse.addEventListener('click', () => vscode.postMessage({ command: 'scan_browse_folder' })); + const raw = document.createElement('button'); + raw.className = 'btn-sm'; + raw.textContent = 'Open JSON'; + raw.addEventListener('click', () => vscode.postMessage({ + command: 'open_text_report', + language: 'json', + content: JSON.stringify(msg.data, null, 2), + })); + toolbar.append(meta, rerun, browse, raw); + panel.appendChild(toolbar); + appendMiniStats(panel, { + seen: data.files_seen ?? results.length, + reported: data.files_reported ?? results.length, + candidates: results.reduce((n, item) => n + (Array.isArray(item.candidates) ? item.candidates.length : 0), 0), + risk_imports: results.reduce((n, item) => n + (Array.isArray(item.risk_imports) ? item.risk_imports.length : 0), 0), + }); + if (!results.length) { + panel.appendChild(document.createRange().createContextualFragment('

No matching PE images were reported.

')); + return; + } + const { bar, lbl } = _searchBar(panel, 'Regex search scan results…'); + lbl.textContent = `${results.length} images`; + const list = document.createElement('div'); + list.className = 'scan-results'; + panel.appendChild(list); + const rows = []; + results.forEach(item => { + const details = document.createElement('details'); + details.className = 'scan-item'; + details.open = rows.length < 8; + details.dataset.text = JSON.stringify(item).toLowerCase(); + const summary = document.createElement('summary'); + summary.className = 'scan-summary'; + summary.innerHTML = `${esc(item.risk_score ?? 0)}${esc(item.name || item.path)}${esc([item.kind, item.arch, formatBytes(item.size_bytes), item.entry_point].filter(Boolean).join(' · '))}`; + details.appendChild(summary); + const body = document.createElement('div'); + body.className = 'scan-body'; + body.appendChild(kvRow('Path', item.path || '')); + body.appendChild(kvRow('Exports', item.exports ?? 0)); + body.appendChild(kvRow('Imports', item.imports ?? 0)); + body.appendChild(kvRow('Runtime Functions', item.runtime_functions ?? 0)); + if (item.pdb_name) + body.appendChild(kvRow('PDB', item.pdb_name)); + appendScanCandidateSection(body, item.candidates || []); + appendScanRiskImports(body, item.risk_imports || []); + if (Array.isArray(item.anomalies) && item.anomalies.length) { + const anom = document.createElement('div'); + anom.className = 'analysis-notes'; + item.anomalies.forEach(a => anom.appendChild(makeTag(a, 'tag-warn'))); + body.appendChild(anom); + } + details.appendChild(body); + list.appendChild(details); + rows.push(details); + }); + const inp = bar.querySelector('input'); + inp.addEventListener('input', () => { + const raw = inp.value.trim(); + let re = null; + let errEl = bar.querySelector('.regex-err') || (() => { const e = document.createElement('span'); e.className = 'regex-err'; bar.appendChild(e); return e; })(); + if (raw) { + try { + re = new RegExp(raw, 'i'); + inp.classList.remove('invalid'); + errEl.textContent = ''; + } + catch (ex) { + inp.classList.add('invalid'); + errEl.textContent = ex.message; + return; + } + } + else { + inp.classList.remove('invalid'); + errEl.textContent = ''; + } + let visible = 0; + rows.forEach(row => { + const show = !re || re.test(row.dataset.text || ''); + row.style.display = show ? '' : 'none'; + if (show) + visible++; + }); + lbl.textContent = re ? `${visible} / ${results.length} images` : `${results.length} images`; + }); +} +function appendScanCandidateSection(container, candidates) { + if (!candidates.length) + return; + const title = document.createElement('div'); + title.className = 'section-label'; + title.textContent = `Fuzz Candidates (${candidates.length})`; + container.appendChild(title); + candidates.slice(0, 12).forEach(candidate => { + const row = document.createElement('div'); + row.className = 'scan-candidate'; + row.innerHTML = `${esc(candidate.score ?? 0)}${esc(candidate.rva || '')}${esc(candidate.name || '')}${esc(candidate.source || '')}`; + if (Array.isArray(candidate.reasons)) { + candidate.reasons.forEach(reason => row.appendChild(makeTag(reason))); + } + container.appendChild(row); + }); +} +function appendScanRiskImports(container, imports) { + if (!imports.length) + return; + const title = document.createElement('div'); + title.className = 'section-label'; + title.textContent = `Risk Imports (${imports.length})`; + container.appendChild(title); + const wrap = document.createElement('div'); + wrap.className = 'tag-wrap'; + imports.slice(0, 32).forEach(item => wrap.appendChild(makeTag(`${item.dll}!${item.name} · ${item.category}`, 'tag-warn'))); + container.appendChild(wrap); } function renderPeInfo(msg) { const d = unwrapObjectPayload(msg.data, 'peinfo'); @@ -2135,6 +2503,17 @@ function renderOverview(d) { [['Exports', d.export_count], ['Import DLLs', d.import_dll_count], ['Imports', d.import_count]] .forEach(([k, v]) => cnts.body.appendChild(kvRow(k, v))); grid.appendChild(cnts.card); + const dataSummary = d.data || {}; + if (dataSummary.unwind_count || dataSummary.vtable_count || dataSummary.pointer_count || d.startup_routines?.length) { + const funcs = _card('Function Discovery'); + funcs.body.appendChild(kvRow('Startup Routines', d.startup_routines?.length || 0)); + funcs.body.appendChild(kvRow('Runtime Functions', dataSummary.unwind_count || 0)); + funcs.body.appendChild(kvRow('VTables', dataSummary.vtable_count || 0)); + funcs.body.appendChild(kvRow('Pointers', dataSummary.pointer_count || 0)); + if (d.mitigations?.cfg_function_table != null) + funcs.body.appendChild(kvRow('CFG Function Table', d.mitigations.cfg_function_table ? 'Present' : 'Absent')); + grid.appendChild(funcs.card); + } const n = d.names || {}; if (n.product_name || n.file_description) { const ver = _card('Version Info'); @@ -3085,6 +3464,25 @@ function renderDump(msg) { }); depthLabel.appendChild(depthSelect); toolbar.appendChild(depthLabel); + const hostileLabel = document.createElement('label'); + hostileLabel.className = 'calls-hostile-label'; + hostileLabel.title = 'Enable aggressive tracing: recursive register backward-slice, decoder-driven cross-reference scan, indirect-JMP emission, suspicion annotations'; + const hostileCheck = document.createElement('input'); + hostileCheck.type = 'checkbox'; + hostileCheck.className = 'calls-hostile-check'; + hostileCheck.checked = st.hostile; + hostileCheck.addEventListener('change', () => { + st.hostile = hostileCheck.checked; + const entry = currentNavEntry(); + if (entry) { + st.dumpCache.clear(); + _showDumpLoading(entry.label); + _requestDump(entry); + } + }); + hostileLabel.appendChild(hostileCheck); + hostileLabel.appendChild(document.createTextNode(' Hostile')); + toolbar.appendChild(hostileLabel); cp.appendChild(toolbar); const wrap = document.createElement('div'); wrap.className = 'tbl-wrap'; @@ -3699,15 +4097,15 @@ function wireAsmFlow(view, insnPane, insnBody, flowSvg, insns, apiCalls, imageNa continue; const y1 = src.offsetTop + Math.max(8, Math.floor(src.offsetHeight / 2)); const y2 = dst.offsetTop + Math.max(8, Math.floor(dst.offsetHeight / 2)); - const codeX = width - 8; - const laneX = codeX - 10 - edge.lane * 12; + const rightX = width - 8; + const laneX = rightX - 10 - edge.lane * 12; const color = edge.kind === 'call' ? 'rgba(78,201,176,.95)' : edge.kind === 'jcc' ? 'rgba(220,220,170,.98)' : 'rgba(86,156,214,.95)'; const path = document.createElementNS('http://www.w3.org/2000/svg', 'path'); - path.setAttribute('d', `M ${codeX} ${y1} H ${laneX} V ${y2} H ${codeX - 2}`); + path.setAttribute('d', `M ${rightX} ${y1} H ${laneX} V ${y2} H ${rightX - 2}`); path.setAttribute('fill', 'none'); path.setAttribute('stroke', color); path.setAttribute('stroke-width', edge.kind === 'call' ? '1.8' : edge.kind === 'jcc' ? '1.6' : '1.4'); @@ -3716,7 +4114,7 @@ function wireAsmFlow(view, insnPane, insnBody, flowSvg, insns, apiCalls, imageNa path.setAttribute('opacity', edge.kind === 'call' ? '0.78' : '0.96'); flowSvg.appendChild(path); const arrow = document.createElementNS('http://www.w3.org/2000/svg', 'path'); - arrow.setAttribute('d', `M ${codeX - 2} ${y2} l -6 -4 v 8 z`); + arrow.setAttribute('d', `M ${rightX - 2} ${y2} l -6 -4 v 8 z`); arrow.setAttribute('fill', color); flowSvg.appendChild(arrow); } diff --git a/resx-vscode/media/style.css b/resx-vscode/media/style.css index 9b6536d..cf85310 100644 --- a/resx-vscode/media/style.css +++ b/resx-vscode/media/style.css @@ -785,6 +785,22 @@ body.asm-resizing { font-size: 11px; font-family: var(--mono); } +.calls-hostile-label { + display: inline-flex; + align-items: center; + gap: 6px; + margin-left: 14px; + font-size: 11px; + color: var(--dim); + cursor: pointer; + user-select: none; +} +.calls-hostile-label:has(.calls-hostile-check:checked) { + color: var(--warn, #e8a44a); +} +.calls-hostile-check { + cursor: pointer; +} .api-label-cell { min-width: 260px; } @@ -1362,12 +1378,136 @@ body.asm-resizing { .section-row:hover td { background: var(--hover); } +.analysis-toolbar { + display: flex; + align-items: center; + gap: 8px; + margin: 0 0 12px; + flex-wrap: wrap; +} +.analysis-toolbar-meta { + color: var(--dim); + font-size: 12px; + margin-right: auto; +} +.mini-stats { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(92px, 1fr)); + gap: 8px; + margin: 0 0 12px; +} +.mini-stat { + border: 1px solid var(--border); + background: var(--card); + border-radius: 4px; + padding: 8px; +} +.mini-stat span { + display: block; + font-family: var(--mono); + font-size: 16px; + color: var(--fg); +} +.mini-stat small { + display: block; + color: var(--dim); + font-size: 10px; + text-transform: uppercase; +} +.analysis-notes { + display: flex; + gap: 6px; + flex-wrap: wrap; + margin: 12px 0; +} +.tag-warn { + border-color: rgba(232,164,74,.45); + color: var(--warn, #e8a44a); +} +.flow-tree, +.scan-results { + display: flex; + flex-direction: column; + gap: 8px; +} +.flow-node, +.scan-item { + border: 1px solid var(--border); + background: var(--card); + border-radius: 4px; +} +.flow-node .flow-node { + margin: 6px 8px 6px 18px; +} +.flow-summary, +.scan-summary { + cursor: pointer; + padding: 8px 10px; +} +.flow-summary { + display: flex; + align-items: center; + gap: 8px; + flex-wrap: wrap; +} +.flow-name, +.scan-name, +.scan-candidate-name { + font-family: var(--mono); + color: var(--fg); +} +.flow-rva { + margin-left: auto; +} +.flow-meta { + padding: 0 10px 8px; +} +.flow-edge-list { + padding: 0 10px 10px; +} +.flow-edge, +.scan-candidate { + display: grid; + grid-template-columns: 96px minmax(160px, 1fr) minmax(220px, 2fr); + gap: 8px; + align-items: center; + padding: 5px 0; + border-top: 1px solid var(--border); +} +.flow-edge-target, +.flow-edge-detail, +.scan-meta { + color: var(--dim); + font-size: 11px; +} +.scan-summary { + display: grid; + grid-template-columns: 54px minmax(160px, 1fr) minmax(200px, 2fr); + gap: 10px; + align-items: center; +} +.scan-risk { + font-family: var(--mono); + color: var(--warn, #e8a44a); + font-weight: 700; +} +.scan-body { + padding: 0 10px 10px; +} +.scan-candidate { + grid-template-columns: 54px 96px minmax(160px, 1fr) 90px; +} @media (max-width: 980px) { .asm-view { --asm-meta-width: 220px; } .asm-shell { grid-template-columns: minmax(0, 1fr) 8px 220px; } .cfg-legend-note { margin-left: 0; width: 100%; } + .flow-edge, + .scan-summary, + .scan-candidate { + grid-template-columns: 1fr; + } } ::-webkit-scrollbar { width: 6px; height: 6px; } ::-webkit-scrollbar-track { background: transparent; } diff --git a/resx-vscode/package.json b/resx-vscode/package.json index ad4f2f9..bbc6ce7 100644 --- a/resx-vscode/package.json +++ b/resx-vscode/package.json @@ -98,6 +98,16 @@ "command": "resx.dump", "category": "RESX", "title": "Dump" + }, + { + "command": "resx.reconstructCfg", + "category": "RESX", + "title": "Reconstruct CFG" + }, + { + "command": "resx.scan", + "category": "RESX", + "title": "Scan Folder" } ] }, diff --git a/resx-vscode/src/commands.ts b/resx-vscode/src/commands.ts index 1a4c6f2..2be1495 100644 --- a/resx-vscode/src/commands.ts +++ b/resx-vscode/src/commands.ts @@ -89,9 +89,105 @@ export function registerResxCommands(context: vscode.ExtensionContext): vscode.D vscode.commands.registerCommand('resx.dump', async () => { await openDump(context); }), + vscode.commands.registerCommand('resx.reconstructCfg', async (uri?: vscode.Uri) => { + await openReconstructCfgReport(context, uri); + }), + vscode.commands.registerCommand('resx.scan', async (uri?: vscode.Uri) => { + await openScanReport(context, uri); + }), ]; } +async function openReconstructCfgReport(context: vscode.ExtensionContext, uri?: vscode.Uri): Promise { + const target = await pickImageUri(uri, 'Select image to reconstruct'); + if (!target) return; + + const result = await vscode.window.withProgress({ + location: vscode.ProgressLocation.Notification, + title: `RESX: reconstructing ${path.basename(target.fsPath)}`, + cancellable: false, + }, async () => runJson(context, ['reconstruct-cfg', target.fsPath], resxRunOpts())); + + if (result.error) { + void vscode.window.showErrorMessage(result.error); + return; + } + + await openJsonReport('RESX reconstruct-cfg', result.data); +} + +async function openScanReport(context: vscode.ExtensionContext, uri?: vscode.Uri): Promise { + const root = await pickScanRoot(uri); + if (!root) return; + + const result = await vscode.window.withProgress({ + location: vscode.ProgressLocation.Notification, + title: `RESX: scanning ${path.basename(root.fsPath) || root.fsPath}`, + cancellable: false, + }, async () => runJson(context, ['scan', root.fsPath])); + + if (result.error) { + void vscode.window.showErrorMessage(result.error); + return; + } + + await openJsonReport('RESX scan', result.data); +} + +async function pickImageUri(uri: vscode.Uri | undefined, title: string): Promise { + if (uri?.scheme === 'file' && /\.(dll|exe|sys)$/i.test(uri.fsPath)) { + return uri; + } + + const active = vscode.window.activeTextEditor?.document.uri; + if (active?.scheme === 'file' && /\.(dll|exe|sys)$/i.test(active.fsPath)) { + return active; + } + + const picked = await vscode.window.showOpenDialog({ + canSelectMany: false, + canSelectFiles: true, + canSelectFolders: false, + filters: { 'Windows binaries': ['dll', 'exe', 'sys'], 'All files': ['*'] }, + title, + }); + return picked?.[0]; +} + +async function pickScanRoot(uri?: vscode.Uri): Promise { + if (uri?.scheme === 'file') { + try { + const stat = await fs.stat(uri.fsPath); + return stat.isDirectory() ? uri : vscode.Uri.file(path.dirname(uri.fsPath)); + } catch { + return uri; + } + } + + if (vscode.workspace.workspaceFolders?.length === 1) { + return vscode.workspace.workspaceFolders[0].uri; + } + + const picked = await vscode.window.showOpenDialog({ + canSelectMany: false, + canSelectFiles: false, + canSelectFolders: true, + title: 'Select folder to scan', + }); + return picked?.[0]; +} + +async function openJsonReport(title: string, data: unknown): Promise { + const doc = await vscode.workspace.openTextDocument({ + language: 'json', + content: JSON.stringify(data ?? {}, null, 2), + }); + await vscode.window.showTextDocument(doc, { + preview: false, + viewColumn: vscode.ViewColumn.Beside, + }); +} + async function openLocate(context: vscode.ExtensionContext, kind: 'locate' | 'locate-sym'): Promise { const query = await pickLocateQuery(context, kind); if (!query) return; diff --git a/resx-vscode/src/editor.ts b/resx-vscode/src/editor.ts index cf9dd3f..ef3ceba 100644 --- a/resx-vscode/src/editor.ts +++ b/resx-vscode/src/editor.ts @@ -210,7 +210,7 @@ export class ResxEditorProvider implements vscode.CustomReadonlyEditorProvider { const target = msg.dllPath || msg.dll || filePath; const args = ['dump', target, msg.func, '--cfg', 'text', '--funcs-depth', String(msg.funcsDepth || 1), '--strings', '--xrefs', '--recomp']; - const result = await runDumpWithForwardFallback(this.context, args, { ...cfgOpts(), funcsDepth: msg.funcsDepth || 1 }); + const result = await runDumpWithForwardFallback(this.context, args, { ...cfgOpts(), funcsDepth: msg.funcsDepth || 1, hostile: !!msg.hostile }); send({ type: 'dump_result', func: msg.func, @@ -253,9 +253,32 @@ export class ResxEditorProvider implements vscode.CustomReadonlyEditorProvider { break; } + case 'reconstruct_cfg': { + const result = await vscode.window.withProgress({ + location: vscode.ProgressLocation.Notification, + title: `RESX: reconstructing startup flow for ${path.basename(filePath)}`, + cancellable: false, + }, async () => runJson(this.context, ['reconstruct-cfg', filePath], cfgOpts())); + send({ type: 'reconstruct_cfg_result', ...result }); + break; + } + + case 'scan_path': { + const root = typeof msg.path === 'string' && msg.path.trim() + ? msg.path.trim() + : path.dirname(filePath); + const result = await vscode.window.withProgress({ + location: vscode.ProgressLocation.Notification, + title: `RESX: scanning ${path.basename(root) || root}`, + cancellable: false, + }, async () => runJson(this.context, ['scan', root])); + send({ type: 'scan_result', root, ...result }); + break; + } + case 'dump_at_rva': { const target = msg.dllPath || filePath; - const result = await runDumpAtRvaWithFallback(this.context, target, msg.rva, msg.dll, { ...cfgOpts(), funcsDepth: msg.funcsDepth || 1 }); + const result = await runDumpAtRvaWithFallback(this.context, target, msg.rva, msg.dll, { ...cfgOpts(), funcsDepth: msg.funcsDepth || 1, hostile: !!msg.hostile }); send({ type: 'dump_result', func: msg.label, @@ -313,6 +336,17 @@ export class ResxEditorProvider implements vscode.CustomReadonlyEditorProvider { break; } + case 'scan_browse_folder': { + const uris = await vscode.window.showOpenDialog({ + canSelectMany: false, + canSelectFiles: false, + canSelectFolders: true, + title: 'Select folder to scan', + }); + send({ type: 'file_picked', kind: 'scan_root', path: uris?.[0]?.fsPath ?? null }); + break; + } + case 'ready': { const pending = ResxEditorProvider.pendingNavigation.get(docKey); send({ type: 'dev_log_history', entries: getRunTraceHistory() }); @@ -386,6 +420,8 @@ export class ResxEditorProvider implements vscode.CustomReadonlyEditorProvider { + + @@ -401,6 +437,8 @@ export class ResxEditorProvider implements vscode.CustomReadonlyEditorProvider {

Analyzing…

Analyzing…

Analyzing…

+

Run reconstruct-cfg to build startup flow.

+

Run scan to discover fuzz candidates.

Waiting for RESX commands…

diff --git a/resx-vscode/src/runner.ts b/resx-vscode/src/runner.ts index 62f777c..e29ac39 100644 --- a/resx-vscode/src/runner.ts +++ b/resx-vscode/src/runner.ts @@ -13,6 +13,7 @@ export interface RunOptions { symPaths?: string[]; pdbFile?: string; funcsDepth?: number; + hostile?: boolean; } export interface RunTraceEntry { @@ -302,6 +303,10 @@ export function runJson( extra.push('--pdb', opts.pdbFile); } + if (opts?.hostile) { + extra.push('--hostile'); + } + const allArgs = [...args, ...extra, '--json', '--no-color', '--quiet']; const started = Date.now(); const traceId = nextRunTraceId++; From 1beb51408c4f01e6d486d458c79ccfd219fe43f8 Mon Sep 17 00:00:00 2001 From: dutchpsycho <178704185+dutchpsycho@users.noreply.github.com> Date: Mon, 11 May 2026 17:41:52 +1000 Subject: [PATCH 4/4] docs(cli): document analysis surfaces --- COMMANDS.md | 42 ++++++++++++++- README.md | 6 +++ docs/analysis-surfaces.md | 94 ++++++++++++++++++++++++++++++++ docs/cli.md | 19 ++++++- docs/disclosure-report.md | 110 ++++++++++++++++++++++++++++++++++++++ docs/json-schemas.md | 101 ++++++++++++++++++++++++++++++++++ docs/vscode-extension.md | 2 +- 7 files changed, 371 insertions(+), 3 deletions(-) create mode 100644 docs/analysis-surfaces.md create mode 100644 docs/disclosure-report.md create mode 100644 docs/json-schemas.md diff --git a/COMMANDS.md b/COMMANDS.md index 21852b2..eaa29ad 100644 --- a/COMMANDS.md +++ b/COMMANDS.md @@ -14,6 +14,7 @@ - `resx cfg ` - `resx cfg --at ` - `resx cfg --ordinal ` +- `resx reconstruct-cfg ` - `resx intelli [function]` - `resx peinfo ` - `resx sections ` @@ -26,6 +27,7 @@ - `resx locate ` - `resx locate-sym ` - `resx explain ` +- `resx scan ` - `resx yara ` - `resx update` @@ -36,6 +38,9 @@ - `--recomp` - `--c-out ` - `--cfg text` +- `--reconstruct-cfg` +- `--thread-filter ` +- `--api-filter ` - `--funcs` - `--funcs-depth ` - `--explain` @@ -47,6 +52,7 @@ - `--unsafe-map-image` - `--hookchk` - `--intelli` +- `--hostile` - `--follow-jmp` - `--no-follow-jmp` - `--rebase ` @@ -64,6 +70,30 @@ - `--api` forces API/symbol-mode interpretation. - `dump --explain` reuses the same explanation engine inline for the current target. +## Reconstruct CFG + +- `resx reconstruct-cfg ` builds a bounded startup/TLS-to-exit graph. +- The graph follows intra-image calls, tail jumps, import edges, indirect-call annotations, recovered thread/workpool callbacks, and x64 unwind exception-handler edges. +- PDB symbols are used when available for names, prototypes, and size-backed decode bounds. +- `--thread-filter ` and `--api-filter ` focus the text output for non-interactive review. +- `--json` emits a versioned `reconstruct_cfg` report with roots, nested edge children, PDB status, statistics, and static-analysis notes. + +## Scan + +- `resx scan ` inventories `.exe`, `.dll`, and `.sys` files under a root. +- `--jsonl` emits one JSON object per image for agent and corpus workflows. +- `--extensions `, `--max-files `, `--max-file-mb `, and `--max-candidates ` control scan scope and output size. +- Results include image metadata, risk imports, anomalies, and ranked fuzz-target candidates. +- Candidate entries include `name`, `rva`, `source`, `score`, `reasons`, `input_surface`, `harness_kind`, `suggested_invocation`, and `confidence`; scores are triage hints, not exploitability claims. + +## JSON Output + +- JSON output uses `schema_version: 1`. +- Most commands emit `{ "schema_version": 1, "kind": "", "": ... }`. +- `scan --json` emits `{ "tool": "resx", "schema_version": 1, "kind": "scan", "results": [...] }`. +- `scan --jsonl` emits one image report per line without the outer scan envelope. +- See [docs/json-schemas.md](docs/json-schemas.md) for field-level notes. + ## Symbol Flags - `--pdb ` @@ -116,17 +146,23 @@ - `dump --cfg text` can recover and render `Switch Map` sections for jump-table dispatchers. - `cfg` supports function names, `--at `, and `--ordinal ` just like `dump`. - `cfg` and `dump` use the same instruction and API/symbol highlighting path for call targets and comments. +- `dump --funcs` discovers direct, import, IAT-indirect, register-indirect, and switch-dispatch call targets. - `dump --recomp` emits corrected bit-test branches and better local-call placeholders. +- `dump --recomp` uses PDB prototype text and x64 unwind metadata when available. - `dump --funcs-depth ` expands nested API call depth and accepts levels `1` through `5`. - `syms --verbose` can show exact PDB identity/load diagnostics, including RSDS-derived kernel PDB names. - `dump` can resolve internal names from enumerated PDB symbols when exports do not contain the target. -- `dump` can surface syscall service numbers and kernel targets for `Nt*` and `Zw*` stubs. +- `dump` can surface syscall service numbers and kernel targets for `Nt*`, `Zw*`, and Win32K GUI syscall stubs. +- Extensionless system image lookup supports `.dll`, `.exe`, and `.sys`, including `user32`, `win32u`, `win32k`, `win32kbase`, `win32kfull`, and `ntoskrnl`. - `locate` and `locate-sym` search only the priority set by default. - `callers` uses the priority set by default. - `--include-dir` and `--include-image` widen the scan beyond the priority set for `locate` and `callers`. - `callers --include-dir` will scan `.dll` and `.sys` images by default, and `.exe` as well when `--scan-exe` is set. - `--include` filters the entire callers scan list; `--scope-file` only filters files discovered through `--include-dir`. - `priority` opens the generated priority config JSON where you can edit directories, exact names, prefixes, and regexes. +- `reconstruct-cfg` reconstructs startup flow using PE entry/TLS/startup roots, direct calls, import calls, recovered callbacks, and unwind exception edges. +- `scan` inventories PE corpora and ranks fuzz-target candidates from exports, startup paths, risky imports, and section anomalies. +- `resx-palace/` is the local controlled fixture corpus that exercises discovery, recursive CFG, EH, typed reconstruction, indirect control flow, and scan output. ## Examples @@ -135,8 +171,11 @@ resx dump ntdll.dll NtOpenProcess --cfg text --hookchk resx cfg ntdll.dll --at 0x161F40 resx intelli suspicious.dll resx intelli suspicious.dll WinMain --hookchk --cfg text --strings +resx reconstruct-cfg suspicious.dll --depth 6 --max-total 300 resx dump ntoskrnl.exe KiSystemCall64 --cfg text --funcs --recomp resx dump ntoskrnl.exe NtQuerySystemInformation --cfg text +resx dump win32u NtUserGetMessage --funcs +resx peinfo win32kfull resx syms ntoskrnl.exe --verbose resx callers blackbird.sys BLACKBIRDNtAllocateVirtualMemoryHookStub --depth 2 resx callers ntoskrnl.exe NtOpenProcess --include-dir C:\Work\Drivers --depth 2 @@ -146,5 +185,6 @@ resx locate NtOpenProcess resx locate NtOpenProcess --include-dir C:\Work\Drivers resx locate-sym NtWriteVirtualMemory --include-image .\mydriver.sys resx explain NtQuerySystemInformation --api +resx scan C:\Windows\System32\drivers --jsonl --max-files 200 resx update ``` diff --git a/README.md b/README.md index 1ee03fa..a92eb64 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,8 @@ RESX is a Windows binary analysis toolkit for terminal-first reversing, symbol-b ## Documentation - [CLI documentation](docs/cli.md) +- [Analysis surfaces](docs/analysis-surfaces.md) +- [JSON schemas](docs/json-schemas.md) - [VS Code extension documentation](docs/vscode-extension.md) ## Screenshots @@ -68,9 +70,13 @@ Common commands: ```powershell resx dump resx cfg +resx reconstruct-cfg resx intelli [function] resx peinfo resx locate resx locate-sym +resx scan resx syms ``` + +Newer analysis surfaces include PDB/export-backed function discovery, recursive startup CFG reconstruction, x64 unwind and exception-handler edges, typed pseudo-C reconstruction, indirect-control-flow annotations, Win32K GUI syscall tracing, scan-driven fuzz target ranking, and versioned JSON output for automation. diff --git a/docs/analysis-surfaces.md b/docs/analysis-surfaces.md new file mode 100644 index 0000000..b62336c --- /dev/null +++ b/docs/analysis-surfaces.md @@ -0,0 +1,94 @@ +# RESX Analysis Surfaces + +This page summarizes the analysis surfaces added around function discovery, recursive CFG reconstruction, exception metadata, typed reconstruction, indirect control flow, corpus scanning, and the `resx-palace` test corpus. + +## Function Discovery + +RESX discovers callable targets from several sources and keeps the source visible in reports: + +- Export tables provide named entry points and ordinal-backed targets. +- PDB symbols provide internal functions, sizes, and prototype/type text when symbols are available. +- PE startup metadata contributes entry point, TLS callbacks, and startup handoff candidates. +- Disassembly contributes direct `CALL` and tail `JMP` targets. +- Import tables identify external API edges and IAT-backed call sites. +- Switch-dispatch recovery contributes jump-table targets where the dispatcher can be statically recovered. + +Use `resx dump --funcs` for a local call map. Use `--funcs-depth ` to recurse into internal callees up to levels `1` through `5`. + +## Recursive CFG + +`resx reconstruct-cfg ` builds a bounded startup/TLS-to-exit graph. The graph starts from PE entry and startup roots, follows intra-image call and jump targets, annotates import edges, and nests recovered children until `--depth` or `--max-total` stops expansion. + +Useful controls: + +- `--depth ` limits recursive expansion depth. +- `--max-total ` caps the total number of expanded functions. +- `--thread-filter ` focuses text output on thread-related paths. +- `--api-filter ` focuses text output on matching APIs or functions. +- `--json` emits the structured `reconstruct_cfg` report. + +Each function node reports name, kind, RVA/VA, section, symbol source/category, PDB prototype text when present, decode bound, status, return sites, and outgoing edges. + +## Unwind And EH + +RESX reads x64 `.pdata` / `UNWIND_INFO` where available: + +- `dump --json` can include `data.unwind[]` entries with runtime-function ranges, unwind-info RVA, prolog size, unwind code count, flags, and exception-handler RVA. +- `dump --recomp` emits a `.pdata` comment above reconstructed C-like output. +- `reconstruct-cfg` follows executable exception-handler RVAs as `exception-handler` edges tagged with `try-except` and `unwind`. + +Language-specific scope tables are not fully expanded; exception edges are best-effort static edges from unwind metadata. + +## Typed IR And Reconstruction + +The current typed reconstruction surface is PDB-backed and C-like: + +- `resx types --pdb ` inventories PDB type records, members, and symbol references. +- `dump --recomp` uses PDB function type text when available to recover return type, calling convention, and parameters. +- `dump --recomp` also uses unwind metadata to annotate frame and handler state. +- Without PDB types, reconstruction falls back to architecture defaults and observed argument-register usage. + +This is not a full decompiler IR contract. Treat it as a typed pseudo-C and typed-IR reporting surface for review, triage, and agent consumption. + +## Indirect Control Flow + +Indirect-control-flow visibility appears in disassembly, call maps, CFG output, and JSON: + +- IAT-backed memory calls are resolved to imported DLL/function names when possible. +- Register-indirect calls and jumps are traced backward through nearby register assignments. +- `--hostile` enables more aggressive reverse-index and indirect-JMP emission paths for hostile or obfuscated samples. +- API call JSON includes `is_indirect`, `indirect_method`, and `switch_cases` fields. +- `reconstruct-cfg` counts indirect edges and tags unresolved or partially resolved indirect calls. + +Static recovery may miss data-dependent dispatch, generated code, or targets materialized outside the local decode window. + +## Fuzz Target Output + +`resx scan ` recursively inventories PE corpora and ranks fuzz-target candidates. It emits JSON by default and JSON Lines with `--jsonl`. + +Candidate scoring uses: + +- Image kind, including driver detection. +- Entry point and exported function names. +- Risk imports such as IOCTL, parser, decompression, crypto, network, file, registry, and kernel buffer APIs. +- Section anomalies and header corruption. +- Driver and WDF-style naming patterns. + +Each candidate includes `name`, `rva`, `source`, `score`, `reasons`, `input_surface`, `harness_kind`, `suggested_invocation`, and `confidence`. Scores are triage hints, not exploitability claims. + +## resx-palace Test Corpus + +`resx-palace/` is the local corpus root for exercising these surfaces against controlled fixtures. It contains Windows C sources, a local build script, and generated DLL/EXE samples under `resx-palace/build/` when compiled. + +Use it for fixtures that need to stress: + +- Export and PDB-backed function discovery. +- Recursive direct-call and tail-call CFG expansion. +- x64 unwind and exception-handler edges. +- PDB type/prototype-backed reconstruction. +- IAT, register-indirect, and switch-dispatch control flow. +- Scan output and fuzz-candidate ranking. + +The current corpus builds `resx_palace.dll` and `resx_palace_probe.exe`. The DLL exports `ResxParsePacket`, `ResxDeviceIoctlDispatch`, `ResxThreadCallbackEntry`, `ResxSwitchJumpTableDispatch`, and `ResxIndirectCallMessage`; the integration suite uses RESX itself to verify PE metadata, exports, dumps, CFG output, recursive reconstruction, and scan/fuzz-candidate output. + +Keep generated binaries, source fixtures, and runner scripts isolated under `resx-palace/` so docs and schema expectations can describe the intended test surface without mixing fixture code into RESX source. diff --git a/docs/cli.md b/docs/cli.md index 80f5811..21540f6 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -22,6 +22,7 @@ resx dump --ordinal resx cfg resx cfg --at resx cfg --ordinal +resx reconstruct-cfg resx intelli [function] resx peinfo resx sections @@ -34,6 +35,7 @@ resx callers resx locate resx locate-sym resx explain +resx scan resx yara resx update resx help @@ -55,12 +57,14 @@ resx iat kernel32.dll resx pechk suspicious.dll resx dump ntoskrnl.exe KiSystemCall64 --cfg text --funcs --recomp resx dump ntoskrnl.exe NtQuerySystemInformation --cfg text +resx reconstruct-cfg suspicious.dll --depth 6 --max-total 300 resx callers .\blackbird.sys BLACKBIRDNtAllocateVirtualMemoryHookStub --depth 2 resx callers ntoskrnl.exe PsOpenProcess --include-dir C:\Windows\System32\drivers --scope-file *.sys resx locate NtOpenProcess resx locate NtOpenProcess --include-dir C:\Work\Drivers resx locate-sym RtlpHeapHandleError resx explain NtQuerySystemInformation --api +resx scan C:\Windows\System32\drivers --jsonl --max-files 200 resx syms ntoskrnl.exe --verbose resx yara suspicious.dll .\rules\triage.yar ``` @@ -68,13 +72,26 @@ resx yara suspicious.dll .\rules\triage.yar ## Notable Capabilities - Export and PDB-backed symbol resolution +- Function discovery from exports, PDB symbols, startup roots, direct calls, imports, and switch-dispatch targets - Targeted disassembly with `--at`, ordinal, and symbol lookup flows -- Recovered CFG text output and switch-dispatch analysis +- Recovered CFG text output, recursive startup CFG reconstruction, and switch-dispatch analysis +- x64 unwind metadata and exception-handler edge reporting +- Startup-to-exit flow reconstruction with thread/workpool callback and exception-handler edges +- Indirect-control-flow annotations for IAT, register-indirect, unresolved, and switch-dispatch paths +- Typed pseudo-C reconstruction from PDB function prototypes and unwind metadata +- Corpus scanning with risk imports and fuzz-target candidate ranking - Nested API call expansion with `--funcs-depth 1..5` - Syscall service number and kernel-target surfacing for `Nt*` and `Zw*` stubs +- Win32K GUI syscall support through `user32`, `win32u`, `win32k`, `win32kbase`, and `win32kfull` - Pseudo-C reconstruction with `--recomp` - PE metadata, mitigation, signer, and debug-directory inspection - Priority-based locate and caller tracing workflows +- Versioned JSON output for dump, reconstruct-cfg, scan, types, metadata, explain, and caller workflows + +## Analysis And Schema References + +- [Analysis surfaces](analysis-surfaces.md) +- [JSON schemas](json-schemas.md) ## References diff --git a/docs/disclosure-report.md b/docs/disclosure-report.md new file mode 100644 index 0000000..20dc057 --- /dev/null +++ b/docs/disclosure-report.md @@ -0,0 +1,110 @@ +# RESX Diamond Integration Disclosure And Review Report + +## Scope + +Diamond was a separate untracked Rust crate that duplicated the RESX source tree and added two command families that RESX did not expose: + +- `reconstruct-cfg`: startup/TLS-to-exit flow reconstruction with intra-image calls, imports, recovered thread/workpool callbacks, and unwind exception-handler edges. +- `scan`: recursive PE corpus inventory with risk imports, anomalies, and ranked fuzz-target candidates. + +Those capabilities are now integrated into the `resx` crate and exposed as RESX commands. + +## Documented Analysis Surfaces + +- Function discovery from exports, PDB symbols, startup roots, direct calls, imports, IAT slots, and switch-dispatch recovery. +- Recursive CFG reconstruction from PE entry/TLS/startup roots with bounded nested child expansion. +- x64 unwind and exception-handler reporting through dump metadata, reconstruction comments, and `reconstruct-cfg` exception edges. +- Typed reconstruction via PDB-backed function prototypes, `types` inventory output, and pseudo-C emission. +- Indirect-control-flow reporting for IAT memory calls, register-indirect calls/jumps, unresolved indirect paths, and switch-dispatch cases. +- Fuzz target output from recursive corpus scanning with risk imports, anomalies, candidate scores, and candidate reasons. +- JSON schema notes for `dump`, `reconstruct_cfg`, `scan`, and `types`. +- `resx-palace/` test corpus with controlled DLL/EXE fixtures covering these analysis surfaces. + +## Task Split + +- Core RESX integration: command routing, config flags, analysis modules, dump JSON surfaces, PE unwind metadata, and Diamond removal. +- Sample corpus and tests: `resx-palace/` fixture sources, local build script, and Windows integration test coverage. +- Documentation and schemas: command docs, analysis-surface notes, JSON field notes, and this disclosure/review report. +- VS Code extension: command-palette entries, runner wiring, and webview rendering for reconstruct-CFG and scan output. + +## Integrated Into RESX + +- Added `resx/src/analysis/reconstruct.rs`. +- Added `resx/src/commands/reconstruct_cfg.rs`. +- Added `resx/src/commands/scan.rs`. +- Registered the new modules in `resx/src/analysis/mod.rs` and `resx/src/commands/mod.rs`. +- Added CLI/config support for: + - `resx reconstruct-cfg ` + - `resx --reconstruct-cfg` + - `--thread-filter ` + - `--api-filter ` + - `resx scan ` + - `--jsonl` + - `--extensions ` + - `--max-files ` + - `--max-file-mb ` + - `--max-candidates ` +- Updated help routing so the shorthand commands rewrite to RESX-native flags. +- Updated scan JSON metadata from `tool: "diamond"` to `tool: "resx"`. +- Updated README, CLI docs, and command reference to disclose the new RESX command surface. +- Added docs for analysis surfaces and JSON schema expectations. + +## JSON Schema Surface + +- Versioned command JSON uses `schema_version: 1`. +- `reconstruct-cfg --json` reports image identity, PDB status, root flow functions, nested edges, statistics, and static-analysis notes under `reconstruct_cfg`. +- `scan --json` reports a `tool: "resx"` envelope with root, file counts, and image results; `scan --jsonl` emits one image report per line. +- `dump --json` exposes function discovery, recursive CFG, typed IR, indirect-flow metadata, indirect call metadata through `api_calls[]`, and unwind/EH metadata through `data.unwind[]`. +- `types --json` exposes PDB type entries, members, and symbol references for typed analysis consumers. + +## resx-palace Corpus + +- `resx-palace/` is a local test corpus with source fixtures, a Windows build script, and generated binaries under `resx-palace/build/`. +- The corpus builds `resx_palace.dll` and `resx_palace_probe.exe` with exported parser, IOCTL, callback/thread, switch/jump-table, and indirect-call functions. +- The corpus coverage is export function discovery, recursive direct/tail-call CFG, unwind/EH metadata, typed reconstruction output, indirect control flow, and scan/fuzz-candidate output. +- The corpus is separate from RESX Rust and VS Code source so fixtures can evolve without changing the analyzer implementation. + +## Removed Diamond + +- Removed `diamond` from the workspace members. +- Changed the workspace default member from `diamond` to `resx`. +- Deleted all Diamond crate source files. +- Deleted the old `docs/diamond.md` planning document. +- Verified that no Diamond source files remain with `rg --files diamond`. +- Verified that user-facing RESX/docs references no longer contain Diamond branding outside this disclosure report. + +## Verification + +- `git diff --check` completed without whitespace errors; Git only reported CRLF normalization warnings. +- `cargo fmt -p resx -- --check` +- `cargo check -p resx --locked` +- `cargo test -p resx --locked` +- `npx tsc -p .\tsconfig.webview.json --noEmit` +- `npx tsc -p .\tsconfig.json --noEmit` +- `resx-palace/scripts/build.ps1` +- `cargo run -p resx --locked -- peinfo resx-palace\build\resx_palace.dll --json --quiet --no-color` +- `cargo run -p resx --locked -- eat resx-palace\build\resx_palace.dll --json --quiet --no-color` +- `cargo run -p resx --locked -- dump resx-palace\build\resx_palace.dll ResxParsePacket --json --quiet --no-color --funcs --cfg text` +- `cargo run -p resx --locked -- cfg resx-palace\build\resx_palace.dll ResxSwitchJumpTableDispatch --quiet --no-color` +- `cargo run -p resx --locked -- reconstruct-cfg resx-palace\build\resx_palace.dll --json --quiet --no-color --depth 3 --max-total 80` +- `cargo run -p resx --locked -- scan resx-palace\build --json --quiet --no-color --max-files 10` + +Cargo was run with an external `CARGO_TARGET_DIR` because the repository `target` directory is not writable under the current sandbox ACLs. + +## Sample Test Results + +- `peinfo` identified `resx_palace.dll` as an x64 DLL with 5 exports, 69 imports, startup roots, and x64 runtime/unwind metadata. +- `eat` resolved all intended exports: `ResxDeviceIoctlDispatch`, `ResxIndirectCallMessage`, `ResxParsePacket`, `ResxSwitchJumpTableDispatch`, and `ResxThreadCallbackEntry`. +- `dump --json` for `ResxParsePacket` emitted instructions, CFG data, function discovery, recursive CFG, typed IR, indirect-flow data, and unwind metadata. +- `cfg` for `ResxSwitchJumpTableDispatch` produced a graph with switch-dispatch control flow and an unresolved register-indirect jump annotation. +- `reconstruct-cfg --json` reported startup roots, nested internal call expansion, import edges, indirect edges, exception edges, and truncation statistics under the `reconstruct_cfg` envelope. +- `scan --json` reported both corpus binaries and ranked the intended DLL exports as fuzz candidates, including IOCTL, structured-input, callback/thread, switch-dispatch, and message/indirect-call surfaces. + +## Residual Limitations + +- `reconstruct-cfg` is static best-effort analysis. Runtime dispatch, data-dependent branches, generated code, and unresolved indirect calls may be incomplete. +- `scan` uses PE metadata, import names, export names, and section heuristics. Candidate ranking is triage-oriented, not a proof of exploitability. +- PDB-backed names, prototypes, and size bounds are used when available, but the command continues with reduced fidelity when symbols are unavailable. +- Typed IR is a lightweight reporting layer for review and automation, not a full SSA/decompiler contract. +- The legacy text CFG renderer can still linearize adjacent table bytes in some switch fixtures; the recursive CFG and indirect-flow JSON now expose the dispatch surface, but perfect jump-table block exclusion remains future hardening. +- Win32K syscall target resolution depends on available exports or PDB symbols in the local `win32k*` images; RESX still reports the user-mode stub and service number when kernel symbols are unavailable. diff --git a/docs/json-schemas.md b/docs/json-schemas.md new file mode 100644 index 0000000..3f87106 --- /dev/null +++ b/docs/json-schemas.md @@ -0,0 +1,101 @@ +# RESX JSON Schemas + +RESX JSON output is versioned with `schema_version: 1`. Most commands use a top-level envelope with the command kind as the payload key, for example `dump`, `peinfo`, `reconstruct_cfg`, `types`, or `explain`. + +## Common Envelope + +```json +{ + "schema_version": 1, + "kind": "dump", + "dump": {} +} +``` + +List-style commands use the item key directly: + +```json +{ + "schema_version": 1, + "kind": "types", + "types": [] +} +``` + +## Dump + +`resx dump --json` emits: + +- Image identity: `dll`, `dll_path`, `image_base`, `arch`, `entry_point`, `size_of_image`. +- Function identity: `function`, `rva`, `va`, `rebased_va`, `size_bytes`, `insn_count`. +- Analysis arrays: `instructions`, `xrefs`, `strings`, `api_calls`, `hook_indicators`, `intelli_findings`. +- Optional surfaces: `data`, `recomp`, `cfg`, `api_call_tree`, `current_syscall`, `edrchk`, `explain`, `function_discovery`, `recursive_cfg`, `typed_ir`, and `indirect_flow`. + +`api_calls[]` includes `rva`, `kind`, `target_rva`, `label`, `dll`, `is_import`, `is_indirect`, optional `indirect_method`, optional `switch_cases`, and optional `syscall`. + +`data.unwind[]` includes `begin_rva`, `end_rva`, `unwind_info_rva`, `prolog_size`, `unwind_codes`, `flags`, parsed unwind operations, saved registers, stack allocation size, chained parent metadata, epilog scopes, and optional `exception_handler_rva` / `handler_data_rva`. + +## Reconstruct CFG + +`resx reconstruct-cfg --json` emits: + +```json +{ + "schema_version": 1, + "kind": "reconstruct_cfg", + "reconstruct_cfg": { + "image": "", + "path": "", + "arch": "x64", + "image_base": "0x0000000000000000", + "entry_point": "0x00000000", + "pdb": {}, + "roots": [], + "stats": {}, + "notes": [] + } +} +``` + +Function nodes in `roots[]` and child edges contain: + +- Function fields: `name`, `kind`, `rva`, `va`, `section`, `symbol_source`, `symbol_category`, `symbol_size`, `prototype`, `decode_bound`, `thread_lane`, `note`, `status`, `returns`, `edges`. +- Edge fields: `site_rva`, `kind`, `target`, `target_rva`, `target_va`, `target_source`, `target_category`, `thread_lane`, `tags`, `detail`, `relation`, and optional `child`. +- Stats: `roots`, `functions_expanded`, `call_edges`, `import_edges`, `indirect_edges`, `thread_edges`, `workpool_edges`, `thread_api_edges`, `exception_edges`, `cycle_edges`, `truncated_edges`, `decode_errors`. + +## Scan + +`resx scan --json` emits: + +```json +{ + "tool": "resx", + "schema_version": 1, + "kind": "scan", + "root": "", + "files_seen": 0, + "files_reported": 0, + "results": [] +} +``` + +Each `results[]` item contains `path`, `name`, `kind`, `arch`, `size_bytes`, `entry_point`, export/import/runtime-function counts, discovered-function counts, function-source counts, indirect edge/table counts, input-surface tags, a fuzz manifest, `risk_score`, `risk_imports`, `candidates`, `anomalies`, and `pdb_name`. + +Each `candidates[]` item contains `name`, `rva`, `source`, `score`, `reasons`, `input_surface`, `harness_kind`, `suggested_invocation`, and `confidence`. + +`--jsonl` emits one image report per line without the outer scan envelope. Each line has the same shape as an item from `results[]`. + +## Types + +`resx types --json` emits `types[]` entries with: + +- Type identity: `type_id`, `name`, `kind`, `size`. +- Reference counts: `symbol_count`, `function_count`, `data_count`. +- Structure data: `members[]` with `name`, `offset`, `type_id`, `type_name`, `size`. +- Symbol references: `refs[]` with `name`, `kind`, `rva`, `size`, `type_id`. + +## Compatibility Notes + +- Hexadecimal addresses are serialized as strings to avoid width loss. +- Optional fields are often omitted when empty. +- `schema_version` is the compatibility key; consumers should ignore unknown fields and require only the fields they use. diff --git a/docs/vscode-extension.md b/docs/vscode-extension.md index e2131a1..ffd4346 100644 --- a/docs/vscode-extension.md +++ b/docs/vscode-extension.md @@ -31,7 +31,7 @@ Install the generated `.vsix` from VS Code with `Extensions: Install from VSIX.. - Disassembly, xrefs, CFG, recompilation, hex view, and nested API call trees - Symbol and PDB controls, including explicit symbol path and PDB file settings - Command palette entry points for `RESX: Locate`, `RESX: Locate Symbol`, and `RESX: Dump` -- Syscall stub annotations and direct follow-through into kernel targets +- Syscall stub annotations and direct follow-through into kernel targets, including Win32K GUI syscall paths - `Dev` trace tab for `resx` command invocations, arguments, timing, and stderr/error output ## Command Palette