From 4b66d2d4ff22ce3c08edae2eb9210814adf936fc Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Thu, 5 Jun 2025 00:35:23 -0700 Subject: [PATCH 01/12] WIP implementation --- objdiff-cli/src/views/function_diff.rs | 1 + objdiff-core/Cargo.toml | 2 +- objdiff-core/config-schema.json | 17 +- objdiff-core/src/arch/mod.rs | 20 +- objdiff-core/src/arch/ppc.rs | 487 ++++++++++++++++++++++++- objdiff-core/src/diff/display.rs | 61 ++-- objdiff-core/src/obj/mod.rs | 16 + objdiff-core/src/obj/read.rs | 67 +++- objdiff-core/src/util.rs | 34 ++ objdiff-gui/src/views/appearance.rs | 5 + objdiff-gui/src/views/diff.rs | 13 + objdiff-gui/src/views/function_diff.rs | 1 + objdiff-gui/src/views/symbol_diff.rs | 8 + 13 files changed, 682 insertions(+), 50 deletions(-) diff --git a/objdiff-cli/src/views/function_diff.rs b/objdiff-cli/src/views/function_diff.rs index 3366eef0..a0472e5d 100644 --- a/objdiff-cli/src/views/function_diff.rs +++ b/objdiff-cli/src/views/function_diff.rs @@ -570,6 +570,7 @@ impl FunctionDiffUi { DiffTextColor::Normal => Color::Gray, DiffTextColor::Dim => Color::DarkGray, DiffTextColor::Bright => Color::White, + DiffTextColor::DataFlow => Color::LightCyan, DiffTextColor::Replace => Color::Cyan, DiffTextColor::Delete => Color::Red, DiffTextColor::Insert => Color::Green, diff --git a/objdiff-core/Cargo.toml b/objdiff-core/Cargo.toml index c4857507..6a12d5de 100644 --- a/objdiff-core/Cargo.toml +++ b/objdiff-core/Cargo.toml @@ -175,7 +175,7 @@ time = { version = "0.3", optional = true } encoding_rs = { version = "0.8.35", optional = true } [target.'cfg(windows)'.dependencies] -winapi = { version = "0.3", optional = true } +winapi = { version = "0.3", optional = true, features = ["winbase"] } # For Linux static binaries, use rustls [target.'cfg(target_os = "linux")'.dependencies] diff --git a/objdiff-core/config-schema.json b/objdiff-core/config-schema.json index d35d12b4..2a307ab4 100644 --- a/objdiff-core/config-schema.json +++ b/objdiff-core/config-schema.json @@ -25,6 +25,20 @@ } ] }, + { + "id": "analyzeDataFlow", + "type": "boolean", + "default": false, + "name": "(Experimental) Perform data flow analysis", + "description": "Use data flow analysis to display known information about register contents where possible" + }, + { + "id": "showDataFlow", + "type": "boolean", + "default": true, + "name": "Show data flow", + "description": "Show data flow analysis results in place of register name where present" + }, { "id": "spaceBetweenArgs", "type": "boolean", @@ -264,7 +278,8 @@ "id": "ppc", "name": "PowerPC", "properties": [ - "ppc.calculatePoolRelocations" + "ppc.calculatePoolRelocations", + "analyzeDataFlow" ] }, { diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 5cedba81..321edc08 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -1,5 +1,5 @@ use alloc::{borrow::Cow, boxed::Box, format, string::String, vec::Vec}; -use core::{ffi::CStr, fmt, fmt::Debug}; +use core::{ffi::CStr, fmt::{self, Debug}}; use anyhow::{Result, bail}; use encoding_rs::SHIFT_JIS; @@ -7,12 +7,10 @@ use object::Endian as _; use crate::{ diff::{ - DiffObjConfig, - display::{ContextItem, HoverItem, InstructionPart}, + display::{ContextItem, HoverItem, InstructionPart}, DiffObjConfig }, obj::{ - InstructionArg, InstructionRef, Object, ParsedInstruction, Relocation, RelocationFlags, - ResolvedInstructionRef, ResolvedSymbol, Section, Symbol, SymbolFlagSet, SymbolKind, + FlowAnalysisResult, InstructionArg, InstructionRef, Object, ParsedInstruction, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedSymbol, Section, Symbol, SymbolFlagSet, SymbolKind }, util::ReallySigned, }; @@ -31,6 +29,7 @@ pub mod superh; pub mod x86; /// Represents the type of data associated with an instruction +#[derive(PartialEq)] pub enum DataType { Int8, Int16, @@ -335,6 +334,17 @@ pub trait Arch: Send + Sync + Debug { Vec::new() } + // Perform detailed data flow analysis + fn data_flow_analysis( + &self, + _obj: &Object, + _symbol: &Symbol, + _code: &[u8], + _relocations: &[Relocation], + ) -> Option> { + None + } + fn implcit_addend( &self, file: &object::File<'_>, diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index f62b38fa..8f2b8706 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -5,6 +5,8 @@ use alloc::{ vec::Vec, }; +use itertools::Itertools; +use std::ops::{Index, IndexMut}; use anyhow::{Result, bail, ensure}; use cwextab::{ExceptionTableData, decode_extab}; use flagset::Flags; @@ -18,9 +20,10 @@ use crate::{ display::{ContextItem, HoverItem, HoverItemColor, InstructionPart, SymbolNavigationKind}, }, obj::{ - InstructionRef, Object, Relocation, RelocationFlags, ResolvedInstructionRef, - ResolvedRelocation, Symbol, SymbolFlag, SymbolFlagSet, + FlowAnalysisResult, FlowAnalysisValue, InstructionRef, Object, Relocation, RelocationFlags, + ResolvedInstructionRef, ResolvedRelocation, Symbol, SymbolFlag, SymbolFlagSet }, + util::{RawFloat, RawDouble}, }; // Relative relocation, can be Simm, Offset or BranchDest @@ -43,6 +46,18 @@ fn is_rel_abs_arg(arg: &ppc750cl::Argument) -> bool { fn is_offset_arg(arg: &ppc750cl::Argument) -> bool { matches!(arg, ppc750cl::Argument::Offset(_)) } +fn is_store_instruction(op: ppc750cl::Opcode) -> bool { + use ppc750cl::Opcode; + match op { + Opcode::Stbux | Opcode::Stbx | Opcode::Stfdux | Opcode::Stfdx | Opcode::Stfiwx | + Opcode::Stfsux | Opcode::Stfsx | Opcode::Sthbrx | Opcode::Sthux | Opcode::Sthx | + Opcode::Stswi | Opcode::Stswx | Opcode::Stwbrx | Opcode::Stwcx_ | Opcode::Stwux | + Opcode::Stwx | Opcode::Stwu | Opcode::Stb | Opcode::Stbu | Opcode::Sth | Opcode::Sthu | + Opcode::Stmw | Opcode::Stfs | Opcode::Stfsu | Opcode::Stfd | Opcode::Stfdu => true, + _ => false, + } +} + #[derive(Debug)] pub struct ArchPpc { /// Exception info @@ -157,6 +172,7 @@ impl Arch for ArchPpc { Ok(()) } + // Could be replaced by data_flow_analysis once that feature stabilizes fn generate_pooled_relocations( &self, address: u64, @@ -166,6 +182,16 @@ impl Arch for ArchPpc { ) -> Vec { generate_fake_pool_relocations_for_function(address, code, relocations, symbols) } + + fn data_flow_analysis( + &self, + obj: &Object, + symbol: &Symbol, + code: &[u8], + relocations: &[Relocation], + ) -> Option> { + Some(ppc_data_flow_analysis(obj, symbol, code, relocations)) + } fn implcit_addend( &self, @@ -501,6 +527,463 @@ fn make_symbol_ref(symbol: &object::Symbol) -> Result { Ok(ExtabSymbolRef { original_index: symbol.index().0 - 1, name, demangled_name }) } +#[derive(Default, PartialEq, Eq, Copy, Hash, Clone, Debug)] +enum RegisterContent { + #[default] + Unknown, + Variable, // Multiple potential values + FloatConstant(RawFloat), + DoubleConstant(RawDouble), + IntConstant(i32), + InputRegister(u8), + Symbol(usize), +} + +impl std::fmt::Display for RegisterContent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + RegisterContent::Unknown => write!(f, "unknown"), + RegisterContent::Variable => write!(f, "variable"), + RegisterContent::IntConstant(i) => + // -i is safe because it's at most a 16 bit constant in the i32 + if *i >= 0 { write!(f, "0x{:x}", i) } else { write!(f, "-0x{:x}", -i) }, + RegisterContent::FloatConstant(RawFloat(fp)) => write!(f, "{fp:?}f"), + RegisterContent::DoubleConstant(RawDouble(fp)) => write!(f, "{fp:?}d"), + RegisterContent::InputRegister(p) => write!(f, "input{p}"), + RegisterContent::Symbol(_u) => write!(f, "relocation"), + } + } +} + +#[derive(Clone, PartialEq, Eq, Hash)] +struct RegisterState { + gpr: [RegisterContent; 32], + fpr: [RegisterContent; 32], +} + +impl RegisterState { + fn new() -> Self { + RegisterState { + gpr: [RegisterContent::Unknown; 32], + fpr: [RegisterContent::Unknown; 32], + } + } + + // During a function call, these registers must be assumed trashed. + fn clear_volatile(&mut self) { + self[ppc750cl::GPR(0)] = RegisterContent::Unknown; + for i in 0..=13 { + self[ppc750cl::GPR(i)] = RegisterContent::Unknown; + } + for i in 0..=13 { + self[ppc750cl::FPR(i)] = RegisterContent::Unknown; + } + } + + // Mark potential input values. + // Subsequent flow analysis will "realize" that they are not actually inputs if + // they get overwritten with another value before getting read. + fn set_potential_inputs(&mut self) { + for g_reg in 3..=13 { + self[ppc750cl::GPR(g_reg)] = RegisterContent::InputRegister(g_reg); + } + for f_reg in 1..=13 { + self[ppc750cl::FPR(f_reg)] = RegisterContent::InputRegister(f_reg); + } + } + + // If the there is no value, we can take the new known value. + // If there's a known value different than the new value, the content + // must is variable. + // Returns whether the current value was updated. + fn unify_values(current: &mut RegisterContent, new: &RegisterContent) -> bool { + if *current == *new { + false + } else { + if *current == RegisterContent::Unknown { + *current = *new; + true + } else if *current == RegisterContent::Variable { + // Already variable + false + } else { + *current = RegisterContent::Variable; + true + } + } + } + + // Unify currently known register contents in a give situation with new + // information about the register contents in that situation. + // Currently unknown register contents can be filled, but if there are + // conflicting contents, we go back to unknown. + fn unify(&mut self, other: &RegisterState) -> bool { + let mut updated = false; + for i in 0..32 { + updated |= Self::unify_values(&mut self.gpr[i], &other.gpr[i]); + updated |= Self::unify_values(&mut self.fpr[i], &other.fpr[i]); + } + updated + } +} + +impl Index for RegisterState { + type Output = RegisterContent; + fn index(&self, gpr: ppc750cl::GPR) -> &Self::Output { + &self.gpr[gpr.0 as usize] + } +} +impl IndexMut for RegisterState { + fn index_mut(&mut self, gpr: ppc750cl::GPR) -> &mut Self::Output { + &mut self.gpr[gpr.0 as usize] + } +} + +impl Index for RegisterState { + type Output = RegisterContent; + fn index(&self, fpr: ppc750cl::FPR) -> &Self::Output { + &self.fpr[fpr.0 as usize] + } +} +impl IndexMut for RegisterState { + fn index_mut(&mut self, fpr: ppc750cl::FPR) -> &mut Self::Output { + &mut self.fpr[fpr.0 as usize] + } +} + +fn execute_instruction(registers: &mut RegisterState, op: &ppc750cl::Opcode, args: &[ppc750cl::Argument; 5]) { + use ppc750cl::{Opcode, Argument, GPR}; + match (op, args[0], args[1], args[2]) { + (Opcode::Or, Argument::GPR(a), Argument::GPR(b), Argument::GPR(c)) => { + // Move is implemented as or with self for ints + if b == c { + registers[a] = registers[b]; + } else { + registers[a] = RegisterContent::Unknown; + } + } + (Opcode::Fmr, Argument::FPR(a), Argument::FPR(b), _) => { + registers[a] = registers[b]; + } + (Opcode::Addi, Argument::GPR(a), Argument::GPR(GPR(0)), Argument::Simm(c)) => { + // Load immidiate implemented as addi with addend = r0 + // Let Addi with other addends fall through to the case which + // overwrites the destination + registers[a] = RegisterContent::IntConstant(c.0 as i32); + } + (Opcode::Bcctr, _, _, _) => { + // Called a function pointer, may have erased volatile registers + registers.clear_volatile(); + } + (Opcode::B, _, _, _) => { + if get_branch_offset(args) == 0 { + // Call to another function + registers.clear_volatile(); + } + } + (Opcode::Stbu | Opcode::Sthu | Opcode::Stwu | + Opcode::Stfsu | Opcode::Stfdu, _, _, Argument::GPR(rel)) => { + // Storing with update, clear updated register (third arg) + registers[rel] = RegisterContent::Unknown; + } + (Opcode::Stbux | Opcode::Sthux | Opcode::Stwux | + Opcode::Stfsux | Opcode::Stfdux, _, Argument::GPR(rel), _) => { + // Storing indexed with update, clear updated register (second arg) + registers[rel] = RegisterContent::Unknown; + } + (Opcode::Stb | Opcode::Sth | Opcode::Stw | + Opcode::Stbx | Opcode::Sthx | Opcode::Stwx | + Opcode::Stfs | Opcode::Stfd, _, _, _) => { + // Storing, does not change registers + } + (Opcode::Lmw, Argument::GPR(target), _, _) => { + // `lmw` overwrites all registers from rd to r31. + for reg in target.0..31 { + registers[GPR(reg)] = RegisterContent::Unknown; + } + } + (_, Argument::GPR(a), _, _) => { + // Other operations which write to GPR a + registers[a] = RegisterContent::Unknown; + } + (_, Argument::FPR(a), _, _) => { + // Other operations which write to FPR a + registers[a] = RegisterContent::Unknown; + } + (_, _, _, _) => {} + } + +} + +fn get_branch_offset(args: &[ppc750cl::Argument; 5]) -> i32 { + for arg in args.iter() { + if let ppc750cl::Argument::BranchDest(dest) = arg { + return dest.0 / 4; + } + } + return 0; +} + +#[derive(Debug, Default)] +struct PPCFlowAnalysisResult { + argument_contents: BTreeMap<(u64, u8), FlowAnalysisValue>, +} + +impl PPCFlowAnalysisResult { + fn set_argument_value_at_address(&mut self, address: u64, argument: u8, value: FlowAnalysisValue) { + self.argument_contents.insert((address, argument), value); + } + + fn new() -> Self { + PPCFlowAnalysisResult { argument_contents: Default::default() } + } +} + +impl FlowAnalysisResult for PPCFlowAnalysisResult { + fn get_argument_value_at_address(&self, address: u64, argument: u8) -> Option<&FlowAnalysisValue> { + self.argument_contents.get(&(address, argument)) + } +} + +fn clamp_text_length(s: String, max: usize) -> String { + if s.len() <= max { + s + } else { + format!("{}…", s.chars().take(max - 3).collect::()) + } +} + +// Executing op with args at cur_address, update current_state with symbols that +// come from relocations. That is, references to globals, floating point +// constants, string constants, etc. +fn fill_registers_from_relocations( + current_state: &mut RegisterState, + obj: &Object, + cur_addr: u32, + op: ppc750cl::Opcode, + args: &[ppc750cl::Argument; 5], + relocations: &[Relocation], +) { + let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); + if let Some(reloc) = reloc { + let bytes = obj.symbol_data(reloc.target_symbol).unwrap_or(&[]); + let content = match guess_data_type_from_load_store_inst_op(op) { + Some(DataType::Float) => RegisterContent::FloatConstant(RawFloat(match obj.endianness { + object::Endianness::Little => f32::from_le_bytes(bytes.try_into().unwrap_or([0; 4])), + object::Endianness::Big => f32::from_be_bytes(bytes.try_into().unwrap_or([0; 4])), + })), + Some(DataType::Double) => RegisterContent::DoubleConstant(RawDouble(match obj.endianness { + object::Endianness::Little => f64::from_le_bytes(bytes.try_into().unwrap_or([0; 8])), + object::Endianness::Big => f64::from_be_bytes(bytes.try_into().unwrap_or([0; 8])), + })), + _ => RegisterContent::Symbol(reloc.target_symbol), + }; + // Only update the register state for loads. We may store to a reloc + // address but that doesn't update register contents. + if !is_store_instruction(op) { + match (op, args[0]) { + // Everything else is a load of some sort + (_, ppc750cl::Argument::GPR(gpr)) => { + current_state[gpr] = content; + } + (_, ppc750cl::Argument::FPR(fpr)) => { + current_state[fpr] = content; + } + _ => {} + } + } + } +} + +fn ppc_data_flow_analysis( + obj: &Object, + func_symbol: &Symbol, + code: &[u8], + relocations: &[Relocation], +) -> Box { + use std::collections::HashSet; + use ppc750cl::InsIter; + use std::collections::VecDeque; + let instructions = InsIter::new(code, func_symbol.address as u32).map(|(_addr, ins)| { + (ins.op, ins.basic().args) + }).collect_vec(); + + let func_address = func_symbol.address; + + // Get initial register values from function parameters + let mut initial_register_state = RegisterState::new(); + initial_register_state.set_potential_inputs(); + + let mut execution_queue = VecDeque::<(usize, RegisterState)>::new(); + execution_queue.push_back((0, initial_register_state)); + + // Execute the instructions against abstract data + let mut failsafe_counter = 0; + let mut taken_branches = HashSet::<(usize, RegisterState)>::new(); + let mut register_state_at = Vec::::new(); + register_state_at.resize_with(instructions.len(), RegisterState::new); + while let Some((mut index, mut current_state)) = execution_queue.pop_front() { + while let Some((op, args)) = instructions.get(index) { + // Record the state at this index + // If recording does not result in any changes to the known values + // we're done, because the subsequent values are a function of the + // current values so we'll get the same result as the last time + // we went down this path. + if !register_state_at[index].unify(¤t_state) { + break; + } + + // Execute the instruction to update the state + execute_instruction(&mut current_state, op, args); + + // Fill in register state coming from relocations at this line. This + // handles references to global variables, floating point constants, + // etc. + let cur_addr = (func_address as u32) + ((index * 4) as u32); + fill_registers_from_relocations(&mut current_state, obj, cur_addr, *op, args, relocations); + + // Add conditional branches to execution queue + // Only take a given (address, register state) combination once. If + // the known register state is different we have to take the branch + // again to stabilize the known values for backwards branches. + if op == &ppc750cl::Opcode::Bc { + let branch_state = (index, current_state.clone()); + if !taken_branches.contains(&branch_state) { + let offset = get_branch_offset(args); + let target_index = ((index as i32) + offset) as usize; + execution_queue.push_back((target_index, current_state.clone())); + taken_branches.insert(branch_state); + + // We should never hit this case, but avoid getting stuck in + // an infinite loop if we hit some kind of bad behavior. + failsafe_counter += 1; + if failsafe_counter > 256 { + println!("Analysis of {} failed to stabilize", func_symbol.name); + return Default::default(); + } + } + } + + // Update index + if op == &ppc750cl::Opcode::B { + // Unconditional branch + let offset = get_branch_offset(args); + if offset > 0 { + // Jump table or branch to over else clause. + index += offset as usize; + } else if offset == 0 { + // Function call with relocation. We'll return to + // the next instruction. + index += 1; + } else { + // Unconditional branch (E.g.: loop { ... }) + // Also some compilations of loops put the conditional at + // the end and B to it for the check of the first iteration. + let branch_state = (index, current_state.clone()); + if taken_branches.contains(&branch_state) { + break; + } + taken_branches.insert(branch_state); + index = ((index as i32) + offset) as usize; + } + } else { + // Normal execution of next instruction + index += 1; + } + } + } + + // Store the relevant data flow values for simplified instructions + generate_flow_analysis_result(&obj, func_address, code, register_state_at, relocations) +} + +// Write the relevant part of the flow analysis out into the FlowAnalysisResult +// the rest of the application will use to query results of the flow analysis. +// Flow analysis will compute the known contents of every register at every +// line, but we only need to record the values of registers that are actually +// referenced at each line. +fn generate_flow_analysis_result( + obj: &Object, + base_address: u64, + code: &[u8], + register_state_at: Vec::, + relocations: &[Relocation] +) -> Box { + use ppc750cl::{InsIter, Argument, Offset, GPR}; + let mut analysis_result = PPCFlowAnalysisResult::new(); + for (addr, ins) in InsIter::new(code, 0) { + let ins_address = base_address + (addr as u64); + let index = addr / 4; + let ppc750cl::ParsedIns {mnemonic, args} = ins.simplified(); + + // Special case to show float and double constants on the line where + // they are being loaded. + // We need to do this before we break out on showing relocations in the + // subsequent if statement. + if ins.op == ppc750cl::Opcode::Lfs || ins.op == ppc750cl::Opcode::Lfd { + // The value is set on the line AFTER the load, get it from there + if let Some(next_state) = register_state_at.get(index as usize + 1) { + // When loading from SDA it will be a relocation so Reg+Offset will both be zero + match (args[0], args[1], args[2]) { + (Argument::FPR(fpr), Argument::Offset(Offset(0)), Argument::GPR(GPR(0))) => { + analysis_result.set_argument_value_at_address(ins_address, 1, + FlowAnalysisValue::Text(format!("{}", next_state[fpr]))); + continue; + } + _ => {} + } + } + } + + // If we're already showing relocations on a line don't also show data flow + if relocations.iter().any(|r| (r.address & !3) == ins_address) { + continue; + } + + let is_store = mnemonic.starts_with("st"); + let default_register_state = RegisterState::new(); + let registers = register_state_at.get(index as usize).unwrap_or(&default_register_state); + for (arg_index, arg) in args.into_iter().enumerate() { + // Hacky shorthand for determining which arguments are sources, + // We only want to show data flow for source registers, not target + // registers. Technically there are some non-"st_" operations which + // read from their first argument but they're rare. + if (arg_index == 0) && !is_store { + continue; + } + + let content = match arg { + Argument::GPR(gpr) => Some(registers[gpr]), + Argument::FPR(fpr) => Some(registers[fpr]), + _ => None, + }; + let analysis_value = match content { + Some(RegisterContent::Symbol(s)) => { + obj.symbols.get(s).map(|sym| + FlowAnalysisValue::Text( + clamp_text_length(sym.demangled_name.as_ref().unwrap_or(&sym.name).clone(), 20))) + } + Some(RegisterContent::InputRegister(reg)) => { + let reg_name = match arg { + Argument::GPR(_) => format!("input_r{reg}"), + Argument::FPR(_) => format!("input_f{reg}"), + _ => panic!("Register content should only be in a register"), + }; + Some(FlowAnalysisValue::Text(reg_name)) + } + Some(RegisterContent::Unknown) | Some(RegisterContent::Variable) => None, + Some(value) => Some(FlowAnalysisValue::Text(format!("{value}"))), + None => None, + }; + if let Some(analysis_value) = analysis_value { + analysis_result.set_argument_value_at_address(ins_address, arg_index as u8, analysis_value); + } + } + } + + Box::new(analysis_result) +} + fn guess_data_type_from_load_store_inst_op(inst_op: ppc750cl::Opcode) -> Option { use ppc750cl::Opcode; match inst_op { diff --git a/objdiff-core/src/diff/display.rs b/objdiff-core/src/diff/display.rs index 7f928e35..503228cc 100644 --- a/objdiff-core/src/diff/display.rs +++ b/objdiff-core/src/diff/display.rs @@ -12,11 +12,9 @@ use itertools::Itertools; use regex::Regex; use crate::{ - diff::{DiffObjConfig, InstructionDiffKind, InstructionDiffRow, ObjectDiff, SymbolDiff}, - obj::{ - InstructionArg, InstructionArgValue, Object, ParsedInstruction, ResolvedInstructionRef, - ResolvedRelocation, SectionFlag, SectionKind, Symbol, SymbolFlag, SymbolKind, - }, + diff::{DiffObjConfig, InstructionDiffKind, InstructionDiffRow, ObjectDiff, SymbolDiff}, obj::{ + FlowAnalysisValue, InstructionArg, InstructionArgValue, Object, ParsedInstruction, ResolvedInstructionRef, ResolvedRelocation, SectionFlag, SectionKind, Symbol, SymbolFlag, SymbolKind + } }; #[derive(Debug, Clone)] @@ -46,12 +44,13 @@ pub enum DiffText<'a> { #[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Hash)] pub enum DiffTextColor { #[default] - Normal, // Grey - Dim, // Dark grey - Bright, // White - Replace, // Blue - Delete, // Red - Insert, // Green + Normal, // Grey + Dim, // Dark grey + Bright, // White + DataFlow, // Light blue + Replace, // Blue + Delete, // Red + Insert, // Green Rotating(u8), } @@ -186,6 +185,11 @@ pub fn display_row( } let mut arg_idx = 0; let mut displayed_relocation = false; + let analysis_result = if diff_config.show_data_flow { + obj.flow_analysis_results.get(&resolved.symbol.address) + } else { + None + }; obj.arch.display_instruction(resolved, diff_config, &mut |part| match part { InstructionPart::Basic(text) => { if text.chars().all(|c| c == ' ') { @@ -208,15 +212,30 @@ pub fn display_row( if arg == InstructionArg::Reloc { displayed_relocation = true; } - match (arg, resolved.ins_ref.branch_dest) { - (InstructionArg::Value(value), _) => cb(DiffTextSegment { - text: DiffText::Argument(value), - color: diff_index + let data_flow_value = + analysis_result.map(|result| + result.as_ref().get_argument_value_at_address( + ins_ref.address, (arg_idx - 1) as u8)).flatten(); + match (arg, data_flow_value, resolved.ins_ref.branch_dest) { + // If we have a flow analysis result, always use that over anything else. + (InstructionArg::Value(_) | InstructionArg::Reloc, Some(FlowAnalysisValue::Text(text)), _) => { + cb(DiffTextSegment { + text: DiffText::Argument(InstructionArgValue::Opaque(Cow::Borrowed(text))), + color: DiffTextColor::DataFlow, + pad_to: 0, + }) + }, + (InstructionArg::Value(value), None, _) => { + let color = diff_index .get() - .map_or(base_color, |i| DiffTextColor::Rotating(i as u8)), - pad_to: 0, - }), - (InstructionArg::Reloc, None) => { + .map_or(base_color, |i| DiffTextColor::Rotating(i as u8)); + cb(DiffTextSegment { + text: DiffText::Argument(value), + color: color, + pad_to: 0, + }) + }, + (InstructionArg::Reloc, _, None) => { let resolved = resolved.relocation.unwrap(); let color = diff_index .get() @@ -235,9 +254,9 @@ pub fn display_row( } Ok(()) } - (InstructionArg::BranchDest(dest), _) | + (InstructionArg::BranchDest(dest), _, _) | // If the relocation was resolved to a branch destination, emit that instead. - (InstructionArg::Reloc, Some(dest)) => { + (InstructionArg::Reloc, _, Some(dest)) => { if let Some(addr) = dest.checked_sub(resolved.symbol.address) { cb(DiffTextSegment { text: DiffText::BranchDest(addr), diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index e182ed9a..42ec8c6d 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -13,6 +13,7 @@ use core::{ fmt, num::{NonZeroU32, NonZeroU64}, }; +use std::collections::HashMap; use flagset::{FlagSet, flags}; @@ -233,6 +234,15 @@ pub enum SymbolKind { Section, } +#[derive(Debug)] +pub enum FlowAnalysisValue { + Text(String), +} + +pub trait FlowAnalysisResult : std::fmt::Debug + Send { + fn get_argument_value_at_address(&self, address: u64, argument: u8) -> Option<&FlowAnalysisValue>; +} + #[derive(Debug, Clone, Eq, PartialEq, Hash, Default)] pub struct Symbol { pub name: String, @@ -260,6 +270,7 @@ pub struct Object { pub path: Option, #[cfg(feature = "std")] pub timestamp: Option, + pub flow_analysis_results: HashMap>, } impl Default for Object { @@ -274,6 +285,7 @@ impl Default for Object { path: None, #[cfg(feature = "std")] timestamp: None, + flow_analysis_results: HashMap::>::new(), } } } @@ -312,6 +324,10 @@ impl Object { pub fn symbol_by_name(&self, name: &str) -> Option { self.symbols.iter().position(|symbol| symbol.section.is_some() && symbol.name == name) } + + pub fn has_flow_analysis_result(&self) -> bool { + !self.flow_analysis_results.is_empty() + } } #[derive(Debug, Clone, Eq, PartialEq, Hash)] diff --git a/objdiff-core/src/obj/read.rs b/objdiff-core/src/obj/read.rs index b5cef3b5..ec5727e7 100644 --- a/objdiff-core/src/obj/read.rs +++ b/objdiff-core/src/obj/read.rs @@ -432,17 +432,18 @@ fn map_relocations( Ok(()) } -fn calculate_pooled_relocations( - arch: &dyn Arch, - sections: &mut [Section], - symbols: &[Symbol], -) -> Result<()> { - for (section_index, section) in sections.iter_mut().enumerate() { +fn perform_data_flow_analysis(obj: &mut Object, config: &DiffObjConfig) -> Result<()> { + // If neither of these settings are on, no flow analysis to perform + if !config.analyze_data_flow && !config.ppc_calculate_pool_relocations { + return Ok(()); + } + + let mut generated_relocations = Vec::<(usize, Vec)>::new(); + for (section_index, section) in obj.sections.iter().enumerate() { if section.kind != SectionKind::Code { continue; } - let mut fake_pool_relocs = Vec::new(); - for symbol in symbols { + for symbol in obj.symbols.iter() { if symbol.section != Some(section_index) { continue; } @@ -457,14 +458,36 @@ fn calculate_pooled_relocations( symbol.address + symbol.size ) })?; - fake_pool_relocs.append(&mut arch.generate_pooled_relocations( - symbol.address, - code, - §ion.relocations, - symbols, - )); + + // Optional pooled relocation computation + // Long view: This could be replaced by the full data flow analysis + // once that feature has stabilized. + if config.ppc_calculate_pool_relocations { + let relocations = obj.arch.generate_pooled_relocations( + symbol.address, + code, + §ion.relocations, + &obj.symbols); + generated_relocations.push((section_index, relocations)); + } + + // Optional full data flow analysis + if config.analyze_data_flow { + obj.arch.data_flow_analysis( + &obj, + symbol, + code, + §ion.relocations, + ).and_then(|flow_result| { + obj.flow_analysis_results.insert(symbol.address, flow_result) + }); + } } - section.relocations.append(&mut fake_pool_relocs); + } + for (section_index, mut relocations) in generated_relocations { + obj.sections[section_index].relocations.append(&mut relocations); + } + for section in obj.sections.iter_mut() { section.relocations.sort_by_key(|r| r.address); } Ok(()) @@ -865,15 +888,12 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result { let (mut symbols, symbol_indices) = map_symbols(arch.as_ref(), &obj_file, §ions, §ion_indices, split_meta.as_ref())?; map_relocations(arch.as_ref(), &obj_file, &mut sections, §ion_indices, &symbol_indices)?; - if config.ppc_calculate_pool_relocations { - calculate_pooled_relocations(arch.as_ref(), &mut sections, &symbols)?; - } parse_line_info(&obj_file, &mut sections, §ion_indices, data)?; if config.combine_data_sections || config.combine_text_sections { combine_sections(&mut sections, &mut symbols, config)?; } arch.post_init(§ions, &symbols); - Ok(Object { + let mut obj = Object { arch, endianness: obj_file.endianness(), symbols, @@ -883,7 +903,14 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result { path: None, #[cfg(feature = "std")] timestamp: None, - }) + flow_analysis_results: Default::default(), + }; + + // Need to construct the obj first so that we have a convinient package to + // pass to flow analysis. Then the flow analysis will mutate obj adding + // additional data to it. + perform_data_flow_analysis(&mut obj, &config)?; + Ok(obj) } #[cfg(feature = "std")] diff --git a/objdiff-core/src/util.rs b/objdiff-core/src/util.rs index 81995f19..b7323a4a 100644 --- a/objdiff-core/src/util.rs +++ b/objdiff-core/src/util.rs @@ -5,6 +5,8 @@ use anyhow::{Result, ensure}; use num_traits::PrimInt; use object::{Endian, Object}; +use std::hash::{Hash, Hasher}; + // https://stackoverflow.com/questions/44711012/how-do-i-format-a-signed-integer-to-a-sign-aware-hexadecimal-representation pub struct ReallySigned(pub N); @@ -59,3 +61,35 @@ pub fn align_u64_to(len: u64, align: u64) -> u64 { len + ((align - (len % align) pub fn align_data_slice_to(data: &mut Vec, align: u64) { data.resize(align_u64_to(data.len() as u64, align) as usize, 0); } + +// Float where we specifically care about comparing the raw bits rather than +// caring about IEEE semantics. +#[derive(Copy, Clone, Debug)] +pub struct RawFloat(pub f32); +impl PartialEq for RawFloat { + fn eq(&self, other: &Self) -> bool { + self.0.to_bits() == other.0.to_bits() + } +} +impl Eq for RawFloat {} +impl Hash for RawFloat { + fn hash(&self, state: &mut H) { + self.0.to_bits().hash(state) + } +} + +// Double where we specifically care about comparing the raw bits rather than +// caring about IEEE semantics. +#[derive(Copy, Clone, Debug)] +pub struct RawDouble(pub f64); +impl PartialEq for RawDouble { + fn eq(&self, other: &Self) -> bool { + self.0.to_bits() == other.0.to_bits() + } +} +impl Eq for RawDouble {} +impl Hash for RawDouble { + fn hash(&self, state: &mut H) { + self.0.to_bits().hash(state) + } +} diff --git a/objdiff-gui/src/views/appearance.rs b/objdiff-gui/src/views/appearance.rs index 805d4b9e..6a106f56 100644 --- a/objdiff-gui/src/views/appearance.rs +++ b/objdiff-gui/src/views/appearance.rs @@ -23,6 +23,8 @@ pub struct Appearance { #[serde(skip)] pub highlight_color: Color32, // WHITE #[serde(skip)] + pub dataflow_color: Color32, // + #[serde(skip)] pub replace_color: Color32, // LIGHT_BLUE #[serde(skip)] pub insert_color: Color32, // GREEN @@ -61,6 +63,7 @@ impl Default for Appearance { emphasized_text_color: Color32::LIGHT_GRAY, deemphasized_text_color: Color32::DARK_GRAY, highlight_color: Color32::WHITE, + dataflow_color: Color32::from_rgb(0, 128, 128), replace_color: Color32::LIGHT_BLUE, insert_color: Color32::GREEN, delete_color: Color32::from_rgb(200, 40, 41), @@ -104,6 +107,7 @@ impl Appearance { self.emphasized_text_color = Color32::LIGHT_GRAY; self.deemphasized_text_color = Color32::DARK_GRAY; self.highlight_color = Color32::WHITE; + self.dataflow_color = Color32::from_rgb(0, 128, 128); self.replace_color = Color32::LIGHT_BLUE; self.insert_color = Color32::GREEN; self.delete_color = Color32::from_rgb(200, 40, 41); @@ -114,6 +118,7 @@ impl Appearance { self.emphasized_text_color = Color32::DARK_GRAY; self.deemphasized_text_color = Color32::LIGHT_GRAY; self.highlight_color = Color32::BLACK; + self.dataflow_color = Color32::from_rgb(0, 128, 128); self.replace_color = Color32::DARK_BLUE; self.insert_color = Color32::DARK_GREEN; self.delete_color = Color32::from_rgb(200, 40, 41); diff --git a/objdiff-gui/src/views/diff.rs b/objdiff-gui/src/views/diff.rs index 49dedee9..5266d8b8 100644 --- a/objdiff-gui/src/views/diff.rs +++ b/objdiff-gui/src/views/diff.rs @@ -281,6 +281,19 @@ pub fn diff_view_ui( }) }); } + + // Only need to check the first Object. Technically the first could not have a flow analysis + // result while the second does but we don't want to waste space on two separate checkboxes. + if result.first_obj.as_ref().is_some_and(|(first, _)| first.has_flow_analysis_result()) { + let mut placeholder = diff_config.show_data_flow; + if ui + .checkbox(&mut placeholder, "Show data flow") + .on_hover_text("Show data flow analysis results in place of register names") + .clicked() + { + ret = Some(DiffViewAction::ToggleShowDataFlow); + } + } } else if column == 1 { // Right column diff --git a/objdiff-gui/src/views/function_diff.rs b/objdiff-gui/src/views/function_diff.rs index dff2c15c..34e08173 100644 --- a/objdiff-gui/src/views/function_diff.rs +++ b/objdiff-gui/src/views/function_diff.rs @@ -174,6 +174,7 @@ fn diff_text_ui( DiffTextColor::Normal => appearance.text_color, DiffTextColor::Dim => appearance.deemphasized_text_color, DiffTextColor::Bright => appearance.emphasized_text_color, + DiffTextColor::DataFlow => appearance.dataflow_color, DiffTextColor::Replace => appearance.replace_color, DiffTextColor::Delete => appearance.delete_color, DiffTextColor::Insert => appearance.insert_color, diff --git a/objdiff-gui/src/views/symbol_diff.rs b/objdiff-gui/src/views/symbol_diff.rs index cfed70f2..ca6038bf 100644 --- a/objdiff-gui/src/views/symbol_diff.rs +++ b/objdiff-gui/src/views/symbol_diff.rs @@ -79,6 +79,8 @@ pub enum DiffViewAction { SetMapping(usize, usize), /// Set the show_mapped_symbols flag SetShowMappedSymbols(bool), + /// Toggle the show_data_flow flag + ToggleShowDataFlow, } #[derive(Debug, Clone, Default, Eq, PartialEq)] @@ -350,6 +352,12 @@ impl DiffViewState { DiffViewAction::SetShowMappedSymbols(value) => { self.symbol_state.show_mapped_symbols = value; } + DiffViewAction::ToggleShowDataFlow => { + let Ok(mut state) = state.write() else { + return; + }; + state.config.diff_obj_config.show_data_flow = !state.config.diff_obj_config.show_data_flow; + } } } From e9a22b85d7d3c71578e4c69d4dbd25d3e12857c9 Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Sun, 8 Jun 2025 20:05:05 -0700 Subject: [PATCH 02/12] * Move flow analysis to dedicated file * Show string constants inline * Handle calls to MWCC "sled" helpers which otherwise disrupt flow analysis --- objdiff-core/src/arch/ppc/flow_analysis.rs | 573 +++++++++++++++++++ objdiff-core/src/arch/{ppc.rs => ppc/mod.rs} | 501 +--------------- 2 files changed, 579 insertions(+), 495 deletions(-) create mode 100644 objdiff-core/src/arch/ppc/flow_analysis.rs rename objdiff-core/src/arch/{ppc.rs => ppc/mod.rs} (62%) diff --git a/objdiff-core/src/arch/ppc/flow_analysis.rs b/objdiff-core/src/arch/ppc/flow_analysis.rs new file mode 100644 index 00000000..2c51b604 --- /dev/null +++ b/objdiff-core/src/arch/ppc/flow_analysis.rs @@ -0,0 +1,573 @@ +use itertools::Itertools; +use ppc750cl::Simm; +use std::ops::{Index, IndexMut}; +use std::collections::BTreeMap; +use crate::{ + util::{RawFloat, RawDouble}, + obj::{FlowAnalysisValue, FlowAnalysisResult, Object, Symbol, Relocation}, + arch::DataType, +}; +use std::ffi::CStr; + +fn is_store_instruction(op: ppc750cl::Opcode) -> bool { + use ppc750cl::Opcode; + match op { + Opcode::Stbux | Opcode::Stbx | Opcode::Stfdux | Opcode::Stfdx | Opcode::Stfiwx | + Opcode::Stfsux | Opcode::Stfsx | Opcode::Sthbrx | Opcode::Sthux | Opcode::Sthx | + Opcode::Stswi | Opcode::Stswx | Opcode::Stwbrx | Opcode::Stwcx_ | Opcode::Stwux | + Opcode::Stwx | Opcode::Stwu | Opcode::Stb | Opcode::Stbu | Opcode::Sth | Opcode::Sthu | + Opcode::Stmw | Opcode::Stfs | Opcode::Stfsu | Opcode::Stfd | Opcode::Stfdu => true, + _ => false, + } +} + +pub fn guess_data_type_from_load_store_inst_op(inst_op: ppc750cl::Opcode) -> Option { + use ppc750cl::Opcode; + match inst_op { + Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), + Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), + Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), + Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), + Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), + Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), + + Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), + Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), + Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), + Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), + Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), + _ => None, + } +} + +#[derive(Default, PartialEq, Eq, Copy, Hash, Clone, Debug)] +enum RegisterContent { + #[default] + Unknown, + Variable, // Multiple potential values + FloatConstant(RawFloat), + DoubleConstant(RawDouble), + IntConstant(i32), + InputRegister(u8), + Symbol(usize), +} + +impl std::fmt::Display for RegisterContent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + RegisterContent::Unknown => write!(f, "unknown"), + RegisterContent::Variable => write!(f, "variable"), + RegisterContent::IntConstant(i) => + // -i is safe because it's at most a 16 bit constant in the i32 + if *i >= 0 { write!(f, "0x{:x}", i) } else { write!(f, "-0x{:x}", -i) }, + RegisterContent::FloatConstant(RawFloat(fp)) => write!(f, "{fp:?}f"), + RegisterContent::DoubleConstant(RawDouble(fp)) => write!(f, "{fp:?}d"), + RegisterContent::InputRegister(p) => write!(f, "input{p}"), + RegisterContent::Symbol(_u) => write!(f, "relocation"), + } + } +} + +#[derive(Clone, PartialEq, Eq, Hash)] +struct RegisterState { + gpr: [RegisterContent; 32], + fpr: [RegisterContent; 32], +} + +impl RegisterState { + fn new() -> Self { + RegisterState { + gpr: [RegisterContent::Unknown; 32], + fpr: [RegisterContent::Unknown; 32], + } + } + + // During a function call, these registers must be assumed trashed. + fn clear_volatile(&mut self) { + self[ppc750cl::GPR(0)] = RegisterContent::Unknown; + for i in 0..=13 { + self[ppc750cl::GPR(i)] = RegisterContent::Unknown; + } + for i in 0..=13 { + self[ppc750cl::FPR(i)] = RegisterContent::Unknown; + } + } + + // Mark potential input values. + // Subsequent flow analysis will "realize" that they are not actually inputs if + // they get overwritten with another value before getting read. + fn set_potential_inputs(&mut self) { + for g_reg in 3..=13 { + self[ppc750cl::GPR(g_reg)] = RegisterContent::InputRegister(g_reg); + } + for f_reg in 1..=13 { + self[ppc750cl::FPR(f_reg)] = RegisterContent::InputRegister(f_reg); + } + } + + // If the there is no value, we can take the new known value. + // If there's a known value different than the new value, the content + // must is variable. + // Returns whether the current value was updated. + fn unify_values(current: &mut RegisterContent, new: &RegisterContent) -> bool { + if *current == *new { + false + } else { + if *current == RegisterContent::Unknown { + *current = *new; + true + } else if *current == RegisterContent::Variable { + // Already variable + false + } else { + *current = RegisterContent::Variable; + true + } + } + } + + // Unify currently known register contents in a give situation with new + // information about the register contents in that situation. + // Currently unknown register contents can be filled, but if there are + // conflicting contents, we go back to unknown. + fn unify(&mut self, other: &RegisterState) -> bool { + let mut updated = false; + for i in 0..32 { + updated |= Self::unify_values(&mut self.gpr[i], &other.gpr[i]); + updated |= Self::unify_values(&mut self.fpr[i], &other.fpr[i]); + } + updated + } +} + +impl Index for RegisterState { + type Output = RegisterContent; + fn index(&self, gpr: ppc750cl::GPR) -> &Self::Output { + &self.gpr[gpr.0 as usize] + } +} +impl IndexMut for RegisterState { + fn index_mut(&mut self, gpr: ppc750cl::GPR) -> &mut Self::Output { + &mut self.gpr[gpr.0 as usize] + } +} + +impl Index for RegisterState { + type Output = RegisterContent; + fn index(&self, fpr: ppc750cl::FPR) -> &Self::Output { + &self.fpr[fpr.0 as usize] + } +} +impl IndexMut for RegisterState { + fn index_mut(&mut self, fpr: ppc750cl::FPR) -> &mut Self::Output { + &mut self.fpr[fpr.0 as usize] + } +} + +fn execute_instruction(registers: &mut RegisterState, op: &ppc750cl::Opcode, args: &[ppc750cl::Argument; 5]) { + use ppc750cl::{Opcode, Argument, GPR}; + match (op, args[0], args[1], args[2]) { + (Opcode::Or, Argument::GPR(a), Argument::GPR(b), Argument::GPR(c)) => { + // Move is implemented as or with self for ints + if b == c { + registers[a] = registers[b]; + } else { + registers[a] = RegisterContent::Unknown; + } + } + (Opcode::Fmr, Argument::FPR(a), Argument::FPR(b), _) => { + registers[a] = registers[b]; + } + (Opcode::Addi, Argument::GPR(a), Argument::GPR(GPR(0)), Argument::Simm(c)) => { + // Load immidiate implemented as addi with addend = r0 + // Let Addi with other addends fall through to the case which + // overwrites the destination + registers[a] = RegisterContent::IntConstant(c.0 as i32); + } + (Opcode::Bcctr, _, _, _) => { + // Called a function pointer, may have erased volatile registers + registers.clear_volatile(); + } + (Opcode::B, _, _, _) => { + if get_branch_offset(args) == 0 { + // Call to another function + registers.clear_volatile(); + } + } + (Opcode::Stbu | Opcode::Sthu | Opcode::Stwu | + Opcode::Stfsu | Opcode::Stfdu, _, _, Argument::GPR(rel)) => { + // Storing with update, clear updated register (third arg) + registers[rel] = RegisterContent::Unknown; + } + (Opcode::Stbux | Opcode::Sthux | Opcode::Stwux | + Opcode::Stfsux | Opcode::Stfdux, _, Argument::GPR(rel), _) => { + // Storing indexed with update, clear updated register (second arg) + registers[rel] = RegisterContent::Unknown; + } + (Opcode::Stb | Opcode::Sth | Opcode::Stw | + Opcode::Stbx | Opcode::Sthx | Opcode::Stwx | + Opcode::Stfs | Opcode::Stfd, _, _, _) => { + // Storing, does not change registers + } + (Opcode::Lmw, Argument::GPR(target), _, _) => { + // `lmw` overwrites all registers from rd to r31. + for reg in target.0..31 { + registers[GPR(reg)] = RegisterContent::Unknown; + } + } + (_, Argument::GPR(a), _, _) => { + // Other operations which write to GPR a + registers[a] = RegisterContent::Unknown; + } + (_, Argument::FPR(a), _, _) => { + // Other operations which write to FPR a + registers[a] = RegisterContent::Unknown; + } + (_, _, _, _) => {} + } + +} + +fn get_branch_offset(args: &[ppc750cl::Argument; 5]) -> i32 { + for arg in args.iter() { + if let ppc750cl::Argument::BranchDest(dest) = arg { + return dest.0 / 4; + } + } + return 0; +} + +#[derive(Debug, Default)] +struct PPCFlowAnalysisResult { + argument_contents: BTreeMap<(u64, u8), FlowAnalysisValue>, +} + +impl PPCFlowAnalysisResult { + fn set_argument_value_at_address(&mut self, address: u64, argument: u8, value: FlowAnalysisValue) { + self.argument_contents.insert((address, argument), value); + } + + fn new() -> Self { + PPCFlowAnalysisResult { argument_contents: Default::default() } + } +} + +impl FlowAnalysisResult for PPCFlowAnalysisResult { + fn get_argument_value_at_address(&self, address: u64, argument: u8) -> Option<&FlowAnalysisValue> { + self.argument_contents.get(&(address, argument)) + } +} + +fn clamp_text_length(s: String, max: usize) -> String { + if s.len() <= max { + s + } else { + format!("{}…", s.chars().take(max - 3).collect::()) + } +} + +// Executing op with args at cur_address, update current_state with symbols that +// come from relocations. That is, references to globals, floating point +// constants, string constants, etc. +fn fill_registers_from_relocation( + reloc: &Relocation, + current_state: &mut RegisterState, + obj: &Object, + op: ppc750cl::Opcode, + args: &[ppc750cl::Argument; 5], +) { + let content = if let Some(bytes) = obj.symbol_data(reloc.target_symbol) { + match guess_data_type_from_load_store_inst_op(op) { + Some(DataType::Float) => RegisterContent::FloatConstant(RawFloat(match obj.endianness { + object::Endianness::Little => f32::from_le_bytes(bytes.try_into().unwrap_or([0; 4])), + object::Endianness::Big => f32::from_be_bytes(bytes.try_into().unwrap_or([0; 4])), + })), + Some(DataType::Double) => RegisterContent::DoubleConstant(RawDouble(match obj.endianness { + object::Endianness::Little => f64::from_le_bytes(bytes.try_into().unwrap_or([0; 8])), + object::Endianness::Big => f64::from_be_bytes(bytes.try_into().unwrap_or([0; 8])), + })), + _ => RegisterContent::Symbol(reloc.target_symbol), + } + } else { + RegisterContent::Symbol(reloc.target_symbol) + }; + // Only update the register state for loads. We may store to a reloc + // address but that doesn't update register contents. + if !is_store_instruction(op) { + match (op, args[0]) { + // Everything else is a load of some sort + (_, ppc750cl::Argument::GPR(gpr)) => { + current_state[gpr] = content; + } + (_, ppc750cl::Argument::FPR(fpr)) => { + current_state[fpr] = content; + } + _ => {} + } + } +} + +// Special helper fragments generated by MWCC. +// See: https://github.com/encounter/decomp-toolkit/blob/main/src/analysis/pass.rs +const SLEDS: [&str; 6] = [ + "_savefpr_", + "_restfpr_", + "_savegpr_", + "_restgpr_", + "_savev", + "_restv", +]; + +fn is_sled_function(name: &str) -> bool { + SLEDS.iter().any(|sled| name.starts_with(sled)) +} + +pub fn ppc_data_flow_analysis( + obj: &Object, + func_symbol: &Symbol, + code: &[u8], + relocations: &[Relocation], +) -> Box { + use std::collections::HashSet; + use ppc750cl::InsIter; + use std::collections::VecDeque; + let instructions = InsIter::new(code, func_symbol.address as u32).map(|(_addr, ins)| { + (ins.op, ins.basic().args) + }).collect_vec(); + + let func_address = func_symbol.address; + + // Get initial register values from function parameters + let mut initial_register_state = RegisterState::new(); + initial_register_state.set_potential_inputs(); + + let mut execution_queue = VecDeque::<(usize, RegisterState)>::new(); + execution_queue.push_back((0, initial_register_state)); + + // Execute the instructions against abstract data + let mut failsafe_counter = 0; + let mut taken_branches = HashSet::<(usize, RegisterState)>::new(); + let mut register_state_at = Vec::::new(); + let mut completed_first_pass = false; + register_state_at.resize_with(instructions.len(), RegisterState::new); + while let Some((mut index, mut current_state)) = execution_queue.pop_front() { + while let Some((op, args)) = instructions.get(index) { + // Record the state at this index + // If recording does not result in any changes to the known values + // we're done, because the subsequent values are a function of the + // current values so we'll get the same result as the last time + // we went down this path. + // Don't break out if we haven't even completed the first pass + // through the function though. + if !register_state_at[index].unify(¤t_state) && completed_first_pass { + break; + } + + // Get symbol used in this instruction + let cur_addr = (func_address as u32) + ((index * 4) as u32); + let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); + + // Is this a branch to a compiler generated helper? These helpers + // do not trash registers like normal function calls, so we don't + // want to treat this as normal execution. + let symbol = reloc.map(|r| obj.symbols.get(r.target_symbol)).flatten(); + let is_sled_invocation = symbol.is_some_and(|x| is_sled_function(&x.name)); + + // Execute the instruction to update the state + // Since sled invocations are only used to save / restore registers + // as part of prelude / cleanup in a function call we don't have to + // do any execution for them. + if !is_sled_invocation { + execute_instruction(&mut current_state, op, args); + } + + // Fill in register state coming from relocations at this line. This + // handles references to global variables, floating point constants, + // etc. + if let Some(reloc) = reloc { + fill_registers_from_relocation(&reloc, &mut current_state, obj, *op, args); + } + + // Add conditional branches to execution queue + // Only take a given (address, register state) combination once. If + // the known register state is different we have to take the branch + // again to stabilize the known values for backwards branches. + if op == &ppc750cl::Opcode::Bc { + let branch_state = (index, current_state.clone()); + if !taken_branches.contains(&branch_state) { + let offset = get_branch_offset(args); + let target_index = ((index as i32) + offset) as usize; + execution_queue.push_back((target_index, current_state.clone())); + taken_branches.insert(branch_state); + + // We should never hit this case, but avoid getting stuck in + // an infinite loop if we hit some kind of bad behavior. + failsafe_counter += 1; + if failsafe_counter > 256 { + println!("Analysis of {} failed to stabilize", func_symbol.name); + return Box::new(PPCFlowAnalysisResult::new()); + } + } + } + + // Update index + if op == &ppc750cl::Opcode::B { + // Unconditional branch + let offset = get_branch_offset(args); + if offset > 0 { + // Jump table or branch to over else clause. + index += offset as usize; + } else if offset == 0 { + // Function call with relocation. We'll return to + // the next instruction. + index += 1; + } else { + // Unconditional branch (E.g.: loop { ... }) + // Also some compilations of loops put the conditional at + // the end and B to it for the check of the first iteration. + let branch_state = (index, current_state.clone()); + if taken_branches.contains(&branch_state) { + break; + } + taken_branches.insert(branch_state); + index = ((index as i32) + offset) as usize; + } + } else { + // Normal execution of next instruction + index += 1; + } + } + + // Mark that we've completed at least one pass over the function, at + // this point we can break out if the code we're running doesn't change + // any register outcomes. + completed_first_pass = true; + } + + // Store the relevant data flow values for simplified instructions + generate_flow_analysis_result(&obj, func_address, code, register_state_at, relocations) +} + +fn get_string_data( + obj: &Object, + symbol_index: usize, + offset: Simm, +) -> Option<&str> { + if let Some(sym) = obj.symbols.get(symbol_index) { + if sym.name.starts_with("@stringBase") && offset.0 != 0 { + if let Some(data) = obj.symbol_data(symbol_index) { + let bytes = &data[offset.0 as usize..]; + if let Ok(Ok(str)) = CStr::from_bytes_until_nul(bytes).map(|x| x.to_str()) { + return Some(str) + } + } + } + } + None +} + +// Write the relevant part of the flow analysis out into the FlowAnalysisResult +// the rest of the application will use to query results of the flow analysis. +// Flow analysis will compute the known contents of every register at every +// line, but we only need to record the values of registers that are actually +// referenced at each line. +fn generate_flow_analysis_result( + obj: &Object, + base_address: u64, + code: &[u8], + register_state_at: Vec::, + relocations: &[Relocation] +) -> Box { + use ppc750cl::{InsIter, Argument, Offset, GPR}; + let mut analysis_result = PPCFlowAnalysisResult::new(); + let default_register_state = RegisterState::new(); + for (addr, ins) in InsIter::new(code, 0) { + let ins_address = base_address + (addr as u64); + let index = addr / 4; + let ppc750cl::ParsedIns {mnemonic: _, args} = ins.simplified(); + + // Special case to show float and double constants on the line where + // they are being loaded. + // We need to do this before we break out on showing relocations in the + // subsequent if statement. + if ins.op == ppc750cl::Opcode::Lfs || ins.op == ppc750cl::Opcode::Lfd { + // The value is set on the line AFTER the load, get it from there + if let Some(next_state) = register_state_at.get(index as usize + 1) { + // When loading from SDA it will be a relocation so Reg+Offset will both be zero + match (args[0], args[1], args[2]) { + (Argument::FPR(fpr), Argument::Offset(Offset(0)), Argument::GPR(GPR(0))) => { + if let RegisterContent::Symbol(_index) = next_state[fpr] { + // We loaded a global variable, not a constant, + // don't do anything for this case. + } else { + analysis_result.set_argument_value_at_address(ins_address, 1, + FlowAnalysisValue::Text(format!("{}", next_state[fpr]))); + continue; + } + } + _ => {} + } + } + } + + // Special case to show string constants on the line where they are + // being indexed to. This will typically be "addi t, stringbase, offset" + let registers = register_state_at.get(index as usize).unwrap_or(&default_register_state); + if let (ppc750cl::Opcode::Addi, Argument::GPR(rel), Argument::Simm(offset)) = (ins.op, args[1], args[2]) { + if let RegisterContent::Symbol(sym_index) = registers[rel] { + if let Some(str) = get_string_data(obj, sym_index, offset) { + // Show the string constant in the analysis result + let formatted = format!("\"{}\"", str); + analysis_result.set_argument_value_at_address(ins_address, 2, + FlowAnalysisValue::Text(clamp_text_length(formatted, 20))); + // Don't continue, we want to show the stringbase value as well + } + } + } + + // If we're already showing relocations on a line don't also show data flow + if relocations.iter().any(|r| (r.address & !3) == ins_address) { + continue; + } + + let is_store = is_store_instruction(ins.op); + for (arg_index, arg) in args.into_iter().enumerate() { + // Hacky shorthand for determining which arguments are sources, + // We only want to show data flow for source registers, not target + // registers. Technically there are some non-"st_" operations which + // read from their first argument but they're rare. + if (arg_index == 0) && !is_store { + continue; + } + + let content = match arg { + Argument::GPR(gpr) => Some(registers[gpr]), + Argument::FPR(fpr) => Some(registers[fpr]), + _ => None, + }; + let analysis_value = match content { + Some(RegisterContent::Symbol(s)) => { + obj.symbols.get(s).map(|sym| + FlowAnalysisValue::Text( + clamp_text_length(sym.demangled_name.as_ref().unwrap_or(&sym.name).clone(), 20))) + } + Some(RegisterContent::InputRegister(reg)) => { + let reg_name = match arg { + Argument::GPR(_) => format!("input_r{reg}"), + Argument::FPR(_) => format!("input_f{reg}"), + _ => panic!("Register content should only be in a register"), + }; + Some(FlowAnalysisValue::Text(reg_name)) + } + Some(RegisterContent::Unknown) | Some(RegisterContent::Variable) => None, + Some(value) => Some(FlowAnalysisValue::Text(format!("{value}"))), + None => None, + }; + if let Some(analysis_value) = analysis_value { + analysis_result.set_argument_value_at_address(ins_address, arg_index as u8, analysis_value); + } + } + } + + Box::new(analysis_result) +} \ No newline at end of file diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc/mod.rs similarity index 62% rename from objdiff-core/src/arch/ppc.rs rename to objdiff-core/src/arch/ppc/mod.rs index 8f2b8706..734ca10c 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -5,8 +5,6 @@ use alloc::{ vec::Vec, }; -use itertools::Itertools; -use std::ops::{Index, IndexMut}; use anyhow::{Result, bail, ensure}; use cwextab::{ExceptionTableData, decode_extab}; use flagset::Flags; @@ -20,12 +18,13 @@ use crate::{ display::{ContextItem, HoverItem, HoverItemColor, InstructionPart, SymbolNavigationKind}, }, obj::{ - FlowAnalysisResult, FlowAnalysisValue, InstructionRef, Object, Relocation, RelocationFlags, + FlowAnalysisResult, InstructionRef, Object, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedRelocation, Symbol, SymbolFlag, SymbolFlagSet }, - util::{RawFloat, RawDouble}, }; +mod flow_analysis; + // Relative relocation, can be Simm, Offset or BranchDest fn is_relative_arg(arg: &ppc750cl::Argument) -> bool { matches!( @@ -46,18 +45,6 @@ fn is_rel_abs_arg(arg: &ppc750cl::Argument) -> bool { fn is_offset_arg(arg: &ppc750cl::Argument) -> bool { matches!(arg, ppc750cl::Argument::Offset(_)) } -fn is_store_instruction(op: ppc750cl::Opcode) -> bool { - use ppc750cl::Opcode; - match op { - Opcode::Stbux | Opcode::Stbx | Opcode::Stfdux | Opcode::Stfdx | Opcode::Stfiwx | - Opcode::Stfsux | Opcode::Stfsx | Opcode::Sthbrx | Opcode::Sthux | Opcode::Sthx | - Opcode::Stswi | Opcode::Stswx | Opcode::Stwbrx | Opcode::Stwcx_ | Opcode::Stwux | - Opcode::Stwx | Opcode::Stwu | Opcode::Stb | Opcode::Stbu | Opcode::Sth | Opcode::Sthu | - Opcode::Stmw | Opcode::Stfs | Opcode::Stfsu | Opcode::Stfd | Opcode::Stfdu => true, - _ => false, - } -} - #[derive(Debug)] pub struct ArchPpc { /// Exception info @@ -190,7 +177,7 @@ impl Arch for ArchPpc { code: &[u8], relocations: &[Relocation], ) -> Option> { - Some(ppc_data_flow_analysis(obj, symbol, code, relocations)) + Some(flow_analysis::ppc_data_flow_analysis(obj, symbol, code, relocations)) } fn implcit_addend( @@ -251,7 +238,7 @@ impl Arch for ArchPpc { return Some(DataType::String); } let opcode = ppc750cl::Opcode::from(resolved.ins_ref.opcode as u8); - if let Some(ty) = guess_data_type_from_load_store_inst_op(opcode) { + if let Some(ty) = flow_analysis::guess_data_type_from_load_store_inst_op(opcode) { // Numeric type. return Some(ty); } @@ -527,482 +514,6 @@ fn make_symbol_ref(symbol: &object::Symbol) -> Result { Ok(ExtabSymbolRef { original_index: symbol.index().0 - 1, name, demangled_name }) } -#[derive(Default, PartialEq, Eq, Copy, Hash, Clone, Debug)] -enum RegisterContent { - #[default] - Unknown, - Variable, // Multiple potential values - FloatConstant(RawFloat), - DoubleConstant(RawDouble), - IntConstant(i32), - InputRegister(u8), - Symbol(usize), -} - -impl std::fmt::Display for RegisterContent { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - RegisterContent::Unknown => write!(f, "unknown"), - RegisterContent::Variable => write!(f, "variable"), - RegisterContent::IntConstant(i) => - // -i is safe because it's at most a 16 bit constant in the i32 - if *i >= 0 { write!(f, "0x{:x}", i) } else { write!(f, "-0x{:x}", -i) }, - RegisterContent::FloatConstant(RawFloat(fp)) => write!(f, "{fp:?}f"), - RegisterContent::DoubleConstant(RawDouble(fp)) => write!(f, "{fp:?}d"), - RegisterContent::InputRegister(p) => write!(f, "input{p}"), - RegisterContent::Symbol(_u) => write!(f, "relocation"), - } - } -} - -#[derive(Clone, PartialEq, Eq, Hash)] -struct RegisterState { - gpr: [RegisterContent; 32], - fpr: [RegisterContent; 32], -} - -impl RegisterState { - fn new() -> Self { - RegisterState { - gpr: [RegisterContent::Unknown; 32], - fpr: [RegisterContent::Unknown; 32], - } - } - - // During a function call, these registers must be assumed trashed. - fn clear_volatile(&mut self) { - self[ppc750cl::GPR(0)] = RegisterContent::Unknown; - for i in 0..=13 { - self[ppc750cl::GPR(i)] = RegisterContent::Unknown; - } - for i in 0..=13 { - self[ppc750cl::FPR(i)] = RegisterContent::Unknown; - } - } - - // Mark potential input values. - // Subsequent flow analysis will "realize" that they are not actually inputs if - // they get overwritten with another value before getting read. - fn set_potential_inputs(&mut self) { - for g_reg in 3..=13 { - self[ppc750cl::GPR(g_reg)] = RegisterContent::InputRegister(g_reg); - } - for f_reg in 1..=13 { - self[ppc750cl::FPR(f_reg)] = RegisterContent::InputRegister(f_reg); - } - } - - // If the there is no value, we can take the new known value. - // If there's a known value different than the new value, the content - // must is variable. - // Returns whether the current value was updated. - fn unify_values(current: &mut RegisterContent, new: &RegisterContent) -> bool { - if *current == *new { - false - } else { - if *current == RegisterContent::Unknown { - *current = *new; - true - } else if *current == RegisterContent::Variable { - // Already variable - false - } else { - *current = RegisterContent::Variable; - true - } - } - } - - // Unify currently known register contents in a give situation with new - // information about the register contents in that situation. - // Currently unknown register contents can be filled, but if there are - // conflicting contents, we go back to unknown. - fn unify(&mut self, other: &RegisterState) -> bool { - let mut updated = false; - for i in 0..32 { - updated |= Self::unify_values(&mut self.gpr[i], &other.gpr[i]); - updated |= Self::unify_values(&mut self.fpr[i], &other.fpr[i]); - } - updated - } -} - -impl Index for RegisterState { - type Output = RegisterContent; - fn index(&self, gpr: ppc750cl::GPR) -> &Self::Output { - &self.gpr[gpr.0 as usize] - } -} -impl IndexMut for RegisterState { - fn index_mut(&mut self, gpr: ppc750cl::GPR) -> &mut Self::Output { - &mut self.gpr[gpr.0 as usize] - } -} - -impl Index for RegisterState { - type Output = RegisterContent; - fn index(&self, fpr: ppc750cl::FPR) -> &Self::Output { - &self.fpr[fpr.0 as usize] - } -} -impl IndexMut for RegisterState { - fn index_mut(&mut self, fpr: ppc750cl::FPR) -> &mut Self::Output { - &mut self.fpr[fpr.0 as usize] - } -} - -fn execute_instruction(registers: &mut RegisterState, op: &ppc750cl::Opcode, args: &[ppc750cl::Argument; 5]) { - use ppc750cl::{Opcode, Argument, GPR}; - match (op, args[0], args[1], args[2]) { - (Opcode::Or, Argument::GPR(a), Argument::GPR(b), Argument::GPR(c)) => { - // Move is implemented as or with self for ints - if b == c { - registers[a] = registers[b]; - } else { - registers[a] = RegisterContent::Unknown; - } - } - (Opcode::Fmr, Argument::FPR(a), Argument::FPR(b), _) => { - registers[a] = registers[b]; - } - (Opcode::Addi, Argument::GPR(a), Argument::GPR(GPR(0)), Argument::Simm(c)) => { - // Load immidiate implemented as addi with addend = r0 - // Let Addi with other addends fall through to the case which - // overwrites the destination - registers[a] = RegisterContent::IntConstant(c.0 as i32); - } - (Opcode::Bcctr, _, _, _) => { - // Called a function pointer, may have erased volatile registers - registers.clear_volatile(); - } - (Opcode::B, _, _, _) => { - if get_branch_offset(args) == 0 { - // Call to another function - registers.clear_volatile(); - } - } - (Opcode::Stbu | Opcode::Sthu | Opcode::Stwu | - Opcode::Stfsu | Opcode::Stfdu, _, _, Argument::GPR(rel)) => { - // Storing with update, clear updated register (third arg) - registers[rel] = RegisterContent::Unknown; - } - (Opcode::Stbux | Opcode::Sthux | Opcode::Stwux | - Opcode::Stfsux | Opcode::Stfdux, _, Argument::GPR(rel), _) => { - // Storing indexed with update, clear updated register (second arg) - registers[rel] = RegisterContent::Unknown; - } - (Opcode::Stb | Opcode::Sth | Opcode::Stw | - Opcode::Stbx | Opcode::Sthx | Opcode::Stwx | - Opcode::Stfs | Opcode::Stfd, _, _, _) => { - // Storing, does not change registers - } - (Opcode::Lmw, Argument::GPR(target), _, _) => { - // `lmw` overwrites all registers from rd to r31. - for reg in target.0..31 { - registers[GPR(reg)] = RegisterContent::Unknown; - } - } - (_, Argument::GPR(a), _, _) => { - // Other operations which write to GPR a - registers[a] = RegisterContent::Unknown; - } - (_, Argument::FPR(a), _, _) => { - // Other operations which write to FPR a - registers[a] = RegisterContent::Unknown; - } - (_, _, _, _) => {} - } - -} - -fn get_branch_offset(args: &[ppc750cl::Argument; 5]) -> i32 { - for arg in args.iter() { - if let ppc750cl::Argument::BranchDest(dest) = arg { - return dest.0 / 4; - } - } - return 0; -} - -#[derive(Debug, Default)] -struct PPCFlowAnalysisResult { - argument_contents: BTreeMap<(u64, u8), FlowAnalysisValue>, -} - -impl PPCFlowAnalysisResult { - fn set_argument_value_at_address(&mut self, address: u64, argument: u8, value: FlowAnalysisValue) { - self.argument_contents.insert((address, argument), value); - } - - fn new() -> Self { - PPCFlowAnalysisResult { argument_contents: Default::default() } - } -} - -impl FlowAnalysisResult for PPCFlowAnalysisResult { - fn get_argument_value_at_address(&self, address: u64, argument: u8) -> Option<&FlowAnalysisValue> { - self.argument_contents.get(&(address, argument)) - } -} - -fn clamp_text_length(s: String, max: usize) -> String { - if s.len() <= max { - s - } else { - format!("{}…", s.chars().take(max - 3).collect::()) - } -} - -// Executing op with args at cur_address, update current_state with symbols that -// come from relocations. That is, references to globals, floating point -// constants, string constants, etc. -fn fill_registers_from_relocations( - current_state: &mut RegisterState, - obj: &Object, - cur_addr: u32, - op: ppc750cl::Opcode, - args: &[ppc750cl::Argument; 5], - relocations: &[Relocation], -) { - let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); - if let Some(reloc) = reloc { - let bytes = obj.symbol_data(reloc.target_symbol).unwrap_or(&[]); - let content = match guess_data_type_from_load_store_inst_op(op) { - Some(DataType::Float) => RegisterContent::FloatConstant(RawFloat(match obj.endianness { - object::Endianness::Little => f32::from_le_bytes(bytes.try_into().unwrap_or([0; 4])), - object::Endianness::Big => f32::from_be_bytes(bytes.try_into().unwrap_or([0; 4])), - })), - Some(DataType::Double) => RegisterContent::DoubleConstant(RawDouble(match obj.endianness { - object::Endianness::Little => f64::from_le_bytes(bytes.try_into().unwrap_or([0; 8])), - object::Endianness::Big => f64::from_be_bytes(bytes.try_into().unwrap_or([0; 8])), - })), - _ => RegisterContent::Symbol(reloc.target_symbol), - }; - // Only update the register state for loads. We may store to a reloc - // address but that doesn't update register contents. - if !is_store_instruction(op) { - match (op, args[0]) { - // Everything else is a load of some sort - (_, ppc750cl::Argument::GPR(gpr)) => { - current_state[gpr] = content; - } - (_, ppc750cl::Argument::FPR(fpr)) => { - current_state[fpr] = content; - } - _ => {} - } - } - } -} - -fn ppc_data_flow_analysis( - obj: &Object, - func_symbol: &Symbol, - code: &[u8], - relocations: &[Relocation], -) -> Box { - use std::collections::HashSet; - use ppc750cl::InsIter; - use std::collections::VecDeque; - let instructions = InsIter::new(code, func_symbol.address as u32).map(|(_addr, ins)| { - (ins.op, ins.basic().args) - }).collect_vec(); - - let func_address = func_symbol.address; - - // Get initial register values from function parameters - let mut initial_register_state = RegisterState::new(); - initial_register_state.set_potential_inputs(); - - let mut execution_queue = VecDeque::<(usize, RegisterState)>::new(); - execution_queue.push_back((0, initial_register_state)); - - // Execute the instructions against abstract data - let mut failsafe_counter = 0; - let mut taken_branches = HashSet::<(usize, RegisterState)>::new(); - let mut register_state_at = Vec::::new(); - register_state_at.resize_with(instructions.len(), RegisterState::new); - while let Some((mut index, mut current_state)) = execution_queue.pop_front() { - while let Some((op, args)) = instructions.get(index) { - // Record the state at this index - // If recording does not result in any changes to the known values - // we're done, because the subsequent values are a function of the - // current values so we'll get the same result as the last time - // we went down this path. - if !register_state_at[index].unify(¤t_state) { - break; - } - - // Execute the instruction to update the state - execute_instruction(&mut current_state, op, args); - - // Fill in register state coming from relocations at this line. This - // handles references to global variables, floating point constants, - // etc. - let cur_addr = (func_address as u32) + ((index * 4) as u32); - fill_registers_from_relocations(&mut current_state, obj, cur_addr, *op, args, relocations); - - // Add conditional branches to execution queue - // Only take a given (address, register state) combination once. If - // the known register state is different we have to take the branch - // again to stabilize the known values for backwards branches. - if op == &ppc750cl::Opcode::Bc { - let branch_state = (index, current_state.clone()); - if !taken_branches.contains(&branch_state) { - let offset = get_branch_offset(args); - let target_index = ((index as i32) + offset) as usize; - execution_queue.push_back((target_index, current_state.clone())); - taken_branches.insert(branch_state); - - // We should never hit this case, but avoid getting stuck in - // an infinite loop if we hit some kind of bad behavior. - failsafe_counter += 1; - if failsafe_counter > 256 { - println!("Analysis of {} failed to stabilize", func_symbol.name); - return Default::default(); - } - } - } - - // Update index - if op == &ppc750cl::Opcode::B { - // Unconditional branch - let offset = get_branch_offset(args); - if offset > 0 { - // Jump table or branch to over else clause. - index += offset as usize; - } else if offset == 0 { - // Function call with relocation. We'll return to - // the next instruction. - index += 1; - } else { - // Unconditional branch (E.g.: loop { ... }) - // Also some compilations of loops put the conditional at - // the end and B to it for the check of the first iteration. - let branch_state = (index, current_state.clone()); - if taken_branches.contains(&branch_state) { - break; - } - taken_branches.insert(branch_state); - index = ((index as i32) + offset) as usize; - } - } else { - // Normal execution of next instruction - index += 1; - } - } - } - - // Store the relevant data flow values for simplified instructions - generate_flow_analysis_result(&obj, func_address, code, register_state_at, relocations) -} - -// Write the relevant part of the flow analysis out into the FlowAnalysisResult -// the rest of the application will use to query results of the flow analysis. -// Flow analysis will compute the known contents of every register at every -// line, but we only need to record the values of registers that are actually -// referenced at each line. -fn generate_flow_analysis_result( - obj: &Object, - base_address: u64, - code: &[u8], - register_state_at: Vec::, - relocations: &[Relocation] -) -> Box { - use ppc750cl::{InsIter, Argument, Offset, GPR}; - let mut analysis_result = PPCFlowAnalysisResult::new(); - for (addr, ins) in InsIter::new(code, 0) { - let ins_address = base_address + (addr as u64); - let index = addr / 4; - let ppc750cl::ParsedIns {mnemonic, args} = ins.simplified(); - - // Special case to show float and double constants on the line where - // they are being loaded. - // We need to do this before we break out on showing relocations in the - // subsequent if statement. - if ins.op == ppc750cl::Opcode::Lfs || ins.op == ppc750cl::Opcode::Lfd { - // The value is set on the line AFTER the load, get it from there - if let Some(next_state) = register_state_at.get(index as usize + 1) { - // When loading from SDA it will be a relocation so Reg+Offset will both be zero - match (args[0], args[1], args[2]) { - (Argument::FPR(fpr), Argument::Offset(Offset(0)), Argument::GPR(GPR(0))) => { - analysis_result.set_argument_value_at_address(ins_address, 1, - FlowAnalysisValue::Text(format!("{}", next_state[fpr]))); - continue; - } - _ => {} - } - } - } - - // If we're already showing relocations on a line don't also show data flow - if relocations.iter().any(|r| (r.address & !3) == ins_address) { - continue; - } - - let is_store = mnemonic.starts_with("st"); - let default_register_state = RegisterState::new(); - let registers = register_state_at.get(index as usize).unwrap_or(&default_register_state); - for (arg_index, arg) in args.into_iter().enumerate() { - // Hacky shorthand for determining which arguments are sources, - // We only want to show data flow for source registers, not target - // registers. Technically there are some non-"st_" operations which - // read from their first argument but they're rare. - if (arg_index == 0) && !is_store { - continue; - } - - let content = match arg { - Argument::GPR(gpr) => Some(registers[gpr]), - Argument::FPR(fpr) => Some(registers[fpr]), - _ => None, - }; - let analysis_value = match content { - Some(RegisterContent::Symbol(s)) => { - obj.symbols.get(s).map(|sym| - FlowAnalysisValue::Text( - clamp_text_length(sym.demangled_name.as_ref().unwrap_or(&sym.name).clone(), 20))) - } - Some(RegisterContent::InputRegister(reg)) => { - let reg_name = match arg { - Argument::GPR(_) => format!("input_r{reg}"), - Argument::FPR(_) => format!("input_f{reg}"), - _ => panic!("Register content should only be in a register"), - }; - Some(FlowAnalysisValue::Text(reg_name)) - } - Some(RegisterContent::Unknown) | Some(RegisterContent::Variable) => None, - Some(value) => Some(FlowAnalysisValue::Text(format!("{value}"))), - None => None, - }; - if let Some(analysis_value) = analysis_value { - analysis_result.set_argument_value_at_address(ins_address, arg_index as u8, analysis_value); - } - } - } - - Box::new(analysis_result) -} - -fn guess_data_type_from_load_store_inst_op(inst_op: ppc750cl::Opcode) -> Option { - use ppc750cl::Opcode; - match inst_op { - Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), - Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), - Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), - Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), - Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), - Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), - - Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), - Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), - Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), - Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), - Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), - _ => None, - } -} - #[derive(Debug)] struct PoolReference { addr_src_gpr: ppc750cl::GPR, @@ -1019,7 +530,7 @@ fn get_pool_reference_for_inst( ) -> Option { use ppc750cl::{Argument, Opcode}; let args = &simplified.args; - if guess_data_type_from_load_store_inst_op(opcode).is_some() { + if flow_analysis::guess_data_type_from_load_store_inst_op(opcode).is_some() { match (args[1], args[2]) { (Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => { // e.g. lwz. Immediate offset. From 11c530ec02408b59cef1c6e1274b9e6f5e1c000c Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Sun, 8 Jun 2025 20:27:05 -0700 Subject: [PATCH 03/12] Run cargo insta review --- objdiff-core/tests/snapshots/arch_arm__read_arm.snap | 1 + objdiff-core/tests/snapshots/arch_arm__read_thumb.snap | 1 + 2 files changed, 2 insertions(+) diff --git a/objdiff-core/tests/snapshots/arch_arm__read_arm.snap b/objdiff-core/tests/snapshots/arch_arm__read_arm.snap index ec9917a0..f3df1fe8 100644 --- a/objdiff-core/tests/snapshots/arch_arm__read_arm.snap +++ b/objdiff-core/tests/snapshots/arch_arm__read_arm.snap @@ -1954,4 +1954,5 @@ Object { split_meta: None, path: None, timestamp: None, + flow_analysis_results: {}, } diff --git a/objdiff-core/tests/snapshots/arch_arm__read_thumb.snap b/objdiff-core/tests/snapshots/arch_arm__read_thumb.snap index 94eb2105..9e861939 100644 --- a/objdiff-core/tests/snapshots/arch_arm__read_thumb.snap +++ b/objdiff-core/tests/snapshots/arch_arm__read_thumb.snap @@ -3826,4 +3826,5 @@ Object { split_meta: None, path: None, timestamp: None, + flow_analysis_results: {}, } From 862f7f18670bdfa9fd573bf6676f50682f8d60cf Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Sun, 8 Jun 2025 20:38:58 -0700 Subject: [PATCH 04/12] Apply clippy feedback --- objdiff-core/src/arch/ppc/flow_analysis.rs | 265 +++++++++++++-------- objdiff-core/src/diff/display.rs | 6 +- objdiff-core/src/obj/read.rs | 2 +- 3 files changed, 166 insertions(+), 107 deletions(-) diff --git a/objdiff-core/src/arch/ppc/flow_analysis.rs b/objdiff-core/src/arch/ppc/flow_analysis.rs index 2c51b604..2dfb65f9 100644 --- a/objdiff-core/src/arch/ppc/flow_analysis.rs +++ b/objdiff-core/src/arch/ppc/flow_analysis.rs @@ -1,24 +1,43 @@ -use itertools::Itertools; -use ppc750cl::Simm; -use std::ops::{Index, IndexMut}; -use std::collections::BTreeMap; use crate::{ - util::{RawFloat, RawDouble}, - obj::{FlowAnalysisValue, FlowAnalysisResult, Object, Symbol, Relocation}, arch::DataType, + obj::{FlowAnalysisResult, FlowAnalysisValue, Object, Relocation, Symbol}, + util::{RawDouble, RawFloat}, }; +use itertools::Itertools; +use ppc750cl::Simm; +use std::collections::BTreeMap; use std::ffi::CStr; +use std::ops::{Index, IndexMut}; fn is_store_instruction(op: ppc750cl::Opcode) -> bool { use ppc750cl::Opcode; - match op { - Opcode::Stbux | Opcode::Stbx | Opcode::Stfdux | Opcode::Stfdx | Opcode::Stfiwx | - Opcode::Stfsux | Opcode::Stfsx | Opcode::Sthbrx | Opcode::Sthux | Opcode::Sthx | - Opcode::Stswi | Opcode::Stswx | Opcode::Stwbrx | Opcode::Stwcx_ | Opcode::Stwux | - Opcode::Stwx | Opcode::Stwu | Opcode::Stb | Opcode::Stbu | Opcode::Sth | Opcode::Sthu | - Opcode::Stmw | Opcode::Stfs | Opcode::Stfsu | Opcode::Stfd | Opcode::Stfdu => true, - _ => false, - } + matches!(op, + Opcode::Stbux + | Opcode::Stbx + | Opcode::Stfdux + | Opcode::Stfdx + | Opcode::Stfiwx + | Opcode::Stfsux + | Opcode::Stfsx + | Opcode::Sthbrx + | Opcode::Sthux + | Opcode::Sthx + | Opcode::Stswi + | Opcode::Stswx + | Opcode::Stwbrx + | Opcode::Stwcx_ + | Opcode::Stwux + | Opcode::Stwx + | Opcode::Stwu + | Opcode::Stb + | Opcode::Stbu + | Opcode::Sth + | Opcode::Sthu + | Opcode::Stmw + | Opcode::Stfs + | Opcode::Stfsu + | Opcode::Stfd + | Opcode::Stfdu) } pub fn guess_data_type_from_load_store_inst_op(inst_op: ppc750cl::Opcode) -> Option { @@ -58,8 +77,14 @@ impl std::fmt::Display for RegisterContent { RegisterContent::Unknown => write!(f, "unknown"), RegisterContent::Variable => write!(f, "variable"), RegisterContent::IntConstant(i) => - // -i is safe because it's at most a 16 bit constant in the i32 - if *i >= 0 { write!(f, "0x{:x}", i) } else { write!(f, "-0x{:x}", -i) }, + // -i is safe because it's at most a 16 bit constant in the i32 + { + if *i >= 0 { + write!(f, "0x{:x}", i) + } else { + write!(f, "-0x{:x}", -i) + } + } RegisterContent::FloatConstant(RawFloat(fp)) => write!(f, "{fp:?}f"), RegisterContent::DoubleConstant(RawDouble(fp)) => write!(f, "{fp:?}d"), RegisterContent::InputRegister(p) => write!(f, "input{p}"), @@ -76,10 +101,7 @@ struct RegisterState { impl RegisterState { fn new() -> Self { - RegisterState { - gpr: [RegisterContent::Unknown; 32], - fpr: [RegisterContent::Unknown; 32], - } + RegisterState { gpr: [RegisterContent::Unknown; 32], fpr: [RegisterContent::Unknown; 32] } } // During a function call, these registers must be assumed trashed. @@ -112,17 +134,15 @@ impl RegisterState { fn unify_values(current: &mut RegisterContent, new: &RegisterContent) -> bool { if *current == *new { false + } else if *current == RegisterContent::Unknown { + *current = *new; + true + } else if *current == RegisterContent::Variable { + // Already variable + false } else { - if *current == RegisterContent::Unknown { - *current = *new; - true - } else if *current == RegisterContent::Variable { - // Already variable - false - } else { - *current = RegisterContent::Variable; - true - } + *current = RegisterContent::Variable; + true } } @@ -164,8 +184,12 @@ impl IndexMut for RegisterState { } } -fn execute_instruction(registers: &mut RegisterState, op: &ppc750cl::Opcode, args: &[ppc750cl::Argument; 5]) { - use ppc750cl::{Opcode, Argument, GPR}; +fn execute_instruction( + registers: &mut RegisterState, + op: &ppc750cl::Opcode, + args: &[ppc750cl::Argument; 5], +) { + use ppc750cl::{Argument, GPR, Opcode}; match (op, args[0], args[1], args[2]) { (Opcode::Or, Argument::GPR(a), Argument::GPR(b), Argument::GPR(c)) => { // Move is implemented as or with self for ints @@ -194,19 +218,37 @@ fn execute_instruction(registers: &mut RegisterState, op: &ppc750cl::Opcode, arg registers.clear_volatile(); } } - (Opcode::Stbu | Opcode::Sthu | Opcode::Stwu | - Opcode::Stfsu | Opcode::Stfdu, _, _, Argument::GPR(rel)) => { + ( + Opcode::Stbu | Opcode::Sthu | Opcode::Stwu | Opcode::Stfsu | Opcode::Stfdu, + _, + _, + Argument::GPR(rel), + ) => { // Storing with update, clear updated register (third arg) registers[rel] = RegisterContent::Unknown; } - (Opcode::Stbux | Opcode::Sthux | Opcode::Stwux | - Opcode::Stfsux | Opcode::Stfdux, _, Argument::GPR(rel), _) => { + ( + Opcode::Stbux | Opcode::Sthux | Opcode::Stwux | Opcode::Stfsux | Opcode::Stfdux, + _, + Argument::GPR(rel), + _, + ) => { // Storing indexed with update, clear updated register (second arg) registers[rel] = RegisterContent::Unknown; } - (Opcode::Stb | Opcode::Sth | Opcode::Stw | - Opcode::Stbx | Opcode::Sthx | Opcode::Stwx | - Opcode::Stfs | Opcode::Stfd, _, _, _) => { + ( + Opcode::Stb + | Opcode::Sth + | Opcode::Stw + | Opcode::Stbx + | Opcode::Sthx + | Opcode::Stwx + | Opcode::Stfs + | Opcode::Stfd, + _, + _, + _, + ) => { // Storing, does not change registers } (Opcode::Lmw, Argument::GPR(target), _, _) => { @@ -225,7 +267,6 @@ fn execute_instruction(registers: &mut RegisterState, op: &ppc750cl::Opcode, arg } (_, _, _, _) => {} } - } fn get_branch_offset(args: &[ppc750cl::Argument; 5]) -> i32 { @@ -234,7 +275,7 @@ fn get_branch_offset(args: &[ppc750cl::Argument; 5]) -> i32 { return dest.0 / 4; } } - return 0; + 0 } #[derive(Debug, Default)] @@ -243,7 +284,12 @@ struct PPCFlowAnalysisResult { } impl PPCFlowAnalysisResult { - fn set_argument_value_at_address(&mut self, address: u64, argument: u8, value: FlowAnalysisValue) { + fn set_argument_value_at_address( + &mut self, + address: u64, + argument: u8, + value: FlowAnalysisValue, + ) { self.argument_contents.insert((address, argument), value); } @@ -253,17 +299,17 @@ impl PPCFlowAnalysisResult { } impl FlowAnalysisResult for PPCFlowAnalysisResult { - fn get_argument_value_at_address(&self, address: u64, argument: u8) -> Option<&FlowAnalysisValue> { + fn get_argument_value_at_address( + &self, + address: u64, + argument: u8, + ) -> Option<&FlowAnalysisValue> { self.argument_contents.get(&(address, argument)) } } fn clamp_text_length(s: String, max: usize) -> String { - if s.len() <= max { - s - } else { - format!("{}…", s.chars().take(max - 3).collect::()) - } + if s.len() <= max { s } else { format!("{}…", s.chars().take(max - 3).collect::()) } } // Executing op with args at cur_address, update current_state with symbols that @@ -278,14 +324,26 @@ fn fill_registers_from_relocation( ) { let content = if let Some(bytes) = obj.symbol_data(reloc.target_symbol) { match guess_data_type_from_load_store_inst_op(op) { - Some(DataType::Float) => RegisterContent::FloatConstant(RawFloat(match obj.endianness { - object::Endianness::Little => f32::from_le_bytes(bytes.try_into().unwrap_or([0; 4])), - object::Endianness::Big => f32::from_be_bytes(bytes.try_into().unwrap_or([0; 4])), - })), - Some(DataType::Double) => RegisterContent::DoubleConstant(RawDouble(match obj.endianness { - object::Endianness::Little => f64::from_le_bytes(bytes.try_into().unwrap_or([0; 8])), - object::Endianness::Big => f64::from_be_bytes(bytes.try_into().unwrap_or([0; 8])), - })), + Some(DataType::Float) => { + RegisterContent::FloatConstant(RawFloat(match obj.endianness { + object::Endianness::Little => { + f32::from_le_bytes(bytes.try_into().unwrap_or([0; 4])) + } + object::Endianness::Big => { + f32::from_be_bytes(bytes.try_into().unwrap_or([0; 4])) + } + })) + } + Some(DataType::Double) => { + RegisterContent::DoubleConstant(RawDouble(match obj.endianness { + object::Endianness::Little => { + f64::from_le_bytes(bytes.try_into().unwrap_or([0; 8])) + } + object::Endianness::Big => { + f64::from_be_bytes(bytes.try_into().unwrap_or([0; 8])) + } + })) + } _ => RegisterContent::Symbol(reloc.target_symbol), } } else { @@ -309,14 +367,7 @@ fn fill_registers_from_relocation( // Special helper fragments generated by MWCC. // See: https://github.com/encounter/decomp-toolkit/blob/main/src/analysis/pass.rs -const SLEDS: [&str; 6] = [ - "_savefpr_", - "_restfpr_", - "_savegpr_", - "_restgpr_", - "_savev", - "_restv", -]; +const SLEDS: [&str; 6] = ["_savefpr_", "_restfpr_", "_savegpr_", "_restgpr_", "_savev", "_restv"]; fn is_sled_function(name: &str) -> bool { SLEDS.iter().any(|sled| name.starts_with(sled)) @@ -328,12 +379,12 @@ pub fn ppc_data_flow_analysis( code: &[u8], relocations: &[Relocation], ) -> Box { - use std::collections::HashSet; use ppc750cl::InsIter; + use std::collections::HashSet; use std::collections::VecDeque; - let instructions = InsIter::new(code, func_symbol.address as u32).map(|(_addr, ins)| { - (ins.op, ins.basic().args) - }).collect_vec(); + let instructions = InsIter::new(code, func_symbol.address as u32) + .map(|(_addr, ins)| (ins.op, ins.basic().args)) + .collect_vec(); let func_address = func_symbol.address; @@ -366,11 +417,11 @@ pub fn ppc_data_flow_analysis( // Get symbol used in this instruction let cur_addr = (func_address as u32) + ((index * 4) as u32); let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); - + // Is this a branch to a compiler generated helper? These helpers // do not trash registers like normal function calls, so we don't // want to treat this as normal execution. - let symbol = reloc.map(|r| obj.symbols.get(r.target_symbol)).flatten(); + let symbol = reloc.and_then(|r| obj.symbols.get(r.target_symbol)); let is_sled_invocation = symbol.is_some_and(|x| is_sled_function(&x.name)); // Execute the instruction to update the state @@ -385,7 +436,7 @@ pub fn ppc_data_flow_analysis( // handles references to global variables, floating point constants, // etc. if let Some(reloc) = reloc { - fill_registers_from_relocation(&reloc, &mut current_state, obj, *op, args); + fill_registers_from_relocation(reloc, &mut current_state, obj, *op, args); } // Add conditional branches to execution queue @@ -445,20 +496,16 @@ pub fn ppc_data_flow_analysis( } // Store the relevant data flow values for simplified instructions - generate_flow_analysis_result(&obj, func_address, code, register_state_at, relocations) + generate_flow_analysis_result(obj, func_address, code, register_state_at, relocations) } -fn get_string_data( - obj: &Object, - symbol_index: usize, - offset: Simm, -) -> Option<&str> { +fn get_string_data(obj: &Object, symbol_index: usize, offset: Simm) -> Option<&str> { if let Some(sym) = obj.symbols.get(symbol_index) { if sym.name.starts_with("@stringBase") && offset.0 != 0 { if let Some(data) = obj.symbol_data(symbol_index) { let bytes = &data[offset.0 as usize..]; if let Ok(Ok(str)) = CStr::from_bytes_until_nul(bytes).map(|x| x.to_str()) { - return Some(str) + return Some(str); } } } @@ -475,16 +522,16 @@ fn generate_flow_analysis_result( obj: &Object, base_address: u64, code: &[u8], - register_state_at: Vec::, - relocations: &[Relocation] + register_state_at: Vec, + relocations: &[Relocation], ) -> Box { - use ppc750cl::{InsIter, Argument, Offset, GPR}; + use ppc750cl::{Argument, GPR, InsIter, Offset}; let mut analysis_result = PPCFlowAnalysisResult::new(); let default_register_state = RegisterState::new(); for (addr, ins) in InsIter::new(code, 0) { let ins_address = base_address + (addr as u64); let index = addr / 4; - let ppc750cl::ParsedIns {mnemonic: _, args} = ins.simplified(); + let ppc750cl::ParsedIns { mnemonic: _, args } = ins.simplified(); // Special case to show float and double constants on the line where // they are being loaded. @@ -494,18 +541,20 @@ fn generate_flow_analysis_result( // The value is set on the line AFTER the load, get it from there if let Some(next_state) = register_state_at.get(index as usize + 1) { // When loading from SDA it will be a relocation so Reg+Offset will both be zero - match (args[0], args[1], args[2]) { - (Argument::FPR(fpr), Argument::Offset(Offset(0)), Argument::GPR(GPR(0))) => { - if let RegisterContent::Symbol(_index) = next_state[fpr] { - // We loaded a global variable, not a constant, - // don't do anything for this case. - } else { - analysis_result.set_argument_value_at_address(ins_address, 1, - FlowAnalysisValue::Text(format!("{}", next_state[fpr]))); - continue; - } + if let (Argument::FPR(fpr), Argument::Offset(Offset(0)), Argument::GPR(GPR(0))) = + (args[0], args[1], args[2]) + { + if let RegisterContent::Symbol(_index) = next_state[fpr] { + // We loaded a global variable, not a constant, + // don't do anything for this case. + } else { + analysis_result.set_argument_value_at_address( + ins_address, + 1, + FlowAnalysisValue::Text(format!("{}", next_state[fpr])), + ); + continue; } - _ => {} } } } @@ -513,13 +562,18 @@ fn generate_flow_analysis_result( // Special case to show string constants on the line where they are // being indexed to. This will typically be "addi t, stringbase, offset" let registers = register_state_at.get(index as usize).unwrap_or(&default_register_state); - if let (ppc750cl::Opcode::Addi, Argument::GPR(rel), Argument::Simm(offset)) = (ins.op, args[1], args[2]) { + if let (ppc750cl::Opcode::Addi, Argument::GPR(rel), Argument::Simm(offset)) = + (ins.op, args[1], args[2]) + { if let RegisterContent::Symbol(sym_index) = registers[rel] { if let Some(str) = get_string_data(obj, sym_index, offset) { // Show the string constant in the analysis result let formatted = format!("\"{}\"", str); - analysis_result.set_argument_value_at_address(ins_address, 2, - FlowAnalysisValue::Text(clamp_text_length(formatted, 20))); + analysis_result.set_argument_value_at_address( + ins_address, + 2, + FlowAnalysisValue::Text(clamp_text_length(formatted, 20)), + ); // Don't continue, we want to show the stringbase value as well } } @@ -546,11 +600,12 @@ fn generate_flow_analysis_result( _ => None, }; let analysis_value = match content { - Some(RegisterContent::Symbol(s)) => { - obj.symbols.get(s).map(|sym| - FlowAnalysisValue::Text( - clamp_text_length(sym.demangled_name.as_ref().unwrap_or(&sym.name).clone(), 20))) - } + Some(RegisterContent::Symbol(s)) => obj.symbols.get(s).map(|sym| { + FlowAnalysisValue::Text(clamp_text_length( + sym.demangled_name.as_ref().unwrap_or(&sym.name).clone(), + 20, + )) + }), Some(RegisterContent::InputRegister(reg)) => { let reg_name = match arg { Argument::GPR(_) => format!("input_r{reg}"), @@ -564,10 +619,14 @@ fn generate_flow_analysis_result( None => None, }; if let Some(analysis_value) = analysis_value { - analysis_result.set_argument_value_at_address(ins_address, arg_index as u8, analysis_value); + analysis_result.set_argument_value_at_address( + ins_address, + arg_index as u8, + analysis_value, + ); } } } Box::new(analysis_result) -} \ No newline at end of file +} diff --git a/objdiff-core/src/diff/display.rs b/objdiff-core/src/diff/display.rs index 503228cc..1690e980 100644 --- a/objdiff-core/src/diff/display.rs +++ b/objdiff-core/src/diff/display.rs @@ -213,9 +213,9 @@ pub fn display_row( displayed_relocation = true; } let data_flow_value = - analysis_result.map(|result| + analysis_result.and_then(|result| result.as_ref().get_argument_value_at_address( - ins_ref.address, (arg_idx - 1) as u8)).flatten(); + ins_ref.address, (arg_idx - 1) as u8)); match (arg, data_flow_value, resolved.ins_ref.branch_dest) { // If we have a flow analysis result, always use that over anything else. (InstructionArg::Value(_) | InstructionArg::Reloc, Some(FlowAnalysisValue::Text(text)), _) => { @@ -231,7 +231,7 @@ pub fn display_row( .map_or(base_color, |i| DiffTextColor::Rotating(i as u8)); cb(DiffTextSegment { text: DiffText::Argument(value), - color: color, + color, pad_to: 0, }) }, diff --git a/objdiff-core/src/obj/read.rs b/objdiff-core/src/obj/read.rs index ec5727e7..3bd28f17 100644 --- a/objdiff-core/src/obj/read.rs +++ b/objdiff-core/src/obj/read.rs @@ -909,7 +909,7 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result { // Need to construct the obj first so that we have a convinient package to // pass to flow analysis. Then the flow analysis will mutate obj adding // additional data to it. - perform_data_flow_analysis(&mut obj, &config)?; + perform_data_flow_analysis(&mut obj, config)?; Ok(obj) } From fcbe72bdb006d89752a86285492ee9300892ee8b Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Sun, 8 Jun 2025 21:12:04 -0700 Subject: [PATCH 05/12] Update more tests. --- objdiff-core/tests/snapshots/arch_mips__read_mips.snap | 1 + objdiff-core/tests/snapshots/arch_ppc__read_extab.snap | 1 + objdiff-core/tests/snapshots/arch_ppc__read_ppc.snap | 1 + 3 files changed, 3 insertions(+) diff --git a/objdiff-core/tests/snapshots/arch_mips__read_mips.snap b/objdiff-core/tests/snapshots/arch_mips__read_mips.snap index b24dd19f..b67679a7 100644 --- a/objdiff-core/tests/snapshots/arch_mips__read_mips.snap +++ b/objdiff-core/tests/snapshots/arch_mips__read_mips.snap @@ -1490,4 +1490,5 @@ Object { split_meta: None, path: None, timestamp: None, + flow_analysis_results: {}, } diff --git a/objdiff-core/tests/snapshots/arch_ppc__read_extab.snap b/objdiff-core/tests/snapshots/arch_ppc__read_extab.snap index 0753fd77..909b2a32 100644 --- a/objdiff-core/tests/snapshots/arch_ppc__read_extab.snap +++ b/objdiff-core/tests/snapshots/arch_ppc__read_extab.snap @@ -548,4 +548,5 @@ Object { split_meta: None, path: None, timestamp: None, + flow_analysis_results: {}, } diff --git a/objdiff-core/tests/snapshots/arch_ppc__read_ppc.snap b/objdiff-core/tests/snapshots/arch_ppc__read_ppc.snap index 6c73593d..32c49c19 100644 --- a/objdiff-core/tests/snapshots/arch_ppc__read_ppc.snap +++ b/objdiff-core/tests/snapshots/arch_ppc__read_ppc.snap @@ -581,4 +581,5 @@ Object { ), path: None, timestamp: None, + flow_analysis_results: {}, } From f37fab58fd300e0f13917fe022e40d69951cb534 Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Sun, 8 Jun 2025 21:40:34 -0700 Subject: [PATCH 06/12] Remove std use from ppc flow analysis --- objdiff-core/src/arch/ppc/flow_analysis.rs | 19 ++++++++--------- objdiff-core/src/arch/ppc/mod.rs | 2 +- objdiff-core/src/obj/mod.rs | 5 ++--- objdiff-core/src/util.rs | 24 ++++++++++++++-------- 4 files changed, 28 insertions(+), 22 deletions(-) diff --git a/objdiff-core/src/arch/ppc/flow_analysis.rs b/objdiff-core/src/arch/ppc/flow_analysis.rs index 2dfb65f9..cbb38e96 100644 --- a/objdiff-core/src/arch/ppc/flow_analysis.rs +++ b/objdiff-core/src/arch/ppc/flow_analysis.rs @@ -5,9 +5,9 @@ use crate::{ }; use itertools::Itertools; use ppc750cl::Simm; -use std::collections::BTreeMap; -use std::ffi::CStr; -use std::ops::{Index, IndexMut}; +use alloc::collections::{BTreeMap, BTreeSet}; +use core::ffi::CStr; +use core::ops::{Index, IndexMut}; fn is_store_instruction(op: ppc750cl::Opcode) -> bool { use ppc750cl::Opcode; @@ -59,7 +59,7 @@ pub fn guess_data_type_from_load_store_inst_op(inst_op: ppc750cl::Opcode) -> Opt } } -#[derive(Default, PartialEq, Eq, Copy, Hash, Clone, Debug)] +#[derive(Default, PartialEq, Eq, Copy, Clone, Debug, PartialOrd, Ord)] enum RegisterContent { #[default] Unknown, @@ -71,8 +71,8 @@ enum RegisterContent { Symbol(usize), } -impl std::fmt::Display for RegisterContent { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl core::fmt::Display for RegisterContent { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { RegisterContent::Unknown => write!(f, "unknown"), RegisterContent::Variable => write!(f, "variable"), @@ -93,7 +93,7 @@ impl std::fmt::Display for RegisterContent { } } -#[derive(Clone, PartialEq, Eq, Hash)] +#[derive(Clone, PartialEq, Eq, Ord, PartialOrd)] struct RegisterState { gpr: [RegisterContent; 32], fpr: [RegisterContent; 32], @@ -380,8 +380,7 @@ pub fn ppc_data_flow_analysis( relocations: &[Relocation], ) -> Box { use ppc750cl::InsIter; - use std::collections::HashSet; - use std::collections::VecDeque; + use alloc::collections::VecDeque; let instructions = InsIter::new(code, func_symbol.address as u32) .map(|(_addr, ins)| (ins.op, ins.basic().args)) .collect_vec(); @@ -397,7 +396,7 @@ pub fn ppc_data_flow_analysis( // Execute the instructions against abstract data let mut failsafe_counter = 0; - let mut taken_branches = HashSet::<(usize, RegisterState)>::new(); + let mut taken_branches = BTreeSet::<(usize, RegisterState)>::new(); let mut register_state_at = Vec::::new(); let mut completed_first_pass = false; register_state_at.resize_with(instructions.len(), RegisterState::new); diff --git a/objdiff-core/src/arch/ppc/mod.rs b/objdiff-core/src/arch/ppc/mod.rs index 734ca10c..4aec52c1 100644 --- a/objdiff-core/src/arch/ppc/mod.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -662,7 +662,7 @@ fn make_fake_pool_reloc( // and returns a Vec of "fake pool relocations" that simulate what a relocation for that instruction // would look like if data hadn't been pooled. // This method tries to follow the function's proper control flow. It keeps track of a queue of -// states it hasn't traversed yet, where each state holds an instruction address and a HashMap of +// states it hasn't traversed yet, where each state holds an instruction address and a map of // which registers hold which pool relocations at that point. // When a conditional or unconditional branch is encountered, the destination of the branch is added // to the queue. Conditional branches will traverse both the path where the branch is taken and the diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index 42ec8c6d..044f339e 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -13,7 +13,6 @@ use core::{ fmt, num::{NonZeroU32, NonZeroU64}, }; -use std::collections::HashMap; use flagset::{FlagSet, flags}; @@ -270,7 +269,7 @@ pub struct Object { pub path: Option, #[cfg(feature = "std")] pub timestamp: Option, - pub flow_analysis_results: HashMap>, + pub flow_analysis_results: BTreeMap>, } impl Default for Object { @@ -285,7 +284,7 @@ impl Default for Object { path: None, #[cfg(feature = "std")] timestamp: None, - flow_analysis_results: HashMap::>::new(), + flow_analysis_results: BTreeMap::>::new(), } } } diff --git a/objdiff-core/src/util.rs b/objdiff-core/src/util.rs index b7323a4a..3c81b73c 100644 --- a/objdiff-core/src/util.rs +++ b/objdiff-core/src/util.rs @@ -5,8 +5,6 @@ use anyhow::{Result, ensure}; use num_traits::PrimInt; use object::{Endian, Object}; -use std::hash::{Hash, Hasher}; - // https://stackoverflow.com/questions/44711012/how-do-i-format-a-signed-integer-to-a-sign-aware-hexadecimal-representation pub struct ReallySigned(pub N); @@ -72,9 +70,14 @@ impl PartialEq for RawFloat { } } impl Eq for RawFloat {} -impl Hash for RawFloat { - fn hash(&self, state: &mut H) { - self.0.to_bits().hash(state) +impl Ord for RawFloat { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.0.to_bits().cmp(&other.0.to_bits()) + } +} +impl PartialOrd for RawFloat { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) } } @@ -88,8 +91,13 @@ impl PartialEq for RawDouble { } } impl Eq for RawDouble {} -impl Hash for RawDouble { - fn hash(&self, state: &mut H) { - self.0.to_bits().hash(state) +impl Ord for RawDouble { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.0.to_bits().cmp(&other.0.to_bits()) + } +} +impl PartialOrd for RawDouble { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) } } From a7f59b2c099b7ef9a43480b905fdcc69f5139f6e Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Sun, 8 Jun 2025 21:56:28 -0700 Subject: [PATCH 07/12] Try to make wasm build work again --- objdiff-core/src/arch/ppc/flow_analysis.rs | 63 +++++++++++----------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/objdiff-core/src/arch/ppc/flow_analysis.rs b/objdiff-core/src/arch/ppc/flow_analysis.rs index cbb38e96..2c81c67f 100644 --- a/objdiff-core/src/arch/ppc/flow_analysis.rs +++ b/objdiff-core/src/arch/ppc/flow_analysis.rs @@ -3,41 +3,44 @@ use crate::{ obj::{FlowAnalysisResult, FlowAnalysisValue, Object, Relocation, Symbol}, util::{RawDouble, RawFloat}, }; -use itertools::Itertools; -use ppc750cl::Simm; use alloc::collections::{BTreeMap, BTreeSet}; +use alloc::{boxed::Box, format, string::String, vec::Vec}; use core::ffi::CStr; use core::ops::{Index, IndexMut}; +use itertools::Itertools; +use ppc750cl::Simm; fn is_store_instruction(op: ppc750cl::Opcode) -> bool { use ppc750cl::Opcode; - matches!(op, + matches!( + op, Opcode::Stbux - | Opcode::Stbx - | Opcode::Stfdux - | Opcode::Stfdx - | Opcode::Stfiwx - | Opcode::Stfsux - | Opcode::Stfsx - | Opcode::Sthbrx - | Opcode::Sthux - | Opcode::Sthx - | Opcode::Stswi - | Opcode::Stswx - | Opcode::Stwbrx - | Opcode::Stwcx_ - | Opcode::Stwux - | Opcode::Stwx - | Opcode::Stwu - | Opcode::Stb - | Opcode::Stbu - | Opcode::Sth - | Opcode::Sthu - | Opcode::Stmw - | Opcode::Stfs - | Opcode::Stfsu - | Opcode::Stfd - | Opcode::Stfdu) + | Opcode::Stbx + | Opcode::Stfdux + | Opcode::Stfdx + | Opcode::Stfiwx + | Opcode::Stfsux + | Opcode::Stfsx + | Opcode::Sthbrx + | Opcode::Sthux + | Opcode::Sthx + | Opcode::Stswi + | Opcode::Stswx + | Opcode::Stwbrx + | Opcode::Stwcx_ + | Opcode::Stwux + | Opcode::Stwx + | Opcode::Stwu + | Opcode::Stb + | Opcode::Stbu + | Opcode::Sth + | Opcode::Sthu + | Opcode::Stmw + | Opcode::Stfs + | Opcode::Stfsu + | Opcode::Stfd + | Opcode::Stfdu + ) } pub fn guess_data_type_from_load_store_inst_op(inst_op: ppc750cl::Opcode) -> Option { @@ -379,8 +382,8 @@ pub fn ppc_data_flow_analysis( code: &[u8], relocations: &[Relocation], ) -> Box { - use ppc750cl::InsIter; use alloc::collections::VecDeque; + use ppc750cl::InsIter; let instructions = InsIter::new(code, func_symbol.address as u32) .map(|(_addr, ins)| (ins.op, ins.basic().args)) .collect_vec(); @@ -454,7 +457,7 @@ pub fn ppc_data_flow_analysis( // an infinite loop if we hit some kind of bad behavior. failsafe_counter += 1; if failsafe_counter > 256 { - println!("Analysis of {} failed to stabilize", func_symbol.name); + //println!("Analysis of {} failed to stabilize", func_symbol.name); return Box::new(PPCFlowAnalysisResult::new()); } } From e1a008694f89d3606176adf3f0e5e0058cf23b73 Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Sun, 8 Jun 2025 21:58:48 -0700 Subject: [PATCH 08/12] More test changes --- objdiff-core/tests/snapshots/arch_x86__read_x86.snap | 1 + objdiff-core/tests/snapshots/arch_x86__read_x86_64.snap | 1 + objdiff-core/tests/snapshots/arch_x86__read_x86_jumptable.snap | 1 + .../tests/snapshots/arch_x86__read_x86_local_labels.snap | 1 + 4 files changed, 4 insertions(+) diff --git a/objdiff-core/tests/snapshots/arch_x86__read_x86.snap b/objdiff-core/tests/snapshots/arch_x86__read_x86.snap index dfba4557..92dc5cc0 100644 --- a/objdiff-core/tests/snapshots/arch_x86__read_x86.snap +++ b/objdiff-core/tests/snapshots/arch_x86__read_x86.snap @@ -207,4 +207,5 @@ Object { split_meta: None, path: None, timestamp: None, + flow_analysis_results: {}, } diff --git a/objdiff-core/tests/snapshots/arch_x86__read_x86_64.snap b/objdiff-core/tests/snapshots/arch_x86__read_x86_64.snap index 3c684df5..cc95831a 100644 --- a/objdiff-core/tests/snapshots/arch_x86__read_x86_64.snap +++ b/objdiff-core/tests/snapshots/arch_x86__read_x86_64.snap @@ -1574,4 +1574,5 @@ Object { split_meta: None, path: None, timestamp: None, + flow_analysis_results: {}, } diff --git a/objdiff-core/tests/snapshots/arch_x86__read_x86_jumptable.snap b/objdiff-core/tests/snapshots/arch_x86__read_x86_jumptable.snap index c77cc991..6ef775b6 100644 --- a/objdiff-core/tests/snapshots/arch_x86__read_x86_jumptable.snap +++ b/objdiff-core/tests/snapshots/arch_x86__read_x86_jumptable.snap @@ -311,4 +311,5 @@ Object { split_meta: None, path: None, timestamp: None, + flow_analysis_results: {}, } diff --git a/objdiff-core/tests/snapshots/arch_x86__read_x86_local_labels.snap b/objdiff-core/tests/snapshots/arch_x86__read_x86_local_labels.snap index 95929d75..3e5131fe 100644 --- a/objdiff-core/tests/snapshots/arch_x86__read_x86_local_labels.snap +++ b/objdiff-core/tests/snapshots/arch_x86__read_x86_local_labels.snap @@ -160,4 +160,5 @@ Object { split_meta: None, path: None, timestamp: None, + flow_analysis_results: {}, } From ad01306472181caf842f531a94c0108ed1e10b03 Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Sun, 8 Jun 2025 22:04:34 -0700 Subject: [PATCH 09/12] Probably last wasm fix --- objdiff-core/src/arch/ppc/mod.rs | 1 + objdiff-core/src/obj/mod.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/objdiff-core/src/arch/ppc/mod.rs b/objdiff-core/src/arch/ppc/mod.rs index 4aec52c1..0eaf37b2 100644 --- a/objdiff-core/src/arch/ppc/mod.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -3,6 +3,7 @@ use alloc::{ string::{String, ToString}, vec, vec::Vec, + boxed::Box, }; use anyhow::{Result, bail, ensure}; diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index 044f339e..061103e7 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -238,7 +238,7 @@ pub enum FlowAnalysisValue { Text(String), } -pub trait FlowAnalysisResult : std::fmt::Debug + Send { +pub trait FlowAnalysisResult : core::fmt::Debug + Send { fn get_argument_value_at_address(&self, address: u64, argument: u8) -> Option<&FlowAnalysisValue>; } From 8bf0629747dd19b28118b635991f86a920ec9c6b Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Mon, 9 Jun 2025 10:07:37 -0700 Subject: [PATCH 10/12] Formatting --- objdiff-core/src/arch/mod.rs | 12 +++++-- objdiff-core/src/arch/ppc/flow_analysis.rs | 40 ++++++++++++---------- objdiff-core/src/arch/ppc/mod.rs | 6 ++-- objdiff-core/src/diff/display.rs | 11 +++--- objdiff-core/src/obj/mod.rs | 14 ++++---- objdiff-core/src/obj/read.rs | 14 +++----- objdiff-core/src/util.rs | 24 ++++--------- objdiff-gui/src/views/appearance.rs | 2 +- objdiff-gui/src/views/diff.rs | 3 +- objdiff-gui/src/views/symbol_diff.rs | 3 +- 10 files changed, 65 insertions(+), 64 deletions(-) diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 321edc08..40eddff0 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -1,5 +1,8 @@ use alloc::{borrow::Cow, boxed::Box, format, string::String, vec::Vec}; -use core::{ffi::CStr, fmt::{self, Debug}}; +use core::{ + ffi::CStr, + fmt::{self, Debug}, +}; use anyhow::{Result, bail}; use encoding_rs::SHIFT_JIS; @@ -7,10 +10,13 @@ use object::Endian as _; use crate::{ diff::{ - display::{ContextItem, HoverItem, InstructionPart}, DiffObjConfig + DiffObjConfig, + display::{ContextItem, HoverItem, InstructionPart}, }, obj::{ - FlowAnalysisResult, InstructionArg, InstructionRef, Object, ParsedInstruction, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedSymbol, Section, Symbol, SymbolFlagSet, SymbolKind + FlowAnalysisResult, InstructionArg, InstructionRef, Object, ParsedInstruction, Relocation, + RelocationFlags, ResolvedInstructionRef, ResolvedSymbol, Section, Symbol, SymbolFlagSet, + SymbolKind, }, util::ReallySigned, }; diff --git a/objdiff-core/src/arch/ppc/flow_analysis.rs b/objdiff-core/src/arch/ppc/flow_analysis.rs index 2c81c67f..ddbfe125 100644 --- a/objdiff-core/src/arch/ppc/flow_analysis.rs +++ b/objdiff-core/src/arch/ppc/flow_analysis.rs @@ -1,14 +1,23 @@ +use alloc::{ + boxed::Box, + collections::{BTreeMap, BTreeSet}, + format, + string::String, + vec::Vec, +}; +use core::{ + ffi::CStr, + ops::{Index, IndexMut}, +}; + +use itertools::Itertools; +use ppc750cl::Simm; + use crate::{ arch::DataType, obj::{FlowAnalysisResult, FlowAnalysisValue, Object, Relocation, Symbol}, util::{RawDouble, RawFloat}, }; -use alloc::collections::{BTreeMap, BTreeSet}; -use alloc::{boxed::Box, format, string::String, vec::Vec}; -use core::ffi::CStr; -use core::ops::{Index, IndexMut}; -use itertools::Itertools; -use ppc750cl::Simm; fn is_store_instruction(op: ppc750cl::Opcode) -> bool { use ppc750cl::Opcode; @@ -165,9 +174,8 @@ impl RegisterState { impl Index for RegisterState { type Output = RegisterContent; - fn index(&self, gpr: ppc750cl::GPR) -> &Self::Output { - &self.gpr[gpr.0 as usize] - } + + fn index(&self, gpr: ppc750cl::GPR) -> &Self::Output { &self.gpr[gpr.0 as usize] } } impl IndexMut for RegisterState { fn index_mut(&mut self, gpr: ppc750cl::GPR) -> &mut Self::Output { @@ -177,9 +185,8 @@ impl IndexMut for RegisterState { impl Index for RegisterState { type Output = RegisterContent; - fn index(&self, fpr: ppc750cl::FPR) -> &Self::Output { - &self.fpr[fpr.0 as usize] - } + + fn index(&self, fpr: ppc750cl::FPR) -> &Self::Output { &self.fpr[fpr.0 as usize] } } impl IndexMut for RegisterState { fn index_mut(&mut self, fpr: ppc750cl::FPR) -> &mut Self::Output { @@ -296,9 +303,7 @@ impl PPCFlowAnalysisResult { self.argument_contents.insert((address, argument), value); } - fn new() -> Self { - PPCFlowAnalysisResult { argument_contents: Default::default() } - } + fn new() -> Self { PPCFlowAnalysisResult { argument_contents: Default::default() } } } impl FlowAnalysisResult for PPCFlowAnalysisResult { @@ -372,9 +377,7 @@ fn fill_registers_from_relocation( // See: https://github.com/encounter/decomp-toolkit/blob/main/src/analysis/pass.rs const SLEDS: [&str; 6] = ["_savefpr_", "_restfpr_", "_savegpr_", "_restgpr_", "_savev", "_restv"]; -fn is_sled_function(name: &str) -> bool { - SLEDS.iter().any(|sled| name.starts_with(sled)) -} +fn is_sled_function(name: &str) -> bool { SLEDS.iter().any(|sled| name.starts_with(sled)) } pub fn ppc_data_flow_analysis( obj: &Object, @@ -383,6 +386,7 @@ pub fn ppc_data_flow_analysis( relocations: &[Relocation], ) -> Box { use alloc::collections::VecDeque; + use ppc750cl::InsIter; let instructions = InsIter::new(code, func_symbol.address as u32) .map(|(_addr, ins)| (ins.op, ins.basic().args)) diff --git a/objdiff-core/src/arch/ppc/mod.rs b/objdiff-core/src/arch/ppc/mod.rs index 0eaf37b2..a968ba46 100644 --- a/objdiff-core/src/arch/ppc/mod.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -1,9 +1,9 @@ use alloc::{ + boxed::Box, collections::{BTreeMap, BTreeSet}, string::{String, ToString}, vec, vec::Vec, - boxed::Box, }; use anyhow::{Result, bail, ensure}; @@ -20,7 +20,7 @@ use crate::{ }, obj::{ FlowAnalysisResult, InstructionRef, Object, Relocation, RelocationFlags, - ResolvedInstructionRef, ResolvedRelocation, Symbol, SymbolFlag, SymbolFlagSet + ResolvedInstructionRef, ResolvedRelocation, Symbol, SymbolFlag, SymbolFlagSet, }, }; @@ -170,7 +170,7 @@ impl Arch for ArchPpc { ) -> Vec { generate_fake_pool_relocations_for_function(address, code, relocations, symbols) } - + fn data_flow_analysis( &self, obj: &Object, diff --git a/objdiff-core/src/diff/display.rs b/objdiff-core/src/diff/display.rs index 1690e980..439847d1 100644 --- a/objdiff-core/src/diff/display.rs +++ b/objdiff-core/src/diff/display.rs @@ -12,9 +12,12 @@ use itertools::Itertools; use regex::Regex; use crate::{ - diff::{DiffObjConfig, InstructionDiffKind, InstructionDiffRow, ObjectDiff, SymbolDiff}, obj::{ - FlowAnalysisValue, InstructionArg, InstructionArgValue, Object, ParsedInstruction, ResolvedInstructionRef, ResolvedRelocation, SectionFlag, SectionKind, Symbol, SymbolFlag, SymbolKind - } + diff::{DiffObjConfig, InstructionDiffKind, InstructionDiffRow, ObjectDiff, SymbolDiff}, + obj::{ + FlowAnalysisValue, InstructionArg, InstructionArgValue, Object, ParsedInstruction, + ResolvedInstructionRef, ResolvedRelocation, SectionFlag, SectionKind, Symbol, SymbolFlag, + SymbolKind, + }, }; #[derive(Debug, Clone)] @@ -44,7 +47,7 @@ pub enum DiffText<'a> { #[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Hash)] pub enum DiffTextColor { #[default] - Normal, // Grey + Normal, // Grey Dim, // Dark grey Bright, // White DataFlow, // Light blue diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index 061103e7..94f0228d 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -238,8 +238,12 @@ pub enum FlowAnalysisValue { Text(String), } -pub trait FlowAnalysisResult : core::fmt::Debug + Send { - fn get_argument_value_at_address(&self, address: u64, argument: u8) -> Option<&FlowAnalysisValue>; +pub trait FlowAnalysisResult: core::fmt::Debug + Send { + fn get_argument_value_at_address( + &self, + address: u64, + argument: u8, + ) -> Option<&FlowAnalysisValue>; } #[derive(Debug, Clone, Eq, PartialEq, Hash, Default)] @@ -323,10 +327,8 @@ impl Object { pub fn symbol_by_name(&self, name: &str) -> Option { self.symbols.iter().position(|symbol| symbol.section.is_some() && symbol.name == name) } - - pub fn has_flow_analysis_result(&self) -> bool { - !self.flow_analysis_results.is_empty() - } + + pub fn has_flow_analysis_result(&self) -> bool { !self.flow_analysis_results.is_empty() } } #[derive(Debug, Clone, Eq, PartialEq, Hash)] diff --git a/objdiff-core/src/obj/read.rs b/objdiff-core/src/obj/read.rs index 3bd28f17..f00fa8b5 100644 --- a/objdiff-core/src/obj/read.rs +++ b/objdiff-core/src/obj/read.rs @@ -467,20 +467,16 @@ fn perform_data_flow_analysis(obj: &mut Object, config: &DiffObjConfig) -> Resul symbol.address, code, §ion.relocations, - &obj.symbols); + &obj.symbols, + ); generated_relocations.push((section_index, relocations)); } // Optional full data flow analysis if config.analyze_data_flow { - obj.arch.data_flow_analysis( - &obj, - symbol, - code, - §ion.relocations, - ).and_then(|flow_result| { - obj.flow_analysis_results.insert(symbol.address, flow_result) - }); + obj.arch.data_flow_analysis(&obj, symbol, code, §ion.relocations).and_then( + |flow_result| obj.flow_analysis_results.insert(symbol.address, flow_result), + ); } } } diff --git a/objdiff-core/src/util.rs b/objdiff-core/src/util.rs index 3c81b73c..1651f205 100644 --- a/objdiff-core/src/util.rs +++ b/objdiff-core/src/util.rs @@ -65,20 +65,14 @@ pub fn align_data_slice_to(data: &mut Vec, align: u64) { #[derive(Copy, Clone, Debug)] pub struct RawFloat(pub f32); impl PartialEq for RawFloat { - fn eq(&self, other: &Self) -> bool { - self.0.to_bits() == other.0.to_bits() - } + fn eq(&self, other: &Self) -> bool { self.0.to_bits() == other.0.to_bits() } } impl Eq for RawFloat {} impl Ord for RawFloat { - fn cmp(&self, other: &Self) -> core::cmp::Ordering { - self.0.to_bits().cmp(&other.0.to_bits()) - } + fn cmp(&self, other: &Self) -> core::cmp::Ordering { self.0.to_bits().cmp(&other.0.to_bits()) } } impl PartialOrd for RawFloat { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } + fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } // Double where we specifically care about comparing the raw bits rather than @@ -86,18 +80,12 @@ impl PartialOrd for RawFloat { #[derive(Copy, Clone, Debug)] pub struct RawDouble(pub f64); impl PartialEq for RawDouble { - fn eq(&self, other: &Self) -> bool { - self.0.to_bits() == other.0.to_bits() - } + fn eq(&self, other: &Self) -> bool { self.0.to_bits() == other.0.to_bits() } } impl Eq for RawDouble {} impl Ord for RawDouble { - fn cmp(&self, other: &Self) -> core::cmp::Ordering { - self.0.to_bits().cmp(&other.0.to_bits()) - } + fn cmp(&self, other: &Self) -> core::cmp::Ordering { self.0.to_bits().cmp(&other.0.to_bits()) } } impl PartialOrd for RawDouble { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } + fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } diff --git a/objdiff-gui/src/views/appearance.rs b/objdiff-gui/src/views/appearance.rs index 6a106f56..6f9acbb2 100644 --- a/objdiff-gui/src/views/appearance.rs +++ b/objdiff-gui/src/views/appearance.rs @@ -23,7 +23,7 @@ pub struct Appearance { #[serde(skip)] pub highlight_color: Color32, // WHITE #[serde(skip)] - pub dataflow_color: Color32, // + pub dataflow_color: Color32, // #[serde(skip)] pub replace_color: Color32, // LIGHT_BLUE #[serde(skip)] diff --git a/objdiff-gui/src/views/diff.rs b/objdiff-gui/src/views/diff.rs index 5266d8b8..dbf31ab3 100644 --- a/objdiff-gui/src/views/diff.rs +++ b/objdiff-gui/src/views/diff.rs @@ -284,7 +284,8 @@ pub fn diff_view_ui( // Only need to check the first Object. Technically the first could not have a flow analysis // result while the second does but we don't want to waste space on two separate checkboxes. - if result.first_obj.as_ref().is_some_and(|(first, _)| first.has_flow_analysis_result()) { + if result.first_obj.as_ref().is_some_and(|(first, _)| first.has_flow_analysis_result()) + { let mut placeholder = diff_config.show_data_flow; if ui .checkbox(&mut placeholder, "Show data flow") diff --git a/objdiff-gui/src/views/symbol_diff.rs b/objdiff-gui/src/views/symbol_diff.rs index ca6038bf..15ad3b75 100644 --- a/objdiff-gui/src/views/symbol_diff.rs +++ b/objdiff-gui/src/views/symbol_diff.rs @@ -356,7 +356,8 @@ impl DiffViewState { let Ok(mut state) = state.write() else { return; }; - state.config.diff_obj_config.show_data_flow = !state.config.diff_obj_config.show_data_flow; + state.config.diff_obj_config.show_data_flow = + !state.config.diff_obj_config.show_data_flow; } } } From 39d9fa667be81267a1baa3189a89c07f4859987b Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Mon, 9 Jun 2025 10:45:17 -0700 Subject: [PATCH 11/12] Fix WASM --- objdiff-wasm/src/api.rs | 1 + objdiff-wasm/wit/objdiff.wit | 1 + 2 files changed, 2 insertions(+) diff --git a/objdiff-wasm/src/api.rs b/objdiff-wasm/src/api.rs index 9110e4bf..fa6af650 100644 --- a/objdiff-wasm/src/api.rs +++ b/objdiff-wasm/src/api.rs @@ -401,6 +401,7 @@ impl From for DiffTextColor { diff::display::DiffTextColor::Replace => DiffTextColor::Replace, diff::display::DiffTextColor::Delete => DiffTextColor::Delete, diff::display::DiffTextColor::Insert => DiffTextColor::Insert, + diff::display::DiffTextColor::DataFlow => DiffTextColor::DataFlow, diff::display::DiffTextColor::Rotating(v) => DiffTextColor::Rotating(v), } } diff --git a/objdiff-wasm/wit/objdiff.wit b/objdiff-wasm/wit/objdiff.wit index 6e9598f8..31b781e3 100644 --- a/objdiff-wasm/wit/objdiff.wit +++ b/objdiff-wasm/wit/objdiff.wit @@ -201,6 +201,7 @@ interface display { dim, bright, replace, + data-flow, delete, insert, rotating(u8), From 975d4a11b560e5157c0eee7a225fe1c957c3c2d4 Mon Sep 17 00:00:00 2001 From: Mark Langen Date: Mon, 9 Jun 2025 10:51:53 -0700 Subject: [PATCH 12/12] One more clippy thing --- objdiff-core/src/obj/read.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/objdiff-core/src/obj/read.rs b/objdiff-core/src/obj/read.rs index f00fa8b5..f6934875 100644 --- a/objdiff-core/src/obj/read.rs +++ b/objdiff-core/src/obj/read.rs @@ -474,7 +474,7 @@ fn perform_data_flow_analysis(obj: &mut Object, config: &DiffObjConfig) -> Resul // Optional full data flow analysis if config.analyze_data_flow { - obj.arch.data_flow_analysis(&obj, symbol, code, §ion.relocations).and_then( + obj.arch.data_flow_analysis(obj, symbol, code, §ion.relocations).and_then( |flow_result| obj.flow_analysis_results.insert(symbol.address, flow_result), ); }