Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 109 additions & 4 deletions meld-core/src/adapter/fact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3260,8 +3260,9 @@ impl FactStyleGenerator {
let caller_param_count = caller_type.params.len();
let _caller_result_count = caller_type.results.len();

// Find callee's memory index for the event buffer scratch space
// Find memory indices for cross-memory operations
let callee_memory = crate::merger::component_memory_index(merged, site.to_component);
let caller_memory = crate::merger::component_memory_index(merged, site.from_component);

// Determine the [async-lift] entry's param count from its type.
// The caller may have extra params (e.g., retptr for multi-value results)
Expand Down Expand Up @@ -3289,8 +3290,98 @@ impl FactStyleGenerator {
let l_p1 = l_packed + 4;
let l_p2 = l_packed + 5;

// 6 locals for callback loop + 3 for string copy (src_ptr, src_len, dst_ptr)
let mut body = Function::new([(9, wasm_encoder::ValType::I32)]);
// 6 locals for callback loop + 4 for string copy (src_ptr, src_len, dst_ptr, new_ptr)
let mut body = Function::new([(10, wasm_encoder::ValType::I32)]);

// Step 0.5: Copy string/list params from caller to callee memory.
//
// The pointer_pair_positions from the resolver are in CALLEE component
// type order. But the adapter's locals are in CALLER order (from the
// caller's canon lower). These may differ if the component type
// reorders params.
//
// Instead of using the resolver's positions, compute positions from
// the caller's flat param types: find (i32, i32) pairs that could be
// (ptr, len) strings/lists.
let callee_realloc = crate::merger::component_realloc_index(merged, site.to_component);

// Detect pointer pairs in caller params: consecutive (i32, i32) pairs
// that aren't the last param (retptr). This is a heuristic — works for
// string and list params which are always (ptr: i32, len: i32).
let caller_ptr_positions: Vec<u32> = if site.crosses_memory && callee_realloc.is_some() {
let params = &caller_type.params;
let has_retptr =
caller_type.results.is_empty() && caller_param_count > callee_param_count;
let effective_len = if has_retptr {
params.len() - 1
} else {
params.len()
};
let mut positions = Vec::new();
let mut i = 0;
while i + 1 < effective_len {
if params[i] == wasm_encoder::ValType::I32
&& params[i + 1] == wasm_encoder::ValType::I32
{
// Check if the resolver also thinks this is a pointer pair
// (the resolver uses component type info to confirm)
if site
.requirements
.pointer_pair_positions
.iter()
.any(|_| true)
{
positions.push(i as u32);
i += 2; // skip the len
continue;
}
}
i += 1;
}
positions
} else {
Vec::new()
};

let has_param_copies = !caller_ptr_positions.is_empty();

if has_param_copies {
log::debug!(
"async adapter param copy: export={} caller_positions={:?} resolver_positions={:?}",
site.export_name,
caller_ptr_positions,
site.requirements.pointer_pair_positions,
);
let realloc = callee_realloc.unwrap();
// For each (ptr, len) pair in the caller's params, allocate in
// callee memory and copy the data from caller memory.
for &ptr_pos in &caller_ptr_positions {
let ptr_local = ptr_pos;
let len_local = ptr_local + 1;
let l_new_ptr = l_p2 + 4; // reuse scratch local

// Allocate in callee memory: cabi_realloc(0, 0, 1, len)
body.instruction(&Instruction::I32Const(0));
body.instruction(&Instruction::I32Const(0));
body.instruction(&Instruction::I32Const(1));
body.instruction(&Instruction::LocalGet(len_local));
body.instruction(&Instruction::Call(realloc));
body.instruction(&Instruction::LocalSet(l_new_ptr));

// Copy from caller memory to callee memory
body.instruction(&Instruction::LocalGet(l_new_ptr)); // dst
body.instruction(&Instruction::LocalGet(ptr_local)); // src
body.instruction(&Instruction::LocalGet(len_local)); // len
body.instruction(&Instruction::MemoryCopy {
dst_mem: callee_memory,
src_mem: caller_memory,
});

// Replace the ptr param with the new callee-memory ptr
body.instruction(&Instruction::LocalGet(l_new_ptr));
body.instruction(&Instruction::LocalSet(ptr_local));
}
}

// Step 1: Call [async-lift] entry with callee's params
// (skip retptr if caller has more params than callee)
Expand Down Expand Up @@ -3420,6 +3511,21 @@ impl FactStyleGenerator {
.map(|(_, name)| name)
.unwrap_or(&site.export_name);

log::debug!(
"async adapter shim lookup: func_name='{}' to_comp={} shims={:?}",
adapter_func_name,
site.to_component,
merged
.task_return_shims
.values()
.map(|s| (
s.component_idx,
s.original_func_name.as_str(),
s.result_globals.iter().map(|(g, _)| *g).collect::<Vec<_>>()
))
.collect::<Vec<_>>(),
);

let shim_info = merged
.task_return_shims
.values()
Expand All @@ -3443,7 +3549,6 @@ impl FactStyleGenerator {
// Detect retptr convention: caller has more params than callee
// and returns void — the last caller param is the result pointer.
let uses_retptr = caller_type.results.is_empty() && caller_param_count > callee_param_count;
let caller_memory = crate::merger::component_memory_index(merged, site.from_component);

// Find caller's cabi_realloc for cross-memory string copying
let caller_realloc = crate::merger::component_realloc_index(merged, site.from_component);
Expand Down
22 changes: 22 additions & 0 deletions meld-core/src/component_wrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,28 @@ fn assemble_component(
}

ImportResolution::TaskBuiltin { op } => {
// Check if this task.return has a shim export in the fused module.
// If so, alias the shim instead of using canonical task.return.
if let P3BuiltinOp::TaskReturn { .. } = op {
let shim_name = format!("$task_return_shim_{}", i);
let has_shim = fused_info
.exports
.iter()
.any(|(n, k, _)| *k == wasmparser::ExternalKind::Func && *n == shim_name);
if has_shim {
let mut alias_section = ComponentAliasSection::new();
alias_section.alias(Alias::CoreInstanceExport {
instance: fused_instance,
kind: ExportKind::Func,
name: &shim_name,
});
component.section(&alias_section);
lowered_func_indices.push(core_func_idx);
core_func_idx += 1;
continue;
}
}

let mut canon = CanonicalFunctionSection::new();
match op {
P3BuiltinOp::TaskReturn {
Expand Down
58 changes: 51 additions & 7 deletions meld-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -718,8 +718,16 @@ impl Fuser {
origin: (comp_idx, 0, u32::MAX),
});

// Remap the task.return import to the shim in function_index_map
// for all modules of this component
// Export the shim so the component wrapper can alias it
// instead of using canonical task.return.
merged.exports.push(merger::MergedExport {
name: format!("$task_return_shim_{}", import_idx),
kind: wasm_encoder::ExportKind::Func,
index: shim_func_idx,
});

// Remap the task.return import to the shim in function_index_map.
// Only match direct imports with the fused name.
let component = &self.components[comp_idx];
for (mod_idx, module) in component.core_modules.iter().enumerate() {
let mut func_idx = 0u32;
Expand Down Expand Up @@ -749,6 +757,10 @@ impl Fuser {
}
}

// Note: intra-component forwarding functions (call_indirect table[N])
// for this task.return are handled by the component wrapper, which
// provides the shim export ($task_return_shim_N) as the table entry.

// Store shim info for the adapter to use
merged.task_return_shims.insert(
import_idx as u32,
Expand All @@ -761,14 +773,17 @@ impl Fuser {
},
);

let shim = &merged.task_return_shims[&(import_idx as u32)];
log::info!(
"task.return shim: import {} '{}' → shim func {} with {} globals",
"task.return shim: import {} '{}' orig='{}' → shim func {} globals {:?}",
import_idx,
imp.name,
shim_func_idx,
merged.task_return_shims[&(import_idx as u32)]
.result_globals
.len(),
shim.original_func_name,
shim.shim_func,
shim.result_globals
.iter()
.map(|(g, _)| *g)
.collect::<Vec<_>>(),
);
}

Expand Down Expand Up @@ -811,6 +826,35 @@ impl Fuser {
}
}

// Patch element segments: replace task.return import references
// with shim function references. This ensures that indirect calls
// through element-segment-initialized tables call the shim instead
// of the (stub) import.
if !merged.task_return_shims.is_empty() {
// Build a map: import merged index → shim func index
let mut import_to_shim: HashMap<u32, u32> = HashMap::new();
for (import_idx, shim_info) in &merged.task_return_shims {
import_to_shim.insert(*import_idx, shim_info.shim_func);
}

for elem in &mut merged.elements {
if let crate::segments::ReindexedElementItems::Functions(ref mut indices) =
elem.items
{
for idx in indices.iter_mut() {
if let Some(&shim_idx) = import_to_shim.get(idx) {
log::debug!(
"element segment: replaced import {} with shim {}",
idx,
shim_idx,
);
*idx = shim_idx;
}
}
}
}
}

Ok(())
}

Expand Down
5 changes: 5 additions & 0 deletions meld-core/src/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2516,6 +2516,11 @@ pub(crate) fn component_memory_index(merged: &MergedModule, comp_idx: usize) ->

/// Find the merged function index of a component's cabi_realloc.
pub(crate) fn component_realloc_index(merged: &MergedModule, comp_idx: usize) -> Option<u32> {
// Prefer module 0's realloc (the main module)
if let Some(&idx) = merged.realloc_map.get(&(comp_idx, 0)) {
return Some(idx);
}
// Fallback: any module's realloc for this component
for (&(ci, _mi), &merged_idx) in &merged.realloc_map {
if ci == comp_idx {
return Some(merged_idx);
Expand Down
9 changes: 9 additions & 0 deletions meld-core/src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2316,6 +2316,15 @@ impl Resolver {
}
requirements.pointer_pair_positions = to_component
.pointer_pair_param_positions(comp_params);
log::debug!(
"pointer_pair_positions for {}: {:?} (comp_params={:?})",
*func_name,
requirements.pointer_pair_positions,
comp_params
.iter()
.map(|(n, t)| (n.as_str(), format!("{:?}", t)))
.collect::<Vec<_>>(),
);
requirements.result_pointer_pair_offsets =
to_component.pointer_pair_result_offsets(results);
// Compute copy layouts for each pointer pair
Expand Down
Loading