Skip to content

Commit 273736e

Browse files
authored
Merge pull request #55 from molpopgen/sample_info
Add SamplesInfo
2 parents d43e374 + 3cba755 commit 273736e

File tree

7 files changed

+75
-31
lines changed

7 files changed

+75
-31
lines changed

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
mod edge_buffer;
4242
mod error;
4343
pub mod nested_forward_list;
44+
mod samples_info;
4445
mod segment;
4546
mod simplification_buffers;
4647
mod simplification_common;
@@ -54,6 +55,7 @@ mod tsdef;
5455

5556
pub use edge_buffer::EdgeBuffer;
5657
pub use error::ForrusttsError;
58+
pub use samples_info::SamplesInfo;
5759
pub use segment::Segment;
5860
pub use simplification_buffers::SimplificationBuffers;
5961
pub use simplification_flags::SimplificationFlags;

src/samples_info.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
use crate::IdType;
2+
3+
/// Information about samples used for
4+
/// table simpilfication.
5+
#[derive(Default)]
6+
pub struct SamplesInfo {
7+
/// A list of sample IDs.
8+
/// Can include both "alive" and
9+
/// "ancient/remembered/preserved" sample
10+
/// nodes.
11+
pub samples: Vec<IdType>,
12+
/// When using [``EdgeBuffer``] to record transmission
13+
/// events, this list must contain a list of all node IDs
14+
/// alive the last time simplification happened. Here,
15+
/// "alive" means "could leave more descendants".
16+
/// At the *start* of a simulation, this should be filled
17+
/// with a list of "founder" node IDs.
18+
pub edge_buffer_founder_nodes: Vec<IdType>,
19+
}
20+
21+
impl SamplesInfo {
22+
/// Generate a new instance.
23+
pub fn new() -> Self {
24+
SamplesInfo {
25+
samples: vec![],
26+
edge_buffer_founder_nodes: vec![],
27+
}
28+
}
29+
}
30+
31+
#[cfg(test)]
32+
mod test {
33+
use super::*;
34+
35+
#[test]
36+
fn test_default() {
37+
let s: SamplesInfo = Default::default();
38+
assert!(s.samples.is_empty());
39+
assert!(s.edge_buffer_founder_nodes.is_empty());
40+
}
41+
}

src/simplification_common.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/// functions
33
use crate::simplification_logic;
44
use crate::ForrusttsError;
5+
use crate::SamplesInfo;
56
use crate::SimplificationBuffers;
67
use crate::SimplificationFlags;
78
use crate::SimplificationOutput;
@@ -14,7 +15,7 @@ fn setup_idmap(nodes: &[Node], idmap: &mut Vec<IdType>) {
1415
}
1516

1617
pub fn setup_simplification(
17-
samples: &[IdType],
18+
samples: &SamplesInfo,
1819
tables: &TableCollection,
1920
flags: SimplificationFlags,
2021
state: &mut SimplificationBuffers,
@@ -38,7 +39,7 @@ pub fn setup_simplification(
3839
state.ancestry.reset(tables.num_nodes());
3940

4041
simplification_logic::record_sample_nodes(
41-
&samples,
42+
&samples.samples,
4243
&tables,
4344
&mut state.new_nodes,
4445
&mut state.ancestry,

src/simplify_from_edge_buffer.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::simplification_logic;
33
use crate::tables::*;
44
use crate::EdgeBuffer;
55
use crate::ForrusttsError;
6+
use crate::SamplesInfo;
67
use crate::Segment;
78
use crate::SimplificationBuffers;
89
use crate::SimplificationFlags;
@@ -28,12 +29,12 @@ impl ParentLocation {
2829

2930
fn find_pre_existing_edges(
3031
tables: &TableCollection,
31-
alive_at_last_simplification: &[IdType],
32+
edge_buffer_founder_nodes: &[IdType],
3233
edge_buffer: &EdgeBuffer,
3334
) -> Result<Vec<ParentLocation>, ForrusttsError> {
3435
let mut alive_with_new_edges: Vec<i32> = vec![];
3536

36-
for a in alive_at_last_simplification {
37+
for a in edge_buffer_founder_nodes {
3738
if edge_buffer.head(*a)? != EdgeBuffer::null() {
3839
alive_with_new_edges.push(*a);
3940
}
@@ -130,8 +131,9 @@ fn process_births_from_buffer(
130131
///
131132
/// # Parameters
132133
///
133-
/// * `samples`:
134-
/// * `alive_at_last_simplification`:
134+
/// * `samples`: Instance of [``SamplesInfo``]. The field
135+
/// [``SamplesInfo::edge_buffer_founder_nodes``]
136+
/// must be populated. See [``EdgeBuffer``] for details.
135137
/// * `flags`: modify the behavior of the simplification algorithm.
136138
/// * `state`: These are the internal data structures used
137139
/// by the simpilfication algorithm.
@@ -146,8 +148,7 @@ fn process_births_from_buffer(
146148
/// The input tables must be sorted.
147149
/// See [``TableCollection::sort_tables_for_simplification``].
148150
pub fn simplify_from_edge_buffer(
149-
samples: &[IdType],
150-
alive_at_last_simplification: &[IdType],
151+
samples: &SamplesInfo,
151152
flags: SimplificationFlags,
152153
state: &mut SimplificationBuffers,
153154
edge_buffer: &mut EdgeBuffer,
@@ -158,7 +159,7 @@ pub fn simplify_from_edge_buffer(
158159

159160
// Process all edges since the last simplification.
160161
let mut max_time = Time::MIN;
161-
for n in alive_at_last_simplification {
162+
for n in samples.edge_buffer_founder_nodes.iter() {
162163
max_time = std::cmp::max(max_time, tables.node(*n).time);
163164
}
164165
for (i, _) in edge_buffer.head_itr().rev().enumerate() {
@@ -185,7 +186,7 @@ pub fn simplify_from_edge_buffer(
185186
}
186187

187188
let existing_edges =
188-
find_pre_existing_edges(&tables, &alive_at_last_simplification, &edge_buffer)?;
189+
find_pre_existing_edges(&tables, &samples.edge_buffer_founder_nodes, &edge_buffer)?;
189190

190191
let mut edge_i = 0;
191192
let num_edges = tables.num_edges();

src/simplify_tables.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::simplification_common::*;
22
use crate::tables::*;
33
use crate::ForrusttsError;
4-
use crate::IdType;
4+
use crate::SamplesInfo;
55
use crate::SimplificationBuffers;
66
use crate::SimplificationFlags;
77
use crate::SimplificationOutput;
@@ -26,7 +26,7 @@ use crate::SimplificationOutput;
2626
/// [``simplify_tables``] to keep memory allocations
2727
/// persistent between simplifications.
2828
pub fn simplify_tables_without_state(
29-
samples: &[IdType],
29+
samples: &SamplesInfo,
3030
flags: SimplificationFlags,
3131
tables: &mut TableCollection,
3232
output: &mut SimplificationOutput,
@@ -56,7 +56,7 @@ pub fn simplify_tables_without_state(
5656
/// The input tables must be sorted.
5757
/// See [``TableCollection::sort_tables_for_simplification``].
5858
pub fn simplify_tables(
59-
samples: &[IdType],
59+
samples: &SamplesInfo,
6060
flags: SimplificationFlags,
6161
state: &mut SimplificationBuffers,
6262
tables: &mut TableCollection,

src/test_simplify_tables.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod test {
88
use crate::tsdef::{IdType, Position, Time};
99
use crate::wright_fisher::*;
1010
use crate::ForrusttsError;
11+
use crate::SamplesInfo;
1112
use crate::SimplificationFlags;
1213
use crate::SimplificationOutput;
1314
use crate::TableCollection;
@@ -89,10 +90,10 @@ mod test {
8990

9091
// Now, sort and simplify the tables we got from the sim:
9192
tables.sort_tables_for_simplification();
92-
let mut samples: Vec<IdType> = vec![];
93+
let mut samples = SamplesInfo::new();
9394
for (i, n) in tables.nodes().iter().enumerate() {
9495
if n.time == num_generations {
95-
samples.push(i as IdType);
96+
samples.samples.push(i as IdType);
9697
}
9798
}
9899

@@ -122,8 +123,8 @@ mod test {
122123
assert!(rv == 0);
123124
let rv = tskr::tsk_table_collection_simplify(
124125
tsk_tables.as_mut_ptr(),
125-
samples.as_ptr(),
126-
samples.len() as u32,
126+
samples.samples.as_ptr(),
127+
samples.samples.len() as u32,
127128
0,
128129
std::ptr::null_mut(),
129130
);

src/wright_fisher.rs

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::tables::{validate_edge_table, TableCollection};
1111
use crate::tsdef::*;
1212
use crate::EdgeBuffer;
1313
use crate::ForrusttsError;
14+
use crate::SamplesInfo;
1415
use crate::Segment;
1516
use crate::SimplificationBuffers;
1617
use crate::SimplificationFlags;
@@ -65,7 +66,6 @@ type VecBirth = Vec<Birth>;
6566
struct PopulationState {
6667
pub parents: VecParent,
6768
pub births: VecBirth,
68-
pub alive_at_last_simplification: Vec<IdType>,
6969
pub edge_buffer: EdgeBuffer,
7070
pub tables: TableCollection,
7171
}
@@ -75,7 +75,6 @@ impl PopulationState {
7575
PopulationState {
7676
parents: vec![],
7777
births: vec![],
78-
alive_at_last_simplification: vec![],
7978
edge_buffer: EdgeBuffer::new(),
8079
tables: TableCollection::new(genome_length).unwrap(),
8180
}
@@ -291,17 +290,17 @@ fn recombination_breakpoints(
291290
}
292291
}
293292

294-
fn fill_samples(parents: &[Parent], samples: &mut Vec<IdType>) {
295-
samples.clear();
293+
fn fill_samples(parents: &[Parent], samples: &mut SamplesInfo) {
294+
samples.samples.clear();
296295
for p in parents {
297-
samples.push(p.node0);
298-
samples.push(p.node1);
296+
samples.samples.push(p.node0);
297+
samples.samples.push(p.node1);
299298
}
300299
}
301300

302301
fn sort_and_simplify(
303302
flags: SimulationFlags,
304-
samples: &[IdType],
303+
samples: &SamplesInfo,
305304
state: &mut SimplificationBuffers,
306305
pop: &mut PopulationState,
307306
output: &mut SimplificationOutput,
@@ -341,7 +340,6 @@ fn sort_and_simplify(
341340
} else {
342341
simplify_from_edge_buffer(
343342
samples,
344-
&pop.alive_at_last_simplification,
345343
SimplificationFlags::empty(),
346344
state,
347345
&mut pop.edge_buffer,
@@ -354,7 +352,7 @@ fn sort_and_simplify(
354352

355353
fn simplify_and_remap_nodes(
356354
flags: SimulationFlags,
357-
samples: &mut Vec<IdType>,
355+
samples: &mut SamplesInfo,
358356
state: &mut SimplificationBuffers,
359357
pop: &mut PopulationState,
360358
output: &mut SimplificationOutput,
@@ -368,10 +366,10 @@ fn simplify_and_remap_nodes(
368366
}
369367

370368
if flags.contains(SimulationFlags::BUFFER_EDGES) {
371-
pop.alive_at_last_simplification.clear();
369+
samples.edge_buffer_founder_nodes.clear();
372370
for p in &pop.parents {
373-
pop.alive_at_last_simplification.push(p.node0);
374-
pop.alive_at_last_simplification.push(p.node1);
371+
samples.edge_buffer_founder_nodes.push(p.node0);
372+
samples.edge_buffer_founder_nodes.push(p.node1);
375373
}
376374
}
377375
}
@@ -523,7 +521,7 @@ pub fn neutral_wf(
523521
rng.set(params.seed);
524522

525523
let mut pop = PopulationState::new(pop_params.genome_length);
526-
let mut samples: Vec<IdType> = vec![];
524+
let mut samples: SamplesInfo = Default::default();
527525
let mut breakpoints = vec![];
528526

529527
// Record nodes for the first generation
@@ -535,7 +533,7 @@ pub fn neutral_wf(
535533
}
536534

537535
for i in 0..pop.tables.num_nodes() {
538-
pop.alive_at_last_simplification.push(i as IdType);
536+
samples.edge_buffer_founder_nodes.push(i as IdType);
539537
}
540538

541539
let mut simplified = false;

0 commit comments

Comments
 (0)