Skip to content

Commit e1490ce

Browse files
dtwf/ped stats tests
1 parent c6e9573 commit e1490ce

File tree

1 file changed

+136
-9
lines changed

1 file changed

+136
-9
lines changed

verification.py

+136-9
Original file line numberDiff line numberDiff line change
@@ -1658,26 +1658,32 @@ def run_dtwf_pedigree_comparison(
16581658
recombination_rate=0,
16591659
num_replicates=100,
16601660
pedigree_sim_direction="forward",
1661+
additional_nodes=None,
16611662
):
1662-
df = pd.DataFrame()
1663-
16641663
def replicates_data(replicates, model):
16651664
data = collections.defaultdict(list)
16661665
for ts in replicates:
16671666
t_mrca = np.zeros(ts.num_trees)
16681667
num_roots = np.zeros(ts.num_trees)
16691668
t_intervals = []
1669+
total_branch_length = np.zeros(ts.num_trees)
16701670
for tree in ts.trees():
16711671
t_mrca[tree.index] = max(tree.time(root) for root in tree.roots)
16721672
t_intervals.append(tree.interval)
16731673
num_roots[tree.index] = tree.num_roots
1674+
total_branch_length[tree.index] = tree.total_branch_length
16741675
data["num_roots"].append(np.mean(num_roots))
16751676
data["tmrca_mean"].append(np.mean(t_mrca))
16761677
data["num_trees"].append(ts.num_trees)
16771678
data["intervals"].append(t_intervals)
1679+
data["total_branch_length"].append(np.mean(total_branch_length))
16781680
data["model"].append(model)
16791681
return pd.DataFrame(data)
16801682

1683+
df_list = []
1684+
coalescing_segments_only = True
1685+
if additional_nodes is not None:
1686+
coalescing_segments_only = False
16811687
for _ in range(num_replicates):
16821688
pedigree = pedigrees.sim_pedigree(
16831689
population_size=N, end_time=end_time, direction=pedigree_sim_direction
@@ -1687,8 +1693,11 @@ def replicates_data(replicates, model):
16871693
initial_state=pedigree,
16881694
recombination_rate=recombination_rate,
16891695
model="fixed_pedigree",
1696+
additional_nodes=additional_nodes,
1697+
coalescing_segments_only=coalescing_segments_only,
16901698
)
1691-
df = df.append(replicates_data([ts_ped], "dtwf|ped"))
1699+
1700+
df_list.append(replicates_data([ts_ped], "dtwf|ped"))
16921701

16931702
dtwf_replicates = msprime.sim_ancestry(
16941703
samples=N,
@@ -1698,14 +1707,16 @@ def replicates_data(replicates, model):
16981707
sequence_length=sequence_length,
16991708
model="dtwf",
17001709
num_replicates=num_replicates,
1710+
additional_nodes=additional_nodes,
1711+
coalescing_segments_only=coalescing_segments_only,
17011712
)
1702-
df = df.append(replicates_data(dtwf_replicates, "dtwf"))
1703-
return df
1713+
df_list.append(replicates_data(dtwf_replicates, "dtwf"))
1714+
return pd.concat(df_list)
17041715

17051716
def plot_coalescent_stats(self, df):
17061717
df_ped = df[df.model == "dtwf|ped"]
17071718
df_dtwf = df[df.model == "dtwf"]
1708-
for stat in ["tmrca_mean", "num_trees", "num_roots"]:
1719+
for stat in ["tmrca_mean", "num_trees", "num_roots", "total_branch_length"]:
17091720
plot_qq(df_ped[stat], df_dtwf[stat])
17101721
pyplot.xlabel("dtwf|ped")
17111722
pyplot.ylabel("dtwf")
@@ -1724,6 +1735,14 @@ def _run(self, **kwargs):
17241735
df = self.run_dtwf_pedigree_comparison(**kwargs)
17251736
self.plot_coalescent_stats(df)
17261737

1738+
1739+
class DtwfVsPedigreeSimple(DtwfVsPedigree):
1740+
"""
1741+
Running a simulation through a pedigree with population size N
1742+
should be identical to running a DTWF simulation of the same
1743+
size.
1744+
"""
1745+
17271746
def test_dtwf_vs_pedigree_single_locus_n50(self):
17281747
self._run(N=50, end_time=500, num_replicates=100)
17291748

@@ -1772,6 +1791,44 @@ def test_dtwf_vs_pedigree_recomb_discrete_hotspots(self):
17721791
)
17731792

17741793

1794+
class DtwfVsPedigreeAdditionalNodes(DtwfVsPedigree):
1795+
"""
1796+
Running a simulation through a pedigree with population size N
1797+
should be identical to running a DTWF simulation of the same
1798+
size. Tests impact of registering additional nodes.
1799+
"""
1800+
1801+
def test_dtwf_vs_pedigree_many_roots_add_nodes(self):
1802+
additional_nodes = (
1803+
msprime.NodeType.RECOMBINANT
1804+
| msprime.NodeType.PASS_THROUGH
1805+
| msprime.NodeType.COMMON_ANCESTOR
1806+
)
1807+
self._run(
1808+
N=500,
1809+
end_time=100,
1810+
num_replicates=100,
1811+
sequence_length=100,
1812+
recombination_rate=0.0001,
1813+
additional_nodes=additional_nodes,
1814+
)
1815+
1816+
def test_dtwf_vs_pedigree_few_roots_add_nodes(self):
1817+
additional_nodes = (
1818+
msprime.NodeType.RECOMBINANT
1819+
| msprime.NodeType.PASS_THROUGH
1820+
| msprime.NodeType.COMMON_ANCESTOR
1821+
)
1822+
self._run(
1823+
N=10,
1824+
end_time=1000,
1825+
num_replicates=100,
1826+
sequence_length=100,
1827+
recombination_rate=0.0001,
1828+
additional_nodes=additional_nodes,
1829+
)
1830+
1831+
17751832
class DtwfVsRecapitatedPedigree(Test):
17761833
"""
17771834
Running a simulation through a pedigree with population size N
@@ -1912,7 +1969,7 @@ class DtwfVsCoalescent(Test):
19121969
"""
19131970

19141971
def run_dtwf_coalescent_stats(self, **kwargs):
1915-
df = pd.DataFrame()
1972+
df_list = []
19161973

19171974
for model in ["hudson", "dtwf"]:
19181975
kwargs["model"] = model
@@ -1930,8 +1987,8 @@ def run_dtwf_coalescent_stats(self, **kwargs):
19301987
data["num_trees"].append(ts.num_trees)
19311988
data["intervals"].append(t_intervals)
19321989
data["model"].append(model)
1933-
df = df.append(pd.DataFrame(data))
1934-
return df
1990+
df_list.append(pd.DataFrame(data))
1991+
return pd.concat(df_list)
19351992

19361993
def plot_dtwf_coalescent_stats(self, df):
19371994
df_hudson = df[df.model == "hudson"]
@@ -2232,6 +2289,76 @@ def test_dtwf_vs_coalescent_2_pops_high_asymm_mig(self):
22322289
)
22332290

22342291

2292+
class DtwfVsCoalescentAdditionalNodes(DtwfVsCoalescent):
2293+
"""
2294+
Comparison of DTWF with additional nodes against coalescent sims
2295+
without additional nodes.
2296+
"""
2297+
2298+
def run_dtwf_coalescent_stats(self, **kwargs):
2299+
df_list = []
2300+
2301+
for model in ["dtwf", "hudson"]:
2302+
kwargs["model"] = model
2303+
if model == "hudson":
2304+
kwargs["additional_nodes"] = None
2305+
kwargs["coalescing_segments_only"] = True
2306+
logging.debug(f"Running: {kwargs}")
2307+
data = collections.defaultdict(list)
2308+
replicates = msprime.sim_ancestry(**kwargs)
2309+
for ts in replicates:
2310+
tss = ts.simplify()
2311+
t_mrca = np.zeros(tss.num_trees)
2312+
t_intervals = []
2313+
for tree in tss.trees():
2314+
t_mrca[tree.index] = tree.time(tree.root)
2315+
t_intervals.append(tree.interval)
2316+
data["tmrca_mean"].append(np.mean(t_mrca))
2317+
data["num_trees"].append(tss.num_trees)
2318+
data["intervals"].append(t_intervals)
2319+
data["model"].append(model)
2320+
df_list.append(pd.DataFrame(data))
2321+
return pd.concat(df_list)
2322+
2323+
def test_dtwf_vs_coalescent_pass_through_only(self):
2324+
"""
2325+
Checks the DTWF against the standard coalescent while
2326+
registering all pass through nodes.
2327+
"""
2328+
additional_nodes = msprime.NodeType.PASS_THROUGH
2329+
self._run(
2330+
samples=10,
2331+
population_size=1000,
2332+
recombination_rate=1e-5,
2333+
num_replicates=300,
2334+
sequence_length=1000,
2335+
discrete_genome=True,
2336+
coalescing_segments_only=False,
2337+
additional_nodes=additional_nodes,
2338+
)
2339+
2340+
def test_dtwf_vs_coalescent_all_additional_nodes(self):
2341+
"""
2342+
Checks the DTWF against the standard coalescent while
2343+
registering all possible additional nodes.
2344+
"""
2345+
additional_nodes = (
2346+
msprime.NodeType.RECOMBINANT
2347+
| msprime.NodeType.PASS_THROUGH
2348+
| msprime.NodeType.COMMON_ANCESTOR
2349+
)
2350+
self._run(
2351+
samples=10,
2352+
population_size=1000,
2353+
recombination_rate=1e-5,
2354+
num_replicates=300,
2355+
sequence_length=1000,
2356+
discrete_genome=True,
2357+
coalescing_segments_only=False,
2358+
additional_nodes=additional_nodes,
2359+
)
2360+
2361+
22352362
class DtwfVsSlim(Test):
22362363
"""
22372364
Tests where we compare the DTWF with SLiM simulations.

0 commit comments

Comments
 (0)