@@ -1658,26 +1658,32 @@ def run_dtwf_pedigree_comparison(
1658
1658
recombination_rate = 0 ,
1659
1659
num_replicates = 100 ,
1660
1660
pedigree_sim_direction = "forward" ,
1661
+ additional_nodes = None ,
1661
1662
):
1662
- df = pd .DataFrame ()
1663
-
1664
1663
def replicates_data (replicates , model ):
1665
1664
data = collections .defaultdict (list )
1666
1665
for ts in replicates :
1667
1666
t_mrca = np .zeros (ts .num_trees )
1668
1667
num_roots = np .zeros (ts .num_trees )
1669
1668
t_intervals = []
1669
+ total_branch_length = np .zeros (ts .num_trees )
1670
1670
for tree in ts .trees ():
1671
1671
t_mrca [tree .index ] = max (tree .time (root ) for root in tree .roots )
1672
1672
t_intervals .append (tree .interval )
1673
1673
num_roots [tree .index ] = tree .num_roots
1674
+ total_branch_length [tree .index ] = tree .total_branch_length
1674
1675
data ["num_roots" ].append (np .mean (num_roots ))
1675
1676
data ["tmrca_mean" ].append (np .mean (t_mrca ))
1676
1677
data ["num_trees" ].append (ts .num_trees )
1677
1678
data ["intervals" ].append (t_intervals )
1679
+ data ["total_branch_length" ].append (np .mean (total_branch_length ))
1678
1680
data ["model" ].append (model )
1679
1681
return pd .DataFrame (data )
1680
1682
1683
+ df_list = []
1684
+ coalescing_segments_only = True
1685
+ if additional_nodes is not None :
1686
+ coalescing_segments_only = False
1681
1687
for _ in range (num_replicates ):
1682
1688
pedigree = pedigrees .sim_pedigree (
1683
1689
population_size = N , end_time = end_time , direction = pedigree_sim_direction
@@ -1687,8 +1693,11 @@ def replicates_data(replicates, model):
1687
1693
initial_state = pedigree ,
1688
1694
recombination_rate = recombination_rate ,
1689
1695
model = "fixed_pedigree" ,
1696
+ additional_nodes = additional_nodes ,
1697
+ coalescing_segments_only = coalescing_segments_only ,
1690
1698
)
1691
- df = df .append (replicates_data ([ts_ped ], "dtwf|ped" ))
1699
+
1700
+ df_list .append (replicates_data ([ts_ped ], "dtwf|ped" ))
1692
1701
1693
1702
dtwf_replicates = msprime .sim_ancestry (
1694
1703
samples = N ,
@@ -1698,14 +1707,16 @@ def replicates_data(replicates, model):
1698
1707
sequence_length = sequence_length ,
1699
1708
model = "dtwf" ,
1700
1709
num_replicates = num_replicates ,
1710
+ additional_nodes = additional_nodes ,
1711
+ coalescing_segments_only = coalescing_segments_only ,
1701
1712
)
1702
- df = df .append (replicates_data (dtwf_replicates , "dtwf" ))
1703
- return df
1713
+ df_list .append (replicates_data (dtwf_replicates , "dtwf" ))
1714
+ return pd . concat ( df_list )
1704
1715
1705
1716
def plot_coalescent_stats (self , df ):
1706
1717
df_ped = df [df .model == "dtwf|ped" ]
1707
1718
df_dtwf = df [df .model == "dtwf" ]
1708
- for stat in ["tmrca_mean" , "num_trees" , "num_roots" ]:
1719
+ for stat in ["tmrca_mean" , "num_trees" , "num_roots" , "total_branch_length" ]:
1709
1720
plot_qq (df_ped [stat ], df_dtwf [stat ])
1710
1721
pyplot .xlabel ("dtwf|ped" )
1711
1722
pyplot .ylabel ("dtwf" )
@@ -1724,6 +1735,14 @@ def _run(self, **kwargs):
1724
1735
df = self .run_dtwf_pedigree_comparison (** kwargs )
1725
1736
self .plot_coalescent_stats (df )
1726
1737
1738
+
1739
+ class DtwfVsPedigreeSimple (DtwfVsPedigree ):
1740
+ """
1741
+ Running a simulation through a pedigree with population size N
1742
+ should be identical to running a DTWF simulation of the same
1743
+ size.
1744
+ """
1745
+
1727
1746
def test_dtwf_vs_pedigree_single_locus_n50 (self ):
1728
1747
self ._run (N = 50 , end_time = 500 , num_replicates = 100 )
1729
1748
@@ -1772,6 +1791,44 @@ def test_dtwf_vs_pedigree_recomb_discrete_hotspots(self):
1772
1791
)
1773
1792
1774
1793
1794
+ class DtwfVsPedigreeAdditionalNodes (DtwfVsPedigree ):
1795
+ """
1796
+ Running a simulation through a pedigree with population size N
1797
+ should be identical to running a DTWF simulation of the same
1798
+ size. Tests impact of registering additional nodes.
1799
+ """
1800
+
1801
+ def test_dtwf_vs_pedigree_many_roots_add_nodes (self ):
1802
+ additional_nodes = (
1803
+ msprime .NodeType .RECOMBINANT
1804
+ | msprime .NodeType .PASS_THROUGH
1805
+ | msprime .NodeType .COMMON_ANCESTOR
1806
+ )
1807
+ self ._run (
1808
+ N = 500 ,
1809
+ end_time = 100 ,
1810
+ num_replicates = 100 ,
1811
+ sequence_length = 100 ,
1812
+ recombination_rate = 0.0001 ,
1813
+ additional_nodes = additional_nodes ,
1814
+ )
1815
+
1816
+ def test_dtwf_vs_pedigree_few_roots_add_nodes (self ):
1817
+ additional_nodes = (
1818
+ msprime .NodeType .RECOMBINANT
1819
+ | msprime .NodeType .PASS_THROUGH
1820
+ | msprime .NodeType .COMMON_ANCESTOR
1821
+ )
1822
+ self ._run (
1823
+ N = 10 ,
1824
+ end_time = 1000 ,
1825
+ num_replicates = 100 ,
1826
+ sequence_length = 100 ,
1827
+ recombination_rate = 0.0001 ,
1828
+ additional_nodes = additional_nodes ,
1829
+ )
1830
+
1831
+
1775
1832
class DtwfVsRecapitatedPedigree (Test ):
1776
1833
"""
1777
1834
Running a simulation through a pedigree with population size N
@@ -1912,7 +1969,7 @@ class DtwfVsCoalescent(Test):
1912
1969
"""
1913
1970
1914
1971
def run_dtwf_coalescent_stats (self , ** kwargs ):
1915
- df = pd . DataFrame ()
1972
+ df_list = []
1916
1973
1917
1974
for model in ["hudson" , "dtwf" ]:
1918
1975
kwargs ["model" ] = model
@@ -1930,8 +1987,8 @@ def run_dtwf_coalescent_stats(self, **kwargs):
1930
1987
data ["num_trees" ].append (ts .num_trees )
1931
1988
data ["intervals" ].append (t_intervals )
1932
1989
data ["model" ].append (model )
1933
- df = df .append (pd .DataFrame (data ))
1934
- return df
1990
+ df_list .append (pd .DataFrame (data ))
1991
+ return pd . concat ( df_list )
1935
1992
1936
1993
def plot_dtwf_coalescent_stats (self , df ):
1937
1994
df_hudson = df [df .model == "hudson" ]
@@ -2232,6 +2289,76 @@ def test_dtwf_vs_coalescent_2_pops_high_asymm_mig(self):
2232
2289
)
2233
2290
2234
2291
2292
+ class DtwfVsCoalescentAdditionalNodes (DtwfVsCoalescent ):
2293
+ """
2294
+ Comparison of DTWF with additional nodes against coalescent sims
2295
+ without additional nodes.
2296
+ """
2297
+
2298
+ def run_dtwf_coalescent_stats (self , ** kwargs ):
2299
+ df_list = []
2300
+
2301
+ for model in ["dtwf" , "hudson" ]:
2302
+ kwargs ["model" ] = model
2303
+ if model == "hudson" :
2304
+ kwargs ["additional_nodes" ] = None
2305
+ kwargs ["coalescing_segments_only" ] = True
2306
+ logging .debug (f"Running: { kwargs } " )
2307
+ data = collections .defaultdict (list )
2308
+ replicates = msprime .sim_ancestry (** kwargs )
2309
+ for ts in replicates :
2310
+ tss = ts .simplify ()
2311
+ t_mrca = np .zeros (tss .num_trees )
2312
+ t_intervals = []
2313
+ for tree in tss .trees ():
2314
+ t_mrca [tree .index ] = tree .time (tree .root )
2315
+ t_intervals .append (tree .interval )
2316
+ data ["tmrca_mean" ].append (np .mean (t_mrca ))
2317
+ data ["num_trees" ].append (tss .num_trees )
2318
+ data ["intervals" ].append (t_intervals )
2319
+ data ["model" ].append (model )
2320
+ df_list .append (pd .DataFrame (data ))
2321
+ return pd .concat (df_list )
2322
+
2323
+ def test_dtwf_vs_coalescent_pass_through_only (self ):
2324
+ """
2325
+ Checks the DTWF against the standard coalescent while
2326
+ registering all pass through nodes.
2327
+ """
2328
+ additional_nodes = msprime .NodeType .PASS_THROUGH
2329
+ self ._run (
2330
+ samples = 10 ,
2331
+ population_size = 1000 ,
2332
+ recombination_rate = 1e-5 ,
2333
+ num_replicates = 300 ,
2334
+ sequence_length = 1000 ,
2335
+ discrete_genome = True ,
2336
+ coalescing_segments_only = False ,
2337
+ additional_nodes = additional_nodes ,
2338
+ )
2339
+
2340
+ def test_dtwf_vs_coalescent_all_additional_nodes (self ):
2341
+ """
2342
+ Checks the DTWF against the standard coalescent while
2343
+ registering all possible additional nodes.
2344
+ """
2345
+ additional_nodes = (
2346
+ msprime .NodeType .RECOMBINANT
2347
+ | msprime .NodeType .PASS_THROUGH
2348
+ | msprime .NodeType .COMMON_ANCESTOR
2349
+ )
2350
+ self ._run (
2351
+ samples = 10 ,
2352
+ population_size = 1000 ,
2353
+ recombination_rate = 1e-5 ,
2354
+ num_replicates = 300 ,
2355
+ sequence_length = 1000 ,
2356
+ discrete_genome = True ,
2357
+ coalescing_segments_only = False ,
2358
+ additional_nodes = additional_nodes ,
2359
+ )
2360
+
2361
+
2235
2362
class DtwfVsSlim (Test ):
2236
2363
"""
2237
2364
Tests where we compare the DTWF with SLiM simulations.
0 commit comments