@@ -7141,18 +7141,180 @@ def test_bad_seq_len(self):
7141
7141
ts .shift (1 , sequence_length = 1 )
7142
7142
7143
7143
7144
+ class TestMerge :
7145
+ def test_empty (self ):
7146
+ ts = tskit .TableCollection (2 ).tree_sequence ()
7147
+ merged_ts = ts .merge (ts , node_mapping = [])
7148
+ assert merged_ts .num_nodes == 0
7149
+ assert merged_ts .num_edges == 0
7150
+ assert merged_ts .sequence_length == 2
7151
+
7152
+ def test_simple (self ):
7153
+ # Cut up a single tree into alternating edges and mutations, then merge
7154
+ ts = tskit .Tree .generate_comb (4 , span = 10 ).tree_sequence
7155
+ ts = msprime .sim_mutations (ts , rate = 0.1 , random_seed = 1 )
7156
+ mut_counts = np .bincount (ts .mutations_site , minlength = ts .num_sites )
7157
+ assert min (mut_counts ) == 1
7158
+ assert max (mut_counts ) > 1
7159
+ tables1 = ts .dump_tables ()
7160
+ tables1 .mutations .clear ()
7161
+ tables2 = tables1 .copy ()
7162
+ i = 0
7163
+ for s in ts .sites ():
7164
+ for m in s .mutations :
7165
+ i += 1
7166
+ if i % 2 :
7167
+ tables1 .mutations .append (m .replace (parent = tskit .NULL ))
7168
+ else :
7169
+ tables2 .mutations .append (m .replace (parent = tskit .NULL ))
7170
+ tables1 .simplify ()
7171
+ tables2 .simplify ()
7172
+ assert tables1 .sites .num_rows != ts .num_sites
7173
+ tables1 .edges .clear ()
7174
+ tables2 .edges .clear ()
7175
+ for e in ts .edges ():
7176
+ if e .id % 2 :
7177
+ tables1 .edges .append (e )
7178
+ else :
7179
+ tables2 .edges .append (e )
7180
+ ts1 = tables1 .tree_sequence ()
7181
+ ts2 = tables2 .tree_sequence ()
7182
+ new_ts = ts1 .merge (ts2 , node_mapping = np .arange (ts .num_nodes )).simplify ()
7183
+ assert new_ts .equals (ts , ignore_provenance = True )
7184
+
7185
+ def test_multi_tree (self ):
7186
+ ts = msprime .sim_ancestry (
7187
+ 2 , sequence_length = 4 , recombination_rate = 1 , random_seed = 1
7188
+ )
7189
+ ts = msprime .sim_mutations (ts , rate = 1 , random_seed = 1 )
7190
+ assert ts .num_trees > 3
7191
+ assert ts .num_mutations > 4
7192
+ ts1 = ts .keep_intervals ([[0 , 1.5 ]], simplify = False )
7193
+ ts2 = ts .keep_intervals ([[1.5 , 4 ]], simplify = False )
7194
+ new_ts = ts1 .merge (
7195
+ ts2 , node_mapping = np .arange (ts .num_nodes ), add_populations = False
7196
+ )
7197
+ assert new_ts .num_trees == ts .num_trees + 1
7198
+ new_ts = new_ts .simplify ()
7199
+ new_ts .equals (ts , ignore_provenance = True )
7200
+
7201
+ def test_new_individuals (self ):
7202
+ ts1 = msprime .sim_ancestry (2 , sequence_length = 1 , random_seed = 1 )
7203
+ ts2 = msprime .sim_ancestry (2 , sequence_length = 1 , random_seed = 2 )
7204
+ tables = ts2 .dump_tables ()
7205
+ tables .edges .clear ()
7206
+ ts2 = tables .tree_sequence ()
7207
+ node_map = np .full (ts2 .num_nodes , tskit .NULL )
7208
+ node_map [0 :2 ] = [0 , 1 ] # map first two nodes to themselves
7209
+ ts_merged = ts1 .merge (ts2 , node_mapping = node_map )
7210
+ assert ts_merged .num_nodes == ts1 .num_nodes + ts2 .num_nodes - 2
7211
+ assert ts1 .num_individuals == 2
7212
+ assert ts_merged .num_individuals == 3
7213
+
7214
+ def test_popcheck (self ):
7215
+ tables = tskit .TableCollection (1 )
7216
+ p1 = tables .populations .add_row (b"foo" )
7217
+ p2 = tables .populations .add_row (b"bar" )
7218
+ tables .nodes .add_row (time = 0 , flags = tskit .NODE_IS_SAMPLE , population = p1 )
7219
+ tables .nodes .add_row (time = 0 , flags = tskit .NODE_IS_SAMPLE , population = p2 )
7220
+ ts1 = tables .tree_sequence ()
7221
+ tables .populations [0 ] = tables .populations [0 ].replace (metadata = b"baz" )
7222
+ ts2 = tables .tree_sequence ()
7223
+ with pytest .raises (ValueError , match = "Non-matching populations" ):
7224
+ ts1 .merge (ts2 , node_mapping = [0 , 1 ])
7225
+ ts1 .merge (ts2 , node_mapping = [0 , 1 ], check_populations = False )
7226
+ # Check with add_populations=False
7227
+ ts1 .merge (ts2 , node_mapping = [- 1 , 1 ]) # only merge the last one
7228
+ with pytest .raises (ValueError , match = "Non-matching populations" ):
7229
+ ts1 .merge (ts2 , node_mapping = [- 1 , 1 ], add_populations = False )
7230
+
7231
+ with pytest .raises (ValueError , match = "Non-matching populations" ):
7232
+ ts1 .simplify ([0 ]).merge (ts2 , node_mapping = [- 1 , 1 ])
7233
+
7234
+ def test_isolated_mutations (self ):
7235
+ tables = tskit .TableCollection (1 )
7236
+ u = tables .nodes .add_row (time = 0 , flags = tskit .NODE_IS_SAMPLE )
7237
+ s = tables .sites .add_row (0.5 , "A" )
7238
+ tables .mutations .add_row (s , u , derived_state = "T" , time = 1 , metadata = b"xxx" )
7239
+ ts1 = tables .tree_sequence ()
7240
+ tables .mutations [0 ] = tables .mutations [0 ].replace (time = 0.5 , metadata = b"yyy" )
7241
+ ts2 = tables .tree_sequence ()
7242
+ ts_merge = ts1 .merge (ts2 , node_mapping = [0 ])
7243
+ assert ts_merge .num_sites == 1
7244
+ assert ts_merge .num_mutations == 2
7245
+ assert ts_merge .mutation (0 ).time == 1
7246
+ assert ts_merge .mutation (0 ).parent == tskit .NULL
7247
+ assert ts_merge .mutation (0 ).metadata == b"xxx"
7248
+ assert ts_merge .mutation (1 ).time == 0.5
7249
+ assert ts_merge .mutation (1 ).parent == 0
7250
+ assert ts_merge .mutation (1 ).metadata == b"yyy"
7251
+
7252
+ def test_identity (self ):
7253
+ tables = tskit .TableCollection (1 )
7254
+ tables .nodes .add_row (time = 0 , flags = tskit .NODE_IS_SAMPLE )
7255
+ ts = tables .tree_sequence ()
7256
+ ts_merge = ts .merge (ts , node_mapping = [0 ])
7257
+ assert ts .equals (ts_merge , ignore_provenance = True )
7258
+
7259
+ def test_provenance (self ):
7260
+ tables = tskit .TableCollection (1 )
7261
+ tables .nodes .add_row (time = 0 , flags = tskit .NODE_IS_SAMPLE )
7262
+ ts = tables .tree_sequence ()
7263
+ ts_merge = ts .merge (ts , node_mapping = [0 ], record_provenance = False )
7264
+ assert ts_merge .num_provenances == ts .num_provenances
7265
+ ts_merge = ts .merge (ts , node_mapping = [0 ])
7266
+ assert ts_merge .num_provenances == ts .num_provenances + 1
7267
+ prov = json .loads (ts_merge .provenance (- 1 ).record )
7268
+ assert prov ["parameters" ]["command" ] == "merge"
7269
+ assert prov ["parameters" ]["node_mapping" ] == [0 ]
7270
+ assert prov ["parameters" ]["add_populations" ] is True
7271
+ assert prov ["parameters" ]["check_populations" ] is True
7272
+
7273
+ def test_bad_sequence_length (self ):
7274
+ ts1 = tskit .TableCollection (1 ).tree_sequence ()
7275
+ ts2 = tskit .TableCollection (2 ).tree_sequence ()
7276
+ with pytest .raises (ValueError , match = "sequence length" ):
7277
+ ts1 .merge (ts2 , node_mapping = [])
7278
+
7279
+ def test_bad_node_mapping (self ):
7280
+ ts = tskit .Tree .generate_comb (5 ).tree_sequence
7281
+ with pytest .raises (ValueError , match = "node_mapping" ):
7282
+ ts .merge (ts , node_mapping = [0 , 1 , 2 ])
7283
+
7284
+ def test_bad_populations (self ):
7285
+ tables = tskit .TableCollection (1 )
7286
+ tables = tskit .TableCollection (1 )
7287
+ p1 = tables .populations .add_row ()
7288
+ p2 = tables .populations .add_row ()
7289
+ tables .nodes .add_row (time = 0 , flags = tskit .NODE_IS_SAMPLE , population = p1 )
7290
+ tables .nodes .add_row (time = 0 , flags = tskit .NODE_IS_SAMPLE , population = p1 )
7291
+ tables .nodes .add_row (time = 0 , flags = tskit .NODE_IS_SAMPLE , population = p2 )
7292
+ ts2 = tables .tree_sequence ()
7293
+ ts1 = ts2 .simplify ([0 , 1 ])
7294
+ assert ts1 .num_populations == 1
7295
+ assert ts2 .num_populations == 2
7296
+ ts2 .merge (ts1 , [0 , - 1 ], check_populations = False , add_populations = False )
7297
+ with pytest .raises (ValueError , match = "population not present" ):
7298
+ ts1 .merge (ts2 , [0 , - 1 , - 1 ], check_populations = False , add_populations = False )
7299
+
7300
+
7144
7301
class TestConcatenate :
7145
7302
def test_simple (self ):
7146
7303
ts1 = tskit .Tree .generate_comb (5 , span = 2 ).tree_sequence
7304
+ ts1 = msprime .sim_mutations (ts1 , rate = 1 , random_seed = 1 )
7147
7305
ts2 = tskit .Tree .generate_balanced (5 , arity = 3 , span = 3 ).tree_sequence
7306
+ ts2 = msprime .sim_mutations (ts2 , rate = 1 , random_seed = 1 )
7148
7307
assert ts1 .num_samples == ts2 .num_samples
7149
7308
assert ts1 .num_nodes != ts2 .num_nodes
7150
7309
joint_ts = ts1 .concatenate (ts2 )
7151
7310
assert joint_ts .num_nodes == ts1 .num_nodes + ts2 .num_nodes - 5
7152
7311
assert joint_ts .sequence_length == ts1 .sequence_length + ts2 .sequence_length
7153
7312
assert joint_ts .num_samples == ts1 .num_samples
7313
+ assert joint_ts .num_sites == ts1 .num_sites + ts2 .num_sites
7314
+ assert joint_ts .num_mutations == ts1 .num_mutations + ts2 .num_mutations
7154
7315
ts3 = joint_ts .delete_intervals ([[2 , 5 ]]).rtrim ()
7155
7316
# Have to simplify here, to remove the redundant nodes
7317
+ ts3 .tables .assert_equals (ts1 .tables , ignore_provenance = True )
7156
7318
assert ts3 .equals (ts1 .simplify (), ignore_provenance = True )
7157
7319
ts4 = joint_ts .delete_intervals ([[0 , 2 ]]).ltrim ()
7158
7320
assert ts4 .equals (ts2 .simplify (), ignore_provenance = True )
@@ -7200,22 +7362,29 @@ def test_internal_samples(self):
7200
7362
nodes_flags [:] = tskit .NODE_IS_SAMPLE
7201
7363
nodes_flags [- 1 ] = 0 # Only root is not a sample
7202
7364
tables .nodes .flags = nodes_flags
7203
- ts = tables .tree_sequence ()
7365
+ ts = msprime .sim_mutations (tables .tree_sequence (), rate = 0.5 , random_seed = 1 )
7366
+ assert ts .num_mutations > 0
7367
+ assert ts .num_mutations > ts .num_sites
7204
7368
joint_ts = ts .concatenate (ts )
7205
7369
assert joint_ts .num_samples == ts .num_samples
7206
7370
assert joint_ts .num_nodes == ts .num_nodes + 1
7371
+ assert joint_ts .num_mutations == ts .num_mutations * 2
7372
+ assert joint_ts .num_sites == ts .num_sites * 2
7207
7373
assert joint_ts .sequence_length == ts .sequence_length * 2
7208
7374
7209
7375
def test_some_shared_samples (self ):
7210
- ts1 = tskit .Tree .generate_comb (4 , span = 2 ).tree_sequence
7211
- ts2 = tskit .Tree .generate_balanced (8 , arity = 3 , span = 3 ).tree_sequence
7212
- shared = np .full (ts2 .num_nodes , tskit .NULL )
7213
- shared [0 ] = 1
7214
- shared [1 ] = 0
7215
- joint_ts = ts1 .concatenate (ts2 , node_mappings = [shared ])
7216
- assert joint_ts .sequence_length == ts1 .sequence_length + ts2 .sequence_length
7217
- assert joint_ts .num_samples == ts1 .num_samples + ts2 .num_samples - 2
7218
- assert joint_ts .num_nodes == ts1 .num_nodes + ts2 .num_nodes - 2
7376
+ tables = tskit .Tree .generate_comb (5 ).tree_sequence .dump_tables ()
7377
+ tables .nodes [5 ] = tables .nodes [5 ].replace (flags = tskit .NODE_IS_SAMPLE )
7378
+ ts1 = tables .tree_sequence ()
7379
+ tables = tskit .Tree .generate_balanced (5 ).tree_sequence .dump_tables ()
7380
+ tables .nodes [5 ] = tables .nodes [5 ].replace (flags = tskit .NODE_IS_SAMPLE )
7381
+ ts2 = tables .tree_sequence ()
7382
+ assert ts1 .num_samples == ts2 .num_samples
7383
+ joint_ts = ts1 .concatenate (ts2 )
7384
+ assert joint_ts .num_samples == ts1 .num_samples
7385
+ assert joint_ts .num_edges == ts1 .num_edges + ts2 .num_edges
7386
+ for tree in joint_ts .trees ():
7387
+ assert tree .num_roots == 1
7219
7388
7220
7389
def test_provenance (self ):
7221
7390
ts = tskit .Tree .generate_comb (2 ).tree_sequence
@@ -7233,9 +7402,12 @@ def test_unequal_samples(self):
7233
7402
with pytest .raises (ValueError , match = "must have the same number of samples" ):
7234
7403
ts1 .concatenate (ts2 )
7235
7404
7236
- @pytest .mark .skip (
7237
- reason = "union bug: https://github.com/tskit-dev/tskit/issues/3168"
7238
- )
7405
+ def test_different_sample_numbers (self ):
7406
+ ts1 = tskit .Tree .generate_comb (5 , span = 2 ).tree_sequence
7407
+ ts2 = tskit .Tree .generate_balanced (4 , arity = 3 , span = 3 ).tree_sequence
7408
+ with pytest .raises (ValueError , match = "must have the same number of samples" ):
7409
+ ts1 .concatenate (ts2 )
7410
+
7239
7411
def test_duplicate_ts (self ):
7240
7412
ts1 = tskit .Tree .generate_comb (3 , span = 4 ).tree_sequence
7241
7413
ts = ts1 .keep_intervals ([[0 , 1 ]]).trim () # a quarter of the original
0 commit comments