@@ -355,6 +355,104 @@ def test_wrong_format(self):
355
355
with self .assertRaises (exceptions .FileFormatError ):
356
356
tszip .decompress (self .path )
357
357
358
+ def test_struct_metadata_roundtrip (self ):
359
+ ts = msprime .simulate (10 , random_seed = 1 )
360
+
361
+ struct_metadata = {
362
+ "reverse_node_map" : [847973 , 1442881 , 356055 , 2542708 , 285222 , 175110 ]
363
+ }
364
+
365
+ tables = ts .dump_tables ()
366
+ schema = {
367
+ "codec" : "struct" ,
368
+ "type" : "object" ,
369
+ "properties" : {
370
+ "reverse_node_map" : {
371
+ "type" : "array" ,
372
+ "items" : {
373
+ "type" : "integer" ,
374
+ "binaryFormat" : "I" ,
375
+ }, # unsigned 32-bit int
376
+ }
377
+ },
378
+ }
379
+ tables .metadata_schema = tskit .MetadataSchema (schema )
380
+ tables .metadata = struct_metadata
381
+ ts_with_metadata = tables .tree_sequence ()
382
+ tszip .compress (ts_with_metadata , self .path )
383
+ ts_decompressed = tszip .decompress (self .path )
384
+ self .assertEqual (ts_decompressed .metadata , ts_with_metadata .metadata )
385
+
386
+ def test_utf8_time_units_roundtrip (self ):
387
+ """Test that time_units with non-ASCII UTF-8 characters work correctly."""
388
+ ts = msprime .simulate (10 , random_seed = 1 )
389
+ tables = ts .dump_tables ()
390
+ # Use time_units with characters that require multi-byte UTF-8 encoding (>127)
391
+ tables .time_units = "μβrånches per γενεᾱ 世代" # Greek, Nordic, Chinese chars
392
+ ts_with_unicode_units = tables .tree_sequence ()
393
+
394
+ tszip .compress (ts_with_unicode_units , self .path )
395
+ ts_decompressed = tszip .decompress (self .path )
396
+ self .assertEqual (ts_decompressed .time_units , ts_with_unicode_units .time_units )
397
+
398
+ def test_json_metadata_roundtrip (self ):
399
+ ts = msprime .simulate (10 , random_seed = 1 )
400
+
401
+ json_metadata = {
402
+ "description" : "Test tree sequence with JSON metadata" ,
403
+ "sample_count" : 10 ,
404
+ "parameters" : {
405
+ "Ne" : 1000 ,
406
+ "mutation_rate" : 1e-8 ,
407
+ "recombination_rate" : 1e-8 ,
408
+ },
409
+ "tags" : ["test" , "simulation" , "msprime" ],
410
+ "version" : 1.0 ,
411
+ "unicode_text" : "Héllo Wørld! 你好世界 🧬🌳" , # Characters with ASCII > 127
412
+ "author" : "José María González-Pérez" , # Accented characters
413
+ }
414
+
415
+ tables = ts .dump_tables ()
416
+ schema = {
417
+ "codec" : "json" ,
418
+ "type" : "object" ,
419
+ "properties" : {
420
+ "description" : {"type" : "string" },
421
+ "sample_count" : {"type" : "integer" },
422
+ "parameters" : {
423
+ "type" : "object" ,
424
+ "properties" : {
425
+ "Ne" : {"type" : "number" },
426
+ "mutation_rate" : {"type" : "number" },
427
+ "recombination_rate" : {"type" : "number" },
428
+ },
429
+ },
430
+ "tags" : {"type" : "array" , "items" : {"type" : "string" }},
431
+ "version" : {"type" : "number" },
432
+ "unicode_text" : {"type" : "string" },
433
+ "author" : {"type" : "string" },
434
+ },
435
+ }
436
+ tables .metadata_schema = tskit .MetadataSchema (schema )
437
+ tables .metadata = json_metadata
438
+ ts_with_metadata = tables .tree_sequence ()
439
+ tszip .compress (ts_with_metadata , self .path )
440
+ ts_decompressed = tszip .decompress (self .path )
441
+ self .assertEqual (ts_decompressed .metadata , json_metadata )
442
+ self .assertEqual (
443
+ ts_decompressed .metadata_schema , ts_with_metadata .metadata_schema
444
+ )
445
+
446
+ def test_raw_metadata_with_high_bytes (self ):
447
+ ts = msprime .simulate (10 , random_seed = 1 )
448
+ tables = ts .dump_tables ()
449
+ raw_metadata_bytes = bytes ([65 , 66 , 200 , 150 , 255 , 128 ]) # Contains bytes > 127
450
+ tables .metadata = raw_metadata_bytes
451
+ ts_with_metadata = tables .tree_sequence ()
452
+ tszip .compress (ts_with_metadata , self .path )
453
+ ts_decompressed = tszip .decompress (self .path )
454
+ self .assertEqual (ts_decompressed .metadata , raw_metadata_bytes )
455
+
358
456
359
457
class TestFileErrors (unittest .TestCase ):
360
458
"""
@@ -411,3 +509,20 @@ def test_open_both(self):
411
509
ts = tszip .load (files / "1.0.0.trees.tsz" )
412
510
ts2 = tszip .load (files / "1.0.0.trees" )
413
511
assert ts == ts2
512
+
513
+ def test_issue95_metadata_dtype_regression (self ):
514
+ # Test that we can decompress files with struct metadata that were compressed by
515
+ # version <=0.2.5 that stored metadata as the wrong dtype.
516
+
517
+ files = pathlib .Path (__file__ ).parent / "files"
518
+
519
+ ts_original = tszip .load (files / "issue95_metadata_dtype.trees" )
520
+ # This file was compressed with 0.2.5 and should now decompress successfully
521
+ ts_decompressed = tszip .load (files / "issue95_metadata_bug.tsz" )
522
+
523
+ assert ts_decompressed .metadata == ts_original .metadata
524
+ assert isinstance (ts_decompressed .metadata , dict )
525
+ assert "reverse_node_map" in ts_decompressed .metadata
526
+ assert len (ts_decompressed .metadata ["reverse_node_map" ]) == len (
527
+ ts_original .metadata ["reverse_node_map" ]
528
+ )
0 commit comments