|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Script to test zarr cross-version compatibility. |
| 4 | +Usage: python test_zarr_cross_version.py [write|read] <filename> |
| 5 | +""" |
| 6 | +import pathlib |
| 7 | +import sys |
| 8 | + |
| 9 | +import msprime |
| 10 | +import tskit |
| 11 | + |
| 12 | +# Add parent directory to path so we can import tszip |
| 13 | +sys.path.insert(0, str(pathlib.Path(__file__).parent.parent)) |
| 14 | + |
| 15 | +import tszip # noqa: E402 |
| 16 | + |
| 17 | + |
| 18 | +def all_fields_ts(edge_metadata=True, migrations=True): |
| 19 | + """ |
| 20 | + A tree sequence with data in all fields (except edge metadata is not set if |
| 21 | + edge_metadata is False and migrations are not defined if migrations is False |
| 22 | + (this is needed to test simplify, which doesn't allow either) |
| 23 | +
|
| 24 | + """ |
| 25 | + demography = msprime.Demography() |
| 26 | + demography.add_population(name="A", initial_size=10_000) |
| 27 | + demography.add_population(name="B", initial_size=5_000) |
| 28 | + demography.add_population(name="C", initial_size=1_000) |
| 29 | + demography.add_population(name="D", initial_size=500) |
| 30 | + demography.add_population(name="E", initial_size=100) |
| 31 | + demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C") |
| 32 | + ts = msprime.sim_ancestry( |
| 33 | + samples={"A": 10, "B": 10}, |
| 34 | + demography=demography, |
| 35 | + sequence_length=5, |
| 36 | + random_seed=42, |
| 37 | + recombination_rate=1, |
| 38 | + record_migrations=migrations, |
| 39 | + record_provenance=True, |
| 40 | + ) |
| 41 | + ts = msprime.sim_mutations(ts, rate=0.001, random_seed=42) |
| 42 | + tables = ts.dump_tables() |
| 43 | + # Add locations to individuals |
| 44 | + individuals_copy = tables.individuals.copy() |
| 45 | + tables.individuals.clear() |
| 46 | + for i, individual in enumerate(individuals_copy): |
| 47 | + tables.individuals.append( |
| 48 | + individual.replace(flags=i, location=[i, i + 1], parents=[i - 1, i - 1]) |
| 49 | + ) |
| 50 | + # Ensure all columns have unique values |
| 51 | + nodes_copy = tables.nodes.copy() |
| 52 | + tables.nodes.clear() |
| 53 | + for i, node in enumerate(nodes_copy): |
| 54 | + tables.nodes.append( |
| 55 | + node.replace( |
| 56 | + flags=i, |
| 57 | + time=node.time + 0.00001 * i, |
| 58 | + individual=i % len(tables.individuals), |
| 59 | + population=i % len(tables.populations), |
| 60 | + ) |
| 61 | + ) |
| 62 | + if migrations: |
| 63 | + tables.migrations.add_row(left=0, right=1, node=21, source=1, dest=3, time=1001) |
| 64 | + |
| 65 | + # Add metadata |
| 66 | + for name, table in tables.table_name_map.items(): |
| 67 | + if name == "provenances": |
| 68 | + continue |
| 69 | + if name == "migrations" and not migrations: |
| 70 | + continue |
| 71 | + if name == "edges" and not edge_metadata: |
| 72 | + continue |
| 73 | + table.metadata_schema = tskit.MetadataSchema.permissive_json() |
| 74 | + metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))] |
| 75 | + metadata, metadata_offset = tskit.pack_strings(metadatas) |
| 76 | + table.set_columns( |
| 77 | + **{ |
| 78 | + **table.asdict(), |
| 79 | + "metadata": metadata, |
| 80 | + "metadata_offset": metadata_offset, |
| 81 | + } |
| 82 | + ) |
| 83 | + tables.metadata_schema = tskit.MetadataSchema.permissive_json() |
| 84 | + tables.metadata = "Test metadata" |
| 85 | + tables.time_units = "Test time units" |
| 86 | + |
| 87 | + tables.reference_sequence.metadata_schema = tskit.MetadataSchema.permissive_json() |
| 88 | + tables.reference_sequence.metadata = "Test reference metadata" |
| 89 | + tables.reference_sequence.data = "A" * int(ts.sequence_length) |
| 90 | + tables.reference_sequence.url = "http://example.com/a_reference" |
| 91 | + |
| 92 | + # Add some more rows to provenance to have enough for testing. |
| 93 | + for i in range(3): |
| 94 | + tables.provenances.add_row(record="A", timestamp=str(i)) |
| 95 | + |
| 96 | + return tables.tree_sequence() |
| 97 | + |
| 98 | + |
| 99 | +def write_test_file(filename): |
| 100 | + """Write a test file with current zarr version""" |
| 101 | + ts = all_fields_ts() |
| 102 | + tszip.compress(ts, filename) |
| 103 | + ts2 = tszip.decompress(filename) |
| 104 | + ts.tables.assert_equals(ts2.tables) |
| 105 | + |
| 106 | + |
| 107 | +def read_test_file(filename): |
| 108 | + """Read and verify a test file with current zarr version""" |
| 109 | + try: |
| 110 | + tszip.decompress(filename) |
| 111 | + except Exception: |
| 112 | + sys.exit(1) |
| 113 | + |
| 114 | + |
| 115 | +if __name__ == "__main__": |
| 116 | + action = sys.argv[1] |
| 117 | + filename = sys.argv[2] |
| 118 | + if action == "write": |
| 119 | + write_test_file(filename) |
| 120 | + elif action == "read": |
| 121 | + read_test_file(filename) |
0 commit comments