Skip to content

Commit f33675d

Browse files
committed
Change Ne to population size in API
1 parent b9bf7d8 commit f33675d

File tree

4 files changed

+52
-36
lines changed

4 files changed

+52
-36
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@ tsdate.egg-info
99
data/prior_1000df.txt
1010
*.ipynb
1111
.ipynb*
12+
build/
13+
.*.swp

tsdate/cli.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,11 @@ def tsdate_cli_parser():
8080
help="The path and name of output file where the dated tree \
8181
sequence will saved.",
8282
)
83+
# TODO array specification from file?
8384
parser.add_argument(
84-
"Ne", type=float, help="estimated effective (diploid) population size."
85+
"population_size",
86+
type=float,
87+
help="estimated effective (diploid) population size.",
8588
)
8689
parser.add_argument(
8790
"-m",
@@ -190,7 +193,7 @@ def run_date(args):
190193
dated_ts = tsdate.date(
191194
ts,
192195
args.mutation_rate,
193-
args.Ne,
196+
args.population_size,
194197
recombination_rate=args.recombination_rate,
195198
probability_space=args.probability_space,
196199
method=args.method,

tsdate/core.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,7 @@ def constrain_ages_topo(ts, post_mn, eps, nodes_to_date=None, progress=False):
932932
def date(
933933
tree_sequence,
934934
mutation_rate,
935-
Ne=None,
935+
population_size=None,
936936
recombination_rate=None,
937937
time_units=None,
938938
priors=None,
@@ -966,10 +966,12 @@ def date(
966966
967967
:param TreeSequence tree_sequence: The input :class:`tskit.TreeSequence`, treated as
968968
one whose non-sample nodes are undated.
969-
:param float Ne: The estimated (diploid) effective population size used to construct
970-
the (default) conditional coalescent prior. This is what is used when ``priors``
971-
is ``None``: a positive ``Ne`` value is therefore required in this case.
972-
Conversely, if ``priors`` is not ``None``, no ``Ne`` value should be given.
969+
:param float population_size: The estimated (diploid) effective population
970+
size used to construct the (default) conditional coalescent prior. This may
971+
be a single value, or a two-column array of epoch breakpoints and effective
972+
population sizes within epochs. These are used when ``priors`` is ``None``.
973+
Conversely, if ``priors`` is not ``None``, no ``population_size`` value
974+
should be given.
973975
:param float mutation_rate: The estimated mutation rate per unit of genome per
974976
unit time. If provided, the dating algorithm will use a mutation rate clock to
975977
help estimate node dates. Default: ``None``
@@ -979,12 +981,12 @@ def date(
979981
:param str time_units: The time units used by the ``mutation_rate`` and
980982
``recombination_rate`` values, and stored in the ``time_units`` attribute of the
981983
output tree sequence. If the conditional coalescent prior is used,
982-
then this is also applies to the value of ``Ne``, which in standard coalescent
983-
theory is measured in generations. Therefore if you wish to use mutation and
984-
recombination rates measured in (say) years, and are using the conditional
985-
coalescent prior, the ``Ne`` value which you provide must be scaled by
986-
multiplying by the number of years per generation. If ``None`` (default), assume
987-
``"generations"``.
984+
then this is also applies to the value of ``population_size``, which in
985+
standard coalescent theory is measured in generations. Therefore if you
986+
wish to use mutation and recombination rates measured in (say) years,
987+
and are using the conditional coalescent prior, the ``population_size``
988+
value which you provide must be scaled by multiplying by the number of
989+
years per generation. If ``None`` (default), assume ``"generations"``.
988990
:param NodeGridValues priors: NodeGridValue object containing the prior probabilities
989991
for each node at a set of discrete time points. If ``None`` (default), use the
990992
conditional coalescent prior with a standard set of time points as given by
@@ -1016,7 +1018,7 @@ def date(
10161018
time_units = "generations"
10171019
tree_sequence, dates, posteriors, timepoints, eps, nds = get_dates(
10181020
tree_sequence,
1019-
Ne=Ne,
1021+
population_size=population_size,
10201022
mutation_rate=mutation_rate,
10211023
recombination_rate=recombination_rate,
10221024
priors=priors,
@@ -1034,7 +1036,7 @@ def date(
10341036
tables,
10351037
"date",
10361038
mutation_rate=mutation_rate,
1037-
Ne=Ne,
1039+
population_size=population_size,
10381040
recombination_rate=recombination_rate,
10391041
progress=progress,
10401042
**kwargs,
@@ -1051,7 +1053,7 @@ def date(
10511053
def get_dates(
10521054
tree_sequence,
10531055
mutation_rate,
1054-
Ne=None,
1056+
population_size=None,
10551057
recombination_rate=None,
10561058
priors=None,
10571059
*,
@@ -1087,24 +1089,24 @@ def get_dates(
10871089
approx_priors = True
10881090

10891091
if priors is None:
1090-
if Ne is None:
1092+
if population_size is None:
10911093
raise ValueError(
1092-
"Must specify Ne if priors are not already built using \
1093-
tsdate.build_prior_grid()"
1094+
"Must specify population_size if priors are not already built \
1095+
using tsdate.build_prior_grid()"
10941096
)
10951097
priors = prior.build_grid(
10961098
tree_sequence,
1097-
Ne=Ne,
1099+
population_size=population_size,
10981100
eps=eps,
10991101
progress=progress,
11001102
approximate_priors=approx_priors,
11011103
)
11021104
else:
11031105
logging.info("Using user-specified priors")
1104-
if Ne is not None:
1106+
if population_size is not None:
11051107
raise ValueError(
1106-
"Cannot specify Ne in tsdate.date() or tsdate.get_dates() if \
1107-
specifying priors from tsdate.build_prior_grid()"
1108+
"Cannot specify population_size in tsdate.date() or tsdate.get_dates() \
1109+
if specifying priors from tsdate.build_prior_grid()"
11081110
)
11091111
priors = priors
11101112

tsdate/prior.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,11 @@ class ConditionalCoalescentTimes:
7070
"""
7171

7272
def __init__(
73-
self, precalc_approximation_n, Ne, prior_distr="lognorm", progress=False
73+
self,
74+
precalc_approximation_n,
75+
population_size,
76+
prior_distr="lognorm",
77+
progress=False,
7478
):
7579
"""
7680
:param bool precalc_approximation_n: the size of tree used for
@@ -79,7 +83,7 @@ def __init__(
7983
and therefore do not allow approximate priors to be used
8084
"""
8185
self.n_approx = precalc_approximation_n
82-
self.Ne = Ne
86+
self.population_size = population_size
8387
self.prior_store = {}
8488
self.progress = progress
8589

@@ -177,8 +181,8 @@ def add(self, total_tips, approximate=None):
177181
priors[1] = PriorParams(alpha=0, beta=1, mean=0, var=0)
178182
for var, tips in zip(variances, all_tips):
179183
# NB: it should be possible to vectorize this in numpy
180-
var = var * ((2 * self.Ne) ** 2)
181-
expectation = self.tau_expect(tips, total_tips) * 2 * self.Ne
184+
var = var * ((2 * self.Ne) ** 2) # TODO
185+
expectation = self.tau_expect(tips, total_tips) * 2 * self.Ne # TODO
182186
alpha, beta = self.func_approx(expectation, var)
183187
priors[tips] = PriorParams(
184188
alpha=alpha, beta=beta, mean=expectation, var=var
@@ -947,7 +951,9 @@ def gamma_cdf(t_set, alpha, beta):
947951
return np.insert(t_set, 0, 0)
948952

949953

950-
def fill_priors(node_parameters, timepoints, ts, Ne, *, prior_distr, progress=False):
954+
def fill_priors(
955+
node_parameters, timepoints, ts, population_size, *, prior_distr, progress=False
956+
):
951957
"""
952958
Take the alpha and beta values from the node_parameters array, which contains
953959
one row for each node in the TS (including fixed nodes)
@@ -993,7 +999,7 @@ def fill_priors(node_parameters, timepoints, ts, Ne, *, prior_distr, progress=Fa
993999

9941000
def build_grid(
9951001
tree_sequence,
996-
Ne,
1002+
population_size,
9971003
timepoints=20,
9981004
*,
9991005
approximate_priors=False,
@@ -1011,9 +1017,11 @@ def build_grid(
10111017
10121018
:param TreeSequence tree_sequence: The input :class:`tskit.TreeSequence`, treated as
10131019
undated.
1014-
:param float Ne: The estimated (diploid) effective population size: must be
1015-
specified. Using standard (unscaled) values for ``Ne`` results in a prior where
1016-
times are measures in generations.
1020+
:param float population_size: The estimated (diploid) effective population
1021+
size: must be specified. May be a single value, or a two-column array with
1022+
epoch breakpoints and effective population sizes. Using standard (unscaled)
1023+
values for ``population_size`` results in a prior where times are measures
1024+
in generations.
10171025
:param int_or_array_like timepoints: The number of quantiles used to create the
10181026
time slices, or manually-specified time slices as a numpy array. Default: 20
10191027
:param bool approximate_priors: Whether to use a precalculated approximate prior or
@@ -1033,8 +1041,9 @@ def build_grid(
10331041
inference and a discretised time grid
10341042
:rtype: base.NodeGridValues Object
10351043
"""
1036-
if Ne <= 0:
1037-
raise ValueError("Parameter 'Ne' must be greater than 0")
1044+
# TODO
1045+
if population_size <= 0:
1046+
raise ValueError("Parameter 'population_size' must be greater than 0")
10381047
if approximate_priors:
10391048
if not approx_prior_size:
10401049
approx_prior_size = 1000
@@ -1053,7 +1062,7 @@ def build_grid(
10531062
span_data = SpansBySamples(contmpr_ts, progress=progress, allow_unary=allow_unary)
10541063

10551064
base_priors = ConditionalCoalescentTimes(
1056-
approx_prior_size, Ne, prior_distribution, progress=progress
1065+
approx_prior_size, population_size, prior_distribution, progress=progress
10571066
)
10581067

10591068
base_priors.add(contmpr_ts.num_samples, approximate_priors)
@@ -1088,7 +1097,7 @@ def build_grid(
10881097
prior_params,
10891098
timepoints,
10901099
tree_sequence,
1091-
Ne,
1100+
population_size,
10921101
prior_distr=prior_distribution,
10931102
progress=progress,
10941103
)

0 commit comments

Comments
 (0)