-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathestim_routes.py
executable file
·77 lines (56 loc) · 2.44 KB
/
estim_routes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
Estimate frequencies of alternative allele in locations and clusters
"""
import numpy as np
import os
from itertools import product
from pop_hist_opt_wright_fisher import HistOpt, PopData, ClusterCov
import warnings
# warnings.filterwarnings("ignore", category=RuntimeWarning)
def run_freq_bayes(cl_tag, dist_tag, pop_tag):
"""
Run optimisation in particular cluster and species type, under particular hypothesis
:param cl_tag: tag of cluster
:param dist_tag: tag of hypothesis
:param pop_tag: desi/kabuli tag
:return:
"""
seeds = [239, 30, 123] # Seeds for three chains
for iseed in seeds:
np.random.seed(seed=iseed) # Set random seed
file_ndata = path_data + 'n_data_{}.txt'.format(pop_tag) # file with counts of alternative allele
file_qdata = path_data + 'q_data_{}.txt'.format(pop_tag) # file with total allele counts
file_cov = path_cov + '{}_cov_{}.txt'.format(cl_tag, dist_tag) # file with covariance matrix
data = PopData(file_qdata, file_ndata) # Dataset
mx_cov = ClusterCov(file_cov) # Covariance matrix
opt_hist = HistOpt(mx_cov, data, ab_flag=True, ilr_flag=True) # Optimizer
path_res = path_mcmc + '{}_{}_{}_{}/'.format(cl_tag, dist_tag, pop_tag, iseed) # Path for MCMC
print(path_res)
opt_hist.optimise(path_res, n_thr=30) # Run optimiser
# ------------------------------------------------
# PATHS
# ------------------------------------------------
path_pref = 'data/'
path_data = path_pref + 'samples/'
path_cov = path_pref + 'cov_mx/'
path_mcmc = path_pref + 'mcmc/'
if not os.path.exists(path_mcmc):
os.mkdir(path_mcmc)
# ------------------------------------------------
# TAGS
# ------------------------------------------------
cluster_tags = ['ETHI', 'IND', 'LEB', 'MOR', 'TUR', 'UZB']
dist_tags = ['routes', 'lin']
pop_tags = ['desi', 'kabuli']
# Combinations to skip: we do not have Kabulis in Ethiopid and India
skip_combinations = [('kabuli', 'ETHI', 'lin'),
('kabuli', 'ETHI', 'routes'),
('kabuli', 'IND', 'lin'),
('kabuli', 'IND', 'routes')]
# ------------------------------------------------
# OPTIMISATION
# ------------------------------------------------
for pop_tag, cl_tag, dist_tag in product(pop_tags, cluster_tags, dist_tags):
if (pop_tag, cl_tag, dist_tag) in skip_combinations:
continue
run_freq_bayes(cl_tag, dist_tag, pop_tag)