Skip to content

Commit 760433a

Browse files
authored
Merge pull request #29 from IntegerLimit/mr-bayes-support-iqtree-3
Support Exporting Model and Partition Selections to MrBayes
2 parents 6c6f978 + df6e521 commit 760433a

19 files changed

+501
-29
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ tags
1414
test_scripts/iqtree_test_cmds.txt
1515
pllrepo/
1616
build/
17+
cmake-build-debug/
1718
/Default-clang
1819
.idea/
1920
.idea/codeStyleSettings.xml

alignment/alignment.cpp

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#include <Eigen/LU>
2525
#ifdef USE_BOOST
26+
#include <boost/bimap.hpp>
2627
#include <boost/math/distributions/binomial.hpp>
2728
#endif
2829

@@ -62,6 +63,8 @@ char genetic_code23[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSS
6263
char genetic_code24[] = "KNKNTTTTSSKSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"; // Pterobranchia mitochondrial
6364
char genetic_code25[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSGCWCLFLF"; // Candidate Division SR1 and Gracilibacteria
6465

66+
boost::bimap<int, char*> genetic_code_map;
67+
6568
Alignment::Alignment()
6669
: vector<Pattern>()
6770
{
@@ -1949,7 +1952,32 @@ void Alignment::convertStateStr(string &str, SeqType seq_type) {
19491952
(*it) = convertState(*it, seq_type);
19501953
}
19511954
*/
1952-
1955+
1956+
boost::bimap<int, char*> getGeneticCodeMap() {
1957+
if (!genetic_code_map.empty()) return genetic_code_map;
1958+
1959+
genetic_code_map.insert({1, genetic_code1});
1960+
genetic_code_map.insert({2, genetic_code2});
1961+
genetic_code_map.insert({3, genetic_code3});
1962+
genetic_code_map.insert({4, genetic_code4});
1963+
genetic_code_map.insert({5, genetic_code5});
1964+
genetic_code_map.insert({6, genetic_code6});
1965+
genetic_code_map.insert({9, genetic_code9});
1966+
genetic_code_map.insert({10, genetic_code10});
1967+
genetic_code_map.insert({11, genetic_code11});
1968+
genetic_code_map.insert({12, genetic_code12});
1969+
genetic_code_map.insert({13, genetic_code13});
1970+
genetic_code_map.insert({14, genetic_code14});
1971+
genetic_code_map.insert({16, genetic_code16});
1972+
genetic_code_map.insert({21, genetic_code21});
1973+
genetic_code_map.insert({22, genetic_code22});
1974+
genetic_code_map.insert({23, genetic_code23});
1975+
genetic_code_map.insert({24, genetic_code24});
1976+
genetic_code_map.insert({25, genetic_code25});
1977+
1978+
return genetic_code_map;
1979+
}
1980+
19531981
void Alignment::initCodon(char *gene_code_id) {
19541982
// build index from 64 codons to non-stop codons
19551983
int transl_table = 1;
@@ -1959,30 +1987,13 @@ void Alignment::initCodon(char *gene_code_id) {
19591987
} catch (string &str) {
19601988
outError("Wrong genetic code ", gene_code_id);
19611989
}
1962-
switch (transl_table) {
1963-
case 1: genetic_code = genetic_code1; break;
1964-
case 2: genetic_code = genetic_code2; break;
1965-
case 3: genetic_code = genetic_code3; break;
1966-
case 4: genetic_code = genetic_code4; break;
1967-
case 5: genetic_code = genetic_code5; break;
1968-
case 6: genetic_code = genetic_code6; break;
1969-
case 9: genetic_code = genetic_code9; break;
1970-
case 10: genetic_code = genetic_code10; break;
1971-
case 11: genetic_code = genetic_code11; break;
1972-
case 12: genetic_code = genetic_code12; break;
1973-
case 13: genetic_code = genetic_code13; break;
1974-
case 14: genetic_code = genetic_code14; break;
1975-
case 15: genetic_code = genetic_code15; break;
1976-
case 16: genetic_code = genetic_code16; break;
1977-
case 21: genetic_code = genetic_code21; break;
1978-
case 22: genetic_code = genetic_code22; break;
1979-
case 23: genetic_code = genetic_code23; break;
1980-
case 24: genetic_code = genetic_code24; break;
1981-
case 25: genetic_code = genetic_code25; break;
1982-
default:
1990+
auto code_map = getGeneticCodeMap();
1991+
auto found = code_map.left.find(transl_table);
1992+
if (found == code_map.left.end()) {
19831993
outError("Wrong genetic code ", gene_code_id);
1984-
break;
1994+
return;
19851995
}
1996+
genetic_code = found->second;
19861997
} else {
19871998
genetic_code = genetic_code1;
19881999
}
@@ -2015,6 +2026,15 @@ void Alignment::initCodon(char *gene_code_id) {
20152026
// cout << "num_states = " << num_states << endl;
20162027
}
20172028

2029+
int Alignment::getGeneticCodeId() {
2030+
if (seq_type != SEQ_CODON || genetic_code == nullptr) return 0;
2031+
2032+
auto code_map = getGeneticCodeMap();
2033+
auto found = code_map.right.find(genetic_code);
2034+
if (found == code_map.right.end()) return 0;
2035+
return found->second;
2036+
}
2037+
20182038
int getMorphStates(StrVector &sequences) {
20192039
char maxstate = 0;
20202040
for (StrVector::iterator it = sequences.begin(); it != sequences.end(); it++)

alignment/alignment.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,12 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
10581058
*/
10591059
void extractMapleFile(const std::string& aln_name, const InputType& format);
10601060

1061+
/**
1062+
* Get the numerical id of the genetic code
1063+
* @return id the genetic code id, or 0 if not a codon type
1064+
*/
1065+
int getGeneticCodeId();
1066+
10611067
protected:
10621068

10631069

main/phyloanalysis.cpp

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,6 +1330,10 @@ void printOutfilesInfo(Params &params, IQTree &tree) {
13301330
if (params.optimize_linked_gtr) {
13311331
cout << " GTRPMIX nex file: " << params.out_prefix << ".GTRPMIX.nex" << endl;
13321332
}
1333+
1334+
if (params.mr_bayes_output) {
1335+
cout << " MrBayes block written to: " << params.out_prefix << ".mr_bayes.nex" << endl;
1336+
}
13331337
cout << endl;
13341338

13351339
}
@@ -3307,6 +3311,91 @@ SuperNeighbor* findRootedNeighbour(SuperNeighbor* super_root, int part_id) {
33073311
return nullptr;
33083312
}
33093313

3314+
void printMrBayesBlockFile(Params &params, IQTree* &iqtree) {
3315+
ofstream out;
3316+
string filename = string(params.out_prefix) + ".mr_bayes.nex";
3317+
try {
3318+
out.exceptions(ios::failbit | ios::badbit);
3319+
out.open(filename);
3320+
3321+
// Write Warning
3322+
out << "#nexus" << endl << endl
3323+
<<"[This MrBayes Block Declaration provides the basic "
3324+
<< (iqtree->isSuperTree() ? "partition structure and models" : "models")
3325+
<< " from the IQTree Run.]" << endl
3326+
<< "[Note that MrBayes does not support a large collection of models, so defaults of 'nst=6' for DNA and 'gtr' for Protein will be used if a model that does not exist in MrBayes is used.]" << endl
3327+
<< "[Furthermore, the Model Parameter '+R' will be replaced by '+G+I'.]" << endl
3328+
<< "[This should be used as a Template Only.]" << endl << endl;
3329+
3330+
// Begin File, Print Charsets
3331+
out << "begin mrbayes;" << endl;
3332+
} catch (ios::failure &) {
3333+
outError(ERR_WRITE_OUTPUT, filename);
3334+
}
3335+
3336+
if (!iqtree->isSuperTree()) {
3337+
out << " [IQTree inferred model " << iqtree->getModelName() << ", ";
3338+
iqtree->getModel()->printMrBayesModelText(out, "all", "");
3339+
3340+
out << endl << "end;" << endl;
3341+
out.close();
3342+
return;
3343+
}
3344+
3345+
auto stree = (PhyloSuperTree*) iqtree;
3346+
auto saln = (SuperAlignment*) stree->aln;
3347+
auto size = stree->size();
3348+
3349+
for (int part = 0; part < size; part++) {
3350+
string name = saln->partitions[part]->name;
3351+
replace(name.begin(), name.end(), '+', '_');
3352+
out << " charset " << name << " = ";
3353+
3354+
string pos = saln->partitions[part]->position_spec;
3355+
replace(pos.begin(), pos.end(), ',' , ' ');
3356+
out << pos << ";" << endl;
3357+
}
3358+
3359+
// Create Partition
3360+
out << endl << " partition iqtree = " << size << ": ";
3361+
for (int part = 0; part < size; part++) {
3362+
if (part != 0) out << ", ";
3363+
3364+
string name = saln->partitions[part]->name;
3365+
replace(name.begin(), name.end(), '+', '_');
3366+
out << name;
3367+
}
3368+
out << ";" << endl;
3369+
3370+
// Set Partition for Use
3371+
out << " set partition = iqtree;" << endl << endl;
3372+
3373+
// Partition-Specific Model Information
3374+
for (int part = 0; part < size; part++) {
3375+
PhyloTree* curr_tree = stree->at(part);
3376+
3377+
out << " [Subset #" << part + 1 << ": IQTree inferred model " << curr_tree->getModelName() << ", ";
3378+
curr_tree->getModel()->printMrBayesModelText(out,
3379+
convertIntToString(part + 1),
3380+
saln->partitions[part]->name);
3381+
out << endl;
3382+
}
3383+
3384+
// Partition Type Settings
3385+
if (params.partition_type != TOPO_UNLINKED) {
3386+
out << " unlink statefreq=(all) revmat=(all) shape=(all) pinvar=(all) tratio=(all);" << endl;
3387+
if (params.partition_type != BRLEN_FIX) {
3388+
out << " prset applyto=(all) ratepr=variable;" << endl;
3389+
if (params.partition_type != BRLEN_SCALE) {
3390+
out << " unlink brlens=(all);" << endl;
3391+
}
3392+
}
3393+
}
3394+
3395+
out << "end;" << endl;
3396+
out.close();
3397+
}
3398+
33103399
/************************************************************
33113400
* MAIN TREE RECONSTRUCTION
33123401
***********************************************************/
@@ -3810,8 +3899,14 @@ void runTreeReconstruction(Params &params, IQTree* &iqtree) {
38103899

38113900
}
38123901
if (iqtree->isSuperTree()) {
3813-
((PhyloSuperTree*) iqtree)->computeBranchLengths();
3814-
((PhyloSuperTree*) iqtree)->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str());
3902+
auto stree = (PhyloSuperTree*) iqtree;
3903+
stree->computeBranchLengths();
3904+
stree->printBestPartitionParams((string(params.out_prefix) + ".best_model.nex").c_str());
3905+
}
3906+
if (params.mr_bayes_output) {
3907+
cout << endl << "Writing MrBayes Block Files..." << endl;
3908+
printMrBayesBlockFile(params, iqtree);
3909+
cout << endl;
38153910
}
38163911

38173912
cout << "BEST SCORE FOUND : " << iqtree->getCurScore() << endl;

main/phylotesting.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,13 @@ string convertSeqTypeToSeqTypeName(SeqType seq_type);
811811

812812
string detectSeqTypeName(string model_name);
813813

814+
/**
815+
* get string name from a SeqType object
816+
* @param seq_type input sequence type
817+
* @return name
818+
*/
819+
string getSeqTypeName(SeqType seq_type);
820+
814821
/****************************************************/
815822
/* Q MATRICES NESTING CHECK */
816823
/****************************************************/

model/modelbin.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,31 @@ string ModelBIN::getNameParams(bool show_fixed_params) {
7676
}
7777
return retname.str();
7878
}
79+
80+
void ModelBIN::printMrBayesModelText(ofstream& out, string partition, string charset) {
81+
RateHeterogeneity* rate = phylo_tree->getRate();
82+
bool equal_freq = (freq_type == FREQ_EQUAL);
83+
84+
// Free Rate should be substituted by +G (+I not supported)
85+
bool has_gamma = rate->getGammaShape() != 0.0 || rate->isFreeRate();
86+
87+
// MrBayes's Binary Model is 'F81-like'.
88+
out << "using MrBayes model F81" << (has_gamma ? "+G" : "") << (equal_freq ? "+FQ" : "") << "]" << endl;
89+
if (rate->isFreeRate() || rate->getPInvar() > 0.0) {
90+
out << " [+I modifier ignored, not supported by MrBayes for binary data]" << endl;
91+
outWarning("MrBayes does not support Invariable Sites with Binary Data! +I has been ignored!");
92+
}
93+
94+
// Lset Parameters
95+
out << " lset applyto=(" << partition << ") rates=";
96+
if (has_gamma) {
97+
// Rate Categories + Gamma
98+
out << "gamma";
99+
} else
100+
out << "equal";
101+
102+
out << ";" << endl;
103+
104+
if (equal_freq)
105+
out << " prset applyto=(" << partition << ") statefreqpr=fixed(equal);" << endl;
106+
}

model/modelbin.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ class ModelBIN : public ModelMarkov
5555
*/
5656
virtual void startCheckpoint();
5757

58+
/**
59+
* Print the model information in a format that can be accepted by MrBayes, using lset and prset.<br>
60+
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes.
61+
* @param out the ofstream to print the result to
62+
* @param partition the partition to apply lset and prset to
63+
* @param charset the current partition's charset.
64+
*/
65+
virtual void printMrBayesModelText(ofstream& out, string partition, string charset);
5866
};
5967

6068
#endif

model/modelcodon.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,6 +1182,41 @@ double ModelCodon::optimizeParameters(double gradient_epsilon) {
11821182
return score;
11831183
}
11841184

1185+
void ModelCodon::printMrBayesModelText(ofstream& out, string partition, string charset) {
1186+
// NST should be 1 if fix_kappa is true (no ts/tv ratio), else it should be 2
1187+
// GTR codon is not available in IQTREE
1188+
int nst = fix_kappa ? 1 : 2;
1189+
int code = phylo_tree->aln->getGeneticCodeId();
1190+
string mr_bayes_code = getMrBayesGeneticCode(code);
1191+
bool code_not_supported = mr_bayes_code.empty();
1192+
RateHeterogeneity* rate = phylo_tree->getRate();
1193+
1194+
string model_name = fix_kappa ? "JC" : "HKY";
1195+
1196+
// If this model is a Empirical / Semi-Empirical Model
1197+
if (num_params == 0 || name.find('_') != string::npos) {
1198+
nst = 6;
1199+
model_name = "GTR";
1200+
}
1201+
1202+
out << "using MrBayes model " << model_name << "]" << endl;
1203+
1204+
if (rate->isFreeRate() || rate->getGammaShape() > 0.0 || rate->getPInvar() > 0.0) {
1205+
out << " [Rate modifiers (+I, +G, +R) ignored, not supported by MrBayes codon models]" << endl;
1206+
outWarning("MrBayes Codon Models do not support any Heterogenity Rate Modifiers! (+I, +G, +R) They have been ignored!");
1207+
}
1208+
1209+
if (code_not_supported) {
1210+
outWarning("MrBayes Does Not Support Codon Code " + convertIntToString(code) + "! Defaulting to Universal (Code 1).");
1211+
mr_bayes_code = "universal";
1212+
}
1213+
1214+
out << " [IQTree genetic code " << code << ", using MrBayes genetic code " << mr_bayes_code << "]" << endl;
1215+
if (code_not_supported)
1216+
out << " [Genetic code " << code << " is not supported by MrBayes, defaulting to universal (code 1)]" << endl;
1217+
1218+
out << " lset applyto=(" << partition << ") nucmodel=codon omegavar=equal nst=" << nst << " code=" << mr_bayes_code << ";" << endl;
1219+
}
11851220

11861221
void ModelCodon::writeInfo(ostream &out) {
11871222
if (name.find('_') == string::npos)

model/modelcodon.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,15 @@ class ModelCodon: public ModelMarkov {
222222
*/
223223
virtual bool getVariables(double *variables);
224224

225+
/**
226+
* Print the model information in a format that can be accepted by MrBayes, using lset and prset.<br>
227+
* By default, it simply prints a warning to the log and to the stream, stating that this model is not supported by MrBayes.
228+
* @param out the ofstream to print the result to
229+
* @param partition the partition to apply lset and prset to
230+
* @param charset the current partition's charset.
231+
*/
232+
virtual void printMrBayesModelText(ofstream& out, string partition, string charset);
233+
225234
};
226235

227236
#endif /* MODELCODON_H_ */

0 commit comments

Comments
 (0)