Skip to content

Commit

Permalink
clean
Browse files Browse the repository at this point in the history
  • Loading branch information
zqfang committed Aug 16, 2024
1 parent 8adc2c1 commit 48eb460
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 55 deletions.
22 changes: 21 additions & 1 deletion haplomap/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,31 @@ build/bin/haplomap ghmap -p ${HOME}/data/test_traits.txt \
By default. Output result are gene-summrized (see `haplomap ghmap --help`).
Recommend adding `-a` flag, which will output gene-oriented format results.

**NOTE 2:** strain order in (-p) should keep the same to the (-b). That's, eblocks (-s)
**NOTE 2:** strain order in (-p) should keep the same to the eblocks (-s)


## Input
see `example` folder for test cases.

### convert
vcf file from variant calling tool.
vcf must be genotyped (has the GT field)

**Encoding**
1. SNV: A->A, T->T, G->G, C->C
2. INDEL/SV:
- reference: A
- deletion: G
- duplication or tanderm duplication: G
- insertion: G
- inversion: G
- breakend: G


### annotate

For SVs, must contain SVTYPE, and SVLEN

### eblocks:
- Strain file (-s):
- Tree column txt file: "#Abbrev \t (Optional) \t Values "
Expand Down
4 changes: 2 additions & 2 deletions haplomap/include/haplolib.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ class SNPInfo
std::string name;
int chrIdx; // chromosome index
int position; // location on chromosome
char ref;
char ref; // reference allele char
// string of alleles "ACGTD?" "D" is deletion, "?" is unspecified.
char *alleles;
// string of haplotype numbers (not printable).
char *pattern;
std::map<std::string, std::string> geneCodonMap; // Map gene names to codons
std::map<std::string, std::string> geneCodonMap; // Map gene names to codons/annotations
bool frozen; // when true, pattern is uniquified and should not be changed.
bool used; // Is this SNP in a chosen block?
bool qMarks; //Are there any unkowns in this SNP?
Expand Down
17 changes: 10 additions & 7 deletions haplomap/include/vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ struct VCFOptions
class Variant {

public:
bool isSNP;
int TYPE; // snv: 1, indel: 2, sv: 3, unknown: 0
std::string CHROM;
int POS;
std::string ID;
Expand All @@ -75,7 +75,8 @@ class Variant {

private:
void parseFORMAT(const std::vector<std::string> & rec);
bool issnp();
bool isSNP();
bool isSV(); // is structral variant ?
float max(const std::string & fmt);
};

Expand All @@ -91,14 +92,14 @@ class VCF
void parseRecords();
void parseHeader();
void checkSamples();
bool parseStructralVariant(std::vector<std::string> & alleles,
bool parseStructralVariant(std::string& alleles,
std::vector<std::string>& alts, std::vector<int>& hasAlt);
bool parseSNP(std::vector<std::string> & alleles,
bool parseSNP(std::string& alleles,
std::vector<std::string>& alts, std::vector<int>& hasAlt);
void writeSNP(std::vector<std::string> &alleles);
void writeTPED(std::vector<std::string> &alleles);
void writeSNP(std::string& alleles);
void writeTPED(std::string& alleles);
void writeTFAM();
void writeStructralVariant(std::vector<std::string> &alleles, const char* varType);
void writeStructralVariant(std::string& alleles);

private:
std::string line;
Expand All @@ -113,6 +114,8 @@ class VCF
// read input file
std::istream* input;
VCFOptions* opts;
char getAllele(std::string& alleles, int i) { return alleles[i]; }
void setAllele(std::string& alleles, int i, char a) { alleles[i] = a; }
};

#endif
16 changes: 15 additions & 1 deletion haplomap/src/eblocks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,21 @@ void readAlleleInfoCompact(char *fname, char* refgenome)
{
std::cout<<"Reference Genome is found: "<<refgenome<<std::endl;
}

/// check input strains can be found in the database
for (int j = 0; j < strainAbbrevs.size(); j++)
{
int idx = allStrains.hasIndex(strainAbbrevs.eltOf(j));
if (idx < 0)
{
std::cerr<<"Fatal error: "<<strainAbbrevs.eltOf(j)<< " is not found in the variant database!"<<std::endl;
std::cerr<<"--> Variant database: "<<fname<<std::endl;
std::cerr<<"--> Variant database has names: "<<std::endl;;
allStrains.dump();
std::exit(1);
}

}

// read the lines with the SNP info
while ((numtoks = rdr.getLine()) >= 0)
{
Expand Down
8 changes: 4 additions & 4 deletions haplomap/src/ehaploblocks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,15 @@ EblockOptions *parseEblockOptions(int argc, char **argv)
if (argc == 1)
{
std::cout<<usage<<std::endl;
exit(1);
std::exit(1);
}

// isn't there some way to do this automatically based on above info?
if (NULL == opts->strainsFileName)
{
std::cerr << usage << std::endl;
std::cerr << "Required arg missing: strains file name (-s)" << std::endl;
exit(1);
std::exit(1);
}
else if (NULL == opts->allelesFileName)
{
Expand Down Expand Up @@ -229,11 +229,11 @@ int main_eblocks(int argc, char **argv)
// Can't do this earlier because numStrains is used in constructor.
snpVec.reserve(approxNumSNPs);

// FIXME: Add option for "compact SNP file"
// read variant database
beginPhase("reading compact alleles file");
readAlleleInfoCompact(opts->allelesFileName, opts->refStrainName);
endPhase();
std::string chr = chromosomes.eltOf(0);
// std::string chr = chromosomes.eltOf(0);
// if no gene names file specified, don't read it, omit gene names.
if (opts->genesFileName)
{
Expand Down
6 changes: 4 additions & 2 deletions haplomap/src/haplolib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,12 @@ std::string SNPInfo::allelesToPattern(void)
map[(int)'G'] = '?';
map[(int)'T'] = '?';
map[(int)'?'] = '?';
map[(int)'N'] = '?'; // ref allele, used in indel or sv
map[(int)'D'] = '?'; // deletion
map[(int)'U'] = '?'; // duplication
map[(int)'I'] = '?'; // insertion
map[(int)'U'] = '?'; // duplication, or tanderm duplication
map[(int)'S'] = '?'; // insertion
map[(int)'V'] = '?'; // inversion
map[(int)'K'] = '?'; // breakend
// update Ref if defined
int eqclass = 0;
if (this->ref != '?')
Expand Down
Loading

0 comments on commit 48eb460

Please sign in to comment.