Skip to content

Commit

Permalink
basic json output for trace assembly
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiasrausch committed Jun 5, 2019
1 parent 5eef136 commit b460a0e
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 8 deletions.
24 changes: 22 additions & 2 deletions src/assemble.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ namespace tracy {
DnaScore<int32_t> aliscore;
boost::filesystem::path reference;
boost::filesystem::path alignment;
boost::filesystem::path outfile;
std::vector<boost::filesystem::path> ab;
};

Expand Down Expand Up @@ -91,6 +92,7 @@ namespace tracy {
boost::program_options::options_description otp("Output options");
otp.add_options()
("alignment,a", boost::program_options::value<boost::filesystem::path>(&c.alignment)->default_value("al.fa.gz"), "vertical alignment")
("outfile,o", boost::program_options::value<boost::filesystem::path>(&c.outfile)->default_value("out.json"), "output file")
;

boost::program_options::options_description hidden("Hidden options");
Expand Down Expand Up @@ -196,9 +198,11 @@ namespace tracy {
int32_t bestScore = std::max(gsFwd, gsRev);
scoreIdx.push_back(std::make_pair(-bestScore, i));
if (gsFwd >= gsRev) {
std::cerr << "Forward alignment" << std::endl;
traceProfiles.push_back(ptrace);
sequences.push_back(primarySeq);
} else {
std::cerr << "Reverse alignment" << std::endl;
traceProfiles.push_back(prevtrace);
reverseComplement(primarySeq);
sequences.push_back(primarySeq);
Expand Down Expand Up @@ -235,8 +239,14 @@ namespace tracy {
std::string cs;
consensus(c, align, gapped, cs);

// Create gapped traces
// Output gapped traces
std::ofstream rfile(c.outfile.c_str());
rfile << "{" << std::endl;
msa(rfile, align);
rfile << "\"gappedTraces\": " << std::endl;
rfile << "[" << std::endl;
for(uint32_t i = 0; i < scoreIdx.size(); ++i) {
if (i!=0) rfile << ", ";
Trace tr;
if (!readab(c.ab[scoreIdx[0].second].string(), tr)) return -1;

Expand All @@ -256,8 +266,18 @@ namespace tracy {

// Debug
//traceTxtOut("debug.trimmed.trace.txt", nbc, ntr);
//alignmentTracePadding(final, tr, bc, i+1, ntr, nbc);

// Trace padding with gaps
BaseCalls padbc;
Trace padtr;
alignmentTracePadding(align, ntr, nbc, i+1, padtr, padbc);

// Append gapped trace to output
assemblyTrace(rfile, padbc, padtr, c.ab[scoreIdx[i].second].stem().string());
}
rfile << "]" << std::endl;
rfile << "}" << std::endl;
rfile.close();

// Output vertical alignment
boost::iostreams::filtering_ostream rcfile;
Expand Down
96 changes: 91 additions & 5 deletions src/json.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ Contact: Tobias Rausch ([email protected])

#include <boost/progress.hpp>

#include <nlohmann/json.hpp>

#include "abif.h"
#include "fmindex.h"
#include "version.h"
Expand Down Expand Up @@ -212,6 +210,90 @@ namespace tracy
rfile.close();
}


template<typename TOFStream, typename TAlign>
inline void
msa(TOFStream& rfile, TAlign const& align) {
rfile << "\"msa\": {" << std::endl;
for(uint32_t i = 0; i<align.shape()[0]; ++i) {
if (i!=0) rfile << "," << std::endl;
rfile << "\"altalign\": \"";
for(uint32_t j = 0; j<align.shape()[1]; ++j) rfile << align[i][j];
rfile << "\"" << std::endl;
}
rfile << "}," << std::endl;
}

template<typename TOFStream>
inline void
assemblyTrace(TOFStream& rfile, BaseCalls& bc, Trace const& tr, std::string const& traceFileName) {
typedef Trace::TValue TValue;

// Output trace
rfile << "{" << std::endl;
rfile << "\"traceFileName\": \"" << traceFileName << "\"," << std::endl;
rfile << "\"pos\": [";
for(uint32_t i = 0; i<tr.traceACGT[0].size(); ++i) {
if (i!=0) rfile << ", ";
rfile << (i+1);
}
rfile << "]," << std::endl;
rfile << "\"peakA\": [";
for(uint32_t i = 0; i<tr.traceACGT[0].size(); ++i) {
if (i!=0) rfile << ", ";
rfile << tr.traceACGT[0][i];
}
rfile << "]," << std::endl;
rfile << "\"peakC\": [";
for(uint32_t i = 0; i<tr.traceACGT[0].size(); ++i) {
if (i!=0) rfile << ", ";
rfile << tr.traceACGT[1][i];
}
rfile << "]," << std::endl;
rfile << "\"peakG\": [";
for(uint32_t i = 0; i<tr.traceACGT[0].size(); ++i) {
if (i!=0) rfile << ", ";
rfile << tr.traceACGT[2][i];
}
rfile << "]," << std::endl;
rfile << "\"peakT\": [";
for(uint32_t i = 0; i<tr.traceACGT[0].size(); ++i) {
if (i!=0) rfile << ", ";
rfile << tr.traceACGT[3][i];
}
rfile << "]," << std::endl;

// Basecalls
uint32_t bcpos = 0;
TValue idx = bc.bcPos[0];
rfile << "\"basecallPos\": [";
for(int32_t i = 0; i < (int32_t) tr.traceACGT[0].size(); ++i) {
if (idx == i) {
if (i!=bc.bcPos[0]) rfile << ", ";
rfile << (i+1);
if (bcpos < bc.bcPos.size() - 1) idx = bc.bcPos[++bcpos];
}
}
rfile << "]," << std::endl;
bcpos = 0;
idx = bc.bcPos[0];
uint32_t gaplessbcpos = 0;
rfile << "\"basecalls\": {";
for(int32_t i = 0; i < (int32_t) tr.traceACGT[0].size(); ++i) {
if (idx == i) {
if (i!=bc.bcPos[0]) rfile << ", ";
if (bc.primary[bcpos] != '-') {
rfile << "\"" << (i+1) << "\"" << ":" << "\"" << (++gaplessbcpos) << ":" << bc.primary[bcpos];
if (bc.primary[bcpos] != bc.secondary[bcpos]) rfile << "|" << bc.secondary[bcpos];
rfile << "\"";
} else rfile << "\"" << (i+1) << "\"" << ":" << "\"-\"";
if (bcpos < bc.bcPos.size() - 1) idx = bc.bcPos[++bcpos];
}
}
rfile << "}" << std::endl;
rfile << "}" << std::endl;
}

inline std::pair<int32_t, int32_t>
xWindowViewport(BaseCalls const& bc, int32_t const pos) {
int32_t lb = bc.bcPos[pos] + 1;
Expand Down Expand Up @@ -351,7 +433,7 @@ namespace tracy

template<typename TAlign>
inline void
alignmentTracePadding(TAlign const& align, Trace const& tr, BaseCalls const& bc, Trace& ntr, BaseCalls& nbc) {
alignmentTracePadding(TAlign const& align, Trace const& tr, BaseCalls const& bc, uint32_t const alignRow, Trace& ntr, BaseCalls& nbc) {
typedef Trace::TMountains TMountains;
typedef Trace::TValue TValue;
typedef std::vector<uint32_t> TVecVal;
Expand All @@ -372,7 +454,7 @@ namespace tracy
bool ingap = false;
uint32_t gapsize = 0;
for(uint32_t j = 0; j<align.shape()[1]; ++j) {
if (align[0][j] == '-') {
if (align[alignRow][j] == '-') {
if (ingap) ++gapsize;
else {
gapsize = 1;
Expand Down Expand Up @@ -456,7 +538,11 @@ namespace tracy
}
}


template<typename TAlign>
inline void
alignmentTracePadding(TAlign const& align, Trace const& tr, BaseCalls const& bc, Trace& ntr, BaseCalls& nbc) {
alignmentTracePadding(align, tr, bc, 0, ntr, nbc);
}

}

Expand Down
2 changes: 1 addition & 1 deletion src/trim.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ namespace tracy {
nbc.primary.push_back(bc.primary[bcpos]);
nbc.secondary.push_back(bc.secondary[bcpos]);
nbc.consensus.push_back(bc.consensus[bcpos]);
ntr.qual.push_back(tr.qual[bcpos]);
}
if (bcpos < bc.bcPos.size() - 1) idx = bc.bcPos[++bcpos];
if (bcpos == trimLeft) {
Expand All @@ -134,7 +135,6 @@ namespace tracy {
ntr.traceACGT[1].push_back(tr.traceACGT[1][tracePos]);
ntr.traceACGT[2].push_back(tr.traceACGT[2][tracePos]);
ntr.traceACGT[3].push_back(tr.traceACGT[3][tracePos]);
ntr.qual.push_back(tr.qual[tracePos]);
++newTracePos;
}
}
Expand Down

0 comments on commit b460a0e

Please sign in to comment.