Skip to content

Commit 96f5be1

Browse files
authored
Merge branch 'main' into help_and_docs
2 parents dec69d3 + 02c1200 commit 96f5be1

File tree

6 files changed

+65
-17
lines changed

6 files changed

+65
-17
lines changed

tracetrack/entities/record.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,14 @@ class TraceSeqRecord(Record):
106106
"""
107107
Class for storing information loaded from trace files (.ab1 files).
108108
"""
109-
def __init__(self, seq, quality, id=None, traces=None, base_locations=None, reference=None, reverse: bool = None):
109+
def __init__(self, seq, quality, f, id=None, traces=None, base_locations=None, reference=None,
110+
reverse: bool = None):
110111
"""
111112
This constructor is only called by read function below or from other functions starting with an instance, so
112113
the parameters need not be understood in too much details - reflects structure of a trace file.
113114
:param seq: Seq object with DNA sequence
114115
:param quality: list of int representing per base quality
116+
:param f: mixed peaks detection threshold
115117
:param traces: dict containing a list of trace values for each base
116118
:param base_locations: list of positions (int) on the x axis, where each base is called
117119
:param reference: reference sequence in the DB, only added after instantiating
@@ -122,7 +124,8 @@ def __init__(self, seq, quality, id=None, traces=None, base_locations=None, refe
122124
self.traces = traces
123125
self.base_locations = base_locations
124126
self.quality = quality
125-
self.mixed_peaks = self.find_mixed_peaks()
127+
self.f = f
128+
self.mixed_peaks = self.find_mixed_peaks(f)
126129
# should the original sequence be stored because of finding mixed peaks? Or just ignore al N's...
127130
self.reference = reference
128131
self.reverse = reverse
@@ -148,8 +151,8 @@ def read(cls, path):
148151
# get locations in trace array for all bases
149152
base_locations = list(record.annotations['abif_raw']["PLOC1"])
150153

151-
return cls(record.seq, record.letter_annotations['phred_quality'], id=name, traces=traces,
152-
base_locations=base_locations)
154+
return cls(record.seq, record.letter_annotations['phred_quality'], f=0.15, id=name,
155+
traces=traces, base_locations=base_locations)
153156

154157
def filter_sequence_by_quality(self, threshold, end_threshold):
155158
"""
@@ -167,7 +170,8 @@ def filter_sequence_by_quality(self, threshold, end_threshold):
167170
traces=self.traces,
168171
base_locations=self.base_locations,
169172
reference=self.reference,
170-
reverse=self.reverse
173+
reverse=self.reverse,
174+
f=self.f
171175
)
172176

173177
def reverse_complement(self, **kwargs):
@@ -183,7 +187,8 @@ def reverse_complement(self, **kwargs):
183187
traces={str(Seq(base).reverse_complement()): values[::-1] for base, values in self.traces.items()},
184188
base_locations=[num_locations - i - 1 for i in self.base_locations[::-1]],
185189
reverse=False,
186-
reference=self.reference
190+
reference=self.reference,
191+
f=self.f
187192
)
188193

189194
def has_base_above_threshold(self):
@@ -242,7 +247,7 @@ def peak_borders(self, i: int):
242247
end = min(pos + width - 2, len(self.traces['A']))
243248
return start, end
244249

245-
def find_mixed_peaks(self, fraction: float = 0.15):
250+
def find_mixed_peaks(self, fraction: float):
246251
"""
247252
For each position of the sequence, determine if the peak in the chromatogram is "mixed".
248253
Disregard mixed signals in regions with low signal to noise ratio (generally bad quality region)
@@ -253,12 +258,12 @@ def find_mixed_peaks(self, fraction: float = 0.15):
253258
stn = [self.signal_to_noise(i) for i in range(len(self.base_locations))]
254259
avg_stn = sum(stn) / len(stn)
255260
mixed_peaks = []
261+
threshold = max(25, avg_stn * 1.35)
256262

257263
for i, pos in enumerate(self.base_locations):
258264
stn_local = stn[i-10:i] + stn[i+1:i+10]
259265
signal_to_noise = sum(stn_local) / 20
260266

261-
threshold = max(25, avg_stn * 1.35)
262267
if signal_to_noise < threshold:
263268
continue
264269
# bad StN ratio -> disregard potential mixed positions
@@ -269,13 +274,22 @@ def find_mixed_peaks(self, fraction: float = 0.15):
269274
peaks = {base: values[pos] for base, values in self.traces.items()}
270275
if base != "N":
271276
main_peak = peaks[base.upper()]
272-
for letter, area in areas.items():
273-
# check for both area and height of peak
274-
if base != letter and area > (areas[base.upper()] * fraction) and peaks[letter] > (main_peak * fraction) \
275-
and self.is_concave(pos, letter):
276-
mixed_peaks.append(i)
277+
else:
278+
# If the called base is "N", we use the highest peak present as the main peak
279+
main_peak = max(peaks.values())
280+
base = max(peaks, key=peaks.get)
281+
for letter, area in areas.items():
282+
# check for both area and height of peak
283+
if base != letter and area > (areas[base.upper()] * fraction) and peaks[letter] > (main_peak * fraction) \
284+
and self.is_concave(pos, letter):
285+
mixed_peaks.append(i)
277286
return mixed_peaks
278287

288+
def re_find_mixed_peaks(self, f):
289+
"""Function to re-calculate mixed peaks after object has been constructed"""
290+
self.mixed_peaks = self.find_mixed_peaks(f)
291+
self.f = f
292+
279293
def signal_to_noise(self, i: int):
280294
"""
281295
Calculate the signal to noise ratio of a position as the ratio of the primary peak area to the sum of all other

tracetrack/flask_server.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,16 @@ def alignment_settings_post(task_id, settings_task=None):
238238
threshold = int(request.form['threshold'])
239239
end_threshold = int(request.form['end_threshold'])
240240
separate = 'separate' in request.form
241+
fraction = request.form['fraction']
242+
messages = []
243+
try:
244+
fraction_f = float(fraction)
245+
if fraction_f <= 0 or fraction_f > 1:
246+
messages.append(f"The number provided for 'f' ({fraction}) is outside of the [0, 1] interval. Using default threshold 0.15.")
247+
fraction_f = 0.15
248+
except ValueError:
249+
messages.append(f"The input for 'f' ({fraction}) could not be converted to a number. Using default threshold 0.15.")
250+
fraction_f = 0.15
241251

242252
result = scheduler.get_result(task_id)
243253
if isinstance(result, Exception):
@@ -256,8 +266,9 @@ def alignment_settings_post(task_id, settings_task=None):
256266
direction = str(request.form[f'dir_for_{seq_id}'])
257267
dir_flag = True if direction == "Rev" else False
258268
seq.flag_as_reverse(dir_flag)
269+
seq.re_find_mixed_peaks(fraction_f)
259270

260-
new_id = schedule_tasks(sequences, population_names, db, separate, threshold, end_threshold)
271+
new_id = schedule_tasks(sequences, population_names, db, separate, threshold, end_threshold, messages)
261272
return redirect("/results/" + task_id + "/" + new_id)
262273

263274

tracetrack/server_utils.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -558,13 +558,18 @@ def get_color_bin(value, thresholds):
558558

559559

560560
def trace_record_dict(trace: AlignedTrace) -> dict:
561-
return {
561+
d = {
562562
'sequence': str(trace.record.seq),
563563
'traces': [{'base': base, 'values': trace.record.traces[base]} for base in 'GATC'],
564564
'locations': trace.record.base_locations,
565565
'alignedPositions': trace.get_aligned_positions(),
566566
'id': trace.record.id
567567
}
568+
last = max(0, d["locations"][d["alignedPositions"][0]] - 112) # 112 points to the left of start of reference are still shown
569+
for i in range(last):
570+
for tr in d["traces"]:
571+
tr["values"][i] = 0
572+
return d
568573

569574

570575
def position_dict(alignment: Alignment, position: int) -> dict:
@@ -586,13 +591,14 @@ def hash_trace_name(pop_number, seq_number):
586591
return f"{pop_number}_{seq_number}"
587592

588593

589-
def schedule_tasks(sequences, population_names, db, separate, threshold, end_threshold):
594+
def schedule_tasks(sequences, population_names, db, separate, threshold, end_threshold, messages):
590595
settings = Settings(threshold, end_threshold, separate)
591596
inputs = []
592597
inputs_sorted = []
593598
for population_seqs, population_name in zip(sequences, population_names):
594599
new_seqlist = []
595-
warnings = []
600+
warnings = [] + messages
601+
messages = []
596602
for record in population_seqs:
597603
new_record = record.filter_sequence_by_quality(threshold, end_threshold)
598604
if new_record.has_base_above_threshold():

tracetrack/templates/help.html

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ <h3>End Trimming Score Threshold</h3>
3737
As the quality towards the ends of trace files tends to decrease, both ends of sequences are discarded until three bases in a row pass the
3838
trimming threshold. This can also be set by the user and should be higher than the score threshold, otherwise it has no effect.
3939
</p>
40+
<h3>Threshold for calling mixed peaks (<i>f</i>)</h3>
41+
<p>
42+
The application detects positions, where more than one trace peak is present, resulting in one primary and one or more secondary base calls.
43+
This threshold specifies the minimal fraction of the area of the primary peak that another peak has to attain in order to be considered a secondary peak.
44+
</p>
45+
46+
4047
<h3>Matching to Reference</h3>
4148
<p>
4249
Trace files are matched to reference sequences automatically. TraceTrack first checks if the reference ID is contained in the trace file name,

tracetrack/templates/index.html

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ <h4 class="badged-header"><span class="badge badge-primary">2</span>References</
5050
<div class="example-used" style="display: none;">
5151
<h4>Using example files</h4>
5252
</div>
53+
<div >
54+
55+
</div>
5356

5457
<br>
5558
<div class="form-group">

tracetrack/templates/settings.html

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,13 @@ <h4 class="badged-header"><span class="badge badge-primary">3</span>Alignment Se
3838
</div>
3939
</div>
4040
<br>
41+
<div class="form-row">
42+
<div class="form-group">
43+
<input type="text" class="form-control" value="0.15" id="fraction" name="fraction" placeholder="0.15">
44+
<label class="formGroupExampleInput" for="fraction">Threshold for calling mixed peaks (<i>f</i>)</label>
45+
</div>
46+
</div>
47+
<br>
4148
<div class="form-row">
4249
<h4 class="badged-header"><span class="badge badge-primary">4</span>Reference Assignment</h4>
4350
{% for trace_key, trace in ref_assignment.items() %}

0 commit comments

Comments
 (0)