Skip to content

Commit

Permalink
Merge pull request #562 from Proteobench/fix-general-dia-issues
Browse files Browse the repository at this point in the history
Fix several param parsing issues from #560
  • Loading branch information
RobbinBouwmeester authored Jan 31, 2025
2 parents 898c608 + 30b3abf commit 46ee938
Show file tree
Hide file tree
Showing 10 changed files with 63 additions and 52 deletions.
38 changes: 14 additions & 24 deletions proteobench/io/params/alphadia.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,23 +191,6 @@ def extract_file_version(line: str) -> str:
return match.group(1) if match else None


def add_fdr_parameters(parameter_dict: dict, parsed_settings: dict) -> None:
"""
Adds FDR parameters (e.g., ident_fdr_psm, ident_fdr_peptide) to the parameter dictionary.
Args:
parameter_dict (dict): The dictionary where the FDR parameters will be added.
parsed_settings (dict): The parsed settings containing the FDR values.
"""
fdr_value = float(parsed_settings["fdr"]["fdr"])
fdr_level = parsed_settings["fdr"]["group_level"].strip()

level_mapping = {"proteins": "ident_fdr_protein"}
fdr_parameters = {"ident_fdr_psm": None, "ident_fdr_peptide": None, "ident_fdr_protein": None}
fdr_parameters[level_mapping[fdr_level]] = fdr_value
parameter_dict.update(fdr_parameters)


def get_min_max(list_of_elements: list) -> Tuple[int, int]:
"""
Extracts the minimum and maximum values from a list of elements.
Expand Down Expand Up @@ -241,10 +224,13 @@ def extract_params(fname: str) -> ProteoBenchParameters:
with open(fname) as f:
lines_read = f.readlines()
lines = [line for line in lines_read if "──" in line]
version = extract_file_version(lines_read[6])
version_line = [line for line in lines_read if "version" in line][0]
version = extract_file_version(version_line)
except:
lines = [l for l in fname.read().decode("utf-8").splitlines() if "──" in l]
version = extract_file_version(lines[6])
lines_read = [l for l in fname.read().decode("utf-8").splitlines()]
lines = [line for line in lines_read if "──" in line]
version_line = [line for line in lines_read if "version" in line][0]
version = extract_file_version(version_line)

line_generator = iter(lines)
first_line = next(line_generator)
Expand All @@ -255,17 +241,22 @@ def extract_params(fname: str) -> ProteoBenchParameters:
precursor_charges = get_min_max(parsed_settings["library_prediction"]["precursor_charge"])

prec_tol = float(parsed_settings["search"]["target_ms1_tolerance"])
prec_tol_string = f"[-{prec_tol} ppm, {prec_tol} ppm]"
frag_tol = float(parsed_settings["search"]["target_ms2_tolerance"])
frag_tol_string = f"[-{frag_tol} ppm, {frag_tol} ppm]"

parameters = {
"software_name": "AlphaDIA",
"search_engine": "AlphaDIA",
"software_version": version,
"search_engine_version": version,
"enable_match_between_runs": False,
"precursor_mass_tolerance": prec_tol,
"fragment_mass_tolerance": frag_tol,
"enzyme": parsed_settings["library_prediction"]["enzyme"].strip(),
"precursor_mass_tolerance": prec_tol_string,
"fragment_mass_tolerance": frag_tol_string,
"ident_fdr_psm": parsed_settings["fdr"]["fdr"],
"ident_fdr_peptide": None,
"ident_fdr_protein": parsed_settings["fdr"]["fdr"],
"enzyme": parsed_settings["library_prediction"]["enzyme"].strip().capitalize(),
"allowed_miscleavages": int(parsed_settings["library_prediction"]["missed_cleavages"]),
"min_peptide_length": peptide_lengths[0],
"max_peptide_length": peptide_lengths[1],
Expand All @@ -280,7 +271,6 @@ def extract_params(fname: str) -> ProteoBenchParameters:
"predictors_library": "Built-in",
}

add_fdr_parameters(parameters, parsed_settings)
return ProteoBenchParameters(**parameters)


Expand Down
4 changes: 4 additions & 0 deletions proteobench/io/params/diann.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ def parse_protein_inference_method(cmdline_dict: dict) -> str:
return pg_level_mapping[pg_setting]
except KeyError:
Exception(f"Unexpected setting passed to --pg-level in diann.exe: {pg_setting}")
else:
return (
"Genes" # Default value, when --pg-level is not changed in the GUI it does not appear in the command string
)


def parse_quantification_strategy(cmdline_dict: dict):
Expand Down
2 changes: 1 addition & 1 deletion proteobench/io/params/fragger.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def extract_params(file: BytesIO) -> ProteoBenchParameters:

elif fragpipe_params.loc["diann.run-dia-nn"] == "true":
params.ident_fdr_protein = fragpipe_params.loc["diann.q-value"]
params.ident_fdr_peptide = fragpipe_params.loc["diann.q-value"]
params.ident_fdr_peptide = None
params.ident_fdr_psm = fragpipe_params.loc["diann.q-value"]
params.abundance_normalization_ions = None

Expand Down
12 changes: 9 additions & 3 deletions proteobench/io/params/spectronaut.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def extract_value(lines: List[str], search_term: str) -> Optional[str]:
return next((clean_text(line.split(search_term)[1]) for line in lines if search_term in line), None)


def extract_mass_tolerance(lines: List[str], search_term: str) -> Optional[str]:
def extract_mass_tolerance(lines: List[str], search_term: str, mass_analyzer="Orbitrap") -> Optional[str]:
"""
Extract the mass tolerance value associated with a search term, with special handling for "System Default".
Expand All @@ -47,7 +47,13 @@ def extract_mass_tolerance(lines: List[str], search_term: str) -> Optional[str]:
Optional[str]: The extracted mass tolerance value, or None if the search term is not found.
"""
value = next((clean_text(line.split(search_term)[1]) for line in lines if search_term in line), None)
value = "40 ppm" if value == "System Default" else value
if value == "System Default":
if mass_analyzer in (["Orbitrap", "TOF", "BrukerTOF"]):
value = "40 ppm"
elif mass_analyzer == "WatersTOF":
value = "80 ppm"
elif mass_analyzer == "IonTrap":
value = "0.5 th"
return value


Expand Down Expand Up @@ -102,7 +108,7 @@ def read_spectronaut_settings(file_path: str) -> ProteoBenchParameters:
params.ident_fdr_psm = float(extract_value(lines, "Precursor Qvalue Cutoff:"))
params.ident_fdr_peptide = None
params.ident_fdr_protein = float(extract_value(lines, "Protein Qvalue Cutoff (Experiment):"))
params.enable_match_between_runs = None
params.enable_match_between_runs = False
_precursor_mass_tolerance = extract_mass_tolerance(lines, "MS1 Mass Tolerance Strategy:")
params.precursor_mass_tolerance = f"[-{_precursor_mass_tolerance}, {_precursor_mass_tolerance}]"
_fragment_mass_tolerance = extract_mass_tolerance(lines, "MS2 Mass Tolerance Strategy:")
Expand Down
11 changes: 4 additions & 7 deletions test/params/DIANN_1.7.16.log.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ software_name,DIA-NN
software_version,1.7.16
search_engine,DIA-NN
search_engine_version,1.7.16
ident_fdr_psm,
ident_fdr_psm,None
ident_fdr_peptide,0.01
ident_fdr_protein,0.01
enable_match_between_runs,True
Expand All @@ -16,11 +16,8 @@ max_peptide_length,30
fixed_mods,Carbamidomethyl (C)
variable_mods,Oxidation (M)
max_mods,1
min_precursor_charge,
max_precursor_charge,
scan_window,10
min_precursor_charge,None
max_precursor_charge,None
quantification_method,QuantUMS high-precision
second_pass,False
protein_inference,
predictors_library,"{'RT': 'DIANN', 'IM': 'DIANN', 'MS2_int': 'DIANN'}"
protein_inference,Genes
abundance_normalization_ions,
7 changes: 2 additions & 5 deletions test/params/DIANN_WU304578_report.log.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ software_name,DIA-NN
software_version,1.8.2 beta 8
search_engine,DIA-NN
search_engine_version,1.8.2 beta 8
ident_fdr_psm,
ident_fdr_psm,None
ident_fdr_peptide,0.01
ident_fdr_protein,0.01
enable_match_between_runs,True
Expand All @@ -18,9 +18,6 @@ variable_mods,UniMod:35/15.994915/M
max_mods,1
min_precursor_charge,2
max_precursor_charge,3
scan_window,13
quantification_method,QuantUMS high-precision
second_pass,False
protein_inference,
predictors_library,"{'RT': 'DIANN', 'IM': 'DIANN', 'MS2_int': 'DIANN'}"
protein_inference,Genes
abundance_normalization_ions,
5 changes: 1 addition & 4 deletions test/params/DIANN_output_20240229_report.log.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ software_name,DIA-NN
software_version,1.8.2 beta 22
search_engine,DIA-NN
search_engine_version,1.8.2 beta 22
ident_fdr_psm,
ident_fdr_psm,None
ident_fdr_peptide,0.01
ident_fdr_protein,0.01
enable_match_between_runs,True
Expand All @@ -18,9 +18,6 @@ variable_mods,UniMod:35/15.994915/M
max_mods,1
min_precursor_charge,1
max_precursor_charge,4
scan_window,13
quantification_method,QuantUMS high-accuracy
second_pass,True
protein_inference,Protein_names
predictors_library,"{'RT': 'DIANN', 'IM': 'DIANN', 'MS2_int': 'DIANN'}"
abundance_normalization_ions,
5 changes: 1 addition & 4 deletions test/params/Version1_9_Predicted_Library_report.log.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ software_name,DIA-NN
software_version,1.9
search_engine,DIA-NN
search_engine_version,1.9
ident_fdr_psm,
ident_fdr_psm,None
ident_fdr_peptide,0.01
ident_fdr_protein,0.01
enable_match_between_runs,True
Expand All @@ -18,9 +18,6 @@ variable_mods,"UniMod:35/15.994915/M,UniMod:1/42.010565/*n"
max_mods,1
min_precursor_charge,1
max_precursor_charge,4
scan_window,13
quantification_method,QuantUMS high-precision
second_pass,True
protein_inference,Protein_names
predictors_library,"{'RT': 'DIANN', 'IM': 'DIANN', 'MS2_int': 'DIANN'}"
abundance_normalization_ions,
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ search_engine_version,19.2.240905.62635
ident_fdr_psm,0.01
ident_fdr_peptide,
ident_fdr_protein,0.01
enable_match_between_runs,
enable_match_between_runs,False
precursor_mass_tolerance,"[-40 ppm, 40 ppm]"
fragment_mass_tolerance,"[-40 ppm, 40 ppm]"
enzyme,Trypsin/P
Expand All @@ -18,9 +18,9 @@ variable_mods,"Acetyl (Protein N-term), Oxidation (M)"
max_mods,5
min_precursor_charge,
max_precursor_charge,
scan_window,Dynamic
quantification_method,MS2
second_pass,directDIA+ (Deep)
protein_inference,IDPicker
predictors_library,False
abundance_normalization_ions,True
scan_window,Dynamic
second_pass,directDIA+ (Deep)
predictors_library,False
23 changes: 23 additions & 0 deletions test_proline.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
,0
software_name,ProlineStudio
software_version,2.3.0-SNAPSHOT_2024-09-11T06:45:20Z_jenkins
search_engine,Mascot
search_engine_version,2.8.3
ident_fdr_psm,0.01
ident_fdr_peptide,
ident_fdr_protein,
enable_match_between_runs,True
precursor_mass_tolerance,"[-10.0 ppm, 10.0 ppm]"
fragment_mass_tolerance,"[-0.02 Da, 0.02 Da]"
enzyme,Trypsin/P
allowed_miscleavages,2
min_peptide_length,7
max_peptide_length,
fixed_mods,Carbamidomethyl (C)
variable_mods,Acetyl (Protein N-term); Oxidation (M)
max_mods,
min_precursor_charge,2
max_precursor_charge,3
quantification_method,
protein_inference,
abundance_normalization_ions,

0 comments on commit 46ee938

Please sign in to comment.