diff --git a/proteobench/io/params/alphadia.py b/proteobench/io/params/alphadia.py index 12180ad9..eb91298e 100644 --- a/proteobench/io/params/alphadia.py +++ b/proteobench/io/params/alphadia.py @@ -191,23 +191,6 @@ def extract_file_version(line: str) -> str: return match.group(1) if match else None -def add_fdr_parameters(parameter_dict: dict, parsed_settings: dict) -> None: - """ - Adds FDR parameters (e.g., ident_fdr_psm, ident_fdr_peptide) to the parameter dictionary. - - Args: - parameter_dict (dict): The dictionary where the FDR parameters will be added. - parsed_settings (dict): The parsed settings containing the FDR values. - """ - fdr_value = float(parsed_settings["fdr"]["fdr"]) - fdr_level = parsed_settings["fdr"]["group_level"].strip() - - level_mapping = {"proteins": "ident_fdr_protein"} - fdr_parameters = {"ident_fdr_psm": None, "ident_fdr_peptide": None, "ident_fdr_protein": None} - fdr_parameters[level_mapping[fdr_level]] = fdr_value - parameter_dict.update(fdr_parameters) - - def get_min_max(list_of_elements: list) -> Tuple[int, int]: """ Extracts the minimum and maximum values from a list of elements. @@ -241,10 +224,13 @@ def extract_params(fname: str) -> ProteoBenchParameters: with open(fname) as f: lines_read = f.readlines() lines = [line for line in lines_read if "──" in line] - version = extract_file_version(lines_read[6]) + version_line = [line for line in lines_read if "version" in line][0] + version = extract_file_version(version_line) except: - lines = [l for l in fname.read().decode("utf-8").splitlines() if "──" in l] - version = extract_file_version(lines[6]) + lines_read = [l for l in fname.read().decode("utf-8").splitlines()] + lines = [line for line in lines_read if "──" in line] + version_line = [line for line in lines_read if "version" in line][0] + version = extract_file_version(version_line) line_generator = iter(lines) first_line = next(line_generator) @@ -255,7 +241,9 @@ def extract_params(fname: str) -> ProteoBenchParameters: precursor_charges = get_min_max(parsed_settings["library_prediction"]["precursor_charge"]) prec_tol = float(parsed_settings["search"]["target_ms1_tolerance"]) + prec_tol_string = f"[-{prec_tol} ppm, {prec_tol} ppm]" frag_tol = float(parsed_settings["search"]["target_ms2_tolerance"]) + frag_tol_string = f"[-{frag_tol} ppm, {frag_tol} ppm]" parameters = { "software_name": "AlphaDIA", @@ -263,9 +251,12 @@ def extract_params(fname: str) -> ProteoBenchParameters: "software_version": version, "search_engine_version": version, "enable_match_between_runs": False, - "precursor_mass_tolerance": prec_tol, - "fragment_mass_tolerance": frag_tol, - "enzyme": parsed_settings["library_prediction"]["enzyme"].strip(), + "precursor_mass_tolerance": prec_tol_string, + "fragment_mass_tolerance": frag_tol_string, + "ident_fdr_psm": parsed_settings["fdr"]["fdr"], + "ident_fdr_peptide": None, + "ident_fdr_protein": parsed_settings["fdr"]["fdr"], + "enzyme": parsed_settings["library_prediction"]["enzyme"].strip().capitalize(), "allowed_miscleavages": int(parsed_settings["library_prediction"]["missed_cleavages"]), "min_peptide_length": peptide_lengths[0], "max_peptide_length": peptide_lengths[1], @@ -280,7 +271,6 @@ def extract_params(fname: str) -> ProteoBenchParameters: "predictors_library": "Built-in", } - add_fdr_parameters(parameters, parsed_settings) return ProteoBenchParameters(**parameters) diff --git a/proteobench/io/params/diann.py b/proteobench/io/params/diann.py index ef25eca5..e1d77d06 100644 --- a/proteobench/io/params/diann.py +++ b/proteobench/io/params/diann.py @@ -217,6 +217,10 @@ def parse_protein_inference_method(cmdline_dict: dict) -> str: return pg_level_mapping[pg_setting] except KeyError: Exception(f"Unexpected setting passed to --pg-level in diann.exe: {pg_setting}") + else: + return ( + "Genes" # Default value, when --pg-level is not changed in the GUI it does not appear in the command string + ) def parse_quantification_strategy(cmdline_dict: dict): diff --git a/proteobench/io/params/fragger.py b/proteobench/io/params/fragger.py index 0a41fa20..7dc21e9c 100644 --- a/proteobench/io/params/fragger.py +++ b/proteobench/io/params/fragger.py @@ -160,7 +160,7 @@ def extract_params(file: BytesIO) -> ProteoBenchParameters: elif fragpipe_params.loc["diann.run-dia-nn"] == "true": params.ident_fdr_protein = fragpipe_params.loc["diann.q-value"] - params.ident_fdr_peptide = fragpipe_params.loc["diann.q-value"] + params.ident_fdr_peptide = None params.ident_fdr_psm = fragpipe_params.loc["diann.q-value"] params.abundance_normalization_ions = None diff --git a/proteobench/io/params/spectronaut.py b/proteobench/io/params/spectronaut.py index 0a7be7c0..e278880a 100644 --- a/proteobench/io/params/spectronaut.py +++ b/proteobench/io/params/spectronaut.py @@ -35,7 +35,7 @@ def extract_value(lines: List[str], search_term: str) -> Optional[str]: return next((clean_text(line.split(search_term)[1]) for line in lines if search_term in line), None) -def extract_mass_tolerance(lines: List[str], search_term: str) -> Optional[str]: +def extract_mass_tolerance(lines: List[str], search_term: str, mass_analyzer="Orbitrap") -> Optional[str]: """ Extract the mass tolerance value associated with a search term, with special handling for "System Default". @@ -47,7 +47,13 @@ def extract_mass_tolerance(lines: List[str], search_term: str) -> Optional[str]: Optional[str]: The extracted mass tolerance value, or None if the search term is not found. """ value = next((clean_text(line.split(search_term)[1]) for line in lines if search_term in line), None) - value = "40 ppm" if value == "System Default" else value + if value == "System Default": + if mass_analyzer in (["Orbitrap", "TOF", "BrukerTOF"]): + value = "40 ppm" + elif mass_analyzer == "WatersTOF": + value = "80 ppm" + elif mass_analyzer == "IonTrap": + value = "0.5 th" return value @@ -102,7 +108,7 @@ def read_spectronaut_settings(file_path: str) -> ProteoBenchParameters: params.ident_fdr_psm = float(extract_value(lines, "Precursor Qvalue Cutoff:")) params.ident_fdr_peptide = None params.ident_fdr_protein = float(extract_value(lines, "Protein Qvalue Cutoff (Experiment):")) - params.enable_match_between_runs = None + params.enable_match_between_runs = False _precursor_mass_tolerance = extract_mass_tolerance(lines, "MS1 Mass Tolerance Strategy:") params.precursor_mass_tolerance = f"[-{_precursor_mass_tolerance}, {_precursor_mass_tolerance}]" _fragment_mass_tolerance = extract_mass_tolerance(lines, "MS2 Mass Tolerance Strategy:") diff --git a/test/params/DIANN_1.7.16.log.csv b/test/params/DIANN_1.7.16.log.csv index 2eacf877..dafaefcd 100644 --- a/test/params/DIANN_1.7.16.log.csv +++ b/test/params/DIANN_1.7.16.log.csv @@ -3,7 +3,7 @@ software_name,DIA-NN software_version,1.7.16 search_engine,DIA-NN search_engine_version,1.7.16 -ident_fdr_psm, +ident_fdr_psm,None ident_fdr_peptide,0.01 ident_fdr_protein,0.01 enable_match_between_runs,True @@ -16,11 +16,8 @@ max_peptide_length,30 fixed_mods,Carbamidomethyl (C) variable_mods,Oxidation (M) max_mods,1 -min_precursor_charge, -max_precursor_charge, -scan_window,10 +min_precursor_charge,None +max_precursor_charge,None quantification_method,QuantUMS high-precision -second_pass,False -protein_inference, -predictors_library,"{'RT': 'DIANN', 'IM': 'DIANN', 'MS2_int': 'DIANN'}" +protein_inference,Genes abundance_normalization_ions, diff --git a/test/params/DIANN_WU304578_report.log.csv b/test/params/DIANN_WU304578_report.log.csv index a24818b0..5b54c503 100644 --- a/test/params/DIANN_WU304578_report.log.csv +++ b/test/params/DIANN_WU304578_report.log.csv @@ -3,7 +3,7 @@ software_name,DIA-NN software_version,1.8.2 beta 8 search_engine,DIA-NN search_engine_version,1.8.2 beta 8 -ident_fdr_psm, +ident_fdr_psm,None ident_fdr_peptide,0.01 ident_fdr_protein,0.01 enable_match_between_runs,True @@ -18,9 +18,6 @@ variable_mods,UniMod:35/15.994915/M max_mods,1 min_precursor_charge,2 max_precursor_charge,3 -scan_window,13 quantification_method,QuantUMS high-precision -second_pass,False -protein_inference, -predictors_library,"{'RT': 'DIANN', 'IM': 'DIANN', 'MS2_int': 'DIANN'}" +protein_inference,Genes abundance_normalization_ions, diff --git a/test/params/DIANN_output_20240229_report.log.csv b/test/params/DIANN_output_20240229_report.log.csv index af0661ca..d79b696b 100644 --- a/test/params/DIANN_output_20240229_report.log.csv +++ b/test/params/DIANN_output_20240229_report.log.csv @@ -3,7 +3,7 @@ software_name,DIA-NN software_version,1.8.2 beta 22 search_engine,DIA-NN search_engine_version,1.8.2 beta 22 -ident_fdr_psm, +ident_fdr_psm,None ident_fdr_peptide,0.01 ident_fdr_protein,0.01 enable_match_between_runs,True @@ -18,9 +18,6 @@ variable_mods,UniMod:35/15.994915/M max_mods,1 min_precursor_charge,1 max_precursor_charge,4 -scan_window,13 quantification_method,QuantUMS high-accuracy -second_pass,True protein_inference,Protein_names -predictors_library,"{'RT': 'DIANN', 'IM': 'DIANN', 'MS2_int': 'DIANN'}" abundance_normalization_ions, diff --git a/test/params/Version1_9_Predicted_Library_report.log.csv b/test/params/Version1_9_Predicted_Library_report.log.csv index 8864db9a..ae6a5e1a 100644 --- a/test/params/Version1_9_Predicted_Library_report.log.csv +++ b/test/params/Version1_9_Predicted_Library_report.log.csv @@ -3,7 +3,7 @@ software_name,DIA-NN software_version,1.9 search_engine,DIA-NN search_engine_version,1.9 -ident_fdr_psm, +ident_fdr_psm,None ident_fdr_peptide,0.01 ident_fdr_protein,0.01 enable_match_between_runs,True @@ -18,9 +18,6 @@ variable_mods,"UniMod:35/15.994915/M,UniMod:1/42.010565/*n" max_mods,1 min_precursor_charge,1 max_precursor_charge,4 -scan_window,13 quantification_method,QuantUMS high-precision -second_pass,True protein_inference,Protein_names -predictors_library,"{'RT': 'DIANN', 'IM': 'DIANN', 'MS2_int': 'DIANN'}" abundance_normalization_ions, diff --git a/test/params/spectronaut_Experiment1_ExperimentSetupOverview_BGS_Factory_Settings.csv b/test/params/spectronaut_Experiment1_ExperimentSetupOverview_BGS_Factory_Settings.csv index 357a4599..0dafad56 100644 --- a/test/params/spectronaut_Experiment1_ExperimentSetupOverview_BGS_Factory_Settings.csv +++ b/test/params/spectronaut_Experiment1_ExperimentSetupOverview_BGS_Factory_Settings.csv @@ -6,7 +6,7 @@ search_engine_version,19.2.240905.62635 ident_fdr_psm,0.01 ident_fdr_peptide, ident_fdr_protein,0.01 -enable_match_between_runs, +enable_match_between_runs,False precursor_mass_tolerance,"[-40 ppm, 40 ppm]" fragment_mass_tolerance,"[-40 ppm, 40 ppm]" enzyme,Trypsin/P @@ -18,9 +18,9 @@ variable_mods,"Acetyl (Protein N-term), Oxidation (M)" max_mods,5 min_precursor_charge, max_precursor_charge, -scan_window,Dynamic quantification_method,MS2 -second_pass,directDIA+ (Deep) protein_inference,IDPicker -predictors_library,False abundance_normalization_ions,True +scan_window,Dynamic +second_pass,directDIA+ (Deep) +predictors_library,False diff --git a/test_proline.csv b/test_proline.csv new file mode 100644 index 00000000..67aa6048 --- /dev/null +++ b/test_proline.csv @@ -0,0 +1,23 @@ +,0 +software_name,ProlineStudio +software_version,2.3.0-SNAPSHOT_2024-09-11T06:45:20Z_jenkins +search_engine,Mascot +search_engine_version,2.8.3 +ident_fdr_psm,0.01 +ident_fdr_peptide, +ident_fdr_protein, +enable_match_between_runs,True +precursor_mass_tolerance,"[-10.0 ppm, 10.0 ppm]" +fragment_mass_tolerance,"[-0.02 Da, 0.02 Da]" +enzyme,Trypsin/P +allowed_miscleavages,2 +min_peptide_length,7 +max_peptide_length, +fixed_mods,Carbamidomethyl (C) +variable_mods,Acetyl (Protein N-term); Oxidation (M) +max_mods, +min_precursor_charge,2 +max_precursor_charge,3 +quantification_method, +protein_inference, +abundance_normalization_ions,