diff --git a/.gitignore b/.gitignore index 5a2dce5..5f73537 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .gitignore /venv -/.pytest_cache \ No newline at end of file +/.pytest_cache +__pycache__/ diff --git a/__pycache__/data_loading.cpython-312.pyc b/__pycache__/data_loading.cpython-312.pyc new file mode 100644 index 0000000..32e51da Binary files /dev/null and b/__pycache__/data_loading.cpython-312.pyc differ diff --git a/__pycache__/processing.cpython-312.pyc b/__pycache__/processing.cpython-312.pyc new file mode 100644 index 0000000..e9a0c26 Binary files /dev/null and b/__pycache__/processing.cpython-312.pyc differ diff --git a/processing.py b/processing.py index 5acd427..bf3c1a2 100644 --- a/processing.py +++ b/processing.py @@ -14,6 +14,9 @@ # Pattern 4: full numeric _NUMERIC_PATTERN = re.compile(r'^\d+$') +# Default spectrum dictionary for missing matches (ensures consistent schema with None values) +_DEFAULT_SPEC = {'title': None, 'mz_array': None, 'intensity_array': None, 'pepmass': None} + def extract_index_from_spectra_ref(s: Optional[str]) -> Optional[str]: """ @@ -98,16 +101,14 @@ def map_psms_to_spectra(spectra: List[Dict], psm_df: pd.DataFrame) -> pd.DataFra # ⚡ OPTIMIZATION: Convert list of dicts to DataFrame directly instead of repeated apply calls # Original: Multiple apply calls (4x iteration over full dataset) + # Improvement: Use .tolist() for faster iteration and explicit columns to skip schema inference. + # Also use _DEFAULT_SPEC to ensure missing rows have None instead of NaN for correct logic in app.py. - # Convert matched Series to list, replacing NaNs with empty dicts for DataFrame construction - specs_list = [x if isinstance(x, dict) else {} for x in matched_spec_series] - specs_df = pd.DataFrame(specs_list) - specs_df.index = psm_df.index # Align index with original DataFrame + # Convert matched Series to list, replacing NaNs with default dict for DataFrame construction + specs_list = [x if isinstance(x, dict) else _DEFAULT_SPEC for x in matched_spec_series.tolist()] - # Ensure required columns exist (if no spectra matched or mock data missing keys) - for col in ['title', 'mz_array', 'intensity_array', 'pepmass']: - if col not in specs_df.columns: - specs_df[col] = None + specs_df = pd.DataFrame(specs_list, columns=['title', 'mz_array', 'intensity_array', 'pepmass']) + specs_df.index = psm_df.index # Align index with original DataFrame mappings = pd.DataFrame({ 'psm_index': psm_df.index, diff --git a/tests/__pycache__/__init__.cpython-312.pyc b/tests/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..9d7b5e0 Binary files /dev/null and b/tests/__pycache__/__init__.cpython-312.pyc differ diff --git a/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..982d72a Binary files /dev/null and b/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/__pycache__/test_extract_index_from_spectra_ref.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_extract_index_from_spectra_ref.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..e8bb93a Binary files /dev/null and b/tests/__pycache__/test_extract_index_from_spectra_ref.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/__pycache__/test_integration.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_integration.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..fdfcd99 Binary files /dev/null and b/tests/__pycache__/test_integration.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/__pycache__/test_load_mgf.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_load_mgf.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..e5d01cf Binary files /dev/null and b/tests/__pycache__/test_load_mgf.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/__pycache__/test_load_mztab.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_load_mztab.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..62e4241 Binary files /dev/null and b/tests/__pycache__/test_load_mztab.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/__pycache__/test_map_psms_to_spectra.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_map_psms_to_spectra.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..bd89d2d Binary files /dev/null and b/tests/__pycache__/test_map_psms_to_spectra.cpython-312-pytest-9.0.2.pyc differ