lilab-bcb · yihming · Jan 22, 2026 · Jan 21, 2026 · Jan 21, 2026 · Jan 22, 2026
diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml
@@ -14,7 +14,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
+        python-version: ['3.10', '3.11', '3.12', '3.13', '3.14']
 
     steps:
     - uses: actions/checkout@v2

diff --git a/pegasus/annotate_cluster/annotate_cluster.py b/pegasus/annotate_cluster/annotate_cluster.py
@@ -9,6 +9,7 @@
 from io import IOBase
 
 import logging
+
 logger = logging.getLogger(__name__)
 
 from pegasusio import timer, MultimodalData, UnimodalData
@@ -30,8 +31,7 @@ def evaluate(
         de_down: pd.DataFrame,
         thre: float,
     ):
-        """ Calculate score for matching a cluster with a putative cell type.
-        """
+        """Calculate score for matching a cluster with a putative cell type."""
         self.score = self.avgp = 0.0
         self.weak_support = []
         self.strong_support = []
@@ -56,14 +56,10 @@ def evaluate(
 
                         if fc >= thre:
                             numer += 2.0
-                            self.strong_support.append(
-                                (marker, f"{percent:.2f}%")
-                            )
+                            self.strong_support.append((marker, f"{percent:.2f}%"))
                         else:
                             numer += 1.0 + (fc - 1.0) / (thre - 1.0)
-                            self.weak_support.append(
-                                (marker, f"{percent:.2f}%")
-                            )
+                            self.weak_support.append((marker, f"{percent:.2f}%"))
                 else:
                     assert sign == "-"
                     if gsym not in de_up.index:
@@ -76,14 +72,10 @@ def evaluate(
                             percent = de_down.at[gsym, "percent"]
                             if fc >= thre:
                                 numer += 2.0
-                                self.strong_support.append(
-                                    (marker, f"{percent:.2f}%")
-                                )
+                                self.strong_support.append((marker, f"{percent:.2f}%"))
                             else:
                                 numer += 1.0 + (fc - 1.0) / (thre - 1.0)
-                                self.weak_support.append(
-                                    (marker, f"{percent:.2f}%")
-                                )
+                                self.weak_support.append((marker, f"{percent:.2f}%"))
                         elif not self.ignore_nonde:
                             numer += 1.0
                             self.weak_support.append((marker, "N/A"))
@@ -116,8 +108,7 @@ def __init__(self, markers: Dict, genes: List[str]) -> None:
         self.recalibrate(self.object, genes)
 
     def recalibrate(self, obj: dict, genes: List[str]) -> None:
-        """ Remove markers that are not expressed (not in genes) and calculate partial weights for existing genes.
-        """
+        """Remove markers that are not expressed (not in genes) and calculate partial weights for existing genes."""
         for celltype in obj["cell_types"]:
             denom = 0.0
             for marker_set in celltype["markers"]:
@@ -141,8 +132,7 @@ def evaluate(
         ignore_nonde: bool = False,
         obj: dict = None,
     ):
-        """ Evaluate a cluster to determine its putative cell type.
-        """
+        """Evaluate a cluster to determine its putative cell type."""
         if obj is None:
             obj = self.object
 
@@ -172,16 +162,17 @@ def report(
         ct_list: List["CellType"],
         space: int = 4,
     ) -> None:
-        """ Write putative cell type reports to fout.
-        """
+        """Write putative cell type reports to fout."""
         for ct in ct_list:
             fout.write(" " * space + str(ct) + "\n")
             if ct.subtypes is not None:
                 self.report(fout, ct.subtypes, space + 4)
 
 
 def infer_cluster_names(
-    cell_type_dict: Dict[str, List["CellType"]], threshold: float = 0.5, is_human_immune: bool = False
+    cell_type_dict: Dict[str, List["CellType"]],
+    threshold: float = 0.5,
+    is_human_immune: bool = False,
 ) -> List[str]:
     """Decide cluster names based on cell types automatically.
 
@@ -222,25 +213,38 @@ def infer_cluster_names(
                 subname = None
                 has_naive_t = False
                 for subt in ct.subtypes:
-                    if subt.score >= threshold and (subt.name != "T regulatory cell" or subt.avgp > 0.5):
+                    if subt.score >= threshold and (
+                        subt.name != "T regulatory cell" or subt.avgp > 0.5
+                    ):
                         if subt.name == "Naive T cell" and subt.score >= 0.6:
                             has_naive_t = True
                         elif subname is None:
                             subname = subt.name
                 if subname is None:
                     cell_name = "Naive T cell" if has_naive_t else "T cell"
                 elif has_naive_t and (subname in ["T helper cell", "Cytotoxic T cell"]):
-                    cell_name = "CD4+ Naive T cell" if subname == "T helper cell" else "CD8+ Naive T cell"
+                    cell_name = (
+                        "CD4+ Naive T cell"
+                        if subname == "T helper cell"
+                        else "CD8+ Naive T cell"
+                    )
                 else:
                     cell_name = subname
             elif is_human_immune and ct.name == "CD1C+ dendritic cell":
                 cell_name = ct.name
                 for ctype in ct_list[1:]:
-                    if ctype.score >= threshold and ctype.name == "CLEC9A+ dendritic cell":
+                    if (
+                        ctype.score >= threshold
+                        and ctype.name == "CLEC9A+ dendritic cell"
+                    ):
                         cell_name = "Conventional dendritic cell (CD1C+/CLEC9A+)"
                         break
             else:
-                while ct.subtypes is not None and len(ct.subtypes) > 0 and ct.subtypes[0].score >= threshold:
+                while (
+                    ct.subtypes is not None
+                    and len(ct.subtypes) > 0
+                    and ct.subtypes[0].score >= threshold
+                ):
                     ct = ct.subtypes[0]
                 cell_name = ct.name
 
@@ -315,7 +319,8 @@ def infer_cell_types(
     if output_file is not None:
         fout = open(output_file, "w")
 
-    import pkg_resources
+    from importlib import resources
+
     predefined_markers = dict(
         human_immune="human_immune_cell_markers.json",
         mouse_immune="mouse_immune_cell_markers.json",
@@ -327,12 +332,13 @@ def infer_cell_types(
     )
 
     if isinstance(markers, str):
-        tokens = markers.split(',')
+        tokens = markers.split(",")
         markers = None
         for token in tokens:
             if token in predefined_markers:
-                token = pkg_resources.resource_filename(
-                    "pegasus.annotate_cluster", predefined_markers[token]
+                token = str(
+                    resources.files("pegasus.annotate_cluster")
+                    / predefined_markers[token]
                 )
             with open(token) as fin:
                 tmp_dict = json.load(fin)
@@ -379,7 +385,9 @@ def infer_cell_types(
             de_up["fc"] = 2.0 ** de_up["fc"]
             de_down["fc"] = 2.0 ** de_down["fc"]
 
-        results = anno.evaluate(de_up, de_down, threshold=threshold, ignore_nonde=ignore_nonde)
+        results = anno.evaluate(
+            de_up, de_down, threshold=threshold, ignore_nonde=ignore_nonde
+        )
 
         if output_file is not None:
             fout.write(f"Cluster {clust_id}:\n")
@@ -394,7 +402,7 @@ def infer_cell_types(
 
 
 def annotate(
-    data: Union[MultimodalData, UnimodalData,AnnData],
+    data: Union[MultimodalData, UnimodalData, AnnData],
     name: str,
     based_on: str,
     anno_dict: Union[Dict[str, str], List[str]],
@@ -425,10 +433,15 @@ def annotate(
     >>> pg.annotate(data, 'anno', 'louvain_labels', ['T cell', 'B cell'])
     """
     if isinstance(anno_dict, list):
-        cluster_ids = data.obs[based_on].cat.categories.values.astype('str')
+        cluster_ids = data.obs[based_on].cat.categories.values.astype("str")
         anno_dict = dict(zip(cluster_ids, anno_dict))
-    from natsort import natsorted 
-    data.obs[name] = pd.Categorical([anno_dict[x] for x in data.obs[based_on]], categories = natsorted(np.unique(list(anno_dict.values()))))
+    from natsort import natsorted
+
+    data.obs[name] = pd.Categorical(
+        [anno_dict[x] for x in data.obs[based_on]],
+        categories=natsorted(np.unique(list(anno_dict.values()))),
+    )
+
 
 @timer(logger=logger)
 def run_annotate_cluster(
@@ -441,8 +454,7 @@ def run_annotate_cluster(
     threshold: float = 0.5,
     ignore_nonde: bool = False,
 ) -> None:
-    """ For command line use.
-    """
+    """For command line use."""
     from pegasusio import read_input
 
     data = read_input(input_file, mode="r")
@@ -459,8 +471,8 @@ def run_annotate_cluster(
 
 
 def annotate_data_object(input_file: str, annotation: str) -> None:
-    """ For command line use.
-        annotation:  anno_name:clust_name:cell_type1;...cell_typen
+    """For command line use.
+    annotation:  anno_name:clust_name:cell_type1;...cell_typen
     """
     from pegasusio import read_input, write_output
 

diff --git a/pegasus/check_sample_indexes/check_sample_indexes.py b/pegasus/check_sample_indexes/check_sample_indexes.py
@@ -3,13 +3,13 @@
 from sys import exit
 
 import json
-import pkg_resources
 
+from importlib import resources
 from typing import List, Dict, Tuple
 
 import logging
-logger = logging.getLogger(__name__)
 
+logger = logging.getLogger(__name__)
 
 
 def load_json_index(input_file: str) -> Dict[str, List[str]]:
@@ -23,23 +23,35 @@ def load_json_index(input_file: str) -> Dict[str, List[str]]:
 
 def load_chromium_indexes() -> Tuple[dict, dict]:
     # Load chromium index sets
-    GA_indexes = load_json_index(pkg_resources.resource_filename("pegasus.check_sample_indexes", "chromium-shared-sample-indexes-plate.json"))
-    NA_indexes = load_json_index(pkg_resources.resource_filename("pegasus.check_sample_indexes", "Chromium-i7-Multiplex-Kit-N-Set-A-sample-indexes-plate.json"))
+    GA_indexes = load_json_index(
+        str(
+            resources.files("pegasus.check_sample_indexes")
+            / "chromium-shared-sample-indexes-plate.json"
+        )
+    )
+    NA_indexes = load_json_index(
+        str(
+            resources.files("pegasus.check_sample_indexes")
+            / "Chromium-i7-Multiplex-Kit-N-Set-A-sample-indexes-plate.json"
+        )
+    )
     return GA_indexes, NA_indexes
 
 
-def load_index_file(index_file: str, GA_indexes: Dict[str, List[str]], NA_indexes: Dict[str, List[str]]) -> List[str]:
+def load_index_file(
+    index_file: str, GA_indexes: Dict[str, List[str]], NA_indexes: Dict[str, List[str]]
+) -> List[str]:
     # Load index file
     index_arr = []
     with open(index_file) as fin:
         for line in fin:
-            index = line.strip().split(',')[0]
+            index = line.strip().split(",")[0]
             if index in GA_indexes:
                 index_arr.extend([(x, index) for x in GA_indexes[index]])
             elif index in NA_indexes:
                 index_arr.extend([(x, index) for x in NA_indexes[index]])
             else:
-                index_arr.append((index, 'orig'))
+                index_arr.append((index, "orig"))
     return index_arr
 
 
@@ -79,12 +91,21 @@ def run_check_sample_indexes(index_file, n_mis=1, n_report=-1):
     min_hd, min_i, min_j = calc_min_hamming_dist(index_arr)
 
     n_mismatch = (min_hd - 1) // 2
-    barcode1 = index_arr[min_i][0] if index_arr[min_i][1] == 'orig' else f"{index_arr[min_i][1]}({index_arr[min_i][0]})"
-    barcode2 = index_arr[min_j][0] if index_arr[min_j][1] == 'orig' else f"{index_arr[min_j][1]}({index_arr[min_j][0]})"
-
-    logger.info(f"Minimum hamming distance is {min_hd}, achieved between {barcode1} and {barcode2}. A n_mis = {n_mismatch} can be set.")
+    barcode1 = (
+        index_arr[min_i][0]
+        if index_arr[min_i][1] == "orig"
+        else f"{index_arr[min_i][1]}({index_arr[min_i][0]})"
+    )
+    barcode2 = (
+        index_arr[min_j][0]
+        if index_arr[min_j][1] == "orig"
+        else f"{index_arr[min_j][1]}({index_arr[min_j][0]})"
+    )
+
+    logger.info(
+        f"Minimum hamming distance is {min_hd}, achieved between {barcode1} and {barcode2}. A n_mis = {n_mismatch} can be set."
+    )
 
-
     if n_mismatch < n_mis:
         logger.error(f"Index collision detected in {index_file} with n_mis = {n_mis}!")
     elif n_report > 0:

diff --git a/pegasus/tools/utils.py b/pegasus/tools/utils.py
@@ -169,29 +169,31 @@ def check_batch_key(data: Union[MultimodalData, UnimodalData], batch: Union[str,
 
 
 
-import pkg_resources
+from importlib import resources
+
+data_path = resources.files("pegasus") / "data_files"
 
 predefined_signatures = dict(
-    cell_cycle_human=pkg_resources.resource_filename("pegasus", "data_files/cell_cycle_human.gmt"),
-    cell_cycle_mouse=pkg_resources.resource_filename("pegasus", "data_files/cell_cycle_mouse.gmt"),
-    gender_human=pkg_resources.resource_filename("pegasus", "data_files/gender_human.gmt"),
-    gender_mouse=pkg_resources.resource_filename("pegasus", "data_files/gender_mouse.gmt"),
-    mitochondrial_genes_human=pkg_resources.resource_filename("pegasus", "data_files/mitochondrial_genes_human.gmt"),
-    mitochondrial_genes_mouse=pkg_resources.resource_filename("pegasus", "data_files/mitochondrial_genes_mouse.gmt"),
-    ribosomal_genes_human=pkg_resources.resource_filename("pegasus", "data_files/ribosomal_genes_human.gmt"),
-    ribosomal_genes_mouse=pkg_resources.resource_filename("pegasus", "data_files/ribosomal_genes_mouse.gmt"),
-    apoptosis_human=pkg_resources.resource_filename("pegasus", "data_files/apoptosis_human.gmt"),
-    apoptosis_mouse=pkg_resources.resource_filename("pegasus", "data_files/apoptosis_mouse.gmt"),
-    human_lung=pkg_resources.resource_filename("pegasus", "data_files/human_lung.gmt"),
-    mouse_lung=pkg_resources.resource_filename("pegasus", "data_files/mouse_lung.gmt"),
-    mouse_brain=pkg_resources.resource_filename("pegasus", "data_files/mouse_brain.gmt"),
-    mouse_liver=pkg_resources.resource_filename("pegasus", "data_files/mouse_liver.gmt"),
-    emt_human=pkg_resources.resource_filename("pegasus", "data_files/emt_human.gmt"),
+    cell_cycle_human=str(data_path / "cell_cycle_human.gmt"),
+    cell_cycle_mouse=str(data_path / "cell_cycle_mouse.gmt"),
+    gender_human=str(data_path / "gender_human.gmt"),
+    gender_mouse=str(data_path / "gender_mouse.gmt"),
+    mitochondrial_genes_human=str(data_path / "mitochondrial_genes_human.gmt"),
+    mitochondrial_genes_mouse=str(data_path / "mitochondrial_genes_mouse.gmt"),
+    ribosomal_genes_human=str(data_path / "ribosomal_genes_human.gmt"),
+    ribosomal_genes_mouse=str(data_path / "ribosomal_genes_mouse.gmt"),
+    apoptosis_human=str(data_path / "apoptosis_human.gmt"),
+    apoptosis_mouse=str(data_path / "apoptosis_mouse.gmt"),
+    human_lung=str(data_path / "human_lung.gmt"),
+    mouse_lung=str(data_path / "mouse_lung.gmt"),
+    mouse_brain=str(data_path / "mouse_brain.gmt"),
+    mouse_liver=str(data_path / "mouse_liver.gmt"),
+    emt_human=str(data_path / "emt_human.gmt"),
 )
 
 predefined_pathways = dict(
-    hallmark=pkg_resources.resource_filename("pegasus", "data_files/h.all.v7.5.1.symbols.gmt"),
-    canonical_pathways=pkg_resources.resource_filename("pegasus", "data_files/c2.cp.v7.5.1.symbols.gmt"),
+    hallmark=str(data_path / "h.all.v7.5.1.symbols.gmt"),
+    canonical_pathways=str(data_path / "c2.cp.v7.5.1.symbols.gmt"),
 )
 
 def load_signatures_from_file(input_file: str) -> Dict[str, List[str]]:

diff --git a/setup.py b/setup.py
@@ -36,11 +36,11 @@
         "Topic :: Software Development :: Build Tools",
         "Topic :: Scientific/Engineering :: Bio-Informatics",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
         "Programming Language :: Python :: 3.12",
         "Programming Language :: Python :: 3.13",
+        "Programming Language :: Python :: 3.14",
     ],
     keywords="single cell/nucleus genomics analysis",
     packages=find_packages(),
@@ -61,7 +61,7 @@
         pseudobulk=["pydeseq2", "gseapy"],
         all=["fitsne", "louvain", "scanorama", "torch", "harmony-pytorch", "nmf-torch", "rpy2", "forceatlas2-python", "scvi-tools", "pydeseq2", "gseapy"]
     ),
-    python_requires="~=3.9",
+    python_requires="~=3.10",
     package_data={
         "pegasus.annotate_cluster": [
             "human_immune_cell_markers.json",

diff --git a/tests/run_inmf.sh b/tests/run_inmf.sh
@@ -1,5 +1,5 @@
 pegasus aggregate_matrix tests/data/count_matrix.csv tests/aggr
 
 if [ -f "tests/aggr.zarr.zip" ]; then
-    pegasus cluster -p 2 --output-h5ad --output-loom --correct-batch-effect --correction-method inmf --louvain --umap tests/aggr.zarr.zip tests/inmf_result
+    pegasus cluster -p 2 --output-h5ad --output-loom --correct-batch-effect --correction-method inmf --leiden --umap tests/aggr.zarr.zip tests/inmf_result
 fi
diff --git a/tests/run_one_sample.sh b/tests/run_one_sample.sh
@@ -1 +1 @@
-pegasus cluster -p 2 --min-genes 500 --max-genes 6000 --mito-prefix mt- --percent-mito 20.0 --output-filtration-results --output-h5ad --output-loom --plot-filtration-results --plot-hvf --louvain --leiden --umap --fle tests/data/heart_1k_v3/filtered_feature_bc_matrix.h5 tests/one_sample_result
+pegasus cluster -p 2 --min-genes 500 --max-genes 6000 --mito-prefix mt- --percent-mito 20.0 --output-filtration-results --output-h5ad --output-loom --plot-filtration-results --plot-hvf --leiden --umap --fle tests/data/heart_1k_v3/filtered_feature_bc_matrix.h5 tests/one_sample_result
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		pegasus cluster -p 2 --min-genes 500 --max-genes 6000 --mito-prefix mt- --percent-mito 20.0 --output-filtration-results --output-h5ad --output-loom --plot-filtration-results --plot-hvf --louvain --leiden --umap --fle tests/data/heart_1k_v3/filtered_feature_bc_matrix.h5 tests/one_sample_result
		pegasus cluster -p 2 --min-genes 500 --max-genes 6000 --mito-prefix mt- --percent-mito 20.0 --output-filtration-results --output-h5ad --output-loom --plot-filtration-results --plot-hvf --leiden --umap --fle tests/data/heart_1k_v3/filtered_feature_bc_matrix.h5 tests/one_sample_result