From 5f90418e515192a51e15fb076faa6d0cfa6493cc Mon Sep 17 00:00:00 2001 From: Blampey Quentin Date: Fri, 15 Sep 2023 15:28:00 +0200 Subject: [PATCH] qptif to sdata --- config/ihc_prism/config_v0.toml | 11 ++++++ pyproject.toml | 47 ++++++++++++++++++++++++ setup.py | 4 ++ sopa/io/explorer/categories.py | 21 ++++++----- sopa/io/qptif.py | 65 +++++++++++++++++---------------- 5 files changed, 107 insertions(+), 41 deletions(-) create mode 100644 config/ihc_prism/config_v0.toml create mode 100644 pyproject.toml create mode 100644 setup.py diff --git a/config/ihc_prism/config_v0.toml b/config/ihc_prism/config_v0.toml new file mode 100644 index 00000000..e271e11a --- /dev/null +++ b/config/ihc_prism/config_v0.toml @@ -0,0 +1,11 @@ +[reader] +type = "qptiff" + +[reader.channels_renaming] +'DAPI MSI' = 'DAPI' +'Opal 780 MSI' = 'CK' +'Opal 480 MSI' = 'CD3' +'Cy5 MSI' = 'CD31' +'FITC' = 'FAP' +'Cy3' = 'CD68' +'Texas Red' = 'CD20' diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..8462bf40 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,47 @@ +[tool.poetry] +name = "sopa" +version = "0.0.1" +description = "Spatial-omics preprocessing and analysis" +authors = ["Quentin Blampey "] +packages = [{ include = "sopa" }] + +[tool.poetry.dependencies] +python = ">=3.9,<3.11" +botocore = "^1.31.47" +spatialdata = "^0.0.12" + +[tool.poetry.group.dev.dependencies] +black = "^22.8.0" +isort = "^5.10.1" +pytest = "^7.1.3" +ipykernel = "^6.25.2" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = "test_*.py" + +[tool.black] +line-length = 100 +include = '\.pyi?$' +exclude = ''' +/( + \.eggs # exclude a few common directories in the + | \.git # root of the project + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + )/ +''' + +[tool.isort] +profile = "black" +skip_glob = ["*/__init__.py"] diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..7f1a1763 --- /dev/null +++ b/setup.py @@ -0,0 +1,4 @@ +from setuptools import setup + +if __name__ == "__main__": + setup() diff --git a/sopa/io/explorer/categories.py b/sopa/io/explorer/categories.py index fcf26b96..6e42fc38 100644 --- a/sopa/io/explorer/categories.py +++ b/sopa/io/explorer/categories.py @@ -1,5 +1,5 @@ +import anndata import numpy as np -import pandas as pd import zarr @@ -15,7 +15,7 @@ def add_group(root: zarr.Group, index: int, values: np.ndarray, categories: list group.array("indptr", indptr, dtype="uint32", chunks=(len(indptr),)) -def write_groups(path: str, df: pd.DataFrame): +def write_groups(path: str, adata: anndata.AnnData): ATTRS = { "major_version": 1, "minor_version": 0, @@ -24,18 +24,19 @@ def write_groups(path: str, df: pd.DataFrame): "group_names": [], } + categorical_columns = [ + name for name, cat in adata.obs.dtypes.items() if cat == "category" + ] + with zarr.ZipStore(path, mode="w") as store: g = zarr.group(store=store) cell_groups = g.create_group("cell_groups") - i = 0 - for name in df.columns: - if df[name].dtype == "category": - categories = list(df[name].cat.categories) - ATTRS["grouping_names"].append(name) - ATTRS["group_names"].append(categories) + for i, name in enumerate(categorical_columns): + categories = list(adata.obs[name].cat.categories) + ATTRS["grouping_names"].append(name) + ATTRS["group_names"].append(categories) - add_group(cell_groups, i, df[name], categories) - i += 1 + add_group(cell_groups, i, adata.obs[name], categories) cell_groups.attrs.put(ATTRS) diff --git a/sopa/io/qptif.py b/sopa/io/qptif.py index df3daf8f..1dd2e685 100644 --- a/sopa/io/qptif.py +++ b/sopa/io/qptif.py @@ -3,52 +3,55 @@ import shutil from pathlib import Path +import dask.array as da import tifffile as tf - -from .explorer import write_ome_tif +import toml +import xarray as xr +from spatialdata import SpatialData +from spatialdata.models import Image2DModel +from spatialdata.transformations import Identity def get_channel_name(description): return re.search(r"(.*?)", description).group(1) -def read_series(path: Path) -> list[tf.TiffPageSeries]: - with tf.TiffFile(path) as tif: - return list(reversed(sorted(tif.series[0], key=lambda p: p.size))) - +def read_qptiff( + path: Path, channels_renaming: dict | None = None, image_models_kwargs: dict | None = None +) -> SpatialData: + image_models_kwargs = {} if image_models_kwargs is None else image_models_kwargs + if "chunks" not in image_models_kwargs: + image_models_kwargs["chunks"] = (1, 4096, 4096) -def write_zarr( - path: Path, - series: list[tf.TiffPageSeries], - names: list[str], - overwrite: bool = True, -) -> None: - import dask.array as da - import xarray as xr + with tf.TiffFile(path) as tif: + page_series = tif.series[0] + names = [get_channel_name(page.description) for page in page_series] - dask_array = da.asarray(series[0].asarray()) - xarr = xr.DataArray( - dask_array, dims=list(series[0]._axes.lower()), coords={"c": names} - ) - ds = xr.Dataset({"image": xarr}) + if channels_renaming is not None: + names = [channels_renaming[name] for name in names] - if path.exists(): - assert overwrite, f"Path {path} exists and overwrite is False" - shutil.rmtree(path) + image_name = Path(path).absolute().stem + image = Image2DModel.parse( + da.from_array(page_series.asarray(), chunks=image_models_kwargs["chunks"]), + dims=list(page_series._axes.lower()), + transformations={"pixels": Identity()}, + c_coords=names, + **image_models_kwargs, + ) - print("Saving xarray") - ds.to_zarr(path) + return SpatialData(images={image_name: image}) def main(args): - path, output = Path(args.path), Path(args.output) + path = Path(args.path) + output = path.with_suffix(".zarr") assert not output.exists(), f"Output path {output} already exists" - series = read_series(path) - names = [get_channel_name(page.description) for page in series[0]._pages] + config = toml.load(args.config) - write_ome_tif(output, series, names) + sdata = read_qptiff(path, channels_renaming=config["reader"]["channels_renaming"]) + sdata.write(output) if __name__ == "__main__": @@ -61,11 +64,11 @@ def main(args): help="Path to the qptiff file", ) parser.add_argument( - "-o", - "--output", + "-c", + "--config", type=str, required=True, - help="Path to the morphology.ome.tif file", + help="Path to the config file", ) main(parser.parse_args())