diff --git a/.circleci/config.yml b/.circleci/config.yml index ff30bc73..ada361e3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -34,7 +34,7 @@ jobs: source activate ./atlasenv conda list else - mamba env create -p ./atlasenv --file atlasenv.yml + conda env create -p ./atlasenv --file atlasenv.yml fi - save_cache: key: atlasenv-d-{{ checksum "atlasenv.yml" }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..ae637b21 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +# Start with the Miniconda base image +FROM continuumio/miniconda3:24.9.2-0 + +# Set the working directory in the container +WORKDIR /main + +# Copy the environment file and project code +COPY atlasenv.yml . + +# Create a user with a specific UID and GID +RUN groupadd -g 1000 atlasgroup && \ + useradd -m -u 1000 -g atlasgroup -s /bin/bash atlasuser + +# Set the HOME environment variable +ENV HOME=/home/atlasuser + +# Change ownership of the home directory +RUN chown -R atlasuser:atlasgroup $HOME + +# Switch to the new user +USER atlasuser + +# Create and activate the environment +RUN conda env create -n atlas -f atlasenv.yml && \ + conda clean -afy && \ + echo "source activate atlas" > ~/.bashrc + +# Set the working directory +WORKDIR /main + + +# Set the default command +CMD ["bash"] \ No newline at end of file diff --git a/README.md b/README.md index 445f8757..b236f4e1 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Metagenome-atlas is a easy-to-use metagenomic pipeline based on snakemake. It ha You can start using atlas with three commands: ``` - mamba install -y -c bioconda -c conda-forge metagenome-atlas={latest_version} + conda install -y -c bioconda -c conda-forge metagenome-atlas={latest_version} atlas init --db-dir databases path/to/fastq/files atlas run all ``` diff --git a/atlas/atlas.py b/atlas/atlas.py index 330d1487..ceadab31 100644 --- a/atlas/atlas.py +++ b/atlas/atlas.py @@ -7,7 +7,7 @@ import click -from snakemake.io import load_configfile +from snakemake.common.configfile import load_configfile from .make_config import validate_config from .init.atlas_init import run_init # , run_init_sra @@ -247,7 +247,7 @@ def run_download(db_dir, jobs, snakemake_args): cmd = ( "snakemake --snakefile {snakefile} " "--jobs {jobs} --rerun-incomplete " - "--conda-frontend mamba --scheduler greedy " + "--scheduler greedy " "--nolock --use-conda --conda-prefix {conda_prefix} " " --show-failed-logs " "--config database_dir='{db_dir}' {add_args} " diff --git a/atlas/make_config.py b/atlas/make_config.py index 96332497..3f21dcb8 100644 --- a/atlas/make_config.py +++ b/atlas/make_config.py @@ -1,6 +1,6 @@ from .default_values import * from snakemake.utils import update_config as snakemake_update_config -from snakemake.io import load_configfile +from snakemake.common.configfile import load_configfile import tempfile import sys import os diff --git a/atlasenv.yml b/atlasenv.yml index c1ec80ea..2df45857 100644 --- a/atlasenv.yml +++ b/atlasenv.yml @@ -3,10 +3,10 @@ channels: - bioconda - defaults dependencies: - - python >=3.8, < 3.12 - - mamba + - python >=3.10, < 3.12 + - libmamba= 2 - bbmap >= 39.01, <40 - - snakemake-minimal >= 7.18.1, <7.26 + - snakemake-minimal >= 8.12, <8.26 - pygments - networkx - graphviz @@ -14,5 +14,5 @@ dependencies: - pyarrow # for parquet reading - click >=7 - ruamel.yaml >=0.17 - - cookiecutter - wget + - snakemake-executor-plugin-slurm diff --git a/docs/index.rst b/docs/index.rst index 971b50ca..5b9794fa 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,7 +26,7 @@ It handles all steps from QC, Assembly, Binning, to Annotation. You can start using atlas with three commands:: - mamba install -c bioconda -c conda-forge metagenome-atlas={latest_version} + conda install -c bioconda -c conda-forge metagenome-atlas={latest_version} atlas init --db-dir databases path/to/fastq/files atlas run diff --git a/docs/usage/getting_started.rst b/docs/usage/getting_started.rst index 3c53f42b..e020df70 100644 --- a/docs/usage/getting_started.rst +++ b/docs/usage/getting_started.rst @@ -1,5 +1,5 @@ .. _conda: http://anaconda.org/ -.. _mamba: https://github.com/TheSnakePit/mamba + Getting Started *************** @@ -30,15 +30,8 @@ Setting strict channel priority can prevent quite some annoyances. The order is important by the way. -Install mamba -------------- - -Conda can be a bit slow because there are so many packages. A good way around this is to use mamba_ (another snake).:: - - conda install mamba - - -From now on, you can replace ``conda install`` with ``mamba install`` and see how much faster this snake is. +_Previously atalas reccomended to use of mamba, which was faster. Since conda 24.9, conda uses the same library as backend. +So we sugest to update conda ``conda update -n base conda``` and to use it._:: Install metagenome-atlas ------------------------ @@ -48,7 +41,7 @@ We also recommend to specify the latest version of metagenome-atlas. .. code-block:: bash - mamba create -y -n atlasenv metagenome-atlas={latest_version} + conda create -y -n atlasenv metagenome-atlas={latest_version} source activate atlasenv where `{latest_version}` should be replaced by @@ -73,7 +66,7 @@ Alternatively, you can install metagenome Atlas directly from GitHub. This allow # git checkout branchname # create dependencies for atlas - mamba env create -n atlas-dev --file atlasenv.yml + conda env create -n atlas-dev --file atlasenv.yml conda activate atlas-dev # install atlas version. Changes in the files are directly available in the atlas dev version diff --git a/docs/usage/output.rst b/docs/usage/output.rst index e0f2237a..b8b18362 100644 --- a/docs/usage/output.rst +++ b/docs/usage/output.rst @@ -1,7 +1,6 @@ .. |scheme| image:: ../../resources/images/atlas_list.png :alt: Atlas is a workflow for assembly and binning of metagenomic reads -.. _thesis: https://github.com/TheSnakePit/mamba Expected output *************** diff --git a/workflow/envs/fasta.yaml b/workflow/envs/fasta.yaml index 13fc46c2..7beaf1f7 100644 --- a/workflow/envs/fasta.yaml +++ b/workflow/envs/fasta.yaml @@ -3,7 +3,7 @@ channels: - bioconda - defaults dependencies: - - pyfastx=0.9 + - pyfastx=2.1 - pandas=1.2 - pyarrow - biopython diff --git a/workflow/envs/required_packages.yaml b/workflow/envs/required_packages.yaml index 9ec72359..07b3f638 100644 --- a/workflow/envs/required_packages.yaml +++ b/workflow/envs/required_packages.yaml @@ -9,4 +9,4 @@ dependencies: - bzip2 >=1.0 - pandas >=1.2, <2 - samtools >=1.13, <2 - - sambamba <1 + - sambamba diff --git a/workflow/rules/assemble.smk b/workflow/rules/assemble.smk index cb5b2ff3..83b7f594 100644 --- a/workflow/rules/assemble.smk +++ b/workflow/rules/assemble.smk @@ -505,7 +505,7 @@ if config["filter_contigs"]: resources: mem_mb=config["mem"] * 1000, wrapper: - "v1.19.0/bio/minimap2/aligner" + "v5.5.0/bio/minimap2/aligner" rule pileup_prefilter: input: @@ -635,7 +635,7 @@ rule align_reads_to_final_contigs: resources: mem_mb=config["mem"] * 1000, wrapper: - "v1.19.0/bio/minimap2/aligner" + "v5.5.0/bio/minimap2/aligner" rule pileup_contigs_sample: diff --git a/workflow/rules/derep.smk b/workflow/rules/derep.smk index 63312277..9e2c78c0 100644 --- a/workflow/rules/derep.smk +++ b/workflow/rules/derep.smk @@ -110,4 +110,4 @@ rule build_bin_report: log: "logs/binning/report_{binner}.log", script: - "../report/bin_report.py" + "../../report/bin_report.py" diff --git a/workflow/rules/genecatalog.smk b/workflow/rules/genecatalog.smk index a0ee605c..45521778 100644 --- a/workflow/rules/genecatalog.smk +++ b/workflow/rules/genecatalog.smk @@ -234,7 +234,7 @@ rule index_genecatalog: params: index_size="12G", wrapper: - "v1.19.0/bio/minimap2/index" + "v5.5.0/bio/minimap2/index" rule concat_all_reads: @@ -266,7 +266,7 @@ rule align_reads_to_Genecatalog: extra="-x sr --split-prefix {sample}_split_ ", sort="coordinate", wrapper: - "v1.19.0/bio/minimap2/aligner" + "v5.5.0/bio/minimap2/aligner" rule pileup_Genecatalog: diff --git a/workflow/rules/genomes.smk b/workflow/rules/genomes.smk index 32b78c9d..a993858c 100644 --- a/workflow/rules/genomes.smk +++ b/workflow/rules/genomes.smk @@ -206,7 +206,7 @@ if config["genome_aligner"] == "minimap": resources: mem_mb=config["mem"] * 1000, wrapper: - "v1.19.0/bio/minimap2/index" + "v5.5.0/bio/minimap2/index" rule align_reads_to_genomes: input: @@ -223,7 +223,7 @@ if config["genome_aligner"] == "minimap": resources: mem_mb=config["mem"] * 1000, wrapper: - "v1.19.0/bio/minimap2/aligner" + "v5.5.0/bio/minimap2/aligner" elif config["genome_aligner"] == "bwa": diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index bde9b98d..655272e9 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -158,8 +158,7 @@ if not SKIP_QC: dupesubs=config["duplicates_allow_substitutions"], only_optical=("t" if config.get("duplicates_only_optical") else "f"), log: - sterr="{sample}/logs/QC/deduplicate.err", - stout="{sample}/logs/QC/deduplicate.log", + "{sample}/logs/QC/deduplicate.log", conda: "%s/required_packages.yaml" % CONDAENV threads: config.get("threads", 1) @@ -177,8 +176,7 @@ if not SKIP_QC: " threads={threads} " " pigz=t unpigz=t " " -Xmx{resources.java_mem}G " - " 2> {log.sterr} " - " 1> {log.stout} " + " &> {log} " PROCESSED_STEPS.append("filtered") @@ -229,8 +227,7 @@ if not SKIP_QC: output.reads, key="out", allow_singletons=False ), log: - sterr="{sample}/logs/QC/quality_filter.err", - stout="{sample}/logs/QC/quality_filter.log", + "{sample}/logs/QC/quality_filter.log", conda: "%s/required_packages.yaml" % CONDAENV threads: config.get("threads", 1) @@ -260,8 +257,7 @@ if not SKIP_QC: " prealloc={params.prealloc} " " pigz=t unpigz=t " " -Xmx{resources.java_mem}G " - " 2> {log.sterr} " - " 1> {log.stout} " + " &> {log} " # if there are no references, decontamination will be skipped if len(config.get("contaminant_references", {}).keys()) > 0: