-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDockerfile
190 lines (158 loc) · 9.52 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
FROM continuumio/miniconda3:4.9.2
# continuumio/miniconda3 is FROM debian:latest
# Make RUN commands use `bash --login` (always source ~/.bashrc on each RUN)
SHELL ["/bin/bash", "--login", "-c"]
WORKDIR /opt/basestack_consensus/code
# install apt dependencies and update conda
RUN apt-get update --allow-releaseinfo-change && apt-get install git -y \
&& apt-get install -y apt-transport-https ca-certificates wget unzip bzip2 libfontconfig1 \
&& update-ca-certificates \
&& apt-get -qq -y remove curl \
&& apt-get -qq -y autoremove \
&& apt-get autoclean \
&& apt-get install -y build-essential zlib1g-dev libbz2-dev liblzma-dev
# install openjdk
RUN wget https://download.java.net/java/GA/jdk14/076bab302c7b4508975440c56f6cc26a/36/GPL/openjdk-14_linux-x64_bin.tar.gz \
&& tar -xzf openjdk-14_linux-x64_bin.tar.gz \
&& rm openjdk-14_linux-x64_bin.tar.gz \
&& wget https://github.com/samtools/samtools/releases/download/1.10/samtools-1.10.tar.bz2 \
&& tar -xjf samtools-1.10.tar.bz2 \
&& rm samtools-1.10.tar.bz2 \
&& git clone https://github.com/mkirsche/vcfigv \
&& rm -rf vcfigv/.git \
&& wget --no-check-certificate https://mirror.oxfordnanoportal.com/software/analysis/ont-guppy-cpu_4.2.2_linux64.tar.gz \
&& tar -xzf ont-guppy-cpu_4.2.2_linux64.tar.gz \
&& rm ont-guppy-cpu_4.2.2_linux64.tar.gz \
&& git clone --recurse-submodules https://github.com/artic-network/artic-ncov2019 \
&& rm -rf artic-ncov2019/.git \
&& git clone https://github.com/cov-lineages/pangolin.git \
&& rm -rf pangolin/.git
WORKDIR /opt/basestack_consensus/code/samtools-1.10
#Compile samtools
RUN ./configure --without-curses && \
make && \
make install
ENV PATH /opt/conda/bin:$PATH
RUN conda install -y python=3 \
&& conda update -y conda \
&& conda clean --all --yes
# install TeX libraries
WORKDIR /opt/basestack_consensus
RUN wget -qO- "https://yihui.name/gh/tinytex/tools/install-unx.sh" | \
sed 's@TEXDIR=${TINYTEX_DIR:-~/.TinyTeX}@TEXDIR=${TINYTEX_DIR:-/opt/basestack_consensus/.TinyTeX}@' | sh
RUN export PATH=/opt/basestack_consensus/.TinyTeX/bin/x86_64-linux:/opt/conda/bin:$PATH \
&& tlmgr path add \
&& tlmgr install mnsymbol \
&& tlmgr install wrapfig \
&& tlmgr install colortbl \
&& tlmgr install pdflscape \
&& tlmgr install tabu \
&& tlmgr install threeparttable \
&& tlmgr install threeparttablex \
&& tlmgr install environ \
&& tlmgr install ulem \
&& tlmgr install makecell \
&& tlmgr update --self \
&& tlmgr install multirow
# configure directory structure exactly as it is on SciServer for ease of transition
RUN mkdir -p /opt/basestack_consensus/code \
&& chmod g+s /opt/basestack_consensus/code
WORKDIR /opt/basestack_consensus/code
# install conda environments
RUN conda config --set channel_priority strict
RUN conda env create -f artic-ncov2019/environment.yml && \
sed -i 's/ - python=3.6/ - python=3.7/' pangolin/environment.yml \
&& conda env create -f pangolin/environment.yml \
&& conda activate pangolin \
&& cd pangolin \
&& python setup.py install \
&& conda clean --all --yes
##################################################################
# configure IGV screenshots in report
# install dependencies for IGV build
WORKDIR /opt/basestack_consensus
RUN apt-get update -qq -y \
&& apt-get install -qq -y xvfb libxtst6 zip unzip curl \
&& export SDKMAN_DIR=/opt/basestack_consensus/.sdkman \
&& curl -s "https://get.sdkman.io" | bash \
&& source "/opt/basestack_consensus/.sdkman/bin/sdkman-init.sh" \
&& sdk install gradle 6.8 \
&& apt-get -qq -y remove curl \
&& apt-get -qq -y autoremove \
&& apt-get autoclean \
&& rm -rf /var/lib/apt/lists/* /var/log/dpkg.log
# install igv
WORKDIR /opt/basestack_consensus/code
RUN git clone https://github.com/igvteam/igv
RUN wget https://download.java.net/java/GA/jdk11/9/GPL/openjdk-11.0.2_linux-x64_bin.tar.gz \
&& tar -xzf openjdk-11.0.2_linux-x64_bin.tar.gz \
&& sed -i '[email protected]_BY\[email protected]_BY\tREAD_STRAND@' ./igv/src/main/resources/org/broad/igv/prefs/preferences.tab \
&& sed -i '[email protected]_OPTION\[email protected]_OPTION\tSTRAND@' ./igv/src/main/resources/org/broad/igv/prefs/preferences.tab \
&& sed -i '[email protected]_OPTION\[email protected]_OPTION\tSTRAND@' ./igv/src/main/resources/org/broad/igv/prefs/preferences.tab \
&& sed -i 's@SHOW_SEQUENCE_TRANSLATION\tFALSE@SHOW_SEQUENCE_TRANSLATION\tTRUE@' ./igv/src/main/resources/org/broad/igv/prefs/preferences.tab \
&& sed -i '[email protected]_CENTER_LINE\tShow center line\tboolean\[email protected]_CENTER_LINE\tShow center line\tboolean\tTRUE@' ./igv/src/main/resources/org/broad/igv/prefs/preferences.tab \
&& sed -i '[email protected]\tGenome server URL\tstring\thttps://s3.amazonaws.com/igv.org.genomes/[email protected]\tGenome server URL\tstring\t/opt/basestack_consensus/igv-genomes/genomes.txt@' ./igv/src/main/resources/org/broad/igv/prefs/preferences.tab \
&& sed -i 's@DEFAULT_GENOME_KEY\thg19@DEFAULT_GENOME_KEY\tncov@' ./igv/src/main/resources/org/broad/igv/prefs/preferences.tab \
&& cd /opt/basestack_consensus/code/igv \
&& export JAVA_HOME="/opt/basestack_consensus/code/jdk-11.0.2" \
&& export PATH=/opt/basestack_consensus/code/jdk-11.0.2/bin:$PATH \
&& ./gradlew createDist \
&& rm ../openjdk-11.0.2_linux-x64_bin.tar.gz \
&& rm -rf ../jdk-11.0.2 \
&& find /opt/basestack_consensus/code/igv -mindepth 1 -maxdepth 1 -type d ! -name "build" -exec rm -r {} \;
##################################################################
#Copy just the environment.yml file for quick debugging purposes. Comment this out in production
COPY ./environment.yml /opt/basestack_consensus/code/ncov/
#Finally, copy over the local files into the working directory and copy rest of necessary environment over to workspace
#COPY ./ /opt/basestack_consensus/code/ncov/
RUN conda env create -f /opt/basestack_consensus/code/ncov/environment.yml \
&& conda clean --all --yes
# Make RUN commands use the new environment:
#SHELL ["conda", "run", "-n", "jhu-ncov", "/bin/bash"]
# Re-copy yml file and the rest for quick debugging. Comment this out in production
COPY ./ /opt/basestack_consensus/code/ncov/
RUN cp -r /opt/basestack_consensus/code/ncov/covid19/* /opt/basestack_consensus \
&& ln -s /opt/basestack_consensus/code/ncov/igv-genomes /opt/basestack_consensus \
&& cp /opt/basestack_consensus/code/ncov/barcode_arrs_nb96.cfg /opt/basestack_consensus/code/ont-guppy-cpu/data/barcoding/ \
&& cp /opt/basestack_consensus/code/ncov/barcodes_masked.fasta /opt/basestack_consensus/code/ont-guppy-cpu/data/barcoding/
#################################################################
# copy 13-gene genome.json over into RAMPART directory (which has a 9-gene file by default)
RUN cp /opt/basestack_consensus/ncov_reference/genome.json /opt/basestack_consensus/code/artic-ncov2019/rampart/genome.json
# set up final environment and default working directory
RUN chmod -R 755 /opt/basestack_consensus/code/ncov/pipeline_scripts/ \
&& ln -sf /bin/bash /bin/sh
WORKDIR /opt/basestack_consensus/code/ncov/additional_scripts
RUN git clone https://github.com/mkirsche/CoverageNormalization.git \
&& git clone https://github.com/mkirsche/VariantValidator.git
# compile java libraries
ENV PATH="/opt/basestack_consensus/code/ncov/additional_scripts:${PATH}"
ENV PATH="/opt/basestack_consensus/code/ncov/pipeline_scripts:${PATH}"
ENV PATH="/opt/basestack_consensus/code/jdk-14/bin:${PATH}"
ENV PATH="/opt/basestack_consensus/code/ont-guppy-cpu/bin:${PATH}"
WORKDIR /opt/basestack_consensus
# compile java files
RUN /opt/basestack_consensus/code/jdk-14/bin/javac "/opt/basestack_consensus/code/ncov/additional_scripts/CoverageNormalization/src"/*.java \
&& /opt/basestack_consensus/code/jdk-14/bin/javac "/opt/basestack_consensus/code/ncov/additional_scripts/VariantValidator/src"/*.java \
&& /opt/basestack_consensus/code/jdk-14/bin/javac "/opt/basestack_consensus/code/vcfigv/src"/*.java \
&& samtools faidx "/opt/basestack_consensus/code/artic-ncov2019/primer_schemes/nCoV-2019/V3/nCoV-2019.reference.fasta" \
&& mkdir -p /opt/basestack_consensus/primer_schemes \
&& cp -r /opt/basestack_consensus/code/artic-ncov2019/primer_schemes/* /opt/basestack_consensus/primer_schemes/ \
&& cp -r /opt/basestack_consensus/code/ncov/primer_schemes/* /opt/basestack_consensus/primer_schemes/ \
&& for file in $( find primer_schemes/* -regex ".*V[0-9]+\/.*" -type f \( -not -name "*README*" -regex ".*\(bed\)\|.*\(fasta\)" \) | cut -sd / -f2- ); do \
fullname=$(basename -- $file); \
exts=${fullname#*.}; name=${fullname%%.*}; \
protocol=${file#/}; protocol=${protocol%%/*}; \
if [ $protocol != $name ]; then \
output=primer_schemes/$(dirname $file)/$protocol.$exts; \
cp primer_schemes/$file $output; \
sed -Ei "s/$name/$protocol/g" $output; \
fi; \
if [ $protocol == 'nCoV-2019' ]; then \
cp /opt/basestack_consensus/code/ncov/covid19/ncov_reference/sequence.fasta primer_schemes/$(dirname $file)/${protocol}.reference.fasta; \
fi; \
if [ ! -s primer_schemes/$(dirname $file)/$protocol.bed ]; then \
cp primer_schemes/$(dirname $file)/${protocol}".scheme.bed" primer_schemes/$(dirname $file)/$protocol".bed"; \
fi; \
done
# ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "jhu-ncov"]
# RUN conda activate jhu-ncov && ldconfig /usr/local/lib