-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathMakefile
224 lines (197 loc) · 6.82 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
help:
@cat Makefile
# Define build variables
STEM?=mmrl/dl
TAG?=latest
BASE_TAG?=latest
OS=ubuntu20.04
CUDA_VERSION?=11.0.3
CUDNN_VERSION?=8
PYTHON_VERSION?=3.8
TENSORFLOW_VERSION?=2.4.1
# 20/10/20: >2.2,<=2.3.1 has a bug which crashes perturbation testing. 2.2.1 is fine.
TF_MODELS_VERSION?=v2.4.0
PYTORCH_VERSION?=1.7.1
UID?=1000
DOCKER_FILE=Dockerfile
# Define run variables
VOLUME?=deepnet
HOST_PORT?=8888
TB_HOST_PORTS?=6006-6015
TB_PORTS?=$(TB_HOST_PORTS)
GPU?=all
# Define directories within the image
CODE_PATH?="/work/code"
DATA_PATH?="/work/data"
LOGS_PATH?="/work/logs"
MODELS_PATH?="/work/models"
NOTEBOOKS_PATH?="/work/notebooks"
RESULTS_PATH?="/work/results"
SCRIPTS_PATH?="/work/scripts"
TEMP_PATH?="/work/temp"
TEST=tests/
# build-time variable with a default, set it in ‘docker build‘ as follows:
# --build-arg BUILD_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# Add ORCID and DOI as labels
# ARG BUILD_DATE unspecified
all: base build tensorflow pytorch
.PHONY: help all base build tensorflow pytorch prune nuke clean bash ipython lab notebook test tensorboard tabs push release info verbose
# build: IMAGE := $(STEM):$(TAG)
ifndef IMAGE
ifdef TAG
IMAGE := $(STEM):$(TAG)
else
IMAGE := $(STEM)
endif
endif
build:
echo "Building $(IMAGE) image..."
echo "PYTHON_VERSION=$(PYTHON_VERSION)"
echo "CUDA_VERSION=$(CUDA_VERSION)"
echo "CUDNN_VERSION=$(CUDNN_VERSION)"
echo "TENSORFLOW_VERSION=$(TENSORFLOW_VERSION)"
echo "PYTORCH_VERSION=$(PYTORCH_VERSION)"
docker build -t $(IMAGE) \
--build-arg OS=$(OS) \
--build-arg CUDA_VERSION=$(CUDA_VERSION) \
--build-arg CUDNN_VERSION=$(CUDNN_VERSION) \
--build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
--build-arg NB_UID=$(UID) \
--build-arg TENSORFLOW_VERSION=$(TENSORFLOW_VERSION) \
--build-arg TF_MODELS_VERSION=$(TF_MODELS_VERSION) \
--build-arg PYTORCH_VERSION=$(PYTORCH_VERSION) \
-f $(DOCKER_FILE) .
docker tag $(IMAGE) $(STEM):latest
# base:
# echo "Building $@ image..."
# $(DOCKER) build -t mmrl/dl-base \
# --build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
# --build-arg CUDA_VERSION=$(CUDA_VERSION) \
# --build-arg CUDNN_VERSION=$(CUDNN_VERSION) \
# --build-arg NB_UID=$(UID) \
# -f base/$(DOCKER_FILE) $@
# tensorflow pytorch: base
# echo "Building $@ image..."
# $(DOCKER) build -t mmrl/dl-$@ -f $@/$(DOCKER_FILE) $@
base: BUILD_ARGS := --build-arg OS=$(OS) \
--build-arg CUDA_VERSION=$(CUDA_VERSION) \
--build-arg CUDNN_VERSION=$(CUDNN_VERSION) \
--build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
--build-arg NB_UID=$(UID)
base: IMAGE := $(STEM)-base:$(CUDA_VERSION)
tensorflow: BUILD_ARGS := --build-arg TAG=$(BASE_TAG) \
--build-arg TENSORFLOW_VERSION=$(TENSORFLOW_VERSION) \
--build-arg TF_MODELS_VERSION=$(TF_MODELS_VERSION)
tensorflow: IMAGE := $(STEM)-tensorflow:$(TENSORFLOW_VERSION)
pytorch: BUILD_ARGS := --build-arg TAG=$(BASE_TAG) \
--build-arg PYTORCH_VERSION=$(PYTORCH_VERSION)
pytorch: IMAGE := $(STEM)-pytorch:$(PYTORCH_VERSION)
tensorflow pytorch: base
base tensorflow pytorch:
echo "Building $@ image..."
docker build -t $(IMAGE) $(BUILD_ARGS) -f $@/$(DOCKER_FILE) .
# $(DOCKER) build -t $(IMAGE) $(BUILD_ARGS) -f $@/$(DOCKER_FILE) $@
# $(DOCKER) build -t mmrl/dl-$@ $(BUILD_ARGS) -f $@/$(DOCKER_FILE) $@
# $(DOCKER) build -t mmrl/dl-$@ \
# --build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
# --build-arg CUDA_VERSION=$(CUDA_VERSION) \
# --build-arg CUDNN_VERSION=$(CUDNN_VERSION) \
# --build-arg NB_UID=$(UID) \
# -f $@/$(DOCKER_FILE) $@
prune:
docker system prune -f
nuke:
docker system prune --volumes
clean: prune
git pull
docker build -t $(IMAGE) \
--no-cache \
--build-arg OS=$(OS) \
--build-arg CUDA_VERSION=$(CUDA_VERSION) \
--build-arg CUDNN_VERSION=$(CUDNN_VERSION) \
--build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
--build-arg NB_UID=$(UID) \
-f $(DOCKER_FILE) .
# Make /work a volume and mount any defined subdirectories
MOUNTS := -v $(VOLUME):/work
ifdef CODE
MOUNTS += -v $(CODE):$(CODE_PATH)
endif
ifdef DATA
MOUNTS += -v $(DATA):$(DATA_PATH)
endif
ifdef LOGS
MOUNTS += -v $(LOGS):$(LOGS_PATH)
endif
ifdef MODELS
MOUNTS += -v $(MODELS):$(MODELS_PATH)
endif
ifdef NOTEBOOKS
MOUNTS += -v $(NOTEBOOKS):$(NOTEBOOKS_PATH)
endif
ifdef RESULTS
MOUNTS += -v $(RESULTS):$(RESULTS_PATH)
endif
ifdef SCRIPTS
MOUNTS += -v $(SCRIPTS):$(SCRIPTS_PATH)
endif
ifdef TEMP
MOUNTS += -v $(TEMP):$(TEMP_PATH)
endif
# Define Jupyter port
PORTS := -p $(HOST_PORT):8888
run:
@echo $(MOUNTS)
# bash ipython: PORTS += -p 0.0.0.0:$(TB_HOST_PORTS):$(TB_PORTS)
# $(PORTS)
bash ipython: build
docker run -it --init --gpus=$(GPU) --name $(notdir $(STEM))-$@ $(MOUNTS) $(IMAGE) $@
# The flag --cap-add=CAP_SYS_ADMIN is needed to avoid CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
# when using the profiler (through tensorboard). This should be resolved in CUDA 11 / TF 2.4
# See: https://github.com/tensorflow/profiler/issues/63
# If that fails, try `--privileged=true`: https://github.com/tensorflow/tensorflow/issues/35860
# https://developer.nvidia.com/nvidia-development-tools-solutions-err-nvgpuctrperm-cupti
# This will likely be fixed in TF 2.4 / CUDA 11
# See: https://www.tensorflow.org/guide/profiler
# To disable the build dependency use `make lab -o build ...`
lab: PORTS += -p 0.0.0.0:$(TB_HOST_PORTS):$(TB_PORTS)
lab: build
docker run -it --init --gpus=$(GPU) --rm --cap-add=CAP_SYS_ADMIN --name $(subst /,_,$(STEM))-lab $(MOUNTS) $(PORTS) $(IMAGE)
notebook: PORTS += -p 0.0.0.0:$(TB_HOST_PORTS):$(TB_PORTS)
notebook: build
docker run -it --init --gpus=$(GPU) --cap-add=CAP_SYS_ADMIN --name $(subst /,_,$(STEM))-nb $(MOUNTS) $(PORTS) $(IMAGE) \
jupyter notebook --port=8888 --ip=0.0.0.0 --notebook-dir=$(NOTEBOOKS_PATH)
test: build
docker run -it --init --gpus=$(GPU) \
-v $(SRC):/work/code \
-v $(DATA):/work/data \
-v $(RESULTS):/work/results \
$(IMAGE) py.test $(TEST)
tensorboard: build
docker run -it --init --gpus=$(GPU) $(MOUNTS) -p 0.0.0.0:$(TB_HOST_PORTS):$(TB_PORTS) $(IMAGE) tensorboard --logdir=$(LOGS_PATH)
tabs: build
# $(DOCKER) run -d --name $(subst /,_,$(STEM))-tbd $(MOUNTS) -p 0.0.0.0:6006:6006 $(IMAGE) tensorboard --logdir=$(LOGS_PATH)
docker run -d --gpus=$(GPU) --name $(subst /,_,$(STEM))-tbd \
-v $(LOGS):$(LOGS_PATH) \
-p 0.0.0.0:6006:6006 \
$(IMAGE) tensorboard --logdir=$(LOGS_PATH)
# $(LOGS) may need to be a volume to share between containers
docker run -it --init --gpus=$(GPU) --cap-add=CAP_SYS_ADMIN --name $(subst /,_,$(STEM))-lab \
-v $(LOGS):$(LOGS_PATH) \
-v $(SRC):/work/code \
-v $(DATA):/work/data \
-v $(RESULTS):/work/results \
-p $(HOST_PORT):8888 \
$(IMAGE)
push: # build
# $(DOCKER) tag $(TAG) $(NEWTAG)
docker push $(IMAGE)
release: build push
info:
@echo "Mounts: $(MOUNTS)"
@echo "Ports: $(PORTS)"
lsb_release -a
docker -v
docker run -it --rm $(IMAGE) nvidia-smi
verbose: info
docker system info