-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path0_workflow.sh
executable file
·192 lines (164 loc) · 7.22 KB
/
0_workflow.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#!/bin/bash
PRE_FLIGHT=0
REMOTE=0
PROVISION=0
SCALE_DOWN=0
RUN_OVERHEAD=1
DATA_GENERATOR_IPS=""
BUILD_DOCKER_IMAGES_FROM_SRC=1
CLONOS_IMG="psylvan/clonos_repro_build"
FLINK_IMG="psylvan/flink_repro_build"
function usage() {
echo "Clonos Reproducibility:"
echo -e "\t -f \t\t\t - Run [f]ailure experiments only."
echo -e "\t -p \t\t\t - Uses [p]re-built images of Flink and Clonos. Skips building docker images from artifact source (They should be identical)."
echo -e "\t -r \t\t\t - Run experiments [r]emotely on Kubernetes. ~/.kube/config needs to be set-up"
echo -e "\t -g [semi-colon separated list of user@IP which can be ssh'ed into] \t\t - Uses the provided hosts as data-[g]enerators for synthetic tests."
echo -e "\t\t\t\t\t\t Requires password-less SSH. Each host must have the kafka directory in their home. Most likely not needed."
echo -e "\t -s [password] \t\t - Provision a cluster for experiments from [S]urfSara. Password must be requested to the authors. Will exit after provisioning."
echo -e "\t -d \t\t\t - Scale [d]own experiments so they can be run on fewer resources. Edit experimental_parameters.sh for more control. "
echo -e "\t -c \t\t\t - Confirms you have completed the pre-flight [c]hecks."
echo -e "\t -h \t\t\t - shows [h]elp."
}
function pre_flight_check() {
echo -e "Pre-flight check flag (-c) was not passed, please read the following:"
echo -e "Dependencies:"
echo -e "\t\t\t General: java8, python3, gradle 4<, pdflatex, bibtex"
echo -e "\t\t\t If building containers from source (DEFAULT): git, maven 3.2.5"
echo -e "\t\t\t If local experiments (DEFAULT): docker, docker-compose"
echo -e "\t\t\t If remote experiments (-r): kubectl, helm"
echo -e "Please install all required dependencies."
echo -e ""
echo -e "We have attempted to automate as much as possible, but one step is still required: 'docker login' "
echo -e "\t This is needed because Docker now unfortunately limits image downloads. "
echo -e "\t To ensure no problems, a Docker account with sufficient daily image pulls (https://www.docker.com/pricing) can be created as the free plan may hit its limits."
echo -e "\t Once an account (free or not) is created, use 'docker login' on your local machine. "
echo -e "\t Our scripts will use this identity to generate docker images (if -p is omitted) and to perform cluster image pulls (if -r is passed)."
echo -e ""
echo -e "Call this script again using the -c flag to certify you have completed the pre-flight check."
}
echoerr() {
echo -e "ERROR: $@" 1>&2
usage
}
echoinfo() { echo -e "INFO: $@"; }
function parse_inputs() {
optstring=":hfpdrs:g:c"
while getopts ${optstring} arg; do
case ${arg} in
h)
usage
exit 0
;;
p)
BUILD_DOCKER_IMAGES_FROM_SRC=0
echoinfo "-p supplied, using pre-built docker images."
;;
f)
RUN_OVERHEAD=0
echoinfo "-f supplied, skipping overhead experiments and going straight to failure experiments."
;;
d)
SCALE_DOWN=1
echoinfo "-d supplied, scaling down experiments."
;;
r)
REMOTE=1
echoinfo "-r supplied, running experiments remotely."
;;
g)
DATA_GENERATOR_IPS="$OPTARG"
echoinfo "-g supplied, using nodes \"$DATA_GENERATOR_IPS\" as data generators."
;;
s)
PROVISION=1
REMOTE=1
PASSWORD="$OPTARG"
echoinfo "-s supplied, will provision cluster for experiments."
;;
c)
PRE_FLIGHT=1
echoinfo "-c supplied, user has completed pre-flight checks."
;;
:)
echoerr "$0: Must supply an argument to -$OPTARG." >&2
exit 1
;;
?)
echoerr "Invalid option: -${OPTARG}."
exit 2
;;
esac
done
}
parse_inputs $@
if [ "$PRE_FLIGHT" = "0" ]; then
pre_flight_check
exit 0
fi
if [ ! -d "./venv" ]; then
echoinfo "Setting up python venv."
python3 -m venv ./venv
source venv/bin/activate
pip3 install --upgrade pip >/dev/null 2>&1
pip3 install wheel >/dev/null 2>&1
pip3 install matplotlib numpy pandas confluent_kafka >/dev/null 2>&1
#Cant install from pypi repositories as it contains old version which is broken
pip3 install git+https://github.com/python-oca/python-oca >/dev/null 2>&1
else
echoinfo "Activating existing python venv."
source venv/bin/activate
fi
# Clone repositories & build
. 1_build_artifacts.sh
if [ ! -d "./beam" ]; then
echoinfo "Cloning Clonos' Beam implementation for NEXMARK experiments"
git clone https://github.com/delftdata/beam
else
echoinfo "Skipping git clone of beam because directory already present."
fi
if [ "$PROVISION" = "1" ]; then
echoinfo "Provisioning cluster from SurfSara. This can take up to 15 minutes."
OUT=($(cd ./surf_sara_provision && python3 provision.py -pw $PASSWORD))
IP=${OUT[0]}
DATA_GENERATOR_IPS=${OUT[1]}
echoinfo "Copying Kubeconfig to local computer."
scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@$IP:~/.kube/config ~/.kube/config >/dev/null 2>&1
echoinfo "Setting up local storage on the cluster. This will also take a few minutes."
sleep 60
pushd ./kubernetes >/dev/null 2>&1
python3 setup_local_storage.py
popd >/dev/null 2>&1
path_to_docker_config="$HOME/.docker/config.json"
kubectl create secret generic pubregcred --from-file=.dockerconfigjson=$path_to_docker_config --type=kubernetes.io/dockerconfigjson >/dev/null 2>&1
# Needed for helm to function
kubectl apply -f ./kubernetes/rbac-config.yaml >/dev/null 2>&1
echoinfo "Creating Kubernetes service account."
kubectl patch serviceaccount default -p '{"imagePullSecrets": [{"name": "pubregcred"}]}' >/dev/null 2>&1
echoinfo "Done. You can now ssh into the machine at ubuntu@$IP."
echoinfo "You can launch experiments by doing the folowing:"
echoinfo "\t 1. ssh ubuntu@$IP "
echoinfo "\t 2. cd ClonosReproducibility"
echoinfo "\t 3. git pull # Ensure the VM has the latest version of the reproducibility scripts"
echoinfo "\t 4. nohup ./0_workflow.sh -c -p -r -g \"$DATA_GENERATOR_IPS\" & # We use nohup to prevent hangups from SSH"
echoinfo "\t 5. tail -f nohup.out # Follow the output of the script"
echoinfo "\t You may omit -p in order to build new Clonos and Flink docker images from scratch, but this will be wasteful of the remaining cluster resources. Furthermore, the images will be identical."
echoinfo "Exiting..."
exit
fi
if [ "$REMOTE" = "1" ]; then
# At this point we can go ahead and set-up the infrastructure (Kafka and Hadoop) if running on Kubernetes.
helm install hadoop ./kubernetes/charts/hadoop >/dev/null 2>&1
helm install confluent ./kubernetes/charts/cp-helm-charts >/dev/null 2>&1
echoinfo "Setting up HDFS and Kafka for experiments ahead of time."
fi
date=$(date +%Y-%m-%d_%H:%M)
path_prefix="./data/results-$date"
mkdir -p $path_prefix/images
. 2_run_experiments.sh $path_prefix
echoinfo "Experiments completed."
echoinfo "Generating experiment graphs in $path_prefix."
python3 generate_figures.py "$path_prefix" "$path_prefix/images"
cp $path_prefix/images/*.pdf ./paper_source/Figures
cd ./paper_source && make all
echoinfo "Finished, you can find the recompiled paper in ./paper_source/hastreaming.pdf. Experimental results are at $path_prefix and graphs are at $path_prefix/images."