Skip to content

Commit edab267

Browse files
committed
Refactoring clean up and fix
1 parent 3b85e49 commit edab267

8 files changed

+68
-70
lines changed

config.py

+22-26
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,21 @@
11
"""
2-
2+
Configuration module of cSpark test benchmark
33
"""
44

5-
# import pprint
6-
75
# AWS
86
DATA_AMI = {"eu-west-1": {"ami": 'ami-d3225da0', "az": 'eu-west-1c', "keypair": "gazzettaEU",
97
"price": "0.3"},
108
"us-west-2": {"ami": 'ami-7f5ff81f', "snapid": "snap-4f38bf1c", "az": 'us-west-2c',
119
"keypair": "gazzetta",
1210
"price": "0.4"}}
1311

14-
CREDENTIAL_PROFILE = 'matteo'
12+
CREDENTIAL_PROFILE = 'default'
1513
REGION = "us-west-2"
16-
KEYPAIR_PATH = "C:\\Users\\Matteo\\Downloads\\" + DATA_AMI[REGION]["keypair"] + ".pem"
14+
KEY_PAIR_PATH = "C:\\Users\\Matteo\\Downloads\\" + DATA_AMI[REGION]["keypair"] + ".pem"
1715
SECURITY_GROUP = "spark-cluster"
1816
PRICE = DATA_AMI[REGION]["price"]
1917
INSTANCE_TYPE = "r3.4xlarge"
20-
NUMINSTANCE = 9
18+
NUM_INSTANCE = 0
2119
EBS_OPTIMIZED = True if "r3" not in INSTANCE_TYPE else False
2220
REBOOT = 0
2321
KILL_JAVA = 1
@@ -31,7 +29,7 @@
3129
}]
3230

3331
# HDFS
34-
HDFS_MASTER = "ec2-35-160-124-233.us-west-2.compute.amazonaws.com"
32+
HDFS_MASTER = ""
3533

3634
# Spark config
3735
SPARK_2 = "/opt/spark/"
@@ -56,11 +54,11 @@
5654
OFF_HEAP_BYTES = 30720000000
5755

5856
# Core Config
59-
COREVM = 8
60-
COREHTVM = 16
61-
DISABLEHT = 1
62-
if DISABLEHT:
63-
COREHTVM = COREVM
57+
CORE_VM = 8
58+
CORE_HT_VM = 16
59+
DISABLE_HT = 1
60+
if DISABLE_HT:
61+
CORE_HT_VM = CORE_VM
6462

6563
# CONTROL
6664
ALPHA = 0.95
@@ -77,13 +75,13 @@
7775
# 0% 209062
7876
# 20% 250874
7977
# 40% 284375
80-
MAXEXECUTOR = 8
81-
OVERSCALE = 2
78+
MAX_EXECUTOR = 8
79+
OVER_SCALE = 2
8280
K = 50
8381
TI = 12000
84-
TSAMPLE = 1000
85-
COREQUANTUM = 0.05
86-
COREMIN = 0.0
82+
T_SAMPLE = 1000
83+
CORE_QUANTUM = 0.05
84+
CORE_MIN = 0.0
8785
CPU_PERIOD = 100000
8886

8987
# BENCHMARK
@@ -200,17 +198,17 @@
200198
"Deadline": DEADLINE,
201199
"Control": {
202200
"Alpha": ALPHA,
203-
"OverScale": OVERSCALE,
204-
"MaxExecutor": MAXEXECUTOR,
205-
"CoreVM": COREVM,
201+
"OverScale": OVER_SCALE,
202+
"MaxExecutor": MAX_EXECUTOR,
203+
"CoreVM": CORE_VM,
206204
"K": K,
207205
"Ti": TI,
208-
"TSample": TSAMPLE,
209-
"CoreQuantum": COREQUANTUM
206+
"TSample": T_SAMPLE,
207+
"CoreQuantum": CORE_QUANTUM
210208
},
211209
"Aws": {
212210
"InstanceType": INSTANCE_TYPE,
213-
"HyperThreading": not DISABLEHT,
211+
"HyperThreading": not DISABLE_HT,
214212
"Price": PRICE,
215213
"AMI": DATA_AMI[REGION]["ami"],
216214
"Region": REGION,
@@ -221,7 +219,7 @@
221219
"SnapshotId": DATA_AMI[REGION]["snapid"]
222220
},
223221
"Spark": {
224-
"ExecutorCore": COREVM,
222+
"ExecutorCore": CORE_VM,
225223
"ExecutorMemory": RAM_EXEC,
226224
"ExternalShuffle": ENABLE_EXTERNAL_SHUFFLE,
227225
"LocalityWait": LOCALITY_WAIT,
@@ -242,5 +240,3 @@
242240
"scala-sort-by-key-int": ["240", "241"],
243241
"scala-count": ["243", "244"],
244242
"scala-count-w-fltr": ["246", "247"]}
245-
246-
# pprint.pprint(CONFIG_DICT)

launch.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
"""
2-
Launch the instance with spot request
2+
Handle the instance:
3+
* Launch new instance with spot request
4+
* Terminate instance
5+
* Check instance connectivity
36
"""
47

58
import socket
@@ -163,10 +166,8 @@ def check_spot_price(client, config):
163166

164167
spot_price_history_response = client.describe_spot_price_history(
165168
InstanceTypes=[config["Aws"]["InstanceType"]],
166-
ProductDescriptions=[
167-
'Linux/UNIX'],
168-
AvailabilityZone=
169-
config["Aws"]["AZ"])
169+
ProductDescriptions=['Linux/UNIX'],
170+
AvailabilityZone=config["Aws"]["AZ"])
170171
print(spot_price_history_response['SpotPriceHistory'][0])
171172
last_spot_price = [float(x['SpotPrice']) for x in
172173
spot_price_history_response['SpotPriceHistory'][:10]]

log.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def download_slave(i, output_folder, app_id, config):
9090

9191

9292
@timing
93-
def download(log_folder, instances, master_dns, output_folder):
93+
def download(log_folder, instances, master_dns, output_folder, config):
9494
""" Download the logs from the master and the worker nodes
9595
9696
:param log_folder: the log folder of the application
@@ -102,7 +102,7 @@ def download(log_folder, instances, master_dns, output_folder):
102102
# MASTER
103103
print("Downloading log from Master: " + master_dns)
104104
master_instance = [i for i in instances if i.public_dns_name == master_dns][0]
105-
output_folder, app_id = download_master(master_instance, output_folder, log_folder)
105+
output_folder, app_id = download_master(master_instance, output_folder, log_folder, config)
106106

107107
# SLAVE
108108
with ThreadPoolExecutor(multiprocessing.cpu_count()) as executor:
@@ -193,6 +193,7 @@ def load_worker_data(worker_log, cpu_log, config):
193193
194194
:param worker_log: the path of the log of the worker
195195
:param cpu_log: the path of the cpu monitoring tool log of the worker
196+
:param config: the configuration dictionary
196197
:return: worker_dict the dictionary of the worker's data
197198
"""
198199
print(worker_log)

main.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,22 @@
55

66
import launch
77
import run
8-
from config import NUMINSTANCE, REGION, TAG, REBOOT, CLUSTER_ID, TERMINATE, RUN, NUM_RUN, \
8+
from config import NUM_INSTANCE, REGION, TAG, REBOOT, CLUSTER_ID, TERMINATE, RUN, NUM_RUN, \
99
CREDENTIAL_PROFILE, CONFIG_DICT
1010

1111

1212
def main():
1313
""" Main function;
14-
- Launch spot request of NUMINSTANCE
15-
- Run Benchmark
16-
:return:
14+
* Launch spot request of NUMINSTANCE
15+
* Run Benchmark
16+
* Download Log
17+
* Plot data from log
1718
"""
1819
session = boto3.Session(profile_name=CREDENTIAL_PROFILE)
1920
client = session.client('ec2', region_name=REGION)
2021

21-
if NUMINSTANCE > 0:
22-
spot_request_ids = launch.launch(client, NUMINSTANCE, CONFIG_DICT)
22+
if NUM_INSTANCE > 0:
23+
spot_request_ids = launch.launch(client, NUM_INSTANCE, CONFIG_DICT)
2324

2425
print("CHECK SECURITY GROUP ALLOWED IP SETTINGS!!!")
2526

metrics.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ def compute_cpu_time(app_id, app_info, workers_dict, config, folder):
6565
index = min(range(len(time_cpu)), key=lambda i: abs(time_cpu[i] - time))
6666
# print(index)
6767
cpu_time_max += (config["Control"]["Tsample"] / 1000) * max(cpu, worker_dict[
68-
"cpu_real"][
69-
index + int(config["Control"]["Tsample"] / 1000)])
68+
"cpu_real"][index + int(config["Control"]["Tsample"] / 1000)])
7069
except KeyError:
7170
print(app_id + " not found")
7271
duration_s = app_info[app_id][max(list(app_info[app_id].keys()))]["end"].timestamp() - \
@@ -91,8 +90,8 @@ def compute_cpu_time(app_id, app_info, workers_dict, config, folder):
9190
throughput = float(num_task) / duration_s
9291
if cpu_time == 0:
9392
cpu_time = ((app_info[app_id][max(list(app_info[app_id].keys()))]["end"].timestamp() -
94-
app_info[app_id][PLOT_SID_STAGE]["start"].timestamp())) * config["Control"][
95-
"MaxExecutor"] * config["Control"]["CoreVM"]
93+
app_info[app_id][PLOT_SID_STAGE]["start"].timestamp())) * \
94+
config["Control"]["MaxExecutor"] * config["Control"]["CoreVM"]
9695
cpu_time_max = cpu_time
9796
cpu_time_max = math.floor(cpu_time_max)
9897
print("CPU_TIME: " + str(cpu_time))
@@ -210,7 +209,7 @@ def compute_metrics(folder):
210209
app_logs = glob.glob(folder + "*.err") + glob.glob(folder + "*.dat")
211210
app_info = {}
212211
for app_log in sorted(app_logs):
213-
app_info = load_app_data(app_log, config)
212+
app_info = load_app_data(app_log)
214213

215214
for app_id in app_info:
216215
compute_errors(app_id, app_info[app_id], folder, config)

plot.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ def plot(folder):
630630
app_logs = glob.glob(folder + "*.err") + glob.glob(folder + "*.dat")
631631
app_info = {}
632632
for app_log in sorted(app_logs):
633-
app_info = load_app_data(app_log, config)
633+
app_info = load_app_data(app_log)
634634

635635
for app_id in app_info:
636636
plot_app_overview(app_id, app_info[app_id], folder, config)

0 commit comments

Comments
 (0)