Skip to content

Commit 9d67a03

Browse files
committed
fixes
1 parent b2ec301 commit 9d67a03

8 files changed

+27
-41
lines changed

config.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,20 @@
55
# AWS
66
DATA_AMI = {"eu-west-1": {"ami": 'ami-d3225da0',
77
"az": 'eu-west-1c',
8-
"keypair": "gazzettaEU",
8+
"keypair": "giovanni2",
99
"price": "0.3"},
1010
"us-west-2": {"ami": 'ami-7f5ff81f',
1111
"snapid": "snap-4f38bf1c",
1212
"az": 'us-west-2c',
13-
"keypair": "gazzetta",
14-
"price": "0.25"}}
13+
"keypair": "giovanni2",
14+
"price": "0.3"}}
1515
"""AMI id for region and availability zone"""
1616

1717
CREDENTIAL_PROFILE = 'cspark'
1818
"""Credential profile name of AWS"""
1919
REGION = "us-west-2"
2020
"""Region of AWS to use"""
21-
KEY_PAIR_PATH = "/home/meteos/" + DATA_AMI[REGION]["keypair"] + ".pem"
21+
KEY_PAIR_PATH = "/Users/Giovanni/Desktop/" + DATA_AMI[REGION]["keypair"] + ".pem"
2222
"""KeyPair path for the instance"""
2323
SECURITY_GROUP = "spark-cluster"
2424
"""Secutiry group of the instance"""
@@ -35,7 +35,7 @@
3535
NUM_RUN = 1
3636
"""Number of run to repeat the benchmark"""
3737

38-
CLUSTER_ID = "1"
38+
CLUSTER_ID = "CSPARK"
3939
"""Id of the cluster with the launched instances"""
4040
print("Cluster ID : " + str(CLUSTER_ID))
4141
TAG = [{
@@ -44,7 +44,7 @@
4444
}]
4545

4646
# HDFS
47-
HDFS_MASTER = ""
47+
HDFS_MASTER = "ec2-35-161-111-116.us-west-2.compute.amazonaws.com"
4848
"""Url of the HDFS NameNode if not set the cluster created is an HDFS Cluster"""
4949
# Spark config
5050
SPARK_2_HOME = "/opt/spark/"
@@ -81,7 +81,7 @@
8181

8282
# CONTROL
8383
ALPHA = 0.95
84-
DEADLINE = 239474
84+
DEADLINE = 284375
8585
# SVM
8686
# 0% 217500
8787
# 20% 261000
@@ -94,7 +94,7 @@
9494
# 0% 209062
9595
# 20% 250874
9696
# 40% 284375
97-
MAX_EXECUTOR = 8
97+
MAX_EXECUTOR = 9
9898
OVER_SCALE = 2
9999
K = 50
100100
TI = 12000
@@ -159,7 +159,8 @@
159159
},
160160
"PageRank": {
161161
"NUM_OF_PARTITIONS": (3, 1000),
162-
"numV": (2, 7000000),
162+
"numV": (2, 2000000),
163+
"mu": (4, 5.0),
163164
"MAX_ITERATION": (8, 1)
164165
},
165166
"KMeans": {

util/download_log.py download_log.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
])
1313

1414
logfolder = "./spark-bench/num"
15-
master_dns = "ec2-35-161-226-18.us-west-2.compute.amazonaws.com"
15+
master_dns = "ec2-35-165-203-239.us-west-2.compute.amazonaws.com"
1616
# master_dns = "ec2-54-70-77-95.us-west-2.compute.amazonaws.com"
1717
output_folder = "./spark-bench/num/"
18-
output_folder = log.download(logfolder, instances, master_dns, output_folder)
18+
output_folder = log.download(logfolder, instances, master_dns, output_folder, CONFIG_DICT)
1919

2020
if output_folder[-1] != "/":
2121
output_folder += "/"

log.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def download(log_folder, instances, master_dns, output_folder, config):
108108
with ThreadPoolExecutor(multiprocessing.cpu_count()) as executor:
109109
for i in instances:
110110
if i.public_dns_name != master_dns:
111-
worker = executor.submit(download_slave, i, output_folder, app_id)
111+
worker = executor.submit(download_slave, i, output_folder, app_id, config)
112112
output_folder = worker.result()
113113
return output_folder
114114

@@ -260,5 +260,9 @@ def load_worker_data(worker_log, cpu_log, config):
260260
cpu_real = float(
261261
'{0:.2f}'.format((float(line[2]) * config["Control"]["CoreVM"]) / 100))
262262
worker_dict["cpu_real"].append(cpu_real)
263+
for app_id in list(worker_dict):
264+
print(app_id)
265+
if not len(worker_dict[app_id]) > 0:
266+
del worker_dict[app_id]
263267
print(list(worker_dict.keys()))
264268
return worker_dict

main.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def main():
4343

4444
if REBOOT:
4545
print("Rebooting instances...")
46-
instances = client.instances.filter(
47-
Filters=[{'Name': 'instance-state-name', 'Values': ['running']},
48-
{'Name': 'tag:ClusterId', 'Values': [CLUSTER_ID]}])
46+
session = boto3.Session(profile_name=CREDENTIAL_PROFILE)
47+
ec2 = session.resource('ec2', region_name=REGION)
48+
instances = ec2.instances.filter(Filters=[{'Name': 'instance-state-name', 'Values': ['running']}, {'Name': 'tag:ClusterId', 'Values': [CLUSTER_ID]}])
4949
instance_ids = [x.id for x in instances]
5050
client.reboot_instances(InstanceIds=instance_ids)
5151
launch.wait_ping(client, instance_ids, copy.deepcopy(instance_ids))

plot.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
LABEL_SIZE = 20
3434
TQ_MICRO = 20
3535
TQ_KMEANS = 9
36-
PDF = 1
36+
PDF = 0
3737

3838
PLOT_PARAMETERS = {
3939
'axes.labelsize': LABEL_SIZE, # fontsize for x and y labels (was 10)
@@ -256,9 +256,10 @@ def plot_worker(app_id, app_info, worker_log, worker_dict, config, first_ts_work
256256
folder_split = worker_log.split("/")
257257
name = folder_split[-3].lower() + "-worker-" + folder_split[-2].replace("%", "") + "-" + \
258258
folder_split[-1].split("-")[-1].replace(".out", "")
259-
folder = "/".join(worker_log.split("\\")[:-1])
259+
folder = "/".join(worker_log.split("/")[:-1])
260260
labels = ax1.get_xticklabels()
261261
plt.setp(labels, rotation=45)
262+
print(folder)
262263
if PDF:
263264
plt.savefig(folder + "/" + name + ".pdf", bbox_inches='tight', dpi=300)
264265
else:

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
boto3>=1.4.1
1+
boto3==1.4.1
22
matplotlib>=2.0.0b4
33
numpy>=1.11.2
44
pandas>=0.19.1

run.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,7 @@ def setup_slave(instance, master_dns):
9191
if UPDATE_SPARK:
9292
print(" Updating Spark...")
9393
ssh_client.run(
94-
"""cd /usr/local/spark && git pull && build/mvn -T 1C -Phive -Pnetlib-lgpl -Pyarn
95-
-Phadoop-2.7 -Dhadoop.version=2.7.2 -Dscala-2.11 -DskipTests
96-
-Dmaven.test.skip=true package""")
94+
"""cd /usr/local/spark && git pull && build/mvn clean && build/mvn -T 1C -Phive -Pnetlib-lgpl -Pyarn -Phadoop-2.7 -Dhadoop.version=2.7.2 -Dscala-2.11 -DskipTests -Dmaven.test.skip=true package""")
9795

9896
# CLEAN UP EXECUTORS APP LOGS
9997
ssh_client.run("rm -r " + SPARK_HOME + "work/*")
@@ -166,9 +164,7 @@ def setup_master(instance):
166164
if UPDATE_SPARK_MASTER:
167165
print(" Updating Spark...")
168166
ssh_client.run(
169-
"""cd /usr/local/spark && git pull && build/mvn -T 1C -Phive -Pnetlib-lgpl -Pyarn
170-
-Phadoop-2.7 -Dhadoop.version=2.7.2 -Dscala-2.11 -DskipTests
171-
-Dmaven.test.skip=true package""")
167+
"""cd /usr/local/spark && git pull && build/mvn clean && build/mvn -T 1C -Phive -Pnetlib-lgpl -Pyarn -Phadoop-2.7 -Dhadoop.version=2.7.2 -Dscala-2.11 -DskipTests -Dmaven.test.skip=true package""")
172168

173169
print(" Remove Logs")
174170
ssh_client.run("rm " + SPARK_HOME + "spark-events/*")
@@ -498,7 +494,7 @@ def run_benchmark():
498494
status = ssh_client.run('[ ! -e %s ]; echo $?' % (DATA_AMI[REGION]["keypair"] + ".pem"))
499495
if not int(status[1].decode('utf8').replace("\n", "")):
500496
ssh_client.put_file(KEY_PAIR_PATH, "/home/ubuntu/" + DATA_AMI[REGION]["keypair"] + ".pem")
501-
497+
ssh_client.run("chmod 400 "+ "/home/ubuntu/" + DATA_AMI[REGION]["keypair"] + ".pem")
502498
# LANCIARE BENCHMARK
503499
if HDFS == 0:
504500
if len(BENCHMARK_PERF) > 0:

util/plot_stages.py

-16
This file was deleted.

0 commit comments

Comments
 (0)