Skip to content

Commit 6663f0a

Browse files
authored
HOT FIX: fix mpi time measurment (#651)
1 parent d7e454d commit 6663f0a

File tree

3 files changed

+367
-169
lines changed

3 files changed

+367
-169
lines changed

.github/workflows/pages.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,13 @@ jobs:
8484
- name: Extract performance data
8585
run: |
8686
mkdir -p build/perf_stat_dir
87+
# The uploaded artifact contains a nested perf-stat.zip inside.
88+
# First unzip extracts the inner archive; the second extracts perf_stat_dir/*.
8789
unzip -o perf-stat.zip -d .
90+
if [ -f "perf-stat.zip" ]; then
91+
mv -f perf-stat.zip perf-stat-inner.zip
92+
unzip -o perf-stat-inner.zip -d .
93+
fi
8894
- name: CMake configure
8995
run: |
9096
cmake -S . -B build -DUSE_SCOREBOARD=ON

scoreboard/main.py

Lines changed: 144 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -108,26 +108,48 @@ def discover_tasks(tasks_dir, task_types):
108108
directories, tasks_type_map = discover_tasks(tasks_dir, task_types)
109109

110110

111-
def load_performance_data(perf_stat_file_path):
112-
"""Load and parse performance statistics from CSV file."""
111+
def load_performance_data_threads(perf_stat_file_path: Path) -> dict:
112+
"""Load threads performance ratios (T_x/T_seq) from CSV.
113+
Expected header: Task, SEQ, OMP, TBB, STL, ALL
114+
"""
115+
perf_stats: dict[str, dict] = {}
116+
if perf_stat_file_path.exists():
117+
with open(perf_stat_file_path, "r", newline="") as csvfile:
118+
reader = csv.DictReader(csvfile)
119+
for row in reader:
120+
task_name = row.get("Task")
121+
if not task_name:
122+
continue
123+
perf_stats[task_name] = {
124+
"seq": row.get("SEQ", "?"),
125+
"omp": row.get("OMP", "?"),
126+
"tbb": row.get("TBB", "?"),
127+
"stl": row.get("STL", "?"),
128+
"all": row.get("ALL", "?"),
129+
}
130+
else:
131+
logger.warning("Threads perf stats CSV not found at %s", perf_stat_file_path)
132+
return perf_stats
113133

114-
perf_stats = dict()
134+
135+
def load_performance_data_processes(perf_stat_file_path: Path) -> dict:
136+
"""Load processes performance ratios (T_x/T_seq) from CSV.
137+
Expected header: Task, SEQ, MPI
138+
"""
139+
perf_stats: dict[str, dict] = {}
115140
if perf_stat_file_path.exists():
116141
with open(perf_stat_file_path, "r", newline="") as csvfile:
117142
reader = csv.DictReader(csvfile)
118143
for row in reader:
119144
task_name = row.get("Task")
120-
if task_name:
121-
perf_stats[task_name] = {
122-
"seq": row.get("SEQ", "?"),
123-
"omp": row.get("OMP", "?"),
124-
"tbb": row.get("TBB", "?"),
125-
"stl": row.get("STL", "?"),
126-
"all": row.get("ALL", "?"),
127-
"mpi": "N/A",
128-
}
145+
if not task_name:
146+
continue
147+
perf_stats[task_name] = {
148+
"seq": row.get("SEQ", "?"),
149+
"mpi": row.get("MPI", "?"),
150+
}
129151
else:
130-
logger.warning("Performance stats CSV not found at %s", perf_stat_file_path)
152+
logger.warning("Processes perf stats CSV not found at %s", perf_stat_file_path)
131153
return perf_stats
132154

133155

@@ -652,15 +674,39 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]:
652674
ds = _evenly_spaced_dates(n_items, s, e)
653675
return ds
654676

655-
# Locate perf CSV from CI or local runs
656-
candidates = [
677+
# Locate perf CSVs from CI or local runs (threads and processes)
678+
candidates_threads = [
679+
script_dir.parent
680+
/ "build"
681+
/ "perf_stat_dir"
682+
/ "threads_task_run_perf_table.csv",
683+
script_dir.parent / "perf_stat_dir" / "threads_task_run_perf_table.csv",
684+
# Fallback to old single-file name
657685
script_dir.parent / "build" / "perf_stat_dir" / "task_run_perf_table.csv",
658686
script_dir.parent / "perf_stat_dir" / "task_run_perf_table.csv",
659687
]
660-
perf_stat_file_path = next((p for p in candidates if p.exists()), candidates[0])
688+
threads_csv = next(
689+
(p for p in candidates_threads if p.exists()), candidates_threads[0]
690+
)
691+
692+
candidates_processes = [
693+
script_dir.parent
694+
/ "build"
695+
/ "perf_stat_dir"
696+
/ "processes_task_run_perf_table.csv",
697+
script_dir.parent / "perf_stat_dir" / "processes_task_run_perf_table.csv",
698+
]
699+
processes_csv = next(
700+
(p for p in candidates_processes if p.exists()), candidates_processes[0]
701+
)
661702

662-
# Read and parse performance statistics CSV
663-
perf_stats = load_performance_data(perf_stat_file_path)
703+
# Read and merge performance statistics CSVs (keys = CSV Task column)
704+
perf_stats_threads = load_performance_data_threads(threads_csv)
705+
perf_stats_processes = load_performance_data_processes(processes_csv)
706+
perf_stats_raw: dict[str, dict] = {}
707+
perf_stats_raw.update(perf_stats_threads)
708+
for k, v in perf_stats_processes.items():
709+
perf_stats_raw[k] = {**perf_stats_raw.get(k, {}), **v}
664710

665711
# Partition tasks by tasks_type from settings.json
666712
threads_task_dirs = [
@@ -678,6 +724,73 @@ def _compute_display_deadlines_processes(n_items: int) -> list[date]:
678724
elif "processes" in name:
679725
processes_task_dirs.append(name)
680726

727+
# Resolve performance stats keys (from CSV Task names) to actual task directories
728+
import re as _re
729+
730+
def _family_from_name(name: str) -> tuple[str, int]:
731+
# Infer family from CSV Task value, using only structural markers
732+
# threads -> ("threads", 0); processes[_N] -> ("processes", N|1)
733+
if "threads" in name:
734+
return "threads", 0
735+
if "processes" in name:
736+
m = _re.search(r"processes(?:_(\d+))?", name)
737+
if m:
738+
try:
739+
idx = int(m.group(1)) if m.group(1) else 1
740+
except Exception:
741+
idx = 1
742+
else:
743+
idx = 1
744+
return "processes", idx
745+
# Fallback: treat as threads family
746+
return "threads", 0
747+
748+
def _family_from_dir(dir_name: str) -> tuple[str, int]:
749+
# Prefer explicit tasks_type from settings.json and task_number from info.json
750+
kind_guess = tasks_type_map.get(dir_name) or (
751+
"threads" if "threads" in dir_name else "processes"
752+
)
753+
idx = 0
754+
if kind_guess == "processes":
755+
# Lightweight reader to avoid dependency on later-scoped helpers
756+
try:
757+
import json as _json
758+
759+
info_path = tasks_dir / dir_name / "info.json"
760+
if info_path.exists():
761+
with open(info_path, "r") as _f:
762+
data = _json.load(_f)
763+
s = data.get("student", {}) if isinstance(data, dict) else {}
764+
try:
765+
idx = int(str(s.get("task_number", "0")))
766+
except Exception:
767+
idx = 0
768+
except Exception:
769+
idx = 0
770+
return kind_guess, idx
771+
772+
# Build map family -> list of dir names in this repo
773+
family_to_dirs: dict[tuple[str, int], list[str]] = {}
774+
for d in sorted(directories.keys()):
775+
fam = _family_from_dir(d)
776+
family_to_dirs.setdefault(fam, []).append(d)
777+
778+
# Aggregate perf by family (CSV keys may not match dir names)
779+
perf_by_family: dict[tuple[str, int], dict] = {}
780+
for key, vals in perf_stats_raw.items():
781+
fam = _family_from_name(key)
782+
perf_by_family[fam] = {**perf_by_family.get(fam, {}), **vals}
783+
784+
# Project family perf onto actual directories (prefer exact one per family)
785+
perf_stats: dict[str, dict] = {}
786+
for fam, vals in perf_by_family.items():
787+
dirs_for_family = family_to_dirs.get(fam, [])
788+
if not dirs_for_family:
789+
continue
790+
# Assign same perf to all dirs in the family (usually one)
791+
for d in dirs_for_family:
792+
perf_stats[d] = vals.copy()
793+
681794
# Build rows for each page
682795
threads_rows = _build_rows_for_task_types(
683796
task_types_threads,
@@ -712,15 +825,15 @@ def _identity_key(student: dict) -> str:
712825
]
713826
)
714827

715-
def _build_cell(dir_name: str, ttype: str):
828+
def _build_cell(dir_name: str, ttype: str, perf_map: dict[str, dict]):
716829
status = directories[dir_name].get(ttype)
717830
sol_points, solution_style = get_solution_points_and_style(ttype, status, cfg)
718831
task_points = sol_points
719832
is_cheated, plagiarism_points = check_plagiarism_and_calculate_penalty(
720833
dir_name, ttype, sol_points, plagiarism_cfg, cfg, semester="processes"
721834
)
722835
task_points += plagiarism_points
723-
perf_val = perf_stats.get(dir_name, {}).get(ttype, "?")
836+
perf_val = perf_map.get(dir_name, {}).get(ttype, "?")
724837
acceleration, efficiency = calculate_performance_metrics(
725838
perf_val, eff_num_proc, ttype
726839
)
@@ -786,7 +899,7 @@ def _build_cell(dir_name: str, ttype: str):
786899
proc_group_headers.append({"type": "seq"})
787900
group_cells = []
788901
for ttype in ["mpi", "seq"]:
789-
cell, _ = _build_cell(d, ttype)
902+
cell, _ = _build_cell(d, ttype, perf_stats)
790903
group_cells.append(cell)
791904
# Override displayed points for processes: S under MPI/SEQ from points-info; A points under MPI only
792905
s_mpi, s_seq, a_mpi, r_max = _find_process_points(cfg, n)
@@ -902,6 +1015,16 @@ def _build_cell(dir_name: str, ttype: str):
9021015
}
9031016
]
9041017

1018+
# Rebuild threads rows with resolved perf stats
1019+
threads_rows = _build_rows_for_task_types(
1020+
task_types_threads,
1021+
threads_task_dirs,
1022+
perf_stats,
1023+
cfg,
1024+
eff_num_proc,
1025+
deadlines_cfg,
1026+
)
1027+
9051028
parser = argparse.ArgumentParser(description="Generate HTML scoreboard.")
9061029
parser.add_argument(
9071030
"-o", "--output", type=str, required=True, help="Output directory path"

0 commit comments

Comments
 (0)