Skip to content

Commit 4522da1

Browse files
committed
Update variable type for efficiency
1 parent 505c80e commit 4522da1

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

silnlp/nmt/exp_summary.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
key_word = ""
1919

2020

21-
def read_data(file_path: str, data: dict, chapters: list) -> None:
21+
def read_data(file_path: str, data: dict, chapters: set) -> None:
2222
global chap_num
2323
global all_books
2424
global key_word
@@ -40,7 +40,7 @@ def read_data(file_path: str, data: dict, chapters: list) -> None:
4040
if diff_pred_file:
4141
r = extract_data(diff_pred_file[0])
4242
data[lang_pair][int(m.group(1))] = r
43-
chapters.append(int(m.group(1)))
43+
chapters.add(int(m.group(1)))
4444
if int(m.group(1)) > chap_num:
4545
chap_num = int(m.group(1))
4646
else:
@@ -53,7 +53,13 @@ def extract_data(filename: str, header_row=5) -> dict:
5353
global target_book
5454

5555
metrics = [m.lower() for m in metrics]
56-
df = pd.read_excel(filename, header=header_row)
56+
try:
57+
df = pd.read_excel(filename, header=header_row)
58+
except ValueError as e:
59+
print(f"An error occurs in {filename}")
60+
print(e)
61+
return {}
62+
5763
df.columns = [col.strip().lower() for col in df.columns]
5864

5965
result = {}
@@ -105,7 +111,7 @@ def flatten_dict(data: dict, chapters: list, baseline={}) -> list:
105111
row[index_m] = data[lang_pair][res_chap][chap][m]
106112
if len(baseline) > 0:
107113
for m in range(len(metrics)):
108-
row[3 + m] = baseline[lang_pair][chap][m]
114+
row[3 + m] = baseline[lang_pair][chap][m] if lang_pair in baseline else None
109115
rows.append(row)
110116
else:
111117
for lang_pair in baseline:
@@ -156,7 +162,7 @@ def create_xlsx(rows: list, chapters: list, output_path: str) -> None:
156162
ws.cell(row=2, column=3 + i, value=baseline_header)
157163

158164
col = 3 + len(metrics) + 1
159-
for _ in range(len(groups) - 2):
165+
for _ in range(len(groups) - 3):
160166
for i, sub_header in enumerate(sub_headers):
161167
ws.cell(row=2, column=col + i, value=sub_header)
162168

@@ -280,10 +286,10 @@ def main() -> None:
280286
output_path = os.path.join(result_dir, "a_result_folder", f"{folder_name}.xlsx")
281287

282288
data = {}
283-
chapters = []
289+
chapters = set()
284290
if exp1_dir:
285291
read_data(exp1_dir, data, chapters)
286-
chapters = sorted(set(chapters))
292+
chapters = sorted(chapters)
287293

288294
baseline_data = {}
289295
if exp2_dir:

0 commit comments

Comments
 (0)