1818key_word = ""
1919
2020
21- def read_data (file_path : str , data : dict , chapters : list ) -> None :
21+ def read_data (file_path : str , data : dict , chapters : set ) -> None :
2222 global chap_num
2323 global all_books
2424 global key_word
@@ -40,7 +40,7 @@ def read_data(file_path: str, data: dict, chapters: list) -> None:
4040 if diff_pred_file :
4141 r = extract_data (diff_pred_file [0 ])
4242 data [lang_pair ][int (m .group (1 ))] = r
43- chapters .append (int (m .group (1 )))
43+ chapters .add (int (m .group (1 )))
4444 if int (m .group (1 )) > chap_num :
4545 chap_num = int (m .group (1 ))
4646 else :
@@ -53,7 +53,13 @@ def extract_data(filename: str, header_row=5) -> dict:
5353 global target_book
5454
5555 metrics = [m .lower () for m in metrics ]
56- df = pd .read_excel (filename , header = header_row )
56+ try :
57+ df = pd .read_excel (filename , header = header_row )
58+ except ValueError as e :
59+ print (f"An error occurs in { filename } " )
60+ print (e )
61+ return {}
62+
5763 df .columns = [col .strip ().lower () for col in df .columns ]
5864
5965 result = {}
@@ -105,7 +111,7 @@ def flatten_dict(data: dict, chapters: list, baseline={}) -> list:
105111 row [index_m ] = data [lang_pair ][res_chap ][chap ][m ]
106112 if len (baseline ) > 0 :
107113 for m in range (len (metrics )):
108- row [3 + m ] = baseline [lang_pair ][chap ][m ]
114+ row [3 + m ] = baseline [lang_pair ][chap ][m ] if lang_pair in baseline else None
109115 rows .append (row )
110116 else :
111117 for lang_pair in baseline :
@@ -156,7 +162,7 @@ def create_xlsx(rows: list, chapters: list, output_path: str) -> None:
156162 ws .cell (row = 2 , column = 3 + i , value = baseline_header )
157163
158164 col = 3 + len (metrics ) + 1
159- for _ in range (len (groups ) - 2 ):
165+ for _ in range (len (groups ) - 3 ):
160166 for i , sub_header in enumerate (sub_headers ):
161167 ws .cell (row = 2 , column = col + i , value = sub_header )
162168
@@ -280,10 +286,10 @@ def main() -> None:
280286 output_path = os .path .join (result_dir , "a_result_folder" , f"{ folder_name } .xlsx" )
281287
282288 data = {}
283- chapters = []
289+ chapters = set ()
284290 if exp1_dir :
285291 read_data (exp1_dir , data , chapters )
286- chapters = sorted (set ( chapters ) )
292+ chapters = sorted (chapters )
287293
288294 baseline_data = {}
289295 if exp2_dir :
0 commit comments