@@ -36,11 +36,13 @@ def generate_mirbench_files():
3636 f .write (dataset_desrciption [dataset_name ])
3737
3838 for dataset_name , splits in dataset_names_splits .items ():
39+ local_dset_path = repo_path / "datasets" / dataset_name
40+ download_path = repo_path / ".miRBench" / dataset_name
41+ os .makedirs (download_path , exist_ok = True )
3942 for split in splits :
40- download_path = repo_path / ".miRBench"
41- os .makedirs (download_path , exist_ok = True )
42- mirbench_download_dataset (dataset_name , download_path = download_path / 'miRBench' , split = split )
43- df = pd .read_csv (download_path / 'miRBench' , sep = "\t " )
43+ split_path = download_path / f"{ split } .tsv"
44+ mirbench_download_dataset (dataset_name , download_path = split_path , split = split )
45+ df = pd .read_csv (split_path , sep = "\t " )
4446 df = df .rename (columns = {"label" : class_col })
4547 # Keep original target column - 'numeric_label' will be created during preparation
4648 df .to_csv (f"{ local_dset_path } /{ split } .csv" , index = False )
@@ -93,4 +95,4 @@ def generate_dataset_files():
9395 generate_mirbench_files ()
9496
9597if __name__ == "__main__" :
96- generate_dataset_files ()
98+ generate_dataset_files ()
0 commit comments