Skip to content

Commit 4e79276

Browse files
committed
Fix miRBench dataset output paths
1 parent 4ea0c9d commit 4e79276

1 file changed

Lines changed: 7 additions & 5 deletions

File tree

src/agentomics/utils/create_datasets.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,13 @@ def generate_mirbench_files():
3636
f.write(dataset_desrciption[dataset_name])
3737

3838
for dataset_name, splits in dataset_names_splits.items():
39+
local_dset_path = repo_path / "datasets" / dataset_name
40+
download_path = repo_path / ".miRBench" / dataset_name
41+
os.makedirs(download_path, exist_ok=True)
3942
for split in splits:
40-
download_path = repo_path/".miRBench"
41-
os.makedirs(download_path, exist_ok=True)
42-
mirbench_download_dataset(dataset_name, download_path=download_path/'miRBench', split=split)
43-
df = pd.read_csv(download_path/'miRBench', sep="\t")
43+
split_path = download_path / f"{split}.tsv"
44+
mirbench_download_dataset(dataset_name, download_path=split_path, split=split)
45+
df = pd.read_csv(split_path, sep="\t")
4446
df = df.rename(columns={"label": class_col})
4547
# Keep original target column - 'numeric_label' will be created during preparation
4648
df.to_csv(f"{local_dset_path}/{split}.csv", index=False)
@@ -93,4 +95,4 @@ def generate_dataset_files():
9395
generate_mirbench_files()
9496

9597
if __name__ == "__main__":
96-
generate_dataset_files()
98+
generate_dataset_files()

0 commit comments

Comments
 (0)