diff --git a/chapters/en/chapter5/4.mdx b/chapters/en/chapter5/4.mdx index 8e6415a3f..dd80dda24 100644 --- a/chapters/en/chapter5/4.mdx +++ b/chapters/en/chapter5/4.mdx @@ -30,7 +30,7 @@ Next, we can load the dataset using the method for remote files that we learned from datasets import load_dataset # This takes a few minutes to run, so go grab a tea or coffee while you wait :) -data_files = "https://the-eye.eu/public/AI/pile_preliminary_components/PUBMED_title_abstracts_2019_baseline.jsonl.zst" +data_files = "https://huggingface.co/datasets/casinca/PUBMED_title_abstracts_2019_baseline/resolve/main/PUBMED_title_abstracts_2019_baseline.jsonl.zst" pubmed_dataset = load_dataset("json", data_files=data_files, split="train") pubmed_dataset ```