feat: use promptsource templates

tianjianjiang · tianjianjiang · commit 42078198223c · 2021-08-31T00:07:20.000+09:00
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,7 @@ tensorflow = "2.5.0"
 torch = "1.9.0"
 tqdm = "4.62.0"
 transformers = "4.9.1"
+promptsource = {git = "https://git@github.com/bigscience-workshop/promptsource.git", rev = "main"}
 
 [tool.poetry.dev-dependencies]
 isort = "^5.9.3"
diff --git a/requirements.txt b/requirements.txt
@@ -4,3 +4,4 @@ tensorflow==2.5.0
 torch==1.9.0
 tqdm==4.62.0
 transformers==4.9.1
+promptsource @ git+ssh://git@github.com/bigscience-workshop/promptsource.git@main
diff --git a/tests/test_tydiqa_secondary.py b/tests/test_tydiqa_secondary.py
@@ -1,3 +1,6 @@
+from datasets import load_dataset
+from promptsource.templates import TemplateCollection
+from promptsource.utils import removeHyphen
 from transformers import AutoTokenizer
 
 from evaluation.tasks.tydiqa_secondary.tydiqa_secondary import TyDiQADataset
@@ -15,3 +18,18 @@ def test_prompt():
         "such as wound, ostomy, and continence nursing and burn center care.\n"
     ) in prompt
     assert prompt.endswith("Answer:")
+
+
+def test_promptsource_template():
+    ds_key, sub_key = "tydiqa", "secondary_task"
+    tydiqa_sec_vld_ds = load_dataset(ds_key, sub_key, split="validation", streaming=True)
+    tydiqa_sec_vld_ds_en = filter(lambda x: x["id"].split("-")[0] == "english", tydiqa_sec_vld_ds)
+    template_collection = TemplateCollection()
+    tydiqa_sec_tmpls = template_collection.get_dataset(ds_key, sub_key)
+    tmpl = tydiqa_sec_tmpls["simple_question_reading_comp_2"]
+    prompt, _ = tmpl.apply(removeHyphen(next(tydiqa_sec_vld_ds_en)))
+    assert (
+        "Wound care encourages and speeds wound healing via cleaning and protection from reinjury or infection. "
+        "Depending on each patient's needs, it can range from the simplest first aid to entire nursing specialties "
+        "such as wound, ostomy, and continence nursing and burn center care.\n"
+    ) in prompt