Add basic test for classification

akx · akx · commit e68465c6a713 · 2023-05-22T17:57:03.000+03:00
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 *.py[cod]
 .*cache
+.valohai
diff --git a/README.md b/README.md
@@ -14,3 +14,11 @@ Linting/formatting happens via `pre-commit`. Install it with `pip install pre-co
 
 The linters run by `pre-commit` are `ruff`, `black`, and `prettier`;
 you can (should) set up your IDE to run them automatically too.
+
+### Tests
+
+You can run tests with `py.test`:
+
+```
+py.test -v .
+```
diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,8 @@
+import pytest
+from valohai.internals import global_state
+
+
+@pytest.fixture()
+def valohai_utils_global_state():
+    global_state.flush_global_state()
+    return global_state
diff --git a/models/nlp/classification/huggingface/test_huggingface.py b/models/nlp/classification/huggingface/test_huggingface.py
@@ -0,0 +1,36 @@
+import csv
+
+import valohai
+
+# Snippet from https://valohai-ecosystem-datasets.s3.eu-west-1.amazonaws.com/yelp_reviews_batch_inference.txt
+EXAMPLE_DATA = """
+Old school.....traditional "mom 'n pop" quality and perfection.
+A great out of the way, non-corporate, vestige of Americana. You will love it.
+Good fish sandwich.
+I always feel like I am constantly bashing breweries for their food, but in my opinion, I feel the bar is raised for places like this.
+I called to complain, and the "manager" didn't even apologize!!! So frustrated. Never going back.  They seem overpriced, too.
+""".strip()
+
+
+def test_inference(valohai_utils_global_state, monkeypatch, tmp_path):
+    monkeypatch.setenv("VH_OUTPUTS_DIR", str(tmp_path))
+    input_path = tmp_path / "input.txt"
+    input_path.write_text(EXAMPLE_DATA)
+    valohai.prepare(
+        step="huggingface-classification-inference",
+        default_parameters={
+            "log_frequency": 1,
+            # This is an untrained model, so the results won't be very interesting.
+            "huggingface_repository": "distilbert-base-uncased",
+            "output_path": "test.csv",
+        },
+        default_inputs={
+            "data": str(input_path),
+        },
+    )
+    from models.nlp.classification.huggingface.inference import main
+
+    main()
+    with (tmp_path / "test.csv").open() as f:
+        results = list(csv.DictReader(f))
+    assert len(results) == 5
diff --git a/mypy.ini b/mypy.ini
@@ -11,3 +11,6 @@ ignore_missing_imports = True
 
 [mypy-datasets.*]
 ignore_missing_imports = True
+
+[mypy-pytest.*]
+ignore_missing_imports = True
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+# this is needed because models.nlp has its own utils which would conflict
+# with the utils in the root directory with the default `prepend` mode
+addopts = --import-mode=append
diff --git a/ruff.toml b/ruff.toml
@@ -20,3 +20,6 @@ ignore = [
     "T2",
     "TRY003",
 ]
+
+[per-file-ignores]
+"**/test*.py" = ["S101"]  # tests can use assertions

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`*.py[cod]`
`2`	`2`	`.*cache`
	`3`	`+.valohai`
Original file line number	Diff line number	Diff line change
`@@ -20,3 +20,6 @@ ignore = [`
`20`	`20`	`"T2",`
`21`	`21`	`"TRY003",`
`22`	`22`	`]`
	`23`	`+`
	`24`	`+[per-file-ignores]`
	`25`	`+"*/test.py" = ["S101"] # tests can use assertions`