add luna sources

martysai · Sep 9, 2023 · f47cb40 · f47cb40
commit f47cb40
Show file tree

Hide file tree

Showing 47 changed files with 7,983 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,133 @@
+# Files downloaded every time Rouge-We netric is used
+embeddings/
+models/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,28 @@
+repos:
+    - repo: https://github.com/pre-commit/pre-commit-hooks
+      rev: v4.0.1
+      hooks:
+        - id: check-yaml
+        - id: check-json
+        - id: end-of-file-fixer
+        - id: trailing-whitespace
+        - id: check-added-large-files
+        - id: check-merge-conflict
+        - id: detect-private-key
+
+    - repo: local
+      hooks:
+        - id: black
+          files: luna
+          name: black
+          entry: black --config pyproject.toml
+          types: [ python ]
+          language: system
+
+    - repo: local
+      hooks:
+        - id: isort
+          name: isort
+          entry: isort --settings-path pyproject.toml
+          types: [ python ]
+          language: system
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Anonymous
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,119 @@
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![Pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/MoonlightSyntax/LUNA/-/blob/main/.pre-commit-config.yaml)
+
+
+# LUNA: a Framework for Language Understanding and Naturalness Assessment
+
+The framework provides a set of well-known automated evaluation metrics for text generation tasks.
+
+The library includes the following metrics:
+
+- Blanc: [paper](https://aclanthology.org/2020.eval4nlp-1.2/)
+- Mover score: [paper](https://aclanthology.org/D19-1053/)
+- BLEU: [paper](https://aclanthology.org/P02-1040/)
+- METEOR: [paper](https://aclanthology.org/W05-0909/)
+- ROUGE: [paper](https://aclanthology.org/W04-1013/)
+- chrF: [paper](https://aclanthology.org/W15-3049/)
+- BERTScore: [paper](https://arxiv.org/abs/1904.09675)
+- BARTScore: [paper](https://arxiv.org/abs/2106.11520)
+- Data statistics metrics: [paper](https://aclanthology.org/N18-1065/)
+  - Compression
+  - Coverage
+  - Length
+  - Novelty
+  - Density
+  - Repetition
+- ROUGE-We: [paper](https://aclanthology.org/D15-1222/)
+- S3: [paper](https://aclanthology.org/W17-4510/)
+- BaryScore: [paper](https://arxiv.org/abs/2108.12463)
+- DepthScore: [paper](https://arxiv.org/abs/2103.12711)
+- InfoLM: [paper](https://arxiv.org/abs/2112.01589)
+
+## Installation
+
+### Installation from the source
+
+Clone the repository and install the library from the root:
+
+```bash
+git clone https://github.com/Moonlight-Syntax/LUNA.git
+pip install .
+```
+
+Another way is to use `poetry`. Then, run `poetry install` from the root.
+
+
+## Quick start
+
+The user can either trigger the `Calculator` to evaluate metrics or integrate the code itself.
+
+### Calculator
+
+The easiest way to evaluate NLG models is to execute the following snippet:
+
+```python
+from luna.calculate import Calculator
+
+# Choose to compute in a sequential or a parallel setting
+calculator = Calculator(execute_parallel=True)
+metrics_dict = calculator.calculate(
+  metrics=[depth_score, s3_metrics], # both are LUNA's metrics
+  candidates=candidates,
+  references=references
+)
+
+print(metrics_dict)
+>>> {"DepthScore": ..., "S3": ...}
+```
+
+### Integrate the evaluations
+
+All the metrics in the library follow the same interface:
+
+```python
+class Metrics:
+    def evaluate_batch(self, hypothesyses: List[str], references: Optional[List[str]]) -> List[float]:
+        *some code here*
+
+    def evaluate_example(self, hypothesys: str, reference: Optional[str]) -> float:
+        *some code here*
+```
+
+Thus, to evaluate your examples run the following code:
+
+```python
+from luna import MetricName
+
+metric = MetricName()
+result = metric.evaluate_example("Generated bad model by example", "Gold example")
+results = metric.evaluate_batch(["Generated bad model by example 1", "Generated bad model by example 2"],
+                                 ["Gold example 1", "Gold example 2"])
+```
+
+
+## Development
+
+### Contribute to the library
+
+We are open for issues and pull requests. We hope that LUNA's functionality is wide enough but we believe that it can always be elaborated and improved.
+
+### Pre-commit hooks
+
+We use [pre-commit hooks](https://pre-commit.com/) to check the code before commiting.
+
+To install the hooks run the following:
+
+```bash
+pip install pre-commit
+pre-commit install
+```
+
+After that every commit will trigger standard checks on code style, including `black`, `isort` etc.
+
+### Tests
+
+Tests for `luna` are located in the `tests` directory. To run them, execute:
+
+```bash
+pytest tests
+```
diff --git a/luna/__init__.py b/luna/__init__.py
@@ -0,0 +1 @@
+__version__ = "1.0.0"
diff --git a/luna/base.py b/luna/base.py
@@ -0,0 +1,57 @@
+import typing as tp
+from abc import ABC, abstractmethod
+
+from tqdm import tqdm
+
+from luna import utils
+
+
+class Metrics(ABC):
+    """
+    A base class to calculate metrics.
+    """
+
+    def __repr__(self) -> str:
+        raise NotImplementedError
+
+    def evaluate_batch(self, hyps: tp.List[str], refs: tp.Optional[tp.List[str]]) -> tp.List[float]:
+        """
+        Basic iteration over samples.
+        Compute metrics for a dataset.
+
+        Parameters
+        ----------
+
+        Returns
+        -------
+        list of float
+            A list of metrics values.
+        """
+        # the following loop can be parallelized
+        # however, as we assume the gpu-based inference, we can't wrap it into a simple joblib.Parallel etc.
+        utils.validate_batch(hyps, refs)
+
+        metrics_list = []
+        for i, hyp in tqdm(enumerate(hyps)):
+            ref = refs[i] if refs else None
+            metrics_list.append(self.evaluate_example(hyp, ref))
+        return metrics_list
+
+    @abstractmethod
+    def evaluate_example(self, hyp: str, ref: tp.Optional[str]) -> float:
+        """
+        Compute metrics for one sample.
+
+        Parameters
+        ----------
+        hyp: str
+            Hypothesis (generated sentence).
+        ref: str, optional
+            Reference (ground-truth sentence).
+
+        Returns
+        -------
+        float
+            Metrics calculated for the example.
+        """
+        raise NotImplementedError