forked from Moonlight-Syntax/LUNA
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Moonlight Syntax
committed
Sep 9, 2023
0 parents
commit f47cb40
Showing
47 changed files
with
7,983 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# Files downloaded every time Rouge-We netric is used | ||
embeddings/ | ||
models/ | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
pip-wheel-metadata/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
repos: | ||
- repo: https://github.com/pre-commit/pre-commit-hooks | ||
rev: v4.0.1 | ||
hooks: | ||
- id: check-yaml | ||
- id: check-json | ||
- id: end-of-file-fixer | ||
- id: trailing-whitespace | ||
- id: check-added-large-files | ||
- id: check-merge-conflict | ||
- id: detect-private-key | ||
|
||
- repo: local | ||
hooks: | ||
- id: black | ||
files: luna | ||
name: black | ||
entry: black --config pyproject.toml | ||
types: [ python ] | ||
language: system | ||
|
||
- repo: local | ||
hooks: | ||
- id: isort | ||
name: isort | ||
entry: isort --settings-path pyproject.toml | ||
types: [ python ] | ||
language: system |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2023 Anonymous | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
[](https://github.com/psf/black) | ||
[](https://github.com/MoonlightSyntax/LUNA/-/blob/main/.pre-commit-config.yaml) | ||
|
||
|
||
# LUNA: a Framework for Language Understanding and Naturalness Assessment | ||
|
||
The framework provides a set of well-known automated evaluation metrics for text generation tasks. | ||
|
||
The library includes the following metrics: | ||
|
||
- Blanc: [paper](https://aclanthology.org/2020.eval4nlp-1.2/) | ||
- Mover score: [paper](https://aclanthology.org/D19-1053/) | ||
- BLEU: [paper](https://aclanthology.org/P02-1040/) | ||
- METEOR: [paper](https://aclanthology.org/W05-0909/) | ||
- ROUGE: [paper](https://aclanthology.org/W04-1013/) | ||
- chrF: [paper](https://aclanthology.org/W15-3049/) | ||
- BERTScore: [paper](https://arxiv.org/abs/1904.09675) | ||
- BARTScore: [paper](https://arxiv.org/abs/2106.11520) | ||
- Data statistics metrics: [paper](https://aclanthology.org/N18-1065/) | ||
- Compression | ||
- Coverage | ||
- Length | ||
- Novelty | ||
- Density | ||
- Repetition | ||
- ROUGE-We: [paper](https://aclanthology.org/D15-1222/) | ||
- S3: [paper](https://aclanthology.org/W17-4510/) | ||
- BaryScore: [paper](https://arxiv.org/abs/2108.12463) | ||
- DepthScore: [paper](https://arxiv.org/abs/2103.12711) | ||
- InfoLM: [paper](https://arxiv.org/abs/2112.01589) | ||
|
||
## Installation | ||
|
||
### Installation from the source | ||
|
||
Clone the repository and install the library from the root: | ||
|
||
```bash | ||
git clone https://github.com/Moonlight-Syntax/LUNA.git | ||
pip install . | ||
``` | ||
|
||
Another way is to use `poetry`. Then, run `poetry install` from the root. | ||
|
||
|
||
## Quick start | ||
|
||
The user can either trigger the `Calculator` to evaluate metrics or integrate the code itself. | ||
|
||
### Calculator | ||
|
||
The easiest way to evaluate NLG models is to execute the following snippet: | ||
|
||
```python | ||
from luna.calculate import Calculator | ||
|
||
# Choose to compute in a sequential or a parallel setting | ||
calculator = Calculator(execute_parallel=True) | ||
metrics_dict = calculator.calculate( | ||
metrics=[depth_score, s3_metrics], # both are LUNA's metrics | ||
candidates=candidates, | ||
references=references | ||
) | ||
|
||
print(metrics_dict) | ||
>>> {"DepthScore": ..., "S3": ...} | ||
``` | ||
|
||
### Integrate the evaluations | ||
|
||
All the metrics in the library follow the same interface: | ||
|
||
```python | ||
class Metrics: | ||
def evaluate_batch(self, hypothesyses: List[str], references: Optional[List[str]]) -> List[float]: | ||
*some code here* | ||
|
||
def evaluate_example(self, hypothesys: str, reference: Optional[str]) -> float: | ||
*some code here* | ||
``` | ||
|
||
Thus, to evaluate your examples run the following code: | ||
|
||
```python | ||
from luna import MetricName | ||
|
||
metric = MetricName() | ||
result = metric.evaluate_example("Generated bad model by example", "Gold example") | ||
results = metric.evaluate_batch(["Generated bad model by example 1", "Generated bad model by example 2"], | ||
["Gold example 1", "Gold example 2"]) | ||
``` | ||
|
||
|
||
## Development | ||
|
||
### Contribute to the library | ||
|
||
We are open for issues and pull requests. We hope that LUNA's functionality is wide enough but we believe that it can always be elaborated and improved. | ||
|
||
### Pre-commit hooks | ||
|
||
We use [pre-commit hooks](https://pre-commit.com/) to check the code before commiting. | ||
|
||
To install the hooks run the following: | ||
|
||
```bash | ||
pip install pre-commit | ||
pre-commit install | ||
``` | ||
|
||
After that every commit will trigger standard checks on code style, including `black`, `isort` etc. | ||
|
||
### Tests | ||
|
||
Tests for `luna` are located in the `tests` directory. To run them, execute: | ||
|
||
```bash | ||
pytest tests | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__version__ = "1.0.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import typing as tp | ||
from abc import ABC, abstractmethod | ||
|
||
from tqdm import tqdm | ||
|
||
from luna import utils | ||
|
||
|
||
class Metrics(ABC): | ||
""" | ||
A base class to calculate metrics. | ||
""" | ||
|
||
def __repr__(self) -> str: | ||
raise NotImplementedError | ||
|
||
def evaluate_batch(self, hyps: tp.List[str], refs: tp.Optional[tp.List[str]]) -> tp.List[float]: | ||
""" | ||
Basic iteration over samples. | ||
Compute metrics for a dataset. | ||
Parameters | ||
---------- | ||
Returns | ||
------- | ||
list of float | ||
A list of metrics values. | ||
""" | ||
# the following loop can be parallelized | ||
# however, as we assume the gpu-based inference, we can't wrap it into a simple joblib.Parallel etc. | ||
utils.validate_batch(hyps, refs) | ||
|
||
metrics_list = [] | ||
for i, hyp in tqdm(enumerate(hyps)): | ||
ref = refs[i] if refs else None | ||
metrics_list.append(self.evaluate_example(hyp, ref)) | ||
return metrics_list | ||
|
||
@abstractmethod | ||
def evaluate_example(self, hyp: str, ref: tp.Optional[str]) -> float: | ||
""" | ||
Compute metrics for one sample. | ||
Parameters | ||
---------- | ||
hyp: str | ||
Hypothesis (generated sentence). | ||
ref: str, optional | ||
Reference (ground-truth sentence). | ||
Returns | ||
------- | ||
float | ||
Metrics calculated for the example. | ||
""" | ||
raise NotImplementedError |
Oops, something went wrong.