Skip to content

Commit

Permalink
adds pyproject files and tests (#1302)
Browse files Browse the repository at this point in the history
* adds pyproject files and tests

* formatting and add dev packages to dev req files

* improve req testing

---------

Co-authored-by: Quentin Anthony <[email protected]>
  • Loading branch information
LouisCastricato and Quentin-Anthony authored Nov 16, 2024
1 parent 50e74cd commit a8f7913
Show file tree
Hide file tree
Showing 14 changed files with 332 additions and 0 deletions.
14 changes: 14 additions & 0 deletions requirements/pyproject-apex-pip.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-apex-pip"
version = "0.1.0"
description = "Apex pip requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
pip = "23.3.2"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-comet.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-comet"
version = "0.1.0"
description = "Comet ML requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
comet_ml = ">=3.45.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-flashattention.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-flashattention"
version = "0.1.0"
description = "Flash Attention requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
flash-attn = "2.5.6"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
16 changes: 16 additions & 0 deletions requirements/pyproject-mamba.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[tool.poetry]
name = "gpt-neox-mamba"
version = "0.1.0"
description = "Mamba requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
causal_conv1d = ">=1.1.0"
einops = "*"
mamba_ssm = ">=1.2.0.post1"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
23 changes: 23 additions & 0 deletions requirements/pyproject-neox-dev.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[tool.poetry]
name = "gpt-neox-dev"
version = "0.1.0"
description = "Development requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
autopep8 = ">=1.5.6"
clang-format = ">=13.0.1"
pre-commit = ">=2.17.0"
pytest = ">=6.2.3"
pytest-cov = ">=2.11.1"
pytest-forked = ">=1.3.0"
pytest-html = "4.1.1"
pytest-xdist = "*"
toml = ">=0.10.2"
packaging = ">=23.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-onebitadam.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-onebitadam"
version = "0.1.0"
description = "OneBitAdam requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
cupy-cuda111 = ">=8.6.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
15 changes: 15 additions & 0 deletions requirements/pyproject-s3.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[tool.poetry]
name = "gpt-neox-s3"
version = "0.1.0"
description = "S3 requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
boto3 = "*"
hf-transfer = ">=0.1.3"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-sparseattention.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-sparseattention"
version = "0.1.0"
description = "Sparse Attention requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
triton = "2.1.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-tensorboard.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-tensorboard"
version = "0.1.0"
description = "TensorBoard requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
tensorboard = "2.13.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-transformerengine.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-transformerengine"
version = "0.1.0"
description = "Transformer Engine requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
transformer-engine = {git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "stable"}

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions requirements/pyproject-wandb.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "gpt-neox-wandb"
version = "0.1.0"
description = "Weights & Biases requirements for GPT-NeoX"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"

[tool.poetry.dependencies]
python = "^3.8"
wandb = ">=0.10.28"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
33 changes: 33 additions & 0 deletions requirements/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[tool.poetry]
name = "gpt-neox"
version = "2.0.0"
description = "An open-source library for training large-scale language models on GPUs"
authors = ["EleutherAI <[email protected]>"]
license = "Apache-2.0"
readme = "README.md"
homepage = "https://www.github.com/eleutherai/gpt-neox"
repository = "https://www.github.com/eleutherai/gpt-neox"
documentation = "https://www.github.com/eleutherai/gpt-neox"

[tool.poetry.dependencies]
python = "^3.8"
deepspeed = {git = "https://github.com/EleutherAI/DeeperSpeed.git", rev = "02e2ebf7dee6aaab3d89094ed470a4609763c742"}
ftfy = "^6.0.1"
huggingface_hub = "^0.11.0"
jinja2 = "3.1.4"
lm_dataformat = {git = "https://github.com/EleutherAI/lm_dataformat.git", rev = "4eec05349977071bf67fc072290b95e31c8dd836"}
lm_eval = ">=0.4.0,<=0.4.1"
mpi4py = "^3.0.3"
numpy = "<2.0"
pybind11 = "^2.6.2"
regex = "*"
sentencepiece = "*"
six = "*"
tiktoken = "^0.1.2"
tokenizers = "^0.12.1"
transformers = "4.38.0"
toml = "*"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
2 changes: 2 additions & 0 deletions requirements/requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
autopep8>=1.5.6
clang-format>=13.0.1
packaging>=23.0
pre-commit>=2.17.0
pytest>=6.2.3
pytest-cov>=2.11.1
pytest-forked>=1.3.0
pytest-html==4.1.1
pytest-xdist
toml>=0.10.2
131 changes: 131 additions & 0 deletions tests/requirements/test_requirements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import pytest
import toml
from pathlib import Path
from typing import Dict, List, Optional
from packaging.version import parse as parse_version, Version
from dataclasses import dataclass


@dataclass
class Dependency:
name: str
version: Optional[str] = None

@classmethod
def from_requirement(cls, requirement: str) -> "Dependency":
"""Parse a requirement string into a Dependency object."""
# Common version specifiers
specifiers = ["==", ">=", ">", "<=", "<"]
name = requirement
version = None

for spec in specifiers:
if spec in requirement:
name, version = requirement.split(spec, 1)
version = version.strip()
break

return cls(name.lower().strip(), version)

def matches_version(self, other_version: str) -> bool:
"""Check if this dependency's version matches another version string."""
if not self.version or not other_version:
return True

try:
# Convert versions to comparable objects
our_version = parse_version(self.version)
their_version = parse_version(other_version.replace("*", "0"))
return our_version == their_version
except ValueError:
# If versions can't be parsed, fall back to string comparison
return self.version.replace("*", "0") == other_version.replace("*", "0")


class DependencyValidator:
def __init__(self, requirements_dir: Path):
self.requirements_dir = requirements_dir

def parse_requirements(self, file_path: Path) -> List[Dependency]:
"""Parse requirements.txt file into a list of Dependencies."""
try:
with open(file_path, "r") as f:
lines = [
line.strip()
for line in f
if line.strip() and not line.startswith("#")
]
return [Dependency.from_requirement(line) for line in lines]
except FileNotFoundError:
raise FileNotFoundError(f"Requirements file not found: {file_path}")
except Exception as e:
raise ValueError(f"Error parsing requirements file {file_path}: {str(e)}")

def parse_pyproject(self, file_path: Path) -> Dict[str, str]:
"""Parse pyproject.toml file and extract dependencies."""
try:
with open(file_path, "r") as f:
pyproject_data = toml.load(f)
return {
name.lower(): str(version)
for name, version in pyproject_data["tool"]["poetry"][
"dependencies"
].items()
if name.lower() != "python" # Exclude Python version
}
except FileNotFoundError:
raise FileNotFoundError(f"pyproject.toml file not found: {file_path}")
except Exception as e:
raise ValueError(f"Error parsing pyproject.toml {file_path}: {str(e)}")

def compare_dependencies(
self, req_deps: List[Dependency], pyproject_deps: Dict[str, str]
) -> tuple[bool, List[str]]:
"""Compare dependencies between requirements.txt and pyproject.toml."""
mismatches = []

for req in req_deps:
if req.name not in pyproject_deps:
mismatches.append(
f"Dependency '{req.name}' not found in pyproject.toml"
)
continue

if not req.matches_version(pyproject_deps[req.name]):
mismatches.append(
f"Version mismatch for '{req.name}': "
f"requirements.txt={req.version}, "
f"pyproject.toml={pyproject_deps[req.name]}"
)

return len(mismatches) == 0, mismatches


def get_corresponding_pyproject(req_file: Path) -> Path:
"""Get the corresponding pyproject.toml file for a requirements file."""
env_name = req_file.stem.split("-")[1]
return req_file.parent / f"pyproject-{env_name}.toml"


@pytest.mark.parametrize("req_file", Path("requirements").glob("requirements-*.txt"))
def test_pyproject_matches_requirements(req_file: Path):
"""Test that requirements.txt dependencies match pyproject.toml dependencies."""
validator = DependencyValidator(req_file.parent)
pyproject_file = get_corresponding_pyproject(req_file)

# Parse both dependency files
req_deps = validator.parse_requirements(req_file)
pyproject_deps = validator.parse_pyproject(pyproject_file)

# Compare dependencies and get detailed mismatches
is_match, mismatches = validator.compare_dependencies(req_deps, pyproject_deps)

# Create detailed error message if there are mismatches
if not is_match:
error_msg = "\n".join(
[
f"\nDependency mismatches found between {req_file} and {pyproject_file}:",
*[f"- {msg}" for msg in mismatches],
]
)
pytest.fail(error_msg)

0 comments on commit a8f7913

Please sign in to comment.