-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adds pyproject files and tests (#1302)
* adds pyproject files and tests * formatting and add dev packages to dev req files * improve req testing --------- Co-authored-by: Quentin Anthony <[email protected]>
- Loading branch information
1 parent
50e74cd
commit a8f7913
Showing
14 changed files
with
332 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-apex-pip" | ||
version = "0.1.0" | ||
description = "Apex pip requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
pip = "23.3.2" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-comet" | ||
version = "0.1.0" | ||
description = "Comet ML requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
comet_ml = ">=3.45.0" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-flashattention" | ||
version = "0.1.0" | ||
description = "Flash Attention requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
flash-attn = "2.5.6" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-mamba" | ||
version = "0.1.0" | ||
description = "Mamba requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
causal_conv1d = ">=1.1.0" | ||
einops = "*" | ||
mamba_ssm = ">=1.2.0.post1" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-dev" | ||
version = "0.1.0" | ||
description = "Development requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
autopep8 = ">=1.5.6" | ||
clang-format = ">=13.0.1" | ||
pre-commit = ">=2.17.0" | ||
pytest = ">=6.2.3" | ||
pytest-cov = ">=2.11.1" | ||
pytest-forked = ">=1.3.0" | ||
pytest-html = "4.1.1" | ||
pytest-xdist = "*" | ||
toml = ">=0.10.2" | ||
packaging = ">=23.0" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-onebitadam" | ||
version = "0.1.0" | ||
description = "OneBitAdam requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
cupy-cuda111 = ">=8.6.0" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-s3" | ||
version = "0.1.0" | ||
description = "S3 requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
boto3 = "*" | ||
hf-transfer = ">=0.1.3" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-sparseattention" | ||
version = "0.1.0" | ||
description = "Sparse Attention requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
triton = "2.1.0" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-tensorboard" | ||
version = "0.1.0" | ||
description = "TensorBoard requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
tensorboard = "2.13.0" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-transformerengine" | ||
version = "0.1.0" | ||
description = "Transformer Engine requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
transformer-engine = {git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "stable"} | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[tool.poetry] | ||
name = "gpt-neox-wandb" | ||
version = "0.1.0" | ||
description = "Weights & Biases requirements for GPT-NeoX" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
wandb = ">=0.10.28" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
[tool.poetry] | ||
name = "gpt-neox" | ||
version = "2.0.0" | ||
description = "An open-source library for training large-scale language models on GPUs" | ||
authors = ["EleutherAI <[email protected]>"] | ||
license = "Apache-2.0" | ||
readme = "README.md" | ||
homepage = "https://www.github.com/eleutherai/gpt-neox" | ||
repository = "https://www.github.com/eleutherai/gpt-neox" | ||
documentation = "https://www.github.com/eleutherai/gpt-neox" | ||
|
||
[tool.poetry.dependencies] | ||
python = "^3.8" | ||
deepspeed = {git = "https://github.com/EleutherAI/DeeperSpeed.git", rev = "02e2ebf7dee6aaab3d89094ed470a4609763c742"} | ||
ftfy = "^6.0.1" | ||
huggingface_hub = "^0.11.0" | ||
jinja2 = "3.1.4" | ||
lm_dataformat = {git = "https://github.com/EleutherAI/lm_dataformat.git", rev = "4eec05349977071bf67fc072290b95e31c8dd836"} | ||
lm_eval = ">=0.4.0,<=0.4.1" | ||
mpi4py = "^3.0.3" | ||
numpy = "<2.0" | ||
pybind11 = "^2.6.2" | ||
regex = "*" | ||
sentencepiece = "*" | ||
six = "*" | ||
tiktoken = "^0.1.2" | ||
tokenizers = "^0.12.1" | ||
transformers = "4.38.0" | ||
toml = "*" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
autopep8>=1.5.6 | ||
clang-format>=13.0.1 | ||
packaging>=23.0 | ||
pre-commit>=2.17.0 | ||
pytest>=6.2.3 | ||
pytest-cov>=2.11.1 | ||
pytest-forked>=1.3.0 | ||
pytest-html==4.1.1 | ||
pytest-xdist | ||
toml>=0.10.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
import pytest | ||
import toml | ||
from pathlib import Path | ||
from typing import Dict, List, Optional | ||
from packaging.version import parse as parse_version, Version | ||
from dataclasses import dataclass | ||
|
||
|
||
@dataclass | ||
class Dependency: | ||
name: str | ||
version: Optional[str] = None | ||
|
||
@classmethod | ||
def from_requirement(cls, requirement: str) -> "Dependency": | ||
"""Parse a requirement string into a Dependency object.""" | ||
# Common version specifiers | ||
specifiers = ["==", ">=", ">", "<=", "<"] | ||
name = requirement | ||
version = None | ||
|
||
for spec in specifiers: | ||
if spec in requirement: | ||
name, version = requirement.split(spec, 1) | ||
version = version.strip() | ||
break | ||
|
||
return cls(name.lower().strip(), version) | ||
|
||
def matches_version(self, other_version: str) -> bool: | ||
"""Check if this dependency's version matches another version string.""" | ||
if not self.version or not other_version: | ||
return True | ||
|
||
try: | ||
# Convert versions to comparable objects | ||
our_version = parse_version(self.version) | ||
their_version = parse_version(other_version.replace("*", "0")) | ||
return our_version == their_version | ||
except ValueError: | ||
# If versions can't be parsed, fall back to string comparison | ||
return self.version.replace("*", "0") == other_version.replace("*", "0") | ||
|
||
|
||
class DependencyValidator: | ||
def __init__(self, requirements_dir: Path): | ||
self.requirements_dir = requirements_dir | ||
|
||
def parse_requirements(self, file_path: Path) -> List[Dependency]: | ||
"""Parse requirements.txt file into a list of Dependencies.""" | ||
try: | ||
with open(file_path, "r") as f: | ||
lines = [ | ||
line.strip() | ||
for line in f | ||
if line.strip() and not line.startswith("#") | ||
] | ||
return [Dependency.from_requirement(line) for line in lines] | ||
except FileNotFoundError: | ||
raise FileNotFoundError(f"Requirements file not found: {file_path}") | ||
except Exception as e: | ||
raise ValueError(f"Error parsing requirements file {file_path}: {str(e)}") | ||
|
||
def parse_pyproject(self, file_path: Path) -> Dict[str, str]: | ||
"""Parse pyproject.toml file and extract dependencies.""" | ||
try: | ||
with open(file_path, "r") as f: | ||
pyproject_data = toml.load(f) | ||
return { | ||
name.lower(): str(version) | ||
for name, version in pyproject_data["tool"]["poetry"][ | ||
"dependencies" | ||
].items() | ||
if name.lower() != "python" # Exclude Python version | ||
} | ||
except FileNotFoundError: | ||
raise FileNotFoundError(f"pyproject.toml file not found: {file_path}") | ||
except Exception as e: | ||
raise ValueError(f"Error parsing pyproject.toml {file_path}: {str(e)}") | ||
|
||
def compare_dependencies( | ||
self, req_deps: List[Dependency], pyproject_deps: Dict[str, str] | ||
) -> tuple[bool, List[str]]: | ||
"""Compare dependencies between requirements.txt and pyproject.toml.""" | ||
mismatches = [] | ||
|
||
for req in req_deps: | ||
if req.name not in pyproject_deps: | ||
mismatches.append( | ||
f"Dependency '{req.name}' not found in pyproject.toml" | ||
) | ||
continue | ||
|
||
if not req.matches_version(pyproject_deps[req.name]): | ||
mismatches.append( | ||
f"Version mismatch for '{req.name}': " | ||
f"requirements.txt={req.version}, " | ||
f"pyproject.toml={pyproject_deps[req.name]}" | ||
) | ||
|
||
return len(mismatches) == 0, mismatches | ||
|
||
|
||
def get_corresponding_pyproject(req_file: Path) -> Path: | ||
"""Get the corresponding pyproject.toml file for a requirements file.""" | ||
env_name = req_file.stem.split("-")[1] | ||
return req_file.parent / f"pyproject-{env_name}.toml" | ||
|
||
|
||
@pytest.mark.parametrize("req_file", Path("requirements").glob("requirements-*.txt")) | ||
def test_pyproject_matches_requirements(req_file: Path): | ||
"""Test that requirements.txt dependencies match pyproject.toml dependencies.""" | ||
validator = DependencyValidator(req_file.parent) | ||
pyproject_file = get_corresponding_pyproject(req_file) | ||
|
||
# Parse both dependency files | ||
req_deps = validator.parse_requirements(req_file) | ||
pyproject_deps = validator.parse_pyproject(pyproject_file) | ||
|
||
# Compare dependencies and get detailed mismatches | ||
is_match, mismatches = validator.compare_dependencies(req_deps, pyproject_deps) | ||
|
||
# Create detailed error message if there are mismatches | ||
if not is_match: | ||
error_msg = "\n".join( | ||
[ | ||
f"\nDependency mismatches found between {req_file} and {pyproject_file}:", | ||
*[f"- {msg}" for msg in mismatches], | ||
] | ||
) | ||
pytest.fail(error_msg) |