Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,24 @@ jobs:

- name: Validate prompts
run: python scripts/validate_prompts.py

# Bare `pip install -e .` (no extras) + import the runtime deps, catching the
# #3/#4 regression the extras-based `test` job can't.
base-install:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip

- name: Bare editable install (no extras)
run: |
python -m pip install --upgrade pip
pip install -e .

- name: Verify runtime dependencies were installed
run: python -c "import pandas, scipy, matplotlib, click, krippendorff, refusalbench"
22 changes: 12 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,12 @@ keywords = ["benchmark", "refusal", "alignment", "llm-evaluation", "protein-desi
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Bio-Informatics",
]

[project.urls]
Homepage = "https://github.com/AppliedScientific/refusalbench"
Repository = "https://github.com/AppliedScientific/refusalbench"
"Bug Tracker" = "https://github.com/AppliedScientific/refusalbench/issues"
Dataset = "https://huggingface.co/datasets/AppliedScientific/refusalbench"
Space = "https://huggingface.co/spaces/AppliedScientific/refusalbench"

dependencies = [
"pandas>=2.0,<3",
"scipy>=1.11",
Expand All @@ -39,6 +30,13 @@ dependencies = [
"krippendorff>=0.6",
]

[project.urls]
Homepage = "https://github.com/AppliedScientific/refusalbench"
Repository = "https://github.com/AppliedScientific/refusalbench"
"Bug Tracker" = "https://github.com/AppliedScientific/refusalbench/issues"
Dataset = "https://huggingface.co/datasets/AppliedScientific/refusalbench"
Space = "https://huggingface.co/spaces/AppliedScientific/refusalbench"

[project.optional-dependencies]
providers = [
# Real model API clients. Stubs in src/refusalbench/providers/ raise
Expand Down Expand Up @@ -89,6 +87,10 @@ branch = true
omit = [
# CLI entrypoint glue; exercised by pilot scripts not unit tests.
"src/refusalbench/scripts_entry.py",
# Provider clients need live API creds — excluded per the report note below.
"src/refusalbench/providers/anthropic.py",
"src/refusalbench/providers/bedrock.py",
"src/refusalbench/providers/openrouter.py",
]

[tool.coverage.report]
Expand Down Expand Up @@ -116,7 +118,7 @@ select = [
]

[tool.ruff.lint.per-file-ignores]
"src/refusalbench/analysis/figures.py" = ["RUF001", "RUF002"]
"src/refusalbench/analysis/figures.py" = ["RUF001", "RUF002", "RUF003"]
"tests/*" = ["E402"] # tests sometimes import after fixture setup

[tool.mypy]
Expand Down
Loading
Loading