Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llm_tts/evaluation/grader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
from typing import Union

import regex
from latex2sympy2 import latex2sympy
from sympy import N, simplify
from sympy.parsing.latex import parse_latex
from sympy.parsing.sympy_parser import parse_expr

from llm_tts.evaluation.latex2sympy import latex2sympy

log = logging.getLogger(__name__)

# Timeout for symbolic comparison (seconds)
Expand Down
3 changes: 2 additions & 1 deletion llm_tts/evaluation/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
from typing import Any, Dict

import regex
from latex2sympy2 import latex2sympy
from word2number import w2n

from llm_tts.evaluation.latex2sympy import latex2sympy

# from utils import *


Expand Down
25 changes: 7 additions & 18 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ name = "thinkbooster"
version = "0.1.0"
description = "ThinkBooster: a unified framework for test-time compute scaling of LLM reasoning"
readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.10"
license = {text = "MIT"}
authors = [
{name = "Your Name", email = "your.email@example.com"},
{name = "List of contributors: https://github.com/IINemo/thinkbooster/graphs/contributors", email = "artemshelmanov@gmail.com"},
]
keywords = ["llm", "reasoning", "test-time-scaling", "best-of-n", "reasoning-evaluation"]
classifiers = [
Expand All @@ -27,22 +27,21 @@ dependencies = [
"torch>=1.9.0",
"transformers>=4.56.0", # vLLM 0.12.0+ requires ALLOWED_LAYER_TYPES from transformers
"datasets>=2.14.0", # HuggingFace datasets for loading benchmarks
"numpy>=2.0.0,<2.3.0", # vLLM requires >=2.0; numba 0.61.2 (vLLM dep) requires <2.3
"thinc>=8.3.0", # 8.3+ required for numpy 2.x binary compatibility
"numpy>=1.23.5",
"tqdm>=4.64.0",
"parse>=1.19.0",
"hydra-core>=1.2.0",
"omegaconf>=2.2.0",
"python-dotenv>=0.19.0",
# "lm-polygraph>=0.0.1", # Installed separately via setup.sh (dev branch)
"lm-polygraph>=0.6.0",
"pylatexenc>=2.10",
"sympy>=1.12", # For Game of 24 answer validation and LaTeX parsing
# latex2sympy2 installed separately with --no-deps due to antlr4 version conflict with hydra
"regex>=2023.0.0", # For advanced regex in grader
"plotly>=5.0.0", # For interactive reasoning visualization
"word2number>=1.1", # For converting word numbers to digits in math normalization
"wandb>=0.15.0", # Weights & Biases for experiment tracking
"evalplus>=0.3.1", # EvalPlus for MBPP+/HumanEval+ dataset loading and evaluation
"vllm>=0.12.0,<0.13.0", # vLLM backend for fast inference with PagedAttention
]

[project.optional-dependencies]
Expand Down Expand Up @@ -72,34 +71,24 @@ service = [
"python-json-logger>=2.0.7",
]

vllm = [
# vLLM backend for fast inference with PagedAttention
# Pin to 0.12.x - vLLM 0.13.0+ requires numpy>=2.0 which breaks thinc/spacy
"vllm>=0.12.0,<0.13.0",
]

[project.urls]
Homepage = "https://github.com/IINemo/thinkbooster"
Repository = "https://github.com/IINemo/thinkbooster"
Issues = "https://github.com/IINemo/thinkbooster/issues"

[project.scripts]
run-tts-eval = "scripts.run_tts_eval:main"

[tool.setuptools.packages.find]
where = ["."]
include = ["llm_tts*", "service_app*"]

[tool.setuptools.package-data]
llm_tts = [
"config/**/*.yaml",
"config/**/*.txt",
"datasets/kernelact/kb_prompts/*.toml",
]

[tool.black]
line-length = 88
target-version = ['py38', 'py39', 'py310', 'py311']
target-version = ['py310', 'py311', 'py312']
include = '\.pyi?$'
extend-exclude = '''
/(
Expand All @@ -124,7 +113,7 @@ known_first_party = ["llm_tts"]
skip = ["llm_tts/evaluation/latex2sympy"]

[tool.mypy]
python_version = "3.8"
python_version = "3.10"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
Expand Down
21 changes: 4 additions & 17 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,18 @@ pip_install() {
}

install_lm_polygraph() {
echo -e "${YELLOW}Setting up lm-polygraph dev branch...${NC}"
echo -e "${YELLOW}Setting up lm-polygraph...${NC}"

if [ -d "$LM_POLYGRAPH_DIR" ]; then
echo -e " Pulling latest changes..."
cd "$LM_POLYGRAPH_DIR"
git pull origin dev 2>&1 | grep -E "(Already|Updating)" || true
git pull origin main 2>&1 | grep -E "(Already|Updating)" || true
cd "$SCRIPT_DIR"
else
echo -e " Cloning lm-polygraph dev branch..."
git clone -b dev https://github.com/IINemo/lm-polygraph.git
echo -e " Cloning lm-polygraph..."
git clone https://github.com/IINemo/lm-polygraph.git
fi

# Patch lm-polygraph requirements to allow newer transformers and spacy (needed for vLLM/numpy 2.x compatibility)
echo -e " Patching version constraints..."
sed -i 's/transformers>=4.48.0,<4.52.0/transformers>=4.48.0/' "$LM_POLYGRAPH_DIR/requirements.txt"
sed -i 's/spacy>=3.4.0,<3.8.0/spacy>=3.8.0/' "$LM_POLYGRAPH_DIR/requirements.txt"
sed -i '/unbabel-comet/d' "$LM_POLYGRAPH_DIR/requirements.txt"

echo -e " Installing lm-polygraph..."
pip_install -e "$LM_POLYGRAPH_DIR"
echo -e "${GREEN}✓ lm-polygraph installed${NC}"
Expand Down Expand Up @@ -127,13 +121,6 @@ install_kernelact
# Install llm-uncertainty-head (luh) for UHead scorer
install_luh

# Pin numpy and fix thinc/spacy AFTER all installs
# (lm-polygraph deps downgrade numpy to 1.x; vLLM needs >=2.0; numba requires <2.3)
echo -e "${YELLOW}Pinning numpy and fixing thinc/spacy for numpy 2.x compatibility...${NC}"
pip_install "numpy>=2.0.0,<2.3.0" # pin after vLLM (vLLM pulls 2.4+; numba 0.61.2 requires <2.3)
pip_install "thinc>=8.3.0" "spacy>=3.8.0"
echo -e "${GREEN}✓ Dependencies pinned${NC}"

echo -e "\n${GREEN}✅ Setup complete!${NC}"
echo -e "\nNext: Copy .env.example to .env and add your API keys"
echo -e "Update dependencies: ${BLUE}./setup.sh --update${NC}"
Loading