diff --git a/llm_tts/evaluation/grader.py b/llm_tts/evaluation/grader.py index 7ef2dd62..4d9423e0 100644 --- a/llm_tts/evaluation/grader.py +++ b/llm_tts/evaluation/grader.py @@ -12,11 +12,12 @@ from typing import Union import regex -from latex2sympy2 import latex2sympy from sympy import N, simplify from sympy.parsing.latex import parse_latex from sympy.parsing.sympy_parser import parse_expr +from llm_tts.evaluation.latex2sympy import latex2sympy + log = logging.getLogger(__name__) # Timeout for symbolic comparison (seconds) diff --git a/llm_tts/evaluation/parser.py b/llm_tts/evaluation/parser.py index 8becb60b..2608b688 100755 --- a/llm_tts/evaluation/parser.py +++ b/llm_tts/evaluation/parser.py @@ -2,9 +2,10 @@ from typing import Any, Dict import regex -from latex2sympy2 import latex2sympy from word2number import w2n +from llm_tts.evaluation.latex2sympy import latex2sympy + # from utils import * diff --git a/pyproject.toml b/pyproject.toml index 870f5fcf..89043278 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,10 +7,10 @@ name = "thinkbooster" version = "0.1.0" description = "ThinkBooster: a unified framework for test-time compute scaling of LLM reasoning" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.10" license = {text = "MIT"} authors = [ - {name = "Your Name", email = "your.email@example.com"}, + {name = "List of contributors: https://github.com/IINemo/thinkbooster/graphs/contributors", email = "artemshelmanov@gmail.com"}, ] keywords = ["llm", "reasoning", "test-time-scaling", "best-of-n", "reasoning-evaluation"] classifiers = [ @@ -27,22 +27,21 @@ dependencies = [ "torch>=1.9.0", "transformers>=4.56.0", # vLLM 0.12.0+ requires ALLOWED_LAYER_TYPES from transformers "datasets>=2.14.0", # HuggingFace datasets for loading benchmarks - "numpy>=2.0.0,<2.3.0", # vLLM requires >=2.0; numba 0.61.2 (vLLM dep) requires <2.3 - "thinc>=8.3.0", # 8.3+ required for numpy 2.x binary compatibility + "numpy>=1.23.5", "tqdm>=4.64.0", "parse>=1.19.0", "hydra-core>=1.2.0", "omegaconf>=2.2.0", "python-dotenv>=0.19.0", - # "lm-polygraph>=0.0.1", # Installed separately via setup.sh (dev branch) + "lm-polygraph>=0.6.0", "pylatexenc>=2.10", "sympy>=1.12", # For Game of 24 answer validation and LaTeX parsing - # latex2sympy2 installed separately with --no-deps due to antlr4 version conflict with hydra "regex>=2023.0.0", # For advanced regex in grader "plotly>=5.0.0", # For interactive reasoning visualization "word2number>=1.1", # For converting word numbers to digits in math normalization "wandb>=0.15.0", # Weights & Biases for experiment tracking "evalplus>=0.3.1", # EvalPlus for MBPP+/HumanEval+ dataset loading and evaluation + "vllm>=0.12.0,<0.13.0", # vLLM backend for fast inference with PagedAttention ] [project.optional-dependencies] @@ -72,34 +71,24 @@ service = [ "python-json-logger>=2.0.7", ] -vllm = [ - # vLLM backend for fast inference with PagedAttention - # Pin to 0.12.x - vLLM 0.13.0+ requires numpy>=2.0 which breaks thinc/spacy - "vllm>=0.12.0,<0.13.0", -] [project.urls] Homepage = "https://github.com/IINemo/thinkbooster" Repository = "https://github.com/IINemo/thinkbooster" Issues = "https://github.com/IINemo/thinkbooster/issues" -[project.scripts] -run-tts-eval = "scripts.run_tts_eval:main" - [tool.setuptools.packages.find] where = ["."] include = ["llm_tts*", "service_app*"] [tool.setuptools.package-data] llm_tts = [ - "config/**/*.yaml", - "config/**/*.txt", "datasets/kernelact/kb_prompts/*.toml", ] [tool.black] line-length = 88 -target-version = ['py38', 'py39', 'py310', 'py311'] +target-version = ['py310', 'py311', 'py312'] include = '\.pyi?$' extend-exclude = ''' /( @@ -124,7 +113,7 @@ known_first_party = ["llm_tts"] skip = ["llm_tts/evaluation/latex2sympy"] [tool.mypy] -python_version = "3.8" +python_version = "3.10" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true diff --git a/setup.sh b/setup.sh index 6536abec..d2bf06d6 100755 --- a/setup.sh +++ b/setup.sh @@ -35,24 +35,18 @@ pip_install() { } install_lm_polygraph() { - echo -e "${YELLOW}Setting up lm-polygraph dev branch...${NC}" + echo -e "${YELLOW}Setting up lm-polygraph...${NC}" if [ -d "$LM_POLYGRAPH_DIR" ]; then echo -e " Pulling latest changes..." cd "$LM_POLYGRAPH_DIR" - git pull origin dev 2>&1 | grep -E "(Already|Updating)" || true + git pull origin main 2>&1 | grep -E "(Already|Updating)" || true cd "$SCRIPT_DIR" else - echo -e " Cloning lm-polygraph dev branch..." - git clone -b dev https://github.com/IINemo/lm-polygraph.git + echo -e " Cloning lm-polygraph..." + git clone https://github.com/IINemo/lm-polygraph.git fi - # Patch lm-polygraph requirements to allow newer transformers and spacy (needed for vLLM/numpy 2.x compatibility) - echo -e " Patching version constraints..." - sed -i 's/transformers>=4.48.0,<4.52.0/transformers>=4.48.0/' "$LM_POLYGRAPH_DIR/requirements.txt" - sed -i 's/spacy>=3.4.0,<3.8.0/spacy>=3.8.0/' "$LM_POLYGRAPH_DIR/requirements.txt" - sed -i '/unbabel-comet/d' "$LM_POLYGRAPH_DIR/requirements.txt" - echo -e " Installing lm-polygraph..." pip_install -e "$LM_POLYGRAPH_DIR" echo -e "${GREEN}✓ lm-polygraph installed${NC}" @@ -127,13 +121,6 @@ install_kernelact # Install llm-uncertainty-head (luh) for UHead scorer install_luh -# Pin numpy and fix thinc/spacy AFTER all installs -# (lm-polygraph deps downgrade numpy to 1.x; vLLM needs >=2.0; numba requires <2.3) -echo -e "${YELLOW}Pinning numpy and fixing thinc/spacy for numpy 2.x compatibility...${NC}" -pip_install "numpy>=2.0.0,<2.3.0" # pin after vLLM (vLLM pulls 2.4+; numba 0.61.2 requires <2.3) -pip_install "thinc>=8.3.0" "spacy>=3.8.0" -echo -e "${GREEN}✓ Dependencies pinned${NC}" - echo -e "\n${GREEN}✅ Setup complete!${NC}" echo -e "\nNext: Copy .env.example to .env and add your API keys" echo -e "Update dependencies: ${BLUE}./setup.sh --update${NC}"