IINemo · smirnovlad · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/.flake8 b/.flake8
@@ -10,7 +10,7 @@ exclude =
     outputs,
     .venv,
     venv,
-    llm_tts/evaluation/latex2sympy
+    thinkbooster/evaluation/latex2sympy
 per-file-ignores =
     __init__.py:F401
     tests/deepconf/test_deepconf_accurate.py:E402

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -23,10 +23,10 @@ jobs:
           pip install black isort flake8
 
       - name: Check formatting with black
-        run: black --check llm_tts scripts service_app
+        run: black --check thinkbooster scripts service_app
 
       - name: Check import sorting with isort
-        run: isort --check-only --profile black llm_tts scripts service_app
+        run: isort --check-only --profile black thinkbooster scripts service_app
 
       - name: Lint with flake8
-        run: flake8 llm_tts scripts service_app
+        run: flake8 thinkbooster scripts service_app
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -26,17 +26,16 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
-      with:
-        submodules: recursive
+    - name: Free disk space
+      run: |
+        sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc
+        df -h /
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
           python-version: '3.11'
           cache: 'pip'
-    - name: Run setup.sh
-      run: |
-        ./setup.sh --verbose
-    - name: Install dev dependencies
+    - name: Install package and dev dependencies
       run: |
         pip install -e ".[dev]"
     - name: Validate strategy registry
@@ -54,17 +53,16 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
-      with:
-        submodules: recursive
+    - name: Free disk space
+      run: |
+        sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc
+        df -h /
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
           python-version: '3.11'
           cache: 'pip'
-    - name: Run setup.sh
-      run: |
-        ./setup.sh --verbose
-    - name: Install dev dependencies
+    - name: Install package and dev dependencies
       run: |
         pip install -e ".[dev]"
     - name: Run integration tests

diff --git a/.gitignore b/.gitignore
@@ -187,7 +187,7 @@ workdir/
 # deepconf/
 # tree-of-thought-llm/
 lm-polygraph/
-llm_tts/datasets/KernelAct/
+thinkbooster/datasets/KernelAct/
 
 # External Qwen repositories
 # Qwen2.5-Math/

diff --git a/Makefile b/Makefile
@@ -24,13 +24,13 @@ hooks:
 
 lint:
 	@echo "Running flake8..."
-	@flake8 llm_tts scripts service_app
+	@flake8 thinkbooster scripts service_app
 
 format:
 	@echo "Formatting with black..."
-	@black llm_tts scripts service_app
+	@black thinkbooster scripts service_app
 	@echo "Sorting imports with isort..."
-	@isort llm_tts scripts service_app
+	@isort thinkbooster scripts service_app
 	@echo "✓ Code formatted"
 
 fix:

diff --git a/README.md b/README.md
@@ -4,7 +4,8 @@
 </div>
 
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![Python 3.11](https://img.shields.io/badge/python-3.11-blue.svg)](https://www.python.org/downloads/release/python-3110/)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
+[![PyPI](https://img.shields.io/pypi/v/thinkbooster)](https://pypi.org/project/thinkbooster/)
 [![arXiv](https://img.shields.io/badge/arXiv-b31b1b.svg)](https://thinkbooster.s3.us-east-1.amazonaws.com/thinkbooster.pdf)
 
 [Quick Start](#quick-start) | [Key Features](#key-features) | [Strategies](#supported-strategies) | [Visual Debugger](#visual-debugger) | [Documentation](#documentation)
@@ -28,25 +29,54 @@ ThinkBooster is an open-source framework for **test-time compute scaling** of la
 ### Installation
 
 ```bash
-# Clone the repository
+pip install thinkbooster
+```
+
+Or install from source for development:
+
+```bash
 git clone https://github.com/IINemo/thinkbooster.git
 cd thinkbooster
+pip install -e ".[dev]"
+```
 
-# Create conda environment
-conda create -n thinkbooster python=3.11 -y
-conda activate thinkbooster
+<details>
+<summary>Optional: additional scorers (UHead, KernelAct)</summary>
+
+Some advanced scorers require GitHub-only dependencies. Run `setup.sh` after pip install:
 
-# Install dependencies
+```bash
 ./setup.sh
+```
+
+This installs `llm-uncertainty-head`, `vllm-speculators`, and `KernelAct`. Core functionality (all strategies, PRM/entropy/probability scorers, evaluation) works without these.
 
-# Configure API keys
+</details>
+
+```bash
+# Configure API keys (optional, for LLM judge and OpenRouter)
 cp .env.example .env
-# Edit .env and add your OPENROUTER_API_KEY
+```
+
+### Python API
+
+```python
+# Strategies
+from thinkbooster.strategies.strategy_baseline import StrategyBaseline
+from thinkbooster.strategies.strategy_self_consistency import StrategySelfConsistency
+from thinkbooster.strategies.strategy_beam_search import StrategyBeamSearch
+from thinkbooster.strategies.strategy_offline_best_of_n import StrategyOfflineBestOfN
+
+# Evaluation utilities
+from thinkbooster.evaluation.grader import math_equal
+from thinkbooster.evaluation.parser import extract_answer
 ```
 
 ### REST API
 
 ```bash
+git clone https://github.com/IINemo/thinkbooster.git
+cd thinkbooster
 pip install -e ".[service]"
 python service_app/main.py   # starts on http://localhost:8001
 ```
@@ -140,7 +170,7 @@ See [service_app/README.md](service_app/README.md) for details on cached example
 
 ```
 thinkbooster/
-├── llm_tts/              # Core library
+├── thinkbooster/         # Core library (pip install thinkbooster)
 │   ├── strategies/       # TTS strategy implementations
 │   ├── models/           # Model wrappers (vLLM, HuggingFace, API)
 │   ├── scorers/          # Step scoring (PRM, uncertainty, voting)
@@ -151,7 +181,7 @@ thinkbooster/
 ├── service_app/          # REST API service + visual debugger
 ├── tests/                # Test suite with strategy registry
 ├── docs/                 # Documentation
-└── lm-polygraph/         # Submodule: uncertainty estimation
+└── setup.sh              # Optional: install GitHub-only deps (UHead, KernelAct)
 ```
 
 See [Project Structure](docs/getting_started/project_structure.md) for a detailed architecture overview.

diff --git a/config/dataset/human_eval_plus.yaml b/config/dataset/human_eval_plus.yaml
@@ -18,7 +18,7 @@ question_field: "question"  # EvalPlus loader uses "question" field
 answer_field: "answer"  # EvalPlus loader uses "answer" field
 data_name: "human_eval_plus"  # Used for evaluation routing and EvalPlus API loading
 
-# Fields from EvalPlus API loader (llm_tts/datasets/human_eval_plus.py):
+# Fields from EvalPlus API loader (thinkbooster/datasets/human_eval_plus.py):
 # - question: Problem prompt with docstring and example (correct format!)
 # - answer: Canonical solution code
 # - task_id: Unique identifier (e.g., "HumanEval/0")

diff --git a/config/dataset/kernelbench.yaml b/config/dataset/kernelbench.yaml
@@ -17,12 +17,12 @@ question_field: "question"  # KernelAct loader uses "question" field with genera
 answer_field: "answer"  # KernelAct loader uses "answer" field (reference code)
 data_name: "kernelbench"  # Used for evaluation routing and KernelAct loader
 
-# KernelBench specific settings (used by llm_tts/datasets/kernelbench.py)
+# KernelBench specific settings (used by thinkbooster/datasets/kernelbench.py)
 level: 1  # Dataset level (1, 2, or 3)
 prompt_type: "improve"  # Prompt type: "improve", "kernelbench", "normal"
 trial: 1  # Trial number (affects prompt generation for TTS iterations)
 
-# Fields from KernelAct loader (llm_tts/datasets/kernelbench.py):
+# Fields from KernelAct loader (thinkbooster/datasets/kernelbench.py):
 # - question: Generated prompt using KernelAct's choose_prompt()
 # - answer: Reference PyTorch implementation
 # - problem_id: Unique identifier (e.g., 1, 2, 3, ...)

diff --git a/config/dataset/mbpp_plus.yaml b/config/dataset/mbpp_plus.yaml
@@ -18,7 +18,7 @@ question_field: "question"  # EvalPlus loader uses "question" field
 answer_field: "answer"  # EvalPlus loader uses "answer" field
 data_name: "mbpp_plus"  # Used for evaluation routing and EvalPlus API loading
 
-# Fields from EvalPlus API loader (llm_tts/datasets/mbpp_plus.py):
+# Fields from EvalPlus API loader (thinkbooster/datasets/mbpp_plus.py):
 # - question: Problem prompt with docstring and example assertion (correct format!)
 # - answer: Canonical solution code
 # - task_id: Unique identifier (e.g., "Mbpp/2")

diff --git a/config/scorer/uncertainty_entropy.py b/config/scorer/uncertainty_entropy.py
@@ -4,7 +4,7 @@
 from lm_polygraph.utils.causal_lm_with_uncertainty import CausalLMWithUncertainty
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from llm_tts.utils import get_torch_dtype
+from thinkbooster.utils import get_torch_dtype
 
 # ===============================================
 

diff --git a/config/scorer/uncertainty_pd.py b/config/scorer/uncertainty_pd.py
@@ -2,8 +2,8 @@
 from lm_polygraph.utils.causal_lm_with_uncertainty import CausalLMWithUncertainty
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from llm_tts.scorers.estimator_uncertainty_pd import PDGap
-from llm_tts.utils import get_torch_dtype
+from thinkbooster.scorers.estimator_uncertainty_pd import PDGap
+from thinkbooster.utils import get_torch_dtype
 
 
 def create_uncertainty_model(config):

diff --git a/config/scorer/uncertainty_perplexity.py b/config/scorer/uncertainty_perplexity.py
@@ -4,7 +4,7 @@
 from lm_polygraph.utils.causal_lm_with_uncertainty import CausalLMWithUncertainty
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from llm_tts.utils import get_torch_dtype
+from thinkbooster.utils import get_torch_dtype
 
 # ===============================================
 

diff --git a/docs/core/architecture.md b/docs/core/architecture.md
@@ -296,18 +296,18 @@ if selected_candidate.is_trajectory_complete:
 ## File References
 
 ### Offline Strategies
-- Self-Consistency: `llm_tts/strategies/strategy_self_consistency.py`
-- DeepConf: `llm_tts/strategies/deepconf/strategy.py`
-- Chain of Thought: `llm_tts/strategies/strategy_chain_of_thought.py`
+- Self-Consistency: `thinkbooster/strategies/strategy_self_consistency.py`
+- DeepConf: `thinkbooster/strategies/deepconf/strategy.py`
+- Chain of Thought: `thinkbooster/strategies/strategy_chain_of_thought.py`
 
 ### Online Strategies
-- Strategy base: `llm_tts/strategies/strategy_base.py`
-- Online Best-of-N: `llm_tts/strategies/strategy_online_best_of_n.py`
-- Phi Decoding: `llm_tts/strategies/phi.py`
-- Adaptive Scaling: `llm_tts/strategies/adaptive_scaling_best_of_n.py`
-- Beam Search: `llm_tts/strategies/strategy_beam_search.py`
+- Strategy base: `thinkbooster/strategies/strategy_base.py`
+- Online Best-of-N: `thinkbooster/strategies/strategy_online_best_of_n.py`
+- Phi Decoding: `thinkbooster/strategies/phi.py`
+- Adaptive Scaling: `thinkbooster/strategies/adaptive_scaling_best_of_n.py`
+- Beam Search: `thinkbooster/strategies/strategy_beam_search.py`
 
 ### Shared Components
-- Step generators: `llm_tts/generators/`
-- Step boundary detectors: `llm_tts/step_boundary_detectors/`
-- Scorers: `llm_tts/scorers/`
+- Step generators: `thinkbooster/generators/`
+- Step boundary detectors: `thinkbooster/step_boundary_detectors/`
+- Scorers: `thinkbooster/scorers/`
diff --git a/docs/core/step_boundary_detectors.md b/docs/core/step_boundary_detectors.md
@@ -189,7 +189,7 @@ Comparing where detectors place step boundaries:
 - Zero API cost
 
 ```python
-from llm_tts.step_boundary_detectors import ThinkingMarkerDetector
+from thinkbooster.step_boundary_detectors import ThinkingMarkerDetector
 
 detector = ThinkingMarkerDetector(
     use_sequence=True,
@@ -217,7 +217,7 @@ steps = detector.detect_steps(thinking_content)
 
 ## Files
 
-- **Detectors implementation**: [`llm_tts/step_boundary_detectors/`](../llm_tts/step_boundary_detectors/)
+- **Detectors implementation**: [`thinkbooster/step_boundary_detectors/`](../thinkbooster/step_boundary_detectors/)
   - `base.py` - Abstract base class (`StepBoundaryDetectorBase`)
   - `non_thinking/` - Detectors for non-thinking mode (structured responses with explicit markers)
     - `structured.py` - `StructuredStepDetector` for "- Step 1:", "- Step 2:" formats