From 1094d1b91eb996a56f850f0cde9c02896d998d7a Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Tue, 10 Mar 2026 10:45:56 -0700 Subject: [PATCH 01/13] DowngradeProtobuf --- setup/evals/requirements-colab.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup/evals/requirements-colab.txt b/setup/evals/requirements-colab.txt index fc92e9ff..3aac9c1b 100644 --- a/setup/evals/requirements-colab.txt +++ b/setup/evals/requirements-colab.txt @@ -45,5 +45,5 @@ flask-cors>=5.0.1 # Logging loguru - -numpy==2.0.1 \ No newline at end of file +numpy==2.0.1 +protobuf<6.0.0 \ No newline at end of file From c67b50c0c873c88a87812a54153d29569bd2e9f8 Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Tue, 10 Mar 2026 16:27:08 -0700 Subject: [PATCH 02/13] Upgrade flag for flasinfer --- rapidfireai/cli.py | 4 ++-- rapidfireai/utils/doctor.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/rapidfireai/cli.py b/rapidfireai/cli.py index 0edf4c8c..0f861440 100644 --- a/rapidfireai/cli.py +++ b/rapidfireai/cli.py @@ -242,8 +242,8 @@ def install_packages(evals: bool = False, init_packages: list[str] | None = None packages.append({"package": f"torchaudio=={torchaudio_version}", "extra_args": ["--upgrade", "--index-url", f"https://download.pytorch.org/whl/{torch_cuda}"]}) if evals: packages.append({"package": f"vllm=={vllm_version}", "extra_args": ["--upgrade"]}) - packages.append({"package": "flashinfer-python", "extra_args": []}) - packages.append({"package": "flashinfer-cubin", "extra_args": []}) + packages.append({"package": "flashinfer-python", "extra_args": ["--upgrade"]}) + packages.append({"package": "flashinfer-cubin", "extra_args": ["--upgrade"]}) if cuda_major + (cuda_minor / 10.0) >= 12.8: packages.append({"package": "flashinfer-jit-cache", "extra_args": ["--upgrade","--index-url", f"https://flashinfer.ai/whl/{flash_cuda}"]}) if get_compute_capability() >= 8.0: diff --git a/rapidfireai/utils/doctor.py b/rapidfireai/utils/doctor.py index c90b6612..a676cebc 100644 --- a/rapidfireai/utils/doctor.py +++ b/rapidfireai/utils/doctor.py @@ -62,6 +62,7 @@ def get_doctor_info(log_lines: int = 10): "mlflow", "torch", "transformers", + "protobuf", "flask", "gunicorn", "peft", @@ -78,6 +79,8 @@ def get_doctor_info(log_lines: int = 10): "langchain-openai", "langchain-huggingface", "langchain-classic", + "langchain-pinecone", + "langchain-postgres", "unstructured", "waitress", "vllm", From d6a82b01857ed033000a53edef0587ee098c1320 Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Tue, 10 Mar 2026 17:39:50 -0700 Subject: [PATCH 03/13] Converge fix --- setup/start.sh | 51 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/setup/start.sh b/setup/start.sh index d838f5ab..44bfb131 100755 --- a/setup/start.sh +++ b/setup/start.sh @@ -182,6 +182,9 @@ cleanup() { pkill -f "gunicorn.*rapidfireai.$RAPIDFIRE_MODE.dispatcher" 2>/dev/null || true # Only kill Flask server if we're not in Colab (frontend doesn't run in Colab) pkill -f "python.*rapidfireai/frontend/server.py" 2>/dev/null || true + # Stop Converge if it was running + pkill -f "converge start" 2>/dev/null || true + pkill -f "uvicorn.*main:app" 2>/dev/null || true fi print_success "All services stopped" @@ -575,6 +578,38 @@ start_frontend() { return 0 } +# Function to start Converge (backend + frontend) via converge CLI +start_converge() { + print_status "Starting Converge services..." + + # converge start runs in the foreground with its own monitor loop, + # so we launch it in the background and track it like other services. + print_status "Converge logs will be written to: $RF_LOG_PATH/converge.log" + + if command -v setsid &> /dev/null; then + setsid converge start --force > "$RF_LOG_PATH/converge.log" 2>&1 & + else + nohup converge start --force > "$RF_LOG_PATH/converge.log" 2>&1 & + fi + + local converge_pid=$! + echo "$converge_pid Converge" >> "$RF_PID_FILE" + + # Wait for the Converge frontend to be ready on the frontend port + if wait_for_service $RF_FRONTEND_HOST $RF_FRONTEND_PORT "Converge frontend" $RF_TIMEOUT_TIME; then + print_success "Converge started (PID: $converge_pid)" + return 0 + else + print_error "Converge failed to start. Checking logs..." + if [[ -f "$RF_LOG_PATH/converge.log" ]]; then + echo "=== Last 30 lines of converge.log ===" + tail -30 "$RF_LOG_PATH/converge.log" + echo "=== End of logs ===" + fi + return 1 + fi +} + # Function to conditionally start frontend based on mode start_frontend_if_needed() { # In Colab mode, always skip frontend @@ -727,10 +762,18 @@ start_services() { # Start frontend server (conditionally) if [[ "$RF_MLFLOW_ENABLED" == "true" ]]; then - if start_frontend; then - ((services_started++)) + if command -v converge &> /dev/null; then + if start_converge; then + ((services_started++)) + else + print_error "Failed to start Converge" + fi else - print_error "Failed to start frontend server" + if start_frontend; then + ((services_started++)) + else + print_error "Failed to start frontend server" + fi fi else print_status "⊗ Skipping frontend (use TensorBoard if in Colab mode)" @@ -794,7 +837,7 @@ main() { # Show summary of all log files for debugging print_status "=== Startup Failure Summary ===" - for log_file in "mlflow.log" "api.log" "frontend.log"; do + for log_file in "mlflow.log" "api.log" "frontend.log" "converge.log"; do if [[ -f "$RF_LOG_PATH/$log_file" ]]; then echo "" print_status "=== $log_file ===" From a2fea8109b841068f2d89138c070041008fb63bb Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Wed, 11 Mar 2026 16:03:53 -0700 Subject: [PATCH 04/13] start.sh --- setup/start.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/setup/start.sh b/setup/start.sh index 44bfb131..4714c8bb 100755 --- a/setup/start.sh +++ b/setup/start.sh @@ -4,6 +4,7 @@ # This script starts MLflow server, API server, and frontend tracking server # Used for pip-installed package mode + set -e # Exit on any error # Configuration From 5b0e59de4e49893473dc0975ae51a56cca5da1f4 Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Thu, 12 Mar 2026 10:54:58 -0700 Subject: [PATCH 05/13] converge argument --- rapidfireai/cli.py | 10 +++++++ setup/start.sh | 75 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 67 insertions(+), 18 deletions(-) diff --git a/rapidfireai/cli.py b/rapidfireai/cli.py index 0f861440..6c2ea154 100644 --- a/rapidfireai/cli.py +++ b/rapidfireai/cli.py @@ -456,6 +456,13 @@ def main(): parser.add_argument("--log-lines", type=int, default=10, help="Number of lines to log to the console") + parser.add_argument( + "--converge", + choices=["all", "none", "backend", "frontend"], + default="all", + help="Converge mode: all (default, start converge backend+frontend), none (use original frontend, do not start converge), backend (only converge backend), frontend (only converge frontend)", + ) + args = parser.parse_args() # Set environment variables from CLI args @@ -481,6 +488,9 @@ def main(): if args.force: os.environ["RF_FORCE"] = "true" + # Converge mode (all|none|backend|frontend) for start script + os.environ["RF_CONVERGE_MODE"] = args.converge + # Handle doctor command separately if args.command == "doctor": return run_doctor(args.log_lines) diff --git a/setup/start.sh b/setup/start.sh index 4714c8bb..6242cd90 100755 --- a/setup/start.sh +++ b/setup/start.sh @@ -30,6 +30,16 @@ RF_LOG_PATH="${RF_LOG_PATH:=$RF_HOME/logs}" RF_TIMEOUT_TIME=${RF_TIMEOUT_TIME:=30} +# Converge mode: all (backend+frontend), none (original frontend only), backend, frontend +RF_CONVERGE_MODE=${RF_CONVERGE_MODE:=all} +case "$RF_CONVERGE_MODE" in + all|none|backend|frontend) ;; + *) + echo "Invalid RF_CONVERGE_MODE=$RF_CONVERGE_MODE (expected: all, none, backend, frontend)" + exit 1 + ;; +esac + # Colab mode configuration if [ -z "${COLAB_GPU+x}" ]; then RF_MLFLOW_ENABLED=${RF_MLFLOW_ENABLED:=true} @@ -579,24 +589,38 @@ start_frontend() { return 0 } -# Function to start Converge (backend + frontend) via converge CLI +# Function to start Converge via converge CLI (mode: all | backend | frontend) start_converge() { - print_status "Starting Converge services..." + local mode="${1:-$RF_CONVERGE_MODE}" + print_status "Starting Converge ($mode)..." # converge start runs in the foreground with its own monitor loop, # so we launch it in the background and track it like other services. print_status "Converge logs will be written to: $RF_LOG_PATH/converge.log" + local converge_args="--force" + case "$mode" in + all) ;; + backend) converge_args="$converge_args backend" ;; + frontend) converge_args="$converge_args frontend" ;; + *) converge_args="$converge_args" ;; + esac + if command -v setsid &> /dev/null; then - setsid converge start --force > "$RF_LOG_PATH/converge.log" 2>&1 & + setsid converge start $converge_args > "$RF_LOG_PATH/converge.log" 2>&1 & else - nohup converge start --force > "$RF_LOG_PATH/converge.log" 2>&1 & + nohup converge start $converge_args > "$RF_LOG_PATH/converge.log" 2>&1 & fi local converge_pid=$! echo "$converge_pid Converge" >> "$RF_PID_FILE" - # Wait for the Converge frontend to be ready on the frontend port + # When starting full stack or frontend, wait for frontend port; backend-only may not serve it + if [[ "$mode" == "backend" ]]; then + print_success "Converge backend started (PID: $converge_pid)" + return 0 + fi + if wait_for_service $RF_FRONTEND_HOST $RF_FRONTEND_PORT "Converge frontend" $RF_TIMEOUT_TIME; then print_success "Converge started (PID: $converge_pid)" return 0 @@ -763,19 +787,34 @@ start_services() { # Start frontend server (conditionally) if [[ "$RF_MLFLOW_ENABLED" == "true" ]]; then - if command -v converge &> /dev/null; then - if start_converge; then - ((services_started++)) - else - print_error "Failed to start Converge" - fi - else - if start_frontend; then - ((services_started++)) - else - print_error "Failed to start frontend server" - fi - fi + case "$RF_CONVERGE_MODE" in + none) + if start_frontend; then + ((services_started++)) + else + print_error "Failed to start frontend server" + fi + ;; + backend|frontend|all) + if command -v converge &> /dev/null; then + if start_converge; then + ((services_started++)) + else + print_error "Failed to start Converge" + fi + else + if [[ "$RF_CONVERGE_MODE" == "all" ]]; then + if start_frontend; then + ((services_started++)) + else + print_error "Failed to start frontend server" + fi + else + print_error "Converge not found in PATH (required for --converge=$RF_CONVERGE_MODE)" + fi + fi + ;; + esac else print_status "⊗ Skipping frontend (use TensorBoard if in Colab mode)" fi From c4143bcdc198d6cf535053bf9fbc1aba70449aef Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Thu, 12 Mar 2026 11:06:36 -0700 Subject: [PATCH 06/13] Change converge check --- setup/start.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/setup/start.sh b/setup/start.sh index 6242cd90..671633db 100755 --- a/setup/start.sh +++ b/setup/start.sh @@ -107,6 +107,11 @@ print_warning() { echo -e "${YELLOW}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1" } +# Return 0 if rapidfireai-pro pip package is installed +has_rapidfireai_pro() { + ${RF_PIP_EXECUTABLE} show rapidfireai-pro >/dev/null 2>&1 +} + # Function to setup Python environment setup_python_env() { print_status "Setting up Python environment..." @@ -796,7 +801,7 @@ start_services() { fi ;; backend|frontend|all) - if command -v converge &> /dev/null; then + if has_rapidfireai_pro; then if start_converge; then ((services_started++)) else @@ -810,7 +815,7 @@ start_services() { print_error "Failed to start frontend server" fi else - print_error "Converge not found in PATH (required for --converge=$RF_CONVERGE_MODE)" + print_error "rapidfireai-pro is not installed (required for --converge=$RF_CONVERGE_MODE)" fi fi ;; From ee15c9c65a22b1b3d5c1cad422cb211945f3fad9 Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Thu, 12 Mar 2026 12:41:27 -0700 Subject: [PATCH 07/13] Update numpy pin --- README.md | 1 + setup/evals/requirements-local.txt | 3 +++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index d6c3d2fa..473102a1 100644 --- a/README.md +++ b/README.md @@ -356,6 +356,7 @@ used to overwrite the defaults. - `RF_PID_FILE` - File to store process ids of started services (default: ${RF_HOME}/rapidfire_pids.txt) - `RF_PYTHON_EXECUTABLE` - Python executable (default: python3 falls back to python if not found) - `RF_PIP_EXECUTABLE` - pip executable (default: pip3 falls back to pip if not found) +- `RF_CONVERGE_MODE` - Whether to use Rapidfire AI Converge frontend and backend if available (default: all) ## Community & Governance diff --git a/setup/evals/requirements-local.txt b/setup/evals/requirements-local.txt index 771b6993..fa21cc4b 100644 --- a/setup/evals/requirements-local.txt +++ b/setup/evals/requirements-local.txt @@ -41,3 +41,6 @@ mlflow>=3.2.0 gunicorn>=23.0.0 flask-cors>=5.0.1 loguru + +numpy==2.0.1 +protobuf<6.0.0 \ No newline at end of file From 958ecf3ba4719c81caede6fd6812a1c473606cda Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Thu, 12 Mar 2026 15:21:40 -0700 Subject: [PATCH 08/13] allow os variable for converge --- rapidfireai/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rapidfireai/cli.py b/rapidfireai/cli.py index 6c2ea154..647eafe6 100644 --- a/rapidfireai/cli.py +++ b/rapidfireai/cli.py @@ -23,6 +23,7 @@ from .version import __version__ +RF_CONVERGE_MODE = os.getenv("RF_CONVERGE_MODE", "all") def get_script_path(): """Get the path to the start.sh script. @@ -459,7 +460,7 @@ def main(): parser.add_argument( "--converge", choices=["all", "none", "backend", "frontend"], - default="all", + default=RF_CONVERGE_MODE, help="Converge mode: all (default, start converge backend+frontend), none (use original frontend, do not start converge), backend (only converge backend), frontend (only converge frontend)", ) From c851e11bc585be7f9e6c397efc3ec35d4e237173 Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Tue, 17 Mar 2026 11:06:15 -0700 Subject: [PATCH 09/13] Better converge startup checks --- setup/start.sh | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/setup/start.sh b/setup/start.sh index 671633db..9526e874 100755 --- a/setup/start.sh +++ b/setup/start.sh @@ -39,6 +39,10 @@ case "$RF_CONVERGE_MODE" in exit 1 ;; esac +RF_CONVERGE_BACKEND_HOST=${RF_CONVERGE_BACKEND_HOST:=0.0.0.0} +RF_CONVERGE_BACKEND_PORT=${RF_CONVERGE_BACKEND_PORT:=8860} +RF_CONVERGE_FRONTEND_HOST=${RF_CONVERGE_FRONTEND_HOST:=$RF_FRONTEND_HOST} +RF_CONVERGE_FRONTEND_PORT=${RF_CONVERGE_FRONTEND_PORT:=$RF_FRONTEND_PORT} # Colab mode configuration if [ -z "${COLAB_GPU+x}" ]; then @@ -621,23 +625,35 @@ start_converge() { echo "$converge_pid Converge" >> "$RF_PID_FILE" # When starting full stack or frontend, wait for frontend port; backend-only may not serve it - if [[ "$mode" == "backend" ]]; then - print_success "Converge backend started (PID: $converge_pid)" - return 0 + if [[ "$mode" == "backend" ]] || [[ "$mode" == "all" ]]; then + if wait_for_service $RF_CONVERGE_BACKEND_HOST $RF_CONVERGE_BACKEND_PORT "Converge backend" $RF_TIMEOUT_TIME; then + print_success "Converge backend started (PID: $converge_pid)" + else + print_error "Converge backend failed to start. Checking logs..." + if [[ -f "$RF_LOG_PATH/converge.log" ]]; then + echo "=== Last 30 lines of converge.log ===" + tail -30 "$RF_LOG_PATH/converge.log" + tail -30 "$RF_LOG_PATH/converge_backend.log" + echo "=== End of logs ===" + return 1 + fi fi - if wait_for_service $RF_FRONTEND_HOST $RF_FRONTEND_PORT "Converge frontend" $RF_TIMEOUT_TIME; then - print_success "Converge started (PID: $converge_pid)" - return 0 - else - print_error "Converge failed to start. Checking logs..." - if [[ -f "$RF_LOG_PATH/converge.log" ]]; then - echo "=== Last 30 lines of converge.log ===" - tail -30 "$RF_LOG_PATH/converge.log" - echo "=== End of logs ===" + if [[ "$mode" == "frontend" ]] || [[ "$mode" == "all" ]]; then + if wait_for_service $RF_CONVERGE_FRONTEND_HOST $RF_CONVERGE_FRONTEND_PORT "Converge frontend" $RF_TIMEOUT_TIME; then + print_success "Converge frontend started (PID: $converge_pid)" + else + print_error "Converge frontend failed to start. Checking logs..." + if [[ -f "$RF_LOG_PATH/converge.log" ]]; then + echo "=== Last 30 lines of converge.log ===" + tail -30 "$RF_LOG_PATH/converge.log" + tail -30 "$RF_LOG_PATH/converge_frontend.log" + echo "=== End of logs ===" + return 1 + fi fi - return 1 fi + return 0 } # Function to conditionally start frontend based on mode From 71058a1e31ac0d51ed3822238915b238e8ccd7dd Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Tue, 17 Mar 2026 11:08:17 -0700 Subject: [PATCH 10/13] Resolve duplicate numpy --- setup/evals/requirements-local.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/setup/evals/requirements-local.txt b/setup/evals/requirements-local.txt index fa21cc4b..b64fa275 100644 --- a/setup/evals/requirements-local.txt +++ b/setup/evals/requirements-local.txt @@ -27,7 +27,6 @@ langchain-postgres>=0.0.17 # Data Manipulation & Display unstructured>=0.18.15 -numpy>=1.26.4,<2.3 # Other requests==2.32.5 From 43759cf39b4f4cded3f21a04df11715a6158f670 Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Tue, 17 Mar 2026 11:19:24 -0700 Subject: [PATCH 11/13] Fix fi --- setup/start.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/setup/start.sh b/setup/start.sh index 9526e874..8697badb 100755 --- a/setup/start.sh +++ b/setup/start.sh @@ -637,6 +637,7 @@ start_converge() { echo "=== End of logs ===" return 1 fi + fi fi if [[ "$mode" == "frontend" ]] || [[ "$mode" == "all" ]]; then From d4ff6048fd3d219862f484d1d46f86fc15f106da Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Tue, 17 Mar 2026 11:38:25 -0700 Subject: [PATCH 12/13] Check on log file names --- setup/start.sh | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/setup/start.sh b/setup/start.sh index 8697badb..91d5d0b0 100755 --- a/setup/start.sh +++ b/setup/start.sh @@ -633,8 +633,14 @@ start_converge() { if [[ -f "$RF_LOG_PATH/converge.log" ]]; then echo "=== Last 30 lines of converge.log ===" tail -30 "$RF_LOG_PATH/converge.log" - tail -30 "$RF_LOG_PATH/converge_backend.log" - echo "=== End of logs ===" + echo "=== End of log ===" + if [[ -f "$RF_LOG_PATH/converge_backend.log" ]]; then + echo "=== Last 30 lines of converge_backend.log ===" + tail -30 "$RF_LOG_PATH/converge_backend.log" + echo "=== End of log ===" + else + echo "No converge_backend.log file found" + fi return 1 fi fi @@ -648,8 +654,14 @@ start_converge() { if [[ -f "$RF_LOG_PATH/converge.log" ]]; then echo "=== Last 30 lines of converge.log ===" tail -30 "$RF_LOG_PATH/converge.log" - tail -30 "$RF_LOG_PATH/converge_frontend.log" - echo "=== End of logs ===" + echo "=== End of log ===" + if [[ -f "$RF_LOG_PATH/converge_frontend.log" ]]; then + echo "=== Last 30 lines of converge_frontend.log ===" + tail -30 "$RF_LOG_PATH/converge_frontend.log" + echo "=== End of log ===" + else + echo "No converge_frontend.log file found" + fi return 1 fi fi From 9c8d15d7db8a460c0f7e790bd08388fa8c1bf1af Mon Sep 17 00:00:00 2001 From: David Matsalla Date: Tue, 17 Mar 2026 12:59:28 -0700 Subject: [PATCH 13/13] Log file warning --- setup/start.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup/start.sh b/setup/start.sh index 91d5d0b0..7fdeecea 100755 --- a/setup/start.sh +++ b/setup/start.sh @@ -641,8 +641,10 @@ start_converge() { else echo "No converge_backend.log file found" fi - return 1 + else + echo "No converge.log file found" fi + return 1 fi fi @@ -662,8 +664,10 @@ start_converge() { else echo "No converge_frontend.log file found" fi - return 1 + else + echo "No converge.log file found" fi + return 1 fi fi return 0