Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ used to overwrite the defaults.
- `RF_PYTHON_EXECUTABLE` - Python executable (default: python3 falls back to python if not found)
- `RF_PIP_EXECUTABLE` - pip executable (default: pip3 falls back to pip if not found)
- `RF_CONVERGE_MODE` - Whether to use Rapidfire AI Converge frontend and backend if available (default: all)
- `RF_NO_FRONTEND` - Option to disable starting the frontend

## Community & Governance

Expand Down
10 changes: 10 additions & 0 deletions rapidfireai/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,13 @@ def main():
help="Run in Colab mode (skips frontend, conditionally starts MLflow based on tracking backend)",
)

parser.add_argument(
"--no-frontend",
action="store_true",
help="Do not start the dashboard (Flask on RF_FRONTEND_PORT); MLflow and the API still start when enabled. "
"With Converge, only the backend is started when --converge=all.",
)

parser.add_argument(
"--test-notebooks",
action="store_true",
Expand Down Expand Up @@ -494,6 +501,9 @@ def main():
os.environ["RF_COLAB_MODE"] = "true"
elif ColabConfig.ON_COLAB and os.getenv("RF_COLAB_MODE") is None:
os.environ["RF_COLAB_MODE"] = "true"

if args.no_frontend:
os.environ["RF_START_FRONTEND"] = "false"

# Handle force command separately
if args.force:
Expand Down
187 changes: 137 additions & 50 deletions setup/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,24 @@ RF_DB_PATH="${RF_DB_PATH:=$RF_HOME/db}"
RF_LOG_PATH="${RF_LOG_PATH:=$RF_HOME/logs}"

RF_TIMEOUT_TIME=${RF_TIMEOUT_TIME:=30}
RF_PYTHON_EXECUTABLE=${RF_PYTHON_EXECUTABLE:-python3}
RF_PIP_EXECUTABLE=${RF_PIP_EXECUTABLE:-pip3}

if ! command -v $RF_PYTHON_EXECUTABLE &> /dev/null; then
RF_PYTHON_EXECUTABLE=python
fi

if ! command -v $RF_PIP_EXECUTABLE &> /dev/null; then
RF_PIP_EXECUTABLE=pip
fi

# Converge mode: all (backend+frontend), none (original frontend only), backend, frontend
RF_CONVERGE_MODE=${RF_CONVERGE_MODE:=all}
CONVERGE_FOUND=$(${RF_PIP_EXECUTABLE} show rapidfireai-pro >/dev/null 2>&1; echo $?)
if [[ $CONVERGE_FOUND -ne 0 ]]; then
RF_CONVERGE_MODE="none"
fi

case "$RF_CONVERGE_MODE" in
all|none|backend|frontend) ;;
*)
Expand All @@ -58,6 +73,10 @@ else
RF_COLAB_MODE=${RF_COLAB_MODE:=true}
fi

# When false, do not start the RapidFire dashboard (Flask) or Converge frontend; MLflow + API still run when enabled.
RF_START_FRONTEND=${RF_START_FRONTEND:=true}


# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
Expand All @@ -74,26 +93,11 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
RAPIDFIRE_DIR="$SCRIPT_DIR/../rapidfireai"
RAPIDFIRE_FIT_DIR="$RAPIDFIRE_DIR/fit"
RAPIDFIRE_EVALS_DIR="$RAPIDFIRE_DIR/evals"
if [[ -d "$RAPIDFIRE_DIR/frontend_pro" ]]; then
FRONTEND_DIR="$RAPIDFIRE_DIR/frontend_pro"
else
FRONTEND_DIR="$RAPIDFIRE_DIR/frontend"
fi
FRONTEND_DIR="$RAPIDFIRE_DIR/frontend"

RAPIDFIRE_MODE=$(cat $RF_HOME/rf_mode.txt 2>/dev/null || echo "fit")
DISPATCHER_DIR="$RAPIDFIRE_DIR/$RAPIDFIRE_MODE/dispatcher"

RF_PYTHON_EXECUTABLE=${RF_PYTHON_EXECUTABLE:-python3}
RF_PIP_EXECUTABLE=${RF_PIP_EXECUTABLE:-pip3}

if ! command -v $RF_PYTHON_EXECUTABLE &> /dev/null; then
RF_PYTHON_EXECUTABLE=python
fi

if ! command -v $RF_PIP_EXECUTABLE &> /dev/null; then
RF_PIP_EXECUTABLE=pip
fi


# Function to print colored output
print_status() {
echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"
Expand All @@ -116,6 +120,17 @@ has_rapidfireai_pro() {
${RF_PIP_EXECUTABLE} show rapidfireai-pro >/dev/null 2>&1
}

if [[ "$RF_CONVERGE_MODE" != "none" ]]; then
if [[ "$RF_MLFLOW_ENABLED" != "true" ]]; then
print_status "MLflow is not enabled, Converge requires MLflow, enabling MLflow"
RF_MLFLOW_ENABLED="true"
fi
if [[ "$RF_TENSORBOARD_ENABLED" != "true" ]]; then
print_status "TensorBoard is not enabled, Converge requires TensorBoard, enabling TensorBoard"
RF_TENSORBOARD_ENABLED="true"
fi
fi

# Function to setup Python environment
setup_python_env() {
print_status "Setting up Python environment..."
Expand Down Expand Up @@ -680,6 +695,10 @@ start_frontend_if_needed() {
print_status "⊗ Skipping frontend (using TensorBoard in Colab mode)"
return 0
fi
if [[ "$RF_START_FRONTEND" != "true" ]]; then
print_status "⊗ Skipping frontend (RF_START_FRONTEND=false or --no-frontend)"
return 0
fi
Comment thread
cursor[bot] marked this conversation as resolved.
Comment thread
david-rfai marked this conversation as resolved.

# Otherwise start frontend
start_frontend
Expand Down Expand Up @@ -722,15 +741,15 @@ show_status() {
print_status " %tensorboard --logdir $RF_HOME/rapidfire_experiments/tensorboard_logs/{experiment_name}"
fi
else
# if [[ "$rf_mode" == "fit" ]]; then
if ping_port $RF_FRONTEND_HOST $RF_FRONTEND_PORT; then
print_success "🚀 RapidFire Frontend is ready!"
print_status "👉 Open your browser and navigate to: http://$RF_FRONTEND_HOST:$RF_FRONTEND_PORT"
print_status " (Click the link above or copy/paste the URL into your browser)"
else
print_error "🚨 RapidFire Frontend is not ready!"
fi
# fi
if [[ "$RF_START_FRONTEND" != "true" ]]; then
print_status "⊗ Frontend not started (RF_START_FRONTEND=false or rapidfireai start --no-frontend)"
elif ping_port $RF_FRONTEND_HOST $RF_FRONTEND_PORT; then
print_success "🚀 RapidFire Frontend is ready!"
print_status "👉 Open your browser and navigate to: http://$RF_FRONTEND_HOST:$RF_FRONTEND_PORT"
print_status " (Click the link above or copy/paste the URL into your browser)"
else
print_error "🚨 RapidFire Frontend is not ready!"
fi
fi
if [[ "$RF_MLFLOW_ENABLED" == "true" ]]; then
if ping_port $RF_MLFLOW_HOST $RF_MLFLOW_PORT; then
Expand Down Expand Up @@ -775,14 +794,41 @@ show_status() {
fi

# Only check frontend.log if frontend is running
if [[ "$RF_COLAB_MODE" != "true" ]]; then
if [[ "$RF_COLAB_MODE" != "true" ]] && [[ "$RF_CONVERGE_MODE" == "none" ]] && [[ "$RF_START_FRONTEND" == "true" ]]; then
if [[ -f "$RF_LOG_PATH/frontend.log" ]]; then
local size=$(du -h "$RF_LOG_PATH/frontend.log" | cut -f1)
print_status "- $RF_LOG_PATH/frontend.log: $size"
else
print_warning "- $RF_LOG_PATH/frontend.log: not found"
fi
fi

if [[ "$RF_CONVERGE_MODE" != "none" ]]; then
if [[ -f "$RF_LOG_PATH/converge.log" ]]; then
local size=$(du -h "$RF_LOG_PATH/converge.log" | cut -f1)
print_status "- $RF_LOG_PATH/converge.log: $size"
else
print_warning "- $RF_LOG_PATH/converge.log: not found"
fi
fi

if [[ "$RF_START_FRONTEND" == "true" ]] && { [[ "$RF_CONVERGE_MODE" == "all" ]] || [[ "$RF_CONVERGE_MODE" != "frontend" ]]; }; then
Comment thread
david-rfai marked this conversation as resolved.
Outdated
if [[ -f "$RF_LOG_PATH/converge_frontend.log" ]]; then
local size=$(du -h "$RF_LOG_PATH/converge_frontend.log" | cut -f1)
print_status "- $RF_LOG_PATH/converge_frontend.log: $size"
else
print_warning "- $RF_LOG_PATH/converge_frontend.log: not found"
fi
fi

if [[ "$RF_CONVERGE_MODE" == "all" ]] || [[ "$RF_CONVERGE_MODE" != "backend" ]]; then
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
if [[ -f "$RF_LOG_PATH/converge_backend.log" ]]; then
local size=$(du -h "$RF_LOG_PATH/converge_backend.log" | cut -f1)
print_status "- $RF_LOG_PATH/converge_backend.log: $size"
else
print_warning "- $RF_LOG_PATH/converge_backend.log: not found"
fi
fi
}

# Function to start services based on mode
Expand All @@ -792,10 +838,20 @@ start_services() {

# Calculate total services based on mode
# MLflow runs unless tensorboard-only in Colab
# Frontend runs if MLflow runs
# Third service: UI (classic frontend, full Converge, or Converge backend-only when --no-frontend)
if [[ "$RF_MLFLOW_ENABLED" == "true" ]]; then
((total_services++))
((total_services++))
local ui_slot=0
if [[ "$RF_START_FRONTEND" == "true" ]]; then
ui_slot=1
elif [[ "$RF_CONVERGE_MODE" == "all" ]] || [[ "$RF_CONVERGE_MODE" == "backend" ]]; then
if has_rapidfireai_pro; then
ui_slot=1
fi
fi
if [[ "$ui_slot" -eq 1 ]]; then
((total_services++))
fi
fi

if [[ ! -d "$RF_LOG_PATH" ]]; then
Expand Down Expand Up @@ -825,34 +881,65 @@ start_services() {

# Start frontend server (conditionally)
if [[ "$RF_MLFLOW_ENABLED" == "true" ]]; then
case "$RF_CONVERGE_MODE" in
none)
if start_frontend; then
((services_started++))
else
print_error "Failed to start frontend server"
fi
;;
backend|frontend|all)
if has_rapidfireai_pro; then
if start_converge; then
if [[ "$RF_START_FRONTEND" != "true" ]]; then
print_status "⊗ Skipping frontend (RF_START_FRONTEND=false or --no-frontend)"
case "$RF_CONVERGE_MODE" in
none)
;;
all)
if has_rapidfireai_pro; then
if start_converge backend; then
((services_started++))
else
print_error "Failed to start Converge backend"
fi
fi
;;
backend)
if has_rapidfireai_pro; then
if start_converge backend; then
((services_started++))
else
print_error "Failed to start Converge backend"
fi
else
print_error "rapidfireai-pro is not installed (required for --converge=$RF_CONVERGE_MODE)"
fi
;;
frontend)
print_status "⊗ Skipping Converge frontend (--no-frontend)"
;;
esac
else
case "$RF_CONVERGE_MODE" in
none)
if start_frontend; then
((services_started++))
else
print_error "Failed to start Converge"
print_error "Failed to start frontend server"
fi
else
if [[ "$RF_CONVERGE_MODE" == "all" ]]; then
if start_frontend; then
;;
backend|frontend|all)
if has_rapidfireai_pro; then
if start_converge; then
((services_started++))
else
print_error "Failed to start frontend server"
print_error "Failed to start Converge"
fi
else
print_error "rapidfireai-pro is not installed (required for --converge=$RF_CONVERGE_MODE)"
if [[ "$RF_CONVERGE_MODE" == "all" ]]; then
if start_frontend; then
((services_started++))
else
print_error "Failed to start frontend server"
fi
else
print_error "rapidfireai-pro is not installed (required for --converge=$RF_CONVERGE_MODE)"
fi
fi
fi
;;
esac
;;
esac
fi
else
print_status "⊗ Skipping frontend (use TensorBoard if in Colab mode)"
fi
Expand Down Expand Up @@ -915,7 +1002,7 @@ main() {

# Show summary of all log files for debugging
print_status "=== Startup Failure Summary ==="
for log_file in "mlflow.log" "api.log" "frontend.log" "converge.log"; do
for log_file in "mlflow.log" "api.log" "frontend.log" "converge.log" "converge_frontend.log" "converge_backend.log"; do
if [[ -f "$RF_LOG_PATH/$log_file" ]]; then
echo ""
print_status "=== $log_file ==="
Expand Down