Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# ==============================================================================
# FinanceGPT All-in-One Configuration
# ==============================================================================
# Copy this file to .env and customize as needed.
# Most settings have sensible defaults - you only need to set what you want to change.
#
# Quick start: Just run ./run.sh without any .env file!

# ==============================================================================
# AUTHENTICATION (Required for production)
# ==============================================================================

# JWT Secret Key - Auto-generated if not set, but set this for production!
# Generate with: openssl rand -hex 32
SECRET_KEY=

# Auth Type: LOCAL (email/password) or GOOGLE (OAuth)
AUTH_TYPE=LOCAL

# Google OAuth (only if AUTH_TYPE=GOOGLE)
# GOOGLE_OAUTH_CLIENT_ID=
# GOOGLE_OAUTH_CLIENT_SECRET=

# Allow new user registration
REGISTRATION_ENABLED=TRUE

# ==============================================================================
# FINANCIAL DATA - PLAID (Optional)
# ==============================================================================
# Connect bank/brokerage accounts. Get keys from: https://dashboard.plaid.com/team/keys

# PLAID_CLIENT_ID=
# PLAID_SECRET=
# PLAID_ENV=sandbox

# ==============================================================================
# AI/ML CONFIGURATION
# ==============================================================================

# Embedding model for semantic search
EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2

# Rerankers for improved search (requires additional setup)
RERANKERS_ENABLED=FALSE
# RERANKERS_MODEL_NAME=ms-marco-MiniLM-L-12-v2
# RERANKERS_MODEL_TYPE=flashrank

# ==============================================================================
# DOCUMENT PROCESSING
# ==============================================================================

# Parser: DOCLING (local, default) | UNSTRUCTURED (API) | LLAMACLOUD (API)
ETL_SERVICE=DOCLING

# API keys (only if using cloud services)
# UNSTRUCTURED_API_KEY=
# LLAMA_CLOUD_API_KEY=

# ==============================================================================
# VOICE SERVICES (Optional - for podcasts)
# ==============================================================================

# Text-to-Speech: local/kokoro (default) or cloud provider
TTS_SERVICE=local/kokoro
# TTS_SERVICE_API_KEY=

# Speech-to-Text: local/base, local/small, local/medium, local/large
STT_SERVICE=local/base
# STT_SERVICE_API_KEY=

# ==============================================================================
# WEB CRAWLING (Optional)
# ==============================================================================

# Firecrawl for advanced web scraping
# FIRECRAWL_API_KEY=

# ==============================================================================
# OBSERVABILITY (Optional)
# ==============================================================================

# LangSmith for LLM tracing and debugging
# LANGSMITH_TRACING=false
# LANGSMITH_API_KEY=
# LANGSMITH_PROJECT=financegpt

# ==============================================================================
# ADVANCED (Usually don't need to change)
# ==============================================================================

# Port mappings (host:container)
FRONTEND_PORT=3000
BACKEND_PORT=8000

# Task scheduler interval
SCHEDULE_CHECKER_INTERVAL=5m

# Max pages per user (0 = unlimited)
# PAGES_LIMIT=500
16 changes: 16 additions & 0 deletions .github/workflows/docker_build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ jobs:
build_amd64:
runs-on: ubuntu-latest
needs: tag_release
timeout-minutes: 90
permissions:
packages: write
contents: read
Expand All @@ -106,6 +107,10 @@ jobs:

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:latest
network=host

- name: Free up disk space
run: |
Expand All @@ -114,6 +119,7 @@ jobs:
sudo rm -rf /usr/local/share/boost
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
docker system prune -af
df -h

- name: Build and push AMD64 image
id: build
Expand All @@ -127,11 +133,14 @@ jobs:
cache-from: type=gha,scope=amd64
cache-to: type=gha,mode=max,scope=amd64
provenance: false
build-args: |
BUILDKIT_INLINE_CACHE=1

# Build for ARM64 on native arm64 runner (no QEMU emulation!)
build_arm64:
runs-on: ubuntu-24.04-arm
needs: tag_release
timeout-minutes: 120
permissions:
packages: write
contents: read
Expand All @@ -156,6 +165,10 @@ jobs:

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:latest
network=host

- name: Free up disk space
run: |
Expand All @@ -164,6 +177,7 @@ jobs:
sudo rm -rf /usr/local/share/boost
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
docker system prune -af
df -h

- name: Build and push ARM64 image
id: build
Expand All @@ -177,6 +191,8 @@ jobs:
cache-from: type=gha,scope=arm64
cache-to: type=gha,mode=max,scope=arm64
provenance: false
build-args: |
BUILDKIT_INLINE_CACHE=1

# Create multi-arch manifest combining both platform images
create_manifest:
Expand Down
38 changes: 24 additions & 14 deletions Dockerfile.allinone
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,24 @@ WORKDIR /app
# Install pnpm
RUN corepack enable pnpm

# Copy package files
# Copy package files first for better caching
COPY financegpt_web/package.json financegpt_web/pnpm-lock.yaml* ./

# Install dependencies in a separate layer (most cacheable)
# Use network timeout to prevent hanging on slow networks
RUN pnpm config set network-timeout 300000 \
&& pnpm install --frozen-lockfile --ignore-scripts

# Copy config files needed for postinstall
COPY financegpt_web/source.config.ts ./
COPY financegpt_web/content ./content

# Install dependencies (skip postinstall which requires all source files)
RUN pnpm install --frozen-lockfile --ignore-scripts
# Run fumadocs-mdx postinstall
RUN pnpm fumadocs-mdx

# Copy source
# Copy source (after dependencies are cached)
COPY financegpt_web/ ./

# Run fumadocs-mdx postinstall now that source files are available
RUN pnpm fumadocs-mdx

# Build with localhost URLs (all services run in same container)
ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL
Expand Down Expand Up @@ -184,15 +188,17 @@ COPY --from=electric-builder /app /app/electric-release
# ====================
WORKDIR /app/backend

# Copy backend dependency files
# Copy backend dependency files first (for better caching)
COPY financegpt_backend/pyproject.toml financegpt_backend/uv.lock ./

# Install PyTorch CPU-only (Docling needs it but OCR is disabled, no GPU needed)
# Install PyTorch CPU-only first (large layer, good to cache separately)
RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu

# Install python dependencies
RUN pip install --no-cache-dir certifi pip-system-certs uv \
&& uv pip install --system --no-cache-dir -e .
# Install uv and base dependencies
RUN pip install --no-cache-dir certifi pip-system-certs uv

# Install python dependencies (separate layer for caching)
RUN uv pip install --system --no-cache-dir -e .

# Set SSL environment variables
RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \
Expand All @@ -202,12 +208,12 @@ RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \
# Note: EasyOCR models NOT downloaded - OCR is disabled in docling_service.py
# GPU support will be added in a future :cuda tagged image

# Install Playwright browsers
# Install Playwright browsers (separate layer)
RUN pip install --no-cache-dir playwright \
&& playwright install chromium \
&& rm -rf /root/.cache/ms-playwright/ffmpeg*

# Copy backend source
# Copy backend source last (changes most frequently)
COPY financegpt_backend/ ./

# ====================
Expand All @@ -226,6 +232,10 @@ RUN dos2unix /app/entrypoint.sh && chmod +x /app/entrypoint.sh
COPY scripts/docker/init-postgres.sh /app/init-postgres.sh
RUN dos2unix /app/init-postgres.sh && chmod +x /app/init-postgres.sh

# Electric SQL initialization script (same as used in local docker-compose)
COPY scripts/docker/init-electric-user.sh /app/init-electric-user.sh
RUN dos2unix /app/init-electric-user.sh && chmod +x /app/init-electric-user.sh

# Clean up build dependencies to reduce image size
RUN apt-get purge -y build-essential postgresql-server-dev-14 git \
&& apt-get autoremove -y \
Expand Down
56 changes: 27 additions & 29 deletions docker-compose.quickstart.yml
Original file line number Diff line number Diff line change
@@ -1,74 +1,72 @@
# FinanceGPT Quick Start Docker Compose
#
# This is a simplified docker-compose for quick local deployment using pre-built images.
# For production or customized deployments, use the main docker-compose.yml
#
# Usage:
# 1. (Optional) Create a .env file with your configuration
# 2. Run: docker compose -f docker-compose.quickstart.yml up -d
# 3. Access FinanceGPT at http://localhost:3000
# ./run.sh # Easiest way - uses this file automatically
# ./run.sh start # Start FinanceGPT
# ./run.sh logs # View logs
# ./run.sh stop # Stop FinanceGPT
#
# All Environment Variables are Optional:
# - SECRET_KEY: JWT secret key (auto-generated and persisted if not set)
# - EMBEDDING_MODEL: Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2)
# - ETL_SERVICE: Document parsing service - DOCLING, UNSTRUCTURED, or LLAMACLOUD (default: DOCLING)
# - TTS_SERVICE: Text-to-speech service for podcasts (default: local/kokoro)
# - STT_SERVICE: Speech-to-text service with model size (default: local/base)
# - FIRECRAWL_API_KEY: For web crawling features

version: "3.8"
# Or manually:
# docker compose -f docker-compose.quickstart.yml up -d
#
# Configuration:
# Copy .env.example to .env and customize as needed.
# All settings have sensible defaults - no .env required for basic usage.

services:
# All-in-one FinanceGPT container
financegpt:
image: ghcr.io/manojag115/financegpt:latest
container_name: financegpt
ports:
- "${FRONTEND_PORT:-3000}:3000"
- "${BACKEND_PORT:-8000}:8000"
- "${ELECTRIC_PORT:-5133}:5133"
volumes:
- financegpt-data:/data
environment:
# Authentication (auto-generated if not set)
# === Authentication ===
- SECRET_KEY=${SECRET_KEY:-}

# Auth Configuration
- AUTH_TYPE=${AUTH_TYPE:-LOCAL}
- REGISTRATION_ENABLED=${REGISTRATION_ENABLED:-TRUE}
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-}

# AI/ML Configuration
# === Financial Data (Plaid) ===
- PLAID_CLIENT_ID=${PLAID_CLIENT_ID:-}
- PLAID_SECRET=${PLAID_SECRET:-}
- PLAID_ENV=${PLAID_ENV:-sandbox}

# === AI/ML ===
- EMBEDDING_MODEL=${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
- RERANKERS_ENABLED=${RERANKERS_ENABLED:-FALSE}
- RERANKERS_MODEL_NAME=${RERANKERS_MODEL_NAME:-}
- RERANKERS_MODEL_TYPE=${RERANKERS_MODEL_TYPE:-}

# Document Processing
# === Document Processing ===
- ETL_SERVICE=${ETL_SERVICE:-DOCLING}
- UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-}
- LLAMA_CLOUD_API_KEY=${LLAMA_CLOUD_API_KEY:-}
- PAGES_LIMIT=${PAGES_LIMIT:-999999999}

# Audio Services
# === Voice Services ===
- TTS_SERVICE=${TTS_SERVICE:-local/kokoro}
- TTS_SERVICE_API_KEY=${TTS_SERVICE_API_KEY:-}
- STT_SERVICE=${STT_SERVICE:-local/base}
- STT_SERVICE_API_KEY=${STT_SERVICE_API_KEY:-}

# Web Crawling
# === Web Crawling ===
- FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY:-}

# Optional Features
- REGISTRATION_ENABLED=${REGISTRATION_ENABLED:-TRUE}
- SCHEDULE_CHECKER_INTERVAL=${SCHEDULE_CHECKER_INTERVAL:-1m}
# === Scheduler ===
- SCHEDULE_CHECKER_INTERVAL=${SCHEDULE_CHECKER_INTERVAL:-5m}

# LangSmith Observability (optional)
# === Observability (Optional) ===
- LANGSMITH_TRACING=${LANGSMITH_TRACING:-false}
- LANGSMITH_ENDPOINT=${LANGSMITH_ENDPOINT:-}
- LANGSMITH_API_KEY=${LANGSMITH_API_KEY:-}
- LANGSMITH_PROJECT=${LANGSMITH_PROJECT:-}
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000", "&&", "curl", "-f", "http://localhost:8000/docs"]
test: ["CMD", "curl", "-f", "http://localhost:3000"]
interval: 30s
timeout: 10s
retries: 3
Expand Down
Loading
Loading