diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..398abd6
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,99 @@
+# ==============================================================================
+# FinanceGPT All-in-One Configuration
+# ==============================================================================
+# Copy this file to .env and customize as needed.
+# Most settings have sensible defaults - you only need to set what you want to change.
+#
+# Quick start: Just run ./run.sh without any .env file!
+
+# ==============================================================================
+# AUTHENTICATION (Required for production)
+# ==============================================================================
+
+# JWT Secret Key - Auto-generated if not set, but set this for production!
+# Generate with: openssl rand -hex 32
+SECRET_KEY=
+
+# Auth Type: LOCAL (email/password) or GOOGLE (OAuth)
+AUTH_TYPE=LOCAL
+
+# Google OAuth (only if AUTH_TYPE=GOOGLE)
+# GOOGLE_OAUTH_CLIENT_ID=
+# GOOGLE_OAUTH_CLIENT_SECRET=
+
+# Allow new user registration
+REGISTRATION_ENABLED=TRUE
+
+# ==============================================================================
+# FINANCIAL DATA - PLAID (Optional)
+# ==============================================================================
+# Connect bank/brokerage accounts. Get keys from: https://dashboard.plaid.com/team/keys
+
+# PLAID_CLIENT_ID=
+# PLAID_SECRET=
+# PLAID_ENV=sandbox
+
+# ==============================================================================
+# AI/ML CONFIGURATION
+# ==============================================================================
+
+# Embedding model for semantic search
+EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
+
+# Rerankers for improved search (requires additional setup)
+RERANKERS_ENABLED=FALSE
+# RERANKERS_MODEL_NAME=ms-marco-MiniLM-L-12-v2
+# RERANKERS_MODEL_TYPE=flashrank
+
+# ==============================================================================
+# DOCUMENT PROCESSING
+# ==============================================================================
+
+# Parser: DOCLING (local, default) | UNSTRUCTURED (API) | LLAMACLOUD (API)
+ETL_SERVICE=DOCLING
+
+# API keys (only if using cloud services)
+# UNSTRUCTURED_API_KEY=
+# LLAMA_CLOUD_API_KEY=
+
+# ==============================================================================
+# VOICE SERVICES (Optional - for podcasts)
+# ==============================================================================
+
+# Text-to-Speech: local/kokoro (default) or cloud provider
+TTS_SERVICE=local/kokoro
+# TTS_SERVICE_API_KEY=
+
+# Speech-to-Text: local/base, local/small, local/medium, local/large
+STT_SERVICE=local/base
+# STT_SERVICE_API_KEY=
+
+# ==============================================================================
+# WEB CRAWLING (Optional)
+# ==============================================================================
+
+# Firecrawl for advanced web scraping
+# FIRECRAWL_API_KEY=
+
+# ==============================================================================
+# OBSERVABILITY (Optional)
+# ==============================================================================
+
+# LangSmith for LLM tracing and debugging
+# LANGSMITH_TRACING=false
+# LANGSMITH_API_KEY=
+# LANGSMITH_PROJECT=financegpt
+
+# ==============================================================================
+# ADVANCED (Usually don't need to change)
+# ==============================================================================
+
+# Port mappings (host:container)
+FRONTEND_PORT=3000
+BACKEND_PORT=8000
+
+# Task scheduler interval
+SCHEDULE_CHECKER_INTERVAL=5m
+
+# Max pages per user (0 = unlimited)
+# PAGES_LIMIT=500
diff --git a/.github/workflows/docker_build.yaml b/.github/workflows/docker_build.yaml
index 44aec4d..b648fcd 100644
--- a/.github/workflows/docker_build.yaml
+++ b/.github/workflows/docker_build.yaml
@@ -82,6 +82,7 @@ jobs:
   build_amd64:
     runs-on: ubuntu-latest
     needs: tag_release
+    timeout-minutes: 90
     permissions:
       packages: write
       contents: read
@@ -106,6 +107,10 @@ jobs:
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: |
+            image=moby/buildkit:latest
+            network=host
 
       - name: Free up disk space
         run: |
@@ -114,6 +119,7 @@ jobs:
           sudo rm -rf /usr/local/share/boost
           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
           docker system prune -af
+          df -h
 
       - name: Build and push AMD64 image
         id: build
@@ -127,11 +133,14 @@ jobs:
           cache-from: type=gha,scope=amd64
           cache-to: type=gha,mode=max,scope=amd64
           provenance: false
+          build-args: |
+            BUILDKIT_INLINE_CACHE=1
 
   # Build for ARM64 on native arm64 runner (no QEMU emulation!)
   build_arm64:
     runs-on: ubuntu-24.04-arm
     needs: tag_release
+    timeout-minutes: 120
     permissions:
       packages: write
       contents: read
@@ -156,6 +165,10 @@ jobs:
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: |
+            image=moby/buildkit:latest
+            network=host
 
       - name: Free up disk space
         run: |
@@ -164,6 +177,7 @@ jobs:
           sudo rm -rf /usr/local/share/boost
           sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
           docker system prune -af
+          df -h
 
       - name: Build and push ARM64 image
         id: build
@@ -177,6 +191,8 @@ jobs:
           cache-from: type=gha,scope=arm64
           cache-to: type=gha,mode=max,scope=arm64
           provenance: false
+          build-args: |
+            BUILDKIT_INLINE_CACHE=1
 
   # Create multi-arch manifest combining both platform images
   create_manifest:
diff --git a/Dockerfile.allinone b/Dockerfile.allinone
index 1f1e451..bdf2b10 100644
--- a/Dockerfile.allinone
+++ b/Dockerfile.allinone
@@ -29,20 +29,24 @@ WORKDIR /app
 # Install pnpm
 RUN corepack enable pnpm
 
-# Copy package files
+# Copy package files first for better caching
 COPY financegpt_web/package.json financegpt_web/pnpm-lock.yaml* ./
+
+# Install dependencies in a separate layer (most cacheable)
+# Use network timeout to prevent hanging on slow networks
+RUN pnpm config set network-timeout 300000 \
+    && pnpm install --frozen-lockfile --ignore-scripts
+
+# Copy config files needed for postinstall
 COPY financegpt_web/source.config.ts ./
 COPY financegpt_web/content ./content
 
-# Install dependencies (skip postinstall which requires all source files)
-RUN pnpm install --frozen-lockfile --ignore-scripts
+# Run fumadocs-mdx postinstall
+RUN pnpm fumadocs-mdx
 
-# Copy source
+# Copy source (after dependencies are cached)
 COPY financegpt_web/ ./
 
-# Run fumadocs-mdx postinstall now that source files are available
-RUN pnpm fumadocs-mdx
-
 # Build with localhost URLs (all services run in same container)
 ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
 ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL
@@ -184,15 +188,17 @@ COPY --from=electric-builder /app /app/electric-release
 # ====================
 WORKDIR /app/backend
 
-# Copy backend dependency files
+# Copy backend dependency files first (for better caching)
 COPY financegpt_backend/pyproject.toml financegpt_backend/uv.lock ./
 
-# Install PyTorch CPU-only (Docling needs it but OCR is disabled, no GPU needed)
+# Install PyTorch CPU-only first (large layer, good to cache separately)
 RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu
 
-# Install python dependencies
-RUN pip install --no-cache-dir certifi pip-system-certs uv \
-    && uv pip install --system --no-cache-dir -e .
+# Install uv and base dependencies
+RUN pip install --no-cache-dir certifi pip-system-certs uv
+
+# Install python dependencies (separate layer for caching)
+RUN uv pip install --system --no-cache-dir -e .
 
 # Set SSL environment variables
 RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \
@@ -202,12 +208,12 @@ RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \
 # Note: EasyOCR models NOT downloaded - OCR is disabled in docling_service.py
 # GPU support will be added in a future :cuda tagged image
 
-# Install Playwright browsers
+# Install Playwright browsers (separate layer)
 RUN pip install --no-cache-dir playwright \
     && playwright install chromium \
     && rm -rf /root/.cache/ms-playwright/ffmpeg* 
 
-# Copy backend source
+# Copy backend source last (changes most frequently)
 COPY financegpt_backend/ ./
 
 # ====================
@@ -226,6 +232,10 @@ RUN dos2unix /app/entrypoint.sh && chmod +x /app/entrypoint.sh
 COPY scripts/docker/init-postgres.sh /app/init-postgres.sh
 RUN dos2unix /app/init-postgres.sh && chmod +x /app/init-postgres.sh
 
+# Electric SQL initialization script (same as used in local docker-compose)
+COPY scripts/docker/init-electric-user.sh /app/init-electric-user.sh
+RUN dos2unix /app/init-electric-user.sh && chmod +x /app/init-electric-user.sh
+
 # Clean up build dependencies to reduce image size
 RUN apt-get purge -y build-essential postgresql-server-dev-14 git \
     && apt-get autoremove -y \
diff --git a/docker-compose.quickstart.yml b/docker-compose.quickstart.yml
index bd838ee..e39a80b 100644
--- a/docker-compose.quickstart.yml
+++ b/docker-compose.quickstart.yml
@@ -1,74 +1,72 @@
 # FinanceGPT Quick Start Docker Compose
 # 
-# This is a simplified docker-compose for quick local deployment using pre-built images.
-# For production or customized deployments, use the main docker-compose.yml
-#
 # Usage:
-#   1. (Optional) Create a .env file with your configuration
-#   2. Run: docker compose -f docker-compose.quickstart.yml up -d
-#   3. Access FinanceGPT at http://localhost:3000
+#   ./run.sh              # Easiest way - uses this file automatically
+#   ./run.sh start        # Start FinanceGPT
+#   ./run.sh logs         # View logs
+#   ./run.sh stop         # Stop FinanceGPT
 #
-# All Environment Variables are Optional:
-#   - SECRET_KEY: JWT secret key (auto-generated and persisted if not set)
-#   - EMBEDDING_MODEL: Embedding model to use (default: sentence-transformers/all-MiniLM-L6-v2)
-#   - ETL_SERVICE: Document parsing service - DOCLING, UNSTRUCTURED, or LLAMACLOUD (default: DOCLING)
-#   - TTS_SERVICE: Text-to-speech service for podcasts (default: local/kokoro)
-#   - STT_SERVICE: Speech-to-text service with model size (default: local/base)
-#   - FIRECRAWL_API_KEY: For web crawling features
-
-version: "3.8"
+# Or manually:
+#   docker compose -f docker-compose.quickstart.yml up -d
+#
+# Configuration:
+#   Copy .env.example to .env and customize as needed.
+#   All settings have sensible defaults - no .env required for basic usage.
 
 services:
-  # All-in-one FinanceGPT container
   financegpt:
     image: ghcr.io/manojag115/financegpt:latest
     container_name: financegpt
     ports:
       - "${FRONTEND_PORT:-3000}:3000"
       - "${BACKEND_PORT:-8000}:8000"
+      - "${ELECTRIC_PORT:-5133}:5133"
     volumes:
       - financegpt-data:/data
     environment:
-      # Authentication (auto-generated if not set)
+      # === Authentication ===
       - SECRET_KEY=${SECRET_KEY:-}
-      
-      # Auth Configuration
       - AUTH_TYPE=${AUTH_TYPE:-LOCAL}
+      - REGISTRATION_ENABLED=${REGISTRATION_ENABLED:-TRUE}
       - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-}
       - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-}
       
-      # AI/ML Configuration
+      # === Financial Data (Plaid) ===
+      - PLAID_CLIENT_ID=${PLAID_CLIENT_ID:-}
+      - PLAID_SECRET=${PLAID_SECRET:-}
+      - PLAID_ENV=${PLAID_ENV:-sandbox}
+      
+      # === AI/ML ===
       - EMBEDDING_MODEL=${EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
       - RERANKERS_ENABLED=${RERANKERS_ENABLED:-FALSE}
       - RERANKERS_MODEL_NAME=${RERANKERS_MODEL_NAME:-}
       - RERANKERS_MODEL_TYPE=${RERANKERS_MODEL_TYPE:-}
       
-      # Document Processing
+      # === Document Processing ===
       - ETL_SERVICE=${ETL_SERVICE:-DOCLING}
       - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-}
       - LLAMA_CLOUD_API_KEY=${LLAMA_CLOUD_API_KEY:-}
+      - PAGES_LIMIT=${PAGES_LIMIT:-999999999}
       
-      # Audio Services
+      # === Voice Services ===
       - TTS_SERVICE=${TTS_SERVICE:-local/kokoro}
       - TTS_SERVICE_API_KEY=${TTS_SERVICE_API_KEY:-}
       - STT_SERVICE=${STT_SERVICE:-local/base}
       - STT_SERVICE_API_KEY=${STT_SERVICE_API_KEY:-}
       
-      # Web Crawling
+      # === Web Crawling ===
       - FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY:-}
       
-      # Optional Features
-      - REGISTRATION_ENABLED=${REGISTRATION_ENABLED:-TRUE}
-      - SCHEDULE_CHECKER_INTERVAL=${SCHEDULE_CHECKER_INTERVAL:-1m}
+      # === Scheduler ===
+      - SCHEDULE_CHECKER_INTERVAL=${SCHEDULE_CHECKER_INTERVAL:-5m}
       
-      # LangSmith Observability (optional)
+      # === Observability (Optional) ===
       - LANGSMITH_TRACING=${LANGSMITH_TRACING:-false}
-      - LANGSMITH_ENDPOINT=${LANGSMITH_ENDPOINT:-}
       - LANGSMITH_API_KEY=${LANGSMITH_API_KEY:-}
       - LANGSMITH_PROJECT=${LANGSMITH_PROJECT:-}
     restart: unless-stopped
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:3000", "&&", "curl", "-f", "http://localhost:8000/docs"]
+      test: ["CMD", "curl", "-f", "http://localhost:3000"]
       interval: 30s
       timeout: 10s
       retries: 3
diff --git a/financegpt_backend/alembic/versions/2_add_tax_forms_tables.py b/financegpt_backend/alembic/versions/2_add_tax_forms_tables.py
new file mode 100644
index 0000000..791bb84
--- /dev/null
+++ b/financegpt_backend/alembic/versions/2_add_tax_forms_tables.py
@@ -0,0 +1,251 @@
+"""add_tax_forms_tables
+
+Revision ID: 2
+Revises: 1
+Create Date: 2026-01-30 00:00:00.000000
+
+"""
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import UUID, JSONB
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = '2'
+down_revision: Union[str, None] = '1'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Create tax form tables."""
+    
+    # Base tax forms table
+    op.create_table(
+        'tax_forms',
+        sa.Column('id', UUID(as_uuid=True), primary_key=True, server_default=sa.text('gen_random_uuid()')),
+        sa.Column('user_id', UUID(as_uuid=True), sa.ForeignKey('user.id', ondelete='CASCADE'), nullable=False),
+        sa.Column('search_space_id', sa.Integer, sa.ForeignKey('searchspaces.id', ondelete='CASCADE'), nullable=False),
+        sa.Column('form_type', sa.String(20), nullable=False),  # W2, 1099-MISC, 1099-INT, etc.
+        sa.Column('tax_year', sa.Integer, nullable=False),
+        sa.Column('document_id', sa.Integer, sa.ForeignKey('documents.id', ondelete='SET NULL'), nullable=True),
+        sa.Column('uploaded_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.Column('processed_at', sa.TIMESTAMP(timezone=True), nullable=True),
+        sa.Column('processing_status', sa.String(20), server_default='pending', nullable=False),  # pending, processing, completed, failed, needs_review
+        sa.Column('extraction_method', sa.String(50), nullable=True),  # structured_pdf, unstructured, ocr, llm_assisted
+        sa.Column('confidence_score', sa.Numeric(3, 2), nullable=True),  # 0.00 to 1.00
+        sa.Column('needs_review', sa.Boolean, server_default='false', nullable=False),
+        sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now(), nullable=False),
+    )
+    op.create_index('ix_tax_forms_user_id', 'tax_forms', ['user_id'])
+    op.create_index('ix_tax_forms_tax_year', 'tax_forms', ['tax_year'])
+    op.create_index('ix_tax_forms_form_type', 'tax_forms', ['form_type'])
+    op.create_index('ix_tax_forms_search_space_id', 'tax_forms', ['search_space_id'])
+    
+    # W2 forms table
+    op.create_table(
+        'w2_forms',
+        sa.Column('id', UUID(as_uuid=True), primary_key=True, server_default=sa.text('gen_random_uuid()')),
+        sa.Column('tax_form_id', UUID(as_uuid=True), sa.ForeignKey('tax_forms.id', ondelete='CASCADE'), nullable=False, unique=True),
+        
+        # Employer Information (masked for privacy)
+        sa.Column('employer_name', sa.String(255), nullable=True),
+        sa.Column('employer_ein_hash', sa.String(64), nullable=True),  # SHA256 hashed
+        sa.Column('employer_address', sa.Text, nullable=True),
+        
+        # Employee Information (masked)
+        sa.Column('employee_ssn_hash', sa.String(64), nullable=True),  # SHA256 hashed, never plain text
+        sa.Column('employee_name_masked', sa.String(255), nullable=True),  # [EMPLOYEE_NAME] for UI
+        
+        # Wage Information - Box 1-9
+        sa.Column('wages_tips_compensation', sa.Numeric(12, 2), nullable=True),  # Box 1
+        sa.Column('federal_income_tax_withheld', sa.Numeric(12, 2), nullable=True),  # Box 2
+        sa.Column('social_security_wages', sa.Numeric(12, 2), nullable=True),  # Box 3
+        sa.Column('social_security_tax_withheld', sa.Numeric(12, 2), nullable=True),  # Box 4
+        sa.Column('medicare_wages', sa.Numeric(12, 2), nullable=True),  # Box 5
+        sa.Column('medicare_tax_withheld', sa.Numeric(12, 2), nullable=True),  # Box 6
+        sa.Column('social_security_tips', sa.Numeric(12, 2), nullable=True),  # Box 7
+        sa.Column('allocated_tips', sa.Numeric(12, 2), nullable=True),  # Box 8
+        
+        # Other Compensation - Box 10-11
+        sa.Column('dependent_care_benefits', sa.Numeric(12, 2), nullable=True),  # Box 10
+        sa.Column('nonqualified_plans', sa.Numeric(12, 2), nullable=True),  # Box 11
+        
+        # Box 12 codes (multiple entries)
+        sa.Column('box_12_codes', JSONB, nullable=True),  # [{code: 'D', amount: 5000.00}, ...]
+        
+        # Box 13 checkboxes
+        sa.Column('statutory_employee', sa.Boolean, server_default='false', nullable=False),
+        sa.Column('retirement_plan', sa.Boolean, server_default='false', nullable=False),
+        sa.Column('third_party_sick_pay', sa.Boolean, server_default='false', nullable=False),
+        
+        # State/Local Tax - Box 15-20
+        sa.Column('state_code', sa.String(2), nullable=True),  # Box 15
+        sa.Column('state_wages', sa.Numeric(12, 2), nullable=True),  # Box 16
+        sa.Column('state_income_tax', sa.Numeric(12, 2), nullable=True),  # Box 17
+        sa.Column('local_wages', sa.Numeric(12, 2), nullable=True),  # Box 18
+        sa.Column('local_income_tax', sa.Numeric(12, 2), nullable=True),  # Box 19
+        sa.Column('locality_name', sa.String(100), nullable=True),  # Box 20
+        
+        # Field-level confidence scores
+        sa.Column('field_confidence_scores', JSONB, nullable=True),  # {wages: 0.95, federal_tax: 0.88, ...}
+        
+        # Raw OCR/extraction data (for debugging/re-processing)
+        sa.Column('raw_extraction_data', JSONB, nullable=True),
+        
+        sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now(), nullable=False),
+    )
+    op.create_index('ix_w2_forms_tax_form_id', 'w2_forms', ['tax_form_id'])
+    
+    # 1099-MISC forms table
+    op.create_table(
+        'form_1099_misc',
+        sa.Column('id', UUID(as_uuid=True), primary_key=True, server_default=sa.text('gen_random_uuid()')),
+        sa.Column('tax_form_id', UUID(as_uuid=True), sa.ForeignKey('tax_forms.id', ondelete='CASCADE'), nullable=False, unique=True),
+        
+        # Payer Information
+        sa.Column('payer_name', sa.String(255), nullable=True),
+        sa.Column('payer_tin_hash', sa.String(64), nullable=True),
+        sa.Column('payer_address', sa.Text, nullable=True),
+        
+        # Recipient (masked)
+        sa.Column('recipient_tin_hash', sa.String(64), nullable=True),
+        
+        # Income Boxes
+        sa.Column('rents', sa.Numeric(12, 2), nullable=True),  # Box 1
+        sa.Column('royalties', sa.Numeric(12, 2), nullable=True),  # Box 2
+        sa.Column('other_income', sa.Numeric(12, 2), nullable=True),  # Box 3
+        sa.Column('federal_income_tax_withheld', sa.Numeric(12, 2), nullable=True),  # Box 4
+        sa.Column('fishing_boat_proceeds', sa.Numeric(12, 2), nullable=True),  # Box 5
+        sa.Column('medical_health_payments', sa.Numeric(12, 2), nullable=True),  # Box 6
+        sa.Column('substitute_payments', sa.Numeric(12, 2), nullable=True),  # Box 8
+        sa.Column('crop_insurance_proceeds', sa.Numeric(12, 2), nullable=True),  # Box 10
+        sa.Column('gross_proceeds_attorney', sa.Numeric(12, 2), nullable=True),  # Box 14
+        sa.Column('section_409a_deferrals', sa.Numeric(12, 2), nullable=True),  # Box 15
+        sa.Column('state_tax_withheld', sa.Numeric(12, 2), nullable=True),  # Box 16
+        sa.Column('state_payer_number', sa.String(50), nullable=True),
+        sa.Column('state_income', sa.Numeric(12, 2), nullable=True),  # Box 18
+        
+        # Field confidence scores
+        sa.Column('field_confidence_scores', JSONB, nullable=True),
+        sa.Column('raw_extraction_data', JSONB, nullable=True),
+        
+        sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now(), nullable=False),
+    )
+    op.create_index('ix_1099_misc_tax_form_id', 'form_1099_misc', ['tax_form_id'])
+    
+    # 1099-INT (Interest Income) forms table
+    op.create_table(
+        'form_1099_int',
+        sa.Column('id', UUID(as_uuid=True), primary_key=True, server_default=sa.text('gen_random_uuid()')),
+        sa.Column('tax_form_id', UUID(as_uuid=True), sa.ForeignKey('tax_forms.id', ondelete='CASCADE'), nullable=False, unique=True),
+        
+        # Payer Information
+        sa.Column('payer_name', sa.String(255), nullable=True),
+        sa.Column('payer_tin_hash', sa.String(64), nullable=True),
+        
+        # Interest Income
+        sa.Column('interest_income', sa.Numeric(12, 2), nullable=True),  # Box 1
+        sa.Column('early_withdrawal_penalty', sa.Numeric(12, 2), nullable=True),  # Box 2
+        sa.Column('interest_us_savings_bonds', sa.Numeric(12, 2), nullable=True),  # Box 3
+        sa.Column('federal_income_tax_withheld', sa.Numeric(12, 2), nullable=True),  # Box 4
+        sa.Column('investment_expenses', sa.Numeric(12, 2), nullable=True),  # Box 5
+        sa.Column('foreign_tax_paid', sa.Numeric(12, 2), nullable=True),  # Box 6
+        sa.Column('foreign_country', sa.String(100), nullable=True),  # Box 7
+        sa.Column('tax_exempt_interest', sa.Numeric(12, 2), nullable=True),  # Box 8
+        sa.Column('specified_private_activity_bond_interest', sa.Numeric(12, 2), nullable=True),  # Box 9
+        sa.Column('market_discount', sa.Numeric(12, 2), nullable=True),  # Box 10
+        sa.Column('bond_premium', sa.Numeric(12, 2), nullable=True),  # Box 11
+        sa.Column('bond_premium_treasury', sa.Numeric(12, 2), nullable=True),  # Box 12
+        sa.Column('tax_exempt_bond_premium', sa.Numeric(12, 2), nullable=True),  # Box 13
+        
+        sa.Column('field_confidence_scores', JSONB, nullable=True),
+        sa.Column('raw_extraction_data', JSONB, nullable=True),
+        
+        sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now(), nullable=False),
+    )
+    op.create_index('ix_1099_int_tax_form_id', 'form_1099_int', ['tax_form_id'])
+    
+    # 1099-DIV (Dividends) forms table
+    op.create_table(
+        'form_1099_div',
+        sa.Column('id', UUID(as_uuid=True), primary_key=True, server_default=sa.text('gen_random_uuid()')),
+        sa.Column('tax_form_id', UUID(as_uuid=True), sa.ForeignKey('tax_forms.id', ondelete='CASCADE'), nullable=False, unique=True),
+        
+        # Payer Information
+        sa.Column('payer_name', sa.String(255), nullable=True),
+        sa.Column('payer_tin_hash', sa.String(64), nullable=True),
+        
+        # Dividend Income
+        sa.Column('total_ordinary_dividends', sa.Numeric(12, 2), nullable=True),  # Box 1a
+        sa.Column('qualified_dividends', sa.Numeric(12, 2), nullable=True),  # Box 1b
+        sa.Column('total_capital_gain_distributions', sa.Numeric(12, 2), nullable=True),  # Box 2a
+        sa.Column('unrecaptured_section_1250_gain', sa.Numeric(12, 2), nullable=True),  # Box 2b
+        sa.Column('section_1202_gain', sa.Numeric(12, 2), nullable=True),  # Box 2c
+        sa.Column('collectibles_gain', sa.Numeric(12, 2), nullable=True),  # Box 2d
+        sa.Column('nondividend_distributions', sa.Numeric(12, 2), nullable=True),  # Box 3
+        sa.Column('federal_income_tax_withheld', sa.Numeric(12, 2), nullable=True),  # Box 4
+        sa.Column('section_199a_dividends', sa.Numeric(12, 2), nullable=True),  # Box 5
+        sa.Column('investment_expenses', sa.Numeric(12, 2), nullable=True),  # Box 6
+        sa.Column('foreign_tax_paid', sa.Numeric(12, 2), nullable=True),  # Box 7
+        sa.Column('foreign_country', sa.String(100), nullable=True),  # Box 8
+        sa.Column('cash_liquidation_distributions', sa.Numeric(12, 2), nullable=True),  # Box 9
+        sa.Column('noncash_liquidation_distributions', sa.Numeric(12, 2), nullable=True),  # Box 10
+        
+        sa.Column('field_confidence_scores', JSONB, nullable=True),
+        sa.Column('raw_extraction_data', JSONB, nullable=True),
+        
+        sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now(), nullable=False),
+    )
+    op.create_index('ix_1099_div_tax_form_id', 'form_1099_div', ['tax_form_id'])
+    
+    # 1099-B (Brokerage Transactions) forms table
+    op.create_table(
+        'form_1099_b',
+        sa.Column('id', UUID(as_uuid=True), primary_key=True, server_default=sa.text('gen_random_uuid()')),
+        sa.Column('tax_form_id', UUID(as_uuid=True), sa.ForeignKey('tax_forms.id', ondelete='CASCADE'), nullable=False, unique=True),
+        
+        # Payer Information
+        sa.Column('payer_name', sa.String(255), nullable=True),
+        sa.Column('payer_tin_hash', sa.String(64), nullable=True),
+        
+        # Transaction Details
+        sa.Column('description_of_property', sa.Text, nullable=True),  # Box 1a (stock name, quantity)
+        sa.Column('date_acquired', sa.Date, nullable=True),  # Box 1b
+        sa.Column('date_sold', sa.Date, nullable=True),  # Box 1c
+        sa.Column('proceeds', sa.Numeric(12, 2), nullable=True),  # Box 1d
+        sa.Column('cost_basis', sa.Numeric(12, 2), nullable=True),  # Box 1e
+        sa.Column('adjustments_to_basis', sa.Numeric(12, 2), nullable=True),  # Box 1f
+        sa.Column('realized_gain_loss', sa.Numeric(12, 2), nullable=True),  # Box 1g (calculated)
+        
+        sa.Column('federal_income_tax_withheld', sa.Numeric(12, 2), nullable=True),  # Box 4
+        
+        # Form Characteristics
+        sa.Column('short_term', sa.Boolean, nullable=True),  # Box 2
+        sa.Column('long_term', sa.Boolean, nullable=True),
+        sa.Column('basis_reported_to_irs', sa.Boolean, nullable=True),  # Box 3
+        sa.Column('noncovered_security', sa.Boolean, nullable=True),  # Box 5
+        
+        sa.Column('field_confidence_scores', JSONB, nullable=True),
+        sa.Column('raw_extraction_data', JSONB, nullable=True),
+        
+        sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now(), nullable=False),
+    )
+    op.create_index('ix_1099_b_tax_form_id', 'form_1099_b', ['tax_form_id'])
+
+
+def downgrade() -> None:
+    """Drop tax form tables."""
+    op.drop_table('form_1099_b')
+    op.drop_table('form_1099_div')
+    op.drop_table('form_1099_int')
+    op.drop_table('form_1099_misc')
+    op.drop_table('w2_forms')
+    op.drop_table('tax_forms')
diff --git a/financegpt_backend/app/agents/new_chat/system_prompt.py b/financegpt_backend/app/agents/new_chat/system_prompt.py
index 4126e46..d96b625 100644
--- a/financegpt_backend/app/agents/new_chat/system_prompt.py
+++ b/financegpt_backend/app/agents/new_chat/system_prompt.py
@@ -23,6 +23,7 @@
 - Plan for major financial goals (retirement, home purchase, education)
 - Understand tax implications and opportunities
 - Make smarter financial decisions with confidence
+- Organize and analyze tax documents (W2s, 1099s) for tax preparation
 
 Today's date (UTC): {resolved_today}
 
@@ -515,6 +516,31 @@
   - IMPORTANT: This tool fetches real-time credit card rewards data from the internet,
     so it works with ANY credit card the user has (no manual configuration needed).
 
+11. analyze_tax_data: Query uploaded and processed tax forms to answer tax questions.
+  - **USE THIS TOOL** when users ask about:
+    * Tax form data: "How much did I earn?", "What were my wages?"
+    * Tax withholdings: "How much federal tax was withheld?"
+    * Interest income: "Did I have interest income?", "1099-INT summary?"
+    * Dividend income: "What dividends did I receive?"
+    * Capital gains: "Stock sale gains/losses?", "1099-B summary?"
+    * W2 employment: "Where did I work?", "Wages by employer?"
+    * Tax year summaries: "2024 tax summary", "Total income 2024"
+  - IMPORTANT: This tool queries ONLY uploaded tax forms (W2, 1099-MISC, 1099-INT, 1099-DIV, 1099-B)
+  - Does NOT calculate estimates or current year projections - only historical data from uploaded forms
+  - Args:
+    - query_type: Type of analysis (required). Options:
+      * "income_summary": Total income across all sources
+      * "tax_summary": Total taxes withheld (federal, state, SS, Medicare)
+      * "interest_income": Interest from 1099-INT forms
+      * "dividends_income": Dividends from 1099-DIV forms
+      * "capital_gains": Capital gains from 1099-B forms
+      * "w2_summary": W2 employment wages and withholdings
+      * "all_forms": List all uploaded tax forms
+    - tax_year: Specific tax year (e.g., 2024) or omit for all years
+    - form_types: Optional filter by form types (e.g., ["W2", "1099-INT"])
+  - Returns: Structured tax data with totals, breakdowns, and per-form details
+  - Privacy: All PII (SSN, EIN) is hashed - never exposed in responses
+
 </tools>
 <tool_call_examples>
 FINANCIAL DATA QUERIES:
@@ -590,6 +616,43 @@
   - List top optimization opportunities
   - Provide specific card recommendations per category
 
+TAX FORM ANALYSIS:
+
+- User: "How much did I earn in 2024?"
+  - Call: `analyze_tax_data(query_type="income_summary", tax_year=2024)`
+  - Returns: Total W2 wages + 1099 income + interest + dividends
+  - Provide breakdown by source
+
+- User: "What was my total federal tax withheld last year?"
+  - Call: `analyze_tax_data(query_type="tax_summary", tax_year=2025)`
+  - Returns: Federal, state, Social Security, Medicare withholdings
+  - Show grand total and breakdown
+
+- User: "Did I have any interest income?"
+  - Call: `analyze_tax_data(query_type="interest_income")`
+  - Returns: 1099-INT forms with interest amounts by payer
+  - Mention if none found
+
+- User: "Show me my dividend income from last year"
+  - Call: `analyze_tax_data(query_type="dividends_income", tax_year=2025)`
+  - Returns: Ordinary and qualified dividends by payer
+  - Explain tax implications (qualified vs ordinary)
+
+- User: "What were my stock sale gains?"
+  - Call: `analyze_tax_data(query_type="capital_gains")`
+  - Returns: Short-term and long-term gains from 1099-B
+  - Break down by transaction and holding period
+
+- User: "Which companies did I work for in 2024?"
+  - Call: `analyze_tax_data(query_type="w2_summary", tax_year=2024)`
+  - Returns: W2 forms with employers, wages, and withholdings
+  - Summarize total wages and tax withheld
+
+- User: "List all my uploaded tax forms"
+  - Call: `analyze_tax_data(query_type="all_forms")`
+  - Returns: All tax forms with types, years, and processing status
+  - Note which forms need review (low confidence extractions)
+
 - User: "How much more am I spending this month compared to last month?"
   - First call: `search_knowledge_base(query="transactions spending", start_date="2025-12-01", end_date="2025-12-31")` (Dec)
   - Second call: `search_knowledge_base(query="transactions spending", start_date="2026-01-01", end_date="2026-01-26")` (Jan)
diff --git a/financegpt_backend/app/agents/new_chat/tools/registry.py b/financegpt_backend/app/agents/new_chat/tools/registry.py
index 9257729..8ad479f 100644
--- a/financegpt_backend/app/agents/new_chat/tools/registry.py
+++ b/financegpt_backend/app/agents/new_chat/tools/registry.py
@@ -58,6 +58,7 @@ async def my_tool(param: str) -> dict:
 from .portfolio_performance import create_portfolio_performance_tool
 from .search_financegpt_docs import create_search_financegpt_docs_tool
 from .search_transactions import create_search_transactions_tool
+from .tax_analysis import create_tax_analysis_tool
 from .user_memory import create_recall_memory_tool, create_save_memory_tool
 
 # =============================================================================
@@ -254,6 +255,20 @@ class ToolDefinition:
         requires=["search_space_id", "db_session", "connector_service"],
     ),
     # =========================================================================
+    # TAX ANALYSIS TOOLS - UPLOADED TAX FORMS
+    # =========================================================================
+    # Tax analysis tool - queries structured tax form data (W2, 1099s)
+    ToolDefinition(
+        name="analyze_tax_data",
+        description="Query uploaded and processed tax forms (W2, 1099s) to answer tax-related questions",
+        factory=lambda deps: create_tax_analysis_tool(
+            user_id=deps["user_id"],
+            search_space_id=deps["search_space_id"],
+            db_session=deps["db_session"],
+        ),
+        requires=["user_id", "search_space_id", "db_session"],
+    ),
+    # =========================================================================
     # ADD YOUR CUSTOM TOOLS BELOW
     # =========================================================================
     # Example:
diff --git a/financegpt_backend/app/agents/new_chat/tools/tax_analysis.py b/financegpt_backend/app/agents/new_chat/tools/tax_analysis.py
new file mode 100644
index 0000000..0f4413b
--- /dev/null
+++ b/financegpt_backend/app/agents/new_chat/tools/tax_analysis.py
@@ -0,0 +1,222 @@
+"""Tax analysis tool for the agent.
+
+This tool allows the agent to query structured tax form data to answer questions like:
+- "How much did I earn in 2024?"
+- "What was my total federal tax withheld?"
+- "Did I have any interest income?"
+- "What were my capital gains from stock sales?"
+"""
+
+import logging
+from datetime import datetime
+from decimal import Decimal
+from typing import Any
+
+from langchain_core.tools import tool
+from sqlalchemy import and_, desc, func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.schemas.tax_forms import TaxFormWithDetails
+
+logger = logging.getLogger(__name__)
+
+
+def create_tax_analysis_tool(user_id: str, search_space_id: int, db_session: AsyncSession):
+    """Create the tax analysis tool for the agent.
+    
+    Args:
+        user_id: User ID (UUID string)
+        search_space_id: Search space ID
+        db_session: Database session
+        
+    Returns:
+        Configured tax analysis tool
+    """
+    
+    @tool
+    async def analyze_tax_data(
+        query_type: str,
+        tax_year: int | None = None,
+        form_types: list[str] | None = None,
+    ) -> dict[str, Any]:
+        """Query uploaded tax forms to answer tax-related questions.
+        
+        Use this tool when users ask about income, taxes withheld, interest, dividends,
+        capital gains, or W2 employment information from their uploaded tax documents.
+        
+        Args:
+            query_type: Type of tax analysis - "income_summary", "tax_summary", 
+                       "interest_income", "dividends_income", "capital_gains", 
+                       "w2_summary", or "all_forms"
+            tax_year: Specific tax year (e.g., 2024) or None for all years
+            form_types: Optional list of form types to filter (e.g., ["W2", "1099-INT"])
+            
+        Returns:
+            Dictionary with analysis results including totals, breakdowns, and details
+        """
+        return await _analyze_tax_data_impl(
+            user_id=user_id,
+            search_space_id=search_space_id,
+            query_type=query_type,
+            tax_year=tax_year,
+            form_types=form_types,
+        )
+    
+    return analyze_tax_data
+
+
+async def _analyze_tax_data_impl(
+    user_id: str,
+    search_space_id: int,
+    query_type: str,
+    tax_year: int | None = None,
+    form_types: list[str] | None = None,
+) -> dict[str, Any]:
+    """Implementation of tax data analysis."""
+    # Note: Actual database queries would go here
+    # For now, returning placeholder structure
+    
+    if query_type == "income_summary":
+        return await _get_income_summary(user_id, search_space_id, tax_year)
+    elif query_type == "tax_summary":
+        return await _get_tax_summary(user_id, search_space_id, tax_year)
+    elif query_type == "interest_income":
+        return await _get_interest_income(user_id, search_space_id, tax_year)
+    elif query_type == "dividends_income":
+        return await _get_dividends_income(user_id, search_space_id, tax_year)
+    elif query_type == "capital_gains":
+        return await _get_capital_gains(user_id, search_space_id, tax_year)
+    elif query_type == "w2_summary":
+        return await _get_w2_summary(user_id, search_space_id, tax_year)
+    elif query_type == "all_forms":
+        return await _get_all_forms(user_id, search_space_id, tax_year, form_types)
+    else:
+        return {"error": f"Unknown query type: {query_type}"}
+
+
+async def _get_income_summary(
+    user_id: str,
+    search_space_id: int,
+    tax_year: int | None,
+) -> dict[str, Any]:
+    """Get total income across all sources."""
+    # TODO: Implement actual database queries
+    # This would query W2s and 1099s to sum total income
+    return {
+        "query_type": "income_summary",
+        "tax_year": tax_year or "all years",
+        "total_w2_wages": Decimal("0.00"),
+        "total_1099_misc_income": Decimal("0.00"),
+        "total_interest_income": Decimal("0.00"),
+        "total_dividend_income": Decimal("0.00"),
+        "total_capital_gains": Decimal("0.00"),
+        "grand_total_income": Decimal("0.00"),
+        "message": "No tax forms uploaded yet. Please upload your W2 and 1099 forms to see income summary.",
+    }
+
+
+async def _get_tax_summary(
+    user_id: str,
+    search_space_id: int,
+    tax_year: int | None,
+) -> dict[str, Any]:
+    """Get total taxes withheld across all sources."""
+    # TODO: Implement actual database queries
+    return {
+        "query_type": "tax_summary",
+        "tax_year": tax_year or "all years",
+        "total_federal_withheld": Decimal("0.00"),
+        "total_social_security_withheld": Decimal("0.00"),
+        "total_medicare_withheld": Decimal("0.00"),
+        "total_state_withheld": Decimal("0.00"),
+        "grand_total_withheld": Decimal("0.00"),
+        "message": "No tax forms uploaded yet. Please upload your W2 and 1099 forms to see tax withholdings.",
+    }
+
+
+async def _get_interest_income(
+    user_id: str,
+    search_space_id: int,
+    tax_year: int | None,
+) -> dict[str, Any]:
+    """Get interest income from 1099-INT forms."""
+    # TODO: Implement actual database queries
+    return {
+        "query_type": "interest_income",
+        "tax_year": tax_year or "all years",
+        "total_interest": Decimal("0.00"),
+        "sources": [],
+        "message": "No 1099-INT forms found. Upload your interest income statements to see details.",
+    }
+
+
+async def _get_dividends_income(
+    user_id: str,
+    search_space_id: int,
+    tax_year: int | None,
+) -> dict[str, Any]:
+    """Get dividend income from 1099-DIV forms."""
+    # TODO: Implement actual database queries
+    return {
+        "query_type": "dividends_income",
+        "tax_year": tax_year or "all years",
+        "total_ordinary_dividends": Decimal("0.00"),
+        "total_qualified_dividends": Decimal("0.00"),
+        "sources": [],
+        "message": "No 1099-DIV forms found. Upload your dividend income statements to see details.",
+    }
+
+
+async def _get_capital_gains(
+    user_id: str,
+    search_space_id: int,
+    tax_year: int | None,
+) -> dict[str, Any]:
+    """Get capital gains from 1099-B forms."""
+    # TODO: Implement actual database queries
+    return {
+        "query_type": "capital_gains",
+        "tax_year": tax_year or "all years",
+        "total_short_term_gains": Decimal("0.00"),
+        "total_long_term_gains": Decimal("0.00"),
+        "total_realized_gains": Decimal("0.00"),
+        "transactions": [],
+        "message": "No 1099-B forms found. Upload your brokerage statements to see capital gains.",
+    }
+
+
+async def _get_w2_summary(
+    user_id: str,
+    search_space_id: int,
+    tax_year: int | None,
+) -> dict[str, Any]:
+    """Get W2 summary."""
+    # TODO: Implement actual database queries
+    return {
+        "query_type": "w2_summary",
+        "tax_year": tax_year or "all years",
+        "employers": [],
+        "total_wages": Decimal("0.00"),
+        "total_federal_withheld": Decimal("0.00"),
+        "total_social_security_withheld": Decimal("0.00"),
+        "total_medicare_withheld": Decimal("0.00"),
+        "message": "No W2 forms found. Upload your W2s to see employment income and withholdings.",
+    }
+
+
+async def _get_all_forms(
+    user_id: str,
+    search_space_id: int,
+    tax_year: int | None,
+    form_types: list[str] | None,
+) -> dict[str, Any]:
+    """Get all tax forms with optional filters."""
+    # TODO: Implement actual database queries
+    return {
+        "query_type": "all_forms",
+        "tax_year": tax_year or "all years",
+        "form_types_filter": form_types,
+        "forms": [],
+        "total_forms": 0,
+        "message": "No tax forms uploaded yet. Upload W2s and 1099s to get started.",
+    }
diff --git a/financegpt_backend/app/db.py b/financegpt_backend/app/db.py
index 56f6687..3932bf1 100644
--- a/financegpt_backend/app/db.py
+++ b/financegpt_backend/app/db.py
@@ -961,6 +961,361 @@ class PortfolioAllocationTarget(BaseModel, TimestampMixin):
     metadata_ = Column("metadata", JSONB, nullable=True)
 
 
+# ============================================================================
+# Tax Forms Models
+# ============================================================================
+
+
+class TaxForm(BaseModel):
+    """Base tax form model for all tax-related documents."""
+
+    __tablename__ = "tax_forms"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, server_default=text('gen_random_uuid()'), index=True)
+    user_id = Column(
+        UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False, index=True
+    )
+    search_space_id = Column(
+        Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False, index=True
+    )
+    form_type = Column(String(20), nullable=False, index=True)  # W2, 1099-MISC, 1099-INT, etc.
+    tax_year = Column(Integer, nullable=False, index=True)
+    document_id = Column(
+        Integer, ForeignKey("documents.id", ondelete="SET NULL"), nullable=True
+    )
+    uploaded_at = Column(TIMESTAMP(timezone=True), server_default=text('now()'), nullable=False)
+    processed_at = Column(TIMESTAMP(timezone=True), nullable=True)
+    processing_status = Column(
+        String(20), server_default='pending', nullable=False
+    )  # pending, processing, completed, failed, needs_review
+    extraction_method = Column(
+        String(50), nullable=True
+    )  # structured_pdf, unstructured, ocr, llm_assisted
+    confidence_score = Column(Numeric(3, 2), nullable=True)  # 0.00 to 1.00
+    needs_review = Column(Boolean, server_default='false', nullable=False)
+    created_at = Column(TIMESTAMP(timezone=True), server_default=text('now()'), nullable=False)
+    updated_at = Column(
+        TIMESTAMP(timezone=True),
+        server_default=text('now()'),
+        onupdate=text('now()'),
+        nullable=False,
+    )
+
+    # Relationships
+    w2_form = relationship("W2Form", back_populates="tax_form", uselist=False, cascade="all, delete-orphan")
+    form_1099_misc = relationship("Form1099Misc", back_populates="tax_form", uselist=False, cascade="all, delete-orphan")
+    form_1099_int = relationship("Form1099Int", back_populates="tax_form", uselist=False, cascade="all, delete-orphan")
+    form_1099_div = relationship("Form1099Div", back_populates="tax_form", uselist=False, cascade="all, delete-orphan")
+    form_1099_b = relationship("Form1099B", back_populates="tax_form", uselist=False, cascade="all, delete-orphan")
+
+
+class W2Form(BaseModel):
+    """W2 wage and tax statement model."""
+
+    __tablename__ = "w2_forms"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, server_default=text('gen_random_uuid()'), index=True)
+    tax_form_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("tax_forms.id", ondelete="CASCADE"),
+        nullable=False,
+        unique=True,
+        index=True,
+    )
+
+    # Employer Information (masked for privacy)
+    employer_name = Column(String(255), nullable=True)
+    employer_ein_hash = Column(String(64), nullable=True)  # SHA256 hashed
+    employer_address = Column(Text, nullable=True)
+
+    # Employee Information (masked)
+    employee_ssn_hash = Column(String(64), nullable=True)  # SHA256 hashed, never plain text
+    employee_name_masked = Column(String(255), nullable=True)  # [EMPLOYEE_NAME] for UI
+
+    # Wage Information - Box 1-9
+    wages_tips_compensation = Column(Numeric(12, 2), nullable=True)  # Box 1
+    federal_income_tax_withheld = Column(Numeric(12, 2), nullable=True)  # Box 2
+    social_security_wages = Column(Numeric(12, 2), nullable=True)  # Box 3
+    social_security_tax_withheld = Column(Numeric(12, 2), nullable=True)  # Box 4
+    medicare_wages = Column(Numeric(12, 2), nullable=True)  # Box 5
+    medicare_tax_withheld = Column(Numeric(12, 2), nullable=True)  # Box 6
+    social_security_tips = Column(Numeric(12, 2), nullable=True)  # Box 7
+    allocated_tips = Column(Numeric(12, 2), nullable=True)  # Box 8
+
+    # Other Compensation - Box 10-11
+    dependent_care_benefits = Column(Numeric(12, 2), nullable=True)  # Box 10
+    nonqualified_plans = Column(Numeric(12, 2), nullable=True)  # Box 11
+
+    # Box 12 codes (multiple entries)
+    box_12_codes = Column(JSONB, nullable=True)  # [{code: 'D', amount: 5000.00}, ...]
+
+    # Box 13 checkboxes
+    statutory_employee = Column(Boolean, server_default='false', nullable=False)
+    retirement_plan = Column(Boolean, server_default='false', nullable=False)
+    third_party_sick_pay = Column(Boolean, server_default='false', nullable=False)
+
+    # State/Local Tax - Box 15-20
+    state_code = Column(String(2), nullable=True)  # Box 15
+    state_wages = Column(Numeric(12, 2), nullable=True)  # Box 16
+    state_income_tax = Column(Numeric(12, 2), nullable=True)  # Box 17
+    local_wages = Column(Numeric(12, 2), nullable=True)  # Box 18
+    local_income_tax = Column(Numeric(12, 2), nullable=True)  # Box 19
+    locality_name = Column(String(100), nullable=True)  # Box 20
+
+    # Field-level confidence scores
+    field_confidence_scores = Column(JSONB, nullable=True)  # {wages: 0.95, federal_tax: 0.88, ...}
+
+    # Raw OCR/extraction data (for debugging/re-processing)
+    raw_extraction_data = Column(JSONB, nullable=True)
+
+    created_at = Column(TIMESTAMP(timezone=True), server_default=text('now()'), nullable=False)
+    updated_at = Column(
+        TIMESTAMP(timezone=True),
+        server_default=text('now()'),
+        onupdate=text('now()'),
+        nullable=False,
+    )
+
+    # Relationships
+    tax_form = relationship("TaxForm", back_populates="w2_form")
+
+
+class Form1099Misc(BaseModel):
+    """1099-MISC miscellaneous income form model."""
+
+    __tablename__ = "form_1099_misc"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, server_default=text('gen_random_uuid()'), index=True)
+    tax_form_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("tax_forms.id", ondelete="CASCADE"),
+        nullable=False,
+        unique=True,
+        index=True,
+    )
+
+    # Payer Information
+    payer_name = Column(String(255), nullable=True)
+    payer_tin_hash = Column(String(64), nullable=True)
+    payer_address = Column(Text, nullable=True)
+
+    # Recipient (masked)
+    recipient_tin_hash = Column(String(64), nullable=True)
+
+    # Income Boxes
+    rents = Column(Numeric(12, 2), nullable=True)  # Box 1
+    royalties = Column(Numeric(12, 2), nullable=True)  # Box 2
+    other_income = Column(Numeric(12, 2), nullable=True)  # Box 3
+    federal_income_tax_withheld = Column(Numeric(12, 2), nullable=True)  # Box 4
+    fishing_boat_proceeds = Column(Numeric(12, 2), nullable=True)  # Box 5
+    medical_health_payments = Column(Numeric(12, 2), nullable=True)  # Box 6
+    substitute_payments = Column(Numeric(12, 2), nullable=True)  # Box 8
+    crop_insurance_proceeds = Column(Numeric(12, 2), nullable=True)  # Box 10
+    gross_proceeds_attorney = Column(Numeric(12, 2), nullable=True)  # Box 14
+    section_409a_deferrals = Column(Numeric(12, 2), nullable=True)  # Box 15
+    state_tax_withheld = Column(Numeric(12, 2), nullable=True)  # Box 16
+    state_payer_number = Column(String(50), nullable=True)
+    state_income = Column(Numeric(12, 2), nullable=True)  # Box 18
+
+    # Field confidence scores
+    field_confidence_scores = Column(JSONB, nullable=True)
+    raw_extraction_data = Column(JSONB, nullable=True)
+
+    created_at = Column(TIMESTAMP(timezone=True), server_default=text('now()'), nullable=False)
+    updated_at = Column(
+        TIMESTAMP(timezone=True),
+        server_default=text('now()'),
+        onupdate=text('now()'),
+        nullable=False,
+    )
+
+    # Relationships
+    tax_form = relationship("TaxForm", back_populates="form_1099_misc")
+
+
+class Form1099Int(BaseModel):
+    """1099-INT interest income form model."""
+
+    __tablename__ = "form_1099_int"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, server_default=text('gen_random_uuid()'), index=True)
+    tax_form_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("tax_forms.id", ondelete="CASCADE"),
+        nullable=False,
+        unique=True,
+        index=True,
+    )
+
+    # Payer Information
+    payer_name = Column(String(255), nullable=True)
+    payer_tin_hash = Column(String(64), nullable=True)
+    payer_address = Column(Text, nullable=True)
+
+    # Recipient (masked)
+    recipient_tin_hash = Column(String(64), nullable=True)
+
+    # Interest Income Boxes
+    interest_income = Column(Numeric(12, 2), nullable=True)  # Box 1
+    early_withdrawal_penalty = Column(Numeric(12, 2), nullable=True)  # Box 2
+    interest_on_us_savings_bonds = Column(Numeric(12, 2), nullable=True)  # Box 3
+    federal_income_tax_withheld = Column(Numeric(12, 2), nullable=True)  # Box 4
+    investment_expenses = Column(Numeric(12, 2), nullable=True)  # Box 5
+    foreign_tax_paid = Column(Numeric(12, 2), nullable=True)  # Box 6
+    foreign_country = Column(String(100), nullable=True)  # Box 7
+    tax_exempt_interest = Column(Numeric(12, 2), nullable=True)  # Box 8
+    specified_private_activity_bond_interest = Column(Numeric(12, 2), nullable=True)  # Box 9
+    market_discount = Column(Numeric(12, 2), nullable=True)  # Box 10
+    bond_premium = Column(Numeric(12, 2), nullable=True)  # Box 11
+    bond_premium_on_treasury = Column(Numeric(12, 2), nullable=True)  # Box 12
+    bond_premium_on_tax_exempt = Column(Numeric(12, 2), nullable=True)  # Box 13
+    state_code = Column(String(2), nullable=True)  # Box 15
+    state_id = Column(String(50), nullable=True)  # Box 16
+    state_tax_withheld = Column(Numeric(12, 2), nullable=True)  # Box 17
+
+    # Field confidence scores
+    field_confidence_scores = Column(JSONB, nullable=True)
+    raw_extraction_data = Column(JSONB, nullable=True)
+
+    created_at = Column(TIMESTAMP(timezone=True), server_default=text('now()'), nullable=False)
+    updated_at = Column(
+        TIMESTAMP(timezone=True),
+        server_default=text('now()'),
+        onupdate=text('now()'),
+        nullable=False,
+    )
+
+    # Relationships
+    tax_form = relationship("TaxForm", back_populates="form_1099_int")
+
+
+class Form1099Div(BaseModel):
+    """1099-DIV dividend income form model."""
+
+    __tablename__ = "form_1099_div"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, server_default=text('gen_random_uuid()'), index=True)
+    tax_form_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("tax_forms.id", ondelete="CASCADE"),
+        nullable=False,
+        unique=True,
+        index=True,
+    )
+
+    # Payer Information
+    payer_name = Column(String(255), nullable=True)
+    payer_tin_hash = Column(String(64), nullable=True)
+    payer_address = Column(Text, nullable=True)
+
+    # Recipient (masked)
+    recipient_tin_hash = Column(String(64), nullable=True)
+
+    # Dividend Income Boxes
+    total_ordinary_dividends = Column(Numeric(12, 2), nullable=True)  # Box 1a
+    qualified_dividends = Column(Numeric(12, 2), nullable=True)  # Box 1b
+    total_capital_gain_distributions = Column(Numeric(12, 2), nullable=True)  # Box 2a
+    unrecaptured_section_1250_gain = Column(Numeric(12, 2), nullable=True)  # Box 2b
+    section_1202_gain = Column(Numeric(12, 2), nullable=True)  # Box 2c
+    collectibles_28_gain = Column(Numeric(12, 2), nullable=True)  # Box 2d
+    section_897_ordinary_dividends = Column(Numeric(12, 2), nullable=True)  # Box 2e
+    section_897_capital_gain = Column(Numeric(12, 2), nullable=True)  # Box 2f
+    nondividend_distributions = Column(Numeric(12, 2), nullable=True)  # Box 3
+    federal_income_tax_withheld = Column(Numeric(12, 2), nullable=True)  # Box 4
+    section_199a_dividends = Column(Numeric(12, 2), nullable=True)  # Box 5
+    investment_expenses = Column(Numeric(12, 2), nullable=True)  # Box 6
+    foreign_tax_paid = Column(Numeric(12, 2), nullable=True)  # Box 7
+    foreign_country = Column(String(100), nullable=True)  # Box 8
+    cash_liquidation_distributions = Column(Numeric(12, 2), nullable=True)  # Box 9
+    noncash_liquidation_distributions = Column(Numeric(12, 2), nullable=True)  # Box 10
+    exempt_interest_dividends = Column(Numeric(12, 2), nullable=True)  # Box 11
+    specified_private_activity_bond_interest_dividends = Column(Numeric(12, 2), nullable=True)  # Box 12
+    state_code = Column(String(2), nullable=True)  # Box 14
+    state_id = Column(String(50), nullable=True)  # Box 15
+    state_tax_withheld = Column(Numeric(12, 2), nullable=True)  # Box 16
+
+    # Field confidence scores
+    field_confidence_scores = Column(JSONB, nullable=True)
+    raw_extraction_data = Column(JSONB, nullable=True)
+
+    created_at = Column(TIMESTAMP(timezone=True), server_default=text('now()'), nullable=False)
+    updated_at = Column(
+        TIMESTAMP(timezone=True),
+        server_default=text('now()'),
+        onupdate=text('now()'),
+        nullable=False,
+    )
+
+    # Relationships
+    tax_form = relationship("TaxForm", back_populates="form_1099_div")
+
+
+class Form1099B(BaseModel):
+    """1099-B broker transaction form model."""
+
+    __tablename__ = "form_1099_b"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, server_default=text('gen_random_uuid()'), index=True)
+    tax_form_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("tax_forms.id", ondelete="CASCADE"),
+        nullable=False,
+        unique=True,
+        index=True,
+    )
+
+    # Payer/Broker Information
+    payer_name = Column(String(255), nullable=True)
+    payer_tin_hash = Column(String(64), nullable=True)
+    payer_address = Column(Text, nullable=True)
+
+    # Recipient (masked)
+    recipient_tin_hash = Column(String(64), nullable=True)
+
+    # Transaction Details
+    description_of_property = Column(Text, nullable=True)  # Box 1a
+    date_acquired = Column(String(50), nullable=True)  # Box 1b (can be "VARIOUS")
+    date_sold = Column(String(50), nullable=True)  # Box 1c
+    proceeds = Column(Numeric(12, 2), nullable=True)  # Box 1d
+    cost_basis = Column(Numeric(12, 2), nullable=True)  # Box 1e
+    accrued_market_discount = Column(Numeric(12, 2), nullable=True)  # Box 1f
+    wash_sale_loss_disallowed = Column(Numeric(12, 2), nullable=True)  # Box 1g
+    federal_income_tax_withheld = Column(Numeric(12, 2), nullable=True)  # Box 4
+    
+    # Form 8949 checkboxes
+    short_term_box_a = Column(Boolean, server_default='false', nullable=False)
+    short_term_box_b = Column(Boolean, server_default='false', nullable=False)
+    short_term_box_c = Column(Boolean, server_default='false', nullable=False)
+    long_term_box_d = Column(Boolean, server_default='false', nullable=False)
+    long_term_box_e = Column(Boolean, server_default='false', nullable=False)
+    long_term_box_f = Column(Boolean, server_default='false', nullable=False)
+    
+    # Applicable checkbox
+    loss_not_allowed = Column(Boolean, server_default='false', nullable=False)
+    noncovered_security = Column(Boolean, server_default='false', nullable=False)
+    basis_reported_to_irs = Column(Boolean, server_default='false', nullable=False)
+    
+    # State tax information
+    state_code = Column(String(2), nullable=True)
+    state_id = Column(String(50), nullable=True)
+    state_tax_withheld = Column(Numeric(12, 2), nullable=True)
+
+    # Field confidence scores
+    field_confidence_scores = Column(JSONB, nullable=True)
+    raw_extraction_data = Column(JSONB, nullable=True)
+
+    created_at = Column(TIMESTAMP(timezone=True), server_default=text('now()'), nullable=False)
+    updated_at = Column(
+        TIMESTAMP(timezone=True),
+        server_default=text('now()'),
+        onupdate=text('now()'),
+        nullable=False,
+    )
+
+    # Relationships
+    tax_form = relationship("TaxForm", back_populates="form_1099_b")
+
+
 class NewLLMConfig(BaseModel, TimestampMixin):
     """
     New LLM configuration table that combines model settings with prompt configuration.
diff --git a/financegpt_backend/app/parsers/tax_form_parser.py b/financegpt_backend/app/parsers/tax_form_parser.py
new file mode 100644
index 0000000..57f9fcd
--- /dev/null
+++ b/financegpt_backend/app/parsers/tax_form_parser.py
@@ -0,0 +1,450 @@
+"""Tiered tax form parser with hybrid extraction strategy.
+
+Extraction Priority:
+1. Structured PDF extraction (pdfplumber) - best for text-based PDFs
+2. Unstructured library - handles more complex layouts
+3. OCR with pattern matching - for scanned documents
+4. LLM-assisted extraction - last resort, with PII masked
+
+Each tier returns confidence scores. If confidence < 0.85, escalate to next tier.
+"""
+
+import logging
+import re
+from decimal import Decimal
+from pathlib import Path
+from typing import Any, Literal
+
+import pdfplumber
+from unstructured.partition.pdf import partition_pdf
+
+from app.utils.pii_masking import mask_tax_form_for_llm, validate_confidence_threshold
+
+logger = logging.getLogger(__name__)
+
+
+class TaxFormParser:
+    """Hybrid tax form parser with tiered extraction."""
+    
+    CONFIDENCE_THRESHOLD = 0.85
+    
+    # Common patterns for tax form fields
+    PATTERNS = {
+        "ssn": r"\b\d{3}-\d{2}-\d{4}\b",
+        "ein": r"\b\d{2}-\d{7}\b",
+        "money": r"\$?\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?",
+        "date": r"\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b",
+        "percentage": r"\d+(?:\.\d+)?%",
+    }
+    
+    def __init__(self):
+        """Initialize parser."""
+        self.extraction_history: list[dict[str, Any]] = []
+    
+    async def parse_tax_form(
+        self,
+        file_path: str | Path,
+        form_type: Literal["W2", "1099-MISC", "1099-INT", "1099-DIV", "1099-B"],
+        tax_year: int,
+    ) -> dict[str, Any]:
+        """Parse tax form using tiered extraction strategy.
+        
+        Args:
+            file_path: Path to PDF file
+            form_type: Type of tax form
+            tax_year: Tax year for the form
+            
+        Returns:
+            Dictionary containing:
+                - extracted_data: Parsed form fields
+                - confidence_scores: Per-field confidence
+                - extraction_method: Method used (structured_pdf, unstructured, ocr, llm_assisted)
+                - needs_review: True if confidence < threshold
+                - raw_extraction_data: Full extraction details
+        """
+        file_path = Path(file_path)
+        
+        if not file_path.exists():
+            raise FileNotFoundError(f"Tax form file not found: {file_path}")
+        
+        # Tier 1: Structured PDF extraction (fastest, most accurate for text PDFs)
+        logger.info(f"Tier 1: Attempting structured PDF extraction for {form_type}")
+        result = await self._extract_structured_pdf(file_path, form_type, tax_year)
+        
+        if result and result["confidence_scores"]:
+            avg_confidence = sum(result["confidence_scores"].values()) / len(result["confidence_scores"])
+            logger.info(f"Tier 1 average confidence: {avg_confidence:.2f}")
+            
+            if avg_confidence >= self.CONFIDENCE_THRESHOLD:
+                logger.info(f"Tier 1 succeeded with {avg_confidence:.2f} confidence")
+                return result
+        
+        # Tier 2: Unstructured library (better layout analysis)
+        logger.info(f"Tier 2: Attempting unstructured library extraction for {form_type}")
+        result = await self._extract_unstructured(file_path, form_type, tax_year)
+        
+        if result and result["confidence_scores"]:
+            avg_confidence = sum(result["confidence_scores"].values()) / len(result["confidence_scores"])
+            logger.info(f"Tier 2 average confidence: {avg_confidence:.2f}")
+            
+            if avg_confidence >= self.CONFIDENCE_THRESHOLD:
+                logger.info(f"Tier 2 succeeded with {avg_confidence:.2f} confidence")
+                return result
+        
+        # Tier 3: OCR with pattern matching
+        logger.info(f"Tier 3: Attempting OCR extraction for {form_type}")
+        result = await self._extract_ocr(file_path, form_type, tax_year)
+        
+        if result and result["confidence_scores"]:
+            avg_confidence = sum(result["confidence_scores"].values()) / len(result["confidence_scores"])
+            logger.info(f"Tier 3 average confidence: {avg_confidence:.2f}")
+            
+            if avg_confidence >= self.CONFIDENCE_THRESHOLD:
+                logger.info(f"Tier 3 succeeded with {avg_confidence:.2f} confidence")
+                return result
+        
+        # Tier 4: LLM-assisted extraction (last resort, with PII masked)
+        logger.warning(f"Tier 4: Escalating to LLM-assisted extraction for {form_type}")
+        result = await self._extract_llm_assisted(file_path, form_type, tax_year, previous_result=result)
+        
+        return result
+    
+    async def _extract_structured_pdf(
+        self,
+        file_path: Path,
+        form_type: str,
+        tax_year: int,
+    ) -> dict[str, Any]:
+        """Extract data using pdfplumber (structured PDF).
+        
+        Best for: Text-based PDFs with clear structure.
+        """
+        try:
+            with pdfplumber.open(file_path) as pdf:
+                # Extract text from all pages
+                full_text = ""
+                for page in pdf.pages:
+                    full_text += page.extract_text() + "\n"
+                
+                # Extract based on form type
+                if form_type == "W2":
+                    extracted_data = self._parse_w2_text(full_text)
+                elif form_type == "1099-MISC":
+                    extracted_data = self._parse_1099_misc_text(full_text)
+                elif form_type == "1099-INT":
+                    extracted_data = self._parse_1099_int_text(full_text)
+                elif form_type == "1099-DIV":
+                    extracted_data = self._parse_1099_div_text(full_text)
+                elif form_type == "1099-B":
+                    extracted_data = self._parse_1099_b_text(full_text)
+                else:
+                    raise ValueError(f"Unsupported form type: {form_type}")
+                
+                # Calculate confidence scores based on field population
+                confidence_scores = self._calculate_confidence_scores(extracted_data)
+                
+                return {
+                    "extracted_data": extracted_data,
+                    "confidence_scores": confidence_scores,
+                    "extraction_method": "structured_pdf",
+                    "needs_review": not self._meets_confidence_threshold(confidence_scores),
+                    "raw_extraction_data": {"full_text": full_text},
+                }
+        
+        except Exception as e:
+            logger.error(f"Structured PDF extraction failed: {e}")
+            return {
+                "extracted_data": {},
+                "confidence_scores": {},
+                "extraction_method": "structured_pdf",
+                "needs_review": True,
+                "raw_extraction_data": {"error": str(e)},
+            }
+    
+    async def _extract_unstructured(
+        self,
+        file_path: Path,
+        form_type: str,
+        tax_year: int,
+    ) -> dict[str, Any]:
+        """Extract data using unstructured library.
+        
+        Best for: PDFs with complex layouts, tables, multiple columns.
+        """
+        try:
+            # Use unstructured to partition the PDF
+            elements = partition_pdf(str(file_path), strategy="hi_res")
+            
+            # Combine all text elements
+            full_text = "\n".join([str(el) for el in elements])
+            
+            # Extract based on form type (same parsers as structured PDF)
+            if form_type == "W2":
+                extracted_data = self._parse_w2_text(full_text)
+            elif form_type == "1099-MISC":
+                extracted_data = self._parse_1099_misc_text(full_text)
+            elif form_type == "1099-INT":
+                extracted_data = self._parse_1099_int_text(full_text)
+            elif form_type == "1099-DIV":
+                extracted_data = self._parse_1099_div_text(full_text)
+            elif form_type == "1099-B":
+                extracted_data = self._parse_1099_b_text(full_text)
+            else:
+                raise ValueError(f"Unsupported form type: {form_type}")
+            
+            confidence_scores = self._calculate_confidence_scores(extracted_data)
+            
+            return {
+                "extracted_data": extracted_data,
+                "confidence_scores": confidence_scores,
+                "extraction_method": "unstructured",
+                "needs_review": not self._meets_confidence_threshold(confidence_scores),
+                "raw_extraction_data": {
+                    "full_text": full_text,
+                    "num_elements": len(elements),
+                },
+            }
+        
+        except Exception as e:
+            logger.error(f"Unstructured extraction failed: {e}")
+            return {
+                "extracted_data": {},
+                "confidence_scores": {},
+                "extraction_method": "unstructured",
+                "needs_review": True,
+                "raw_extraction_data": {"error": str(e)},
+            }
+    
+    async def _extract_ocr(
+        self,
+        file_path: Path,
+        form_type: str,
+        tax_year: int,
+    ) -> dict[str, Any]:
+        """Extract data using OCR with pattern matching.
+        
+        Best for: Scanned documents, images of tax forms.
+        Note: This is a placeholder - would use pytesseract or similar in production.
+        """
+        # TODO: Implement OCR extraction with pytesseract
+        # For now, return empty result to trigger LLM escalation
+        logger.warning("OCR extraction not yet implemented")
+        return {
+            "extracted_data": {},
+            "confidence_scores": {},
+            "extraction_method": "ocr",
+            "needs_review": True,
+            "raw_extraction_data": {"status": "not_implemented"},
+        }
+    
+    async def _extract_llm_assisted(
+        self,
+        file_path: Path,
+        form_type: str,
+        tax_year: int,
+        previous_result: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
+        """Extract/verify data using LLM (with PII masked).
+        
+        Best for: Verification of low-confidence fields, unusual layouts.
+        IMPORTANT: All PII is masked before sending to LLM.
+        """
+        # TODO: Implement LLM-assisted extraction using instructor
+        # This would:
+        # 1. Convert PDF to image or extract text
+        # 2. Mask any PII found in previous extraction
+        # 3. Send to LLM with structured output schema
+        # 4. Return verified/extracted data
+        
+        logger.warning("LLM-assisted extraction not yet implemented")
+        
+        # For now, return previous result marked as needs_review
+        if previous_result:
+            previous_result["extraction_method"] = "llm_assisted"
+            previous_result["needs_review"] = True
+            return previous_result
+        
+        return {
+            "extracted_data": {},
+            "confidence_scores": {},
+            "extraction_method": "llm_assisted",
+            "needs_review": True,
+            "raw_extraction_data": {"status": "not_implemented"},
+        }
+    
+    def _parse_w2_text(self, text: str) -> dict[str, Any]:
+        """Parse W2 form from extracted text."""
+        data: dict[str, Any] = {}
+        
+        # Box 1: Wages, tips, other compensation
+        wages_match = re.search(r"(?:Wages|Box 1).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if wages_match:
+            data["wages_tips_compensation"] = self._parse_money(wages_match.group(1))
+        
+        # Box 2: Federal income tax withheld
+        fed_tax_match = re.search(r"(?:Federal.*?tax.*?withheld|Box 2).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if fed_tax_match:
+            data["federal_income_tax_withheld"] = self._parse_money(fed_tax_match.group(1))
+        
+        # Box 3: Social security wages
+        ss_wages_match = re.search(r"(?:Social security wages|Box 3).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if ss_wages_match:
+            data["social_security_wages"] = self._parse_money(ss_wages_match.group(1))
+        
+        # Box 4: Social security tax withheld
+        ss_tax_match = re.search(r"(?:Social security tax|Box 4).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if ss_tax_match:
+            data["social_security_tax_withheld"] = self._parse_money(ss_tax_match.group(1))
+        
+        # Box 5: Medicare wages
+        medicare_wages_match = re.search(r"(?:Medicare wages|Box 5).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if medicare_wages_match:
+            data["medicare_wages"] = self._parse_money(medicare_wages_match.group(1))
+        
+        # Box 6: Medicare tax withheld
+        medicare_tax_match = re.search(r"(?:Medicare tax|Box 6).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if medicare_tax_match:
+            data["medicare_tax_withheld"] = self._parse_money(medicare_tax_match.group(1))
+        
+        # Extract SSN (will be hashed before storage)
+        ssn_match = re.search(self.PATTERNS["ssn"], text)
+        if ssn_match:
+            data["employee_ssn"] = ssn_match.group(0)
+        
+        # Extract EIN
+        ein_match = re.search(self.PATTERNS["ein"], text)
+        if ein_match:
+            data["employer_ein"] = ein_match.group(0)
+        
+        # Box 13: Retirement plan checkbox
+        data["retirement_plan"] = bool(re.search(r"Retirement plan.*?[Xx✓]", text, re.IGNORECASE))
+        
+        return data
+    
+    def _parse_1099_misc_text(self, text: str) -> dict[str, Any]:
+        """Parse 1099-MISC form from extracted text."""
+        data: dict[str, Any] = {}
+        
+        # Box 1: Rents
+        rents_match = re.search(r"(?:Rents|Box 1).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if rents_match:
+            data["rents"] = self._parse_money(rents_match.group(1))
+        
+        # Box 2: Royalties
+        royalties_match = re.search(r"(?:Royalties|Box 2).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if royalties_match:
+            data["royalties"] = self._parse_money(royalties_match.group(1))
+        
+        # Box 3: Other income
+        other_match = re.search(r"(?:Other income|Box 3).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if other_match:
+            data["other_income"] = self._parse_money(other_match.group(1))
+        
+        # Box 4: Federal income tax withheld
+        fed_tax_match = re.search(r"(?:Federal.*?tax.*?withheld|Box 4).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if fed_tax_match:
+            data["federal_income_tax_withheld"] = self._parse_money(fed_tax_match.group(1))
+        
+        return data
+    
+    def _parse_1099_int_text(self, text: str) -> dict[str, Any]:
+        """Parse 1099-INT form from extracted text."""
+        data: dict[str, Any] = {}
+        
+        # Box 1: Interest income
+        interest_match = re.search(r"(?:Interest income|Box 1).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if interest_match:
+            data["interest_income"] = self._parse_money(interest_match.group(1))
+        
+        # Box 2: Early withdrawal penalty
+        penalty_match = re.search(r"(?:Early withdrawal|Box 2).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if penalty_match:
+            data["early_withdrawal_penalty"] = self._parse_money(penalty_match.group(1))
+        
+        # Box 4: Federal income tax withheld
+        fed_tax_match = re.search(r"(?:Federal.*?tax.*?withheld|Box 4).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if fed_tax_match:
+            data["federal_income_tax_withheld"] = self._parse_money(fed_tax_match.group(1))
+        
+        return data
+    
+    def _parse_1099_div_text(self, text: str) -> dict[str, Any]:
+        """Parse 1099-DIV form from extracted text."""
+        data: dict[str, Any] = {}
+        
+        # Box 1a: Total ordinary dividends
+        dividends_match = re.search(r"(?:Total ordinary dividends|Box 1a).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if dividends_match:
+            data["total_ordinary_dividends"] = self._parse_money(dividends_match.group(1))
+        
+        # Box 1b: Qualified dividends
+        qualified_match = re.search(r"(?:Qualified dividends|Box 1b).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if qualified_match:
+            data["qualified_dividends"] = self._parse_money(qualified_match.group(1))
+        
+        # Box 2a: Total capital gain distributions
+        cap_gains_match = re.search(r"(?:Total capital gain|Box 2a).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if cap_gains_match:
+            data["total_capital_gain_distributions"] = self._parse_money(cap_gains_match.group(1))
+        
+        return data
+    
+    def _parse_1099_b_text(self, text: str) -> dict[str, Any]:
+        """Parse 1099-B form from extracted text."""
+        data: dict[str, Any] = {}
+        
+        # Box 1d: Proceeds
+        proceeds_match = re.search(r"(?:Proceeds|Box 1d).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if proceeds_match:
+            data["proceeds"] = self._parse_money(proceeds_match.group(1))
+        
+        # Box 1e: Cost or other basis
+        basis_match = re.search(r"(?:Cost.*?basis|Box 1e).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if basis_match:
+            data["cost_basis"] = self._parse_money(basis_match.group(1))
+        
+        # Short-term vs long-term
+        data["short_term"] = bool(re.search(r"short.?term", text, re.IGNORECASE))
+        data["long_term"] = bool(re.search(r"long.?term", text, re.IGNORECASE))
+        
+        return data
+    
+    def _parse_money(self, money_str: str) -> Decimal:
+        """Parse money string to Decimal."""
+        # Remove $, spaces, commas
+        clean = re.sub(r"[$,\s]", "", money_str)
+        return Decimal(clean)
+    
+    def _calculate_confidence_scores(self, data: dict[str, Any]) -> dict[str, float]:
+        """Calculate confidence scores for extracted fields.
+        
+        For structured/unstructured extraction, confidence is based on:
+        - Field population (present vs missing)
+        - Format validation (valid Decimal, SSN format, etc.)
+        """
+        scores: dict[str, float] = {}
+        
+        for field, value in data.items():
+            if value is None:
+                scores[field] = 0.0
+            elif isinstance(value, Decimal):
+                # Money fields: high confidence if non-zero
+                scores[field] = 0.95 if value > 0 else 0.5
+            elif isinstance(value, str):
+                # String fields: high confidence if non-empty
+                scores[field] = 0.90 if value else 0.0
+            elif isinstance(value, bool):
+                # Boolean fields: medium confidence
+                scores[field] = 0.75
+            else:
+                scores[field] = 0.85  # Default confidence
+        
+        return scores
+    
+    def _meets_confidence_threshold(self, scores: dict[str, float]) -> bool:
+        """Check if confidence scores meet threshold."""
+        if not scores:
+            return False
+        
+        avg_score = sum(scores.values()) / len(scores)
+        return avg_score >= self.CONFIDENCE_THRESHOLD
diff --git a/financegpt_backend/app/schemas/tax_forms.py b/financegpt_backend/app/schemas/tax_forms.py
new file mode 100644
index 0000000..1dd3138
--- /dev/null
+++ b/financegpt_backend/app/schemas/tax_forms.py
@@ -0,0 +1,308 @@
+"""Pydantic schemas for tax forms."""
+
+from datetime import date, datetime
+from decimal import Decimal
+from typing import Any, Literal, Optional
+from uuid import UUID
+
+from pydantic import BaseModel, Field, field_validator
+
+
+# Base Tax Form Schema
+class TaxFormBase(BaseModel):
+    """Base schema for all tax forms."""
+    
+    form_type: Literal["W2", "1099-MISC", "1099-INT", "1099-DIV", "1099-B"]
+    tax_year: int = Field(ge=1900, le=2100)
+    
+    @field_validator("tax_year")
+    @classmethod
+    def validate_tax_year(cls, v: int) -> int:
+        """Validate tax year is reasonable."""
+        current_year = datetime.now().year
+        if v > current_year + 1:
+            raise ValueError(f"Tax year cannot be more than {current_year + 1}")
+        return v
+
+
+class TaxFormCreate(TaxFormBase):
+    """Schema for creating a tax form."""
+    
+    search_space_id: int
+    document_id: Optional[int] = None
+
+
+class TaxFormResponse(TaxFormBase):
+    """Response schema for tax form."""
+    
+    id: UUID
+    user_id: UUID
+    search_space_id: int
+    document_id: Optional[int] = None
+    uploaded_at: datetime
+    processed_at: Optional[datetime] = None
+    processing_status: Literal["pending", "processing", "completed", "failed", "needs_review"]
+    extraction_method: Optional[str] = None
+    confidence_score: Optional[Decimal] = None
+    needs_review: bool = False
+    created_at: datetime
+    updated_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+# W2 Form Schemas
+class W2Box12Code(BaseModel):
+    """Box 12 code entry on W2."""
+    
+    code: str = Field(max_length=2)
+    amount: Decimal = Field(ge=0, decimal_places=2)
+
+
+class W2FormBase(BaseModel):
+    """Base schema for W2 form data."""
+    
+    # Employer info (masked)
+    employer_name: Optional[str] = None
+    employer_ein_hash: Optional[str] = None
+    employer_address: Optional[str] = None
+    
+    # Wage information
+    wages_tips_compensation: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    federal_income_tax_withheld: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    social_security_wages: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    social_security_tax_withheld: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    medicare_wages: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    medicare_tax_withheld: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    social_security_tips: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    allocated_tips: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    
+    # Other compensation
+    dependent_care_benefits: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    nonqualified_plans: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    
+    # Box 12 codes
+    box_12_codes: Optional[list[W2Box12Code]] = None
+    
+    # Box 13 checkboxes
+    statutory_employee: bool = False
+    retirement_plan: bool = False
+    third_party_sick_pay: bool = False
+    
+    # State/Local tax
+    state_code: Optional[str] = Field(None, max_length=2)
+    state_wages: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    state_income_tax: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    local_wages: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    local_income_tax: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    locality_name: Optional[str] = Field(None, max_length=100)
+
+
+class W2FormCreate(W2FormBase):
+    """Schema for creating W2 form."""
+    
+    tax_form_id: UUID
+    field_confidence_scores: Optional[dict[str, float]] = None
+    raw_extraction_data: Optional[dict[str, Any]] = None
+
+
+class W2FormResponse(W2FormBase):
+    """Response schema for W2 form."""
+    
+    id: UUID
+    tax_form_id: UUID
+    employee_name_masked: Optional[str] = None
+    employee_ssn_hash: Optional[str] = None
+    field_confidence_scores: Optional[dict[str, float]] = None
+    created_at: datetime
+    updated_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+# 1099-MISC Form Schemas
+class Form1099MiscBase(BaseModel):
+    """Base schema for 1099-MISC form."""
+    
+    payer_name: Optional[str] = None
+    payer_tin_hash: Optional[str] = None
+    payer_address: Optional[str] = None
+    
+    rents: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    royalties: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    other_income: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    federal_income_tax_withheld: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    fishing_boat_proceeds: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    medical_health_payments: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    substitute_payments: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    crop_insurance_proceeds: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    gross_proceeds_attorney: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    section_409a_deferrals: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    state_tax_withheld: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    state_payer_number: Optional[str] = Field(None, max_length=50)
+    state_income: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+
+
+class Form1099MiscCreate(Form1099MiscBase):
+    """Schema for creating 1099-MISC form."""
+    
+    tax_form_id: UUID
+    field_confidence_scores: Optional[dict[str, float]] = None
+    raw_extraction_data: Optional[dict[str, Any]] = None
+
+
+class Form1099MiscResponse(Form1099MiscBase):
+    """Response schema for 1099-MISC form."""
+    
+    id: UUID
+    tax_form_id: UUID
+    recipient_tin_hash: Optional[str] = None
+    field_confidence_scores: Optional[dict[str, float]] = None
+    created_at: datetime
+    updated_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+# 1099-INT Form Schemas
+class Form1099IntBase(BaseModel):
+    """Base schema for 1099-INT form."""
+    
+    payer_name: Optional[str] = None
+    payer_tin_hash: Optional[str] = None
+    
+    interest_income: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    early_withdrawal_penalty: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    interest_us_savings_bonds: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    federal_income_tax_withheld: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    investment_expenses: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    foreign_tax_paid: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    foreign_country: Optional[str] = Field(None, max_length=100)
+    tax_exempt_interest: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    specified_private_activity_bond_interest: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    market_discount: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    bond_premium: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    bond_premium_treasury: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    tax_exempt_bond_premium: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+
+
+class Form1099IntCreate(Form1099IntBase):
+    """Schema for creating 1099-INT form."""
+    
+    tax_form_id: UUID
+    field_confidence_scores: Optional[dict[str, float]] = None
+    raw_extraction_data: Optional[dict[str, Any]] = None
+
+
+class Form1099IntResponse(Form1099IntBase):
+    """Response schema for 1099-INT form."""
+    
+    id: UUID
+    tax_form_id: UUID
+    field_confidence_scores: Optional[dict[str, float]] = None
+    created_at: datetime
+    updated_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+# 1099-DIV Form Schemas
+class Form1099DivBase(BaseModel):
+    """Base schema for 1099-DIV form."""
+    
+    payer_name: Optional[str] = None
+    payer_tin_hash: Optional[str] = None
+    
+    total_ordinary_dividends: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    qualified_dividends: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    total_capital_gain_distributions: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    unrecaptured_section_1250_gain: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    section_1202_gain: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    collectibles_gain: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    nondividend_distributions: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    federal_income_tax_withheld: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    section_199a_dividends: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    investment_expenses: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    foreign_tax_paid: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    foreign_country: Optional[str] = Field(None, max_length=100)
+    cash_liquidation_distributions: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    noncash_liquidation_distributions: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+
+
+class Form1099DivCreate(Form1099DivBase):
+    """Schema for creating 1099-DIV form."""
+    
+    tax_form_id: UUID
+    field_confidence_scores: Optional[dict[str, float]] = None
+    raw_extraction_data: Optional[dict[str, Any]] = None
+
+
+class Form1099DivResponse(Form1099DivBase):
+    """Response schema for 1099-DIV form."""
+    
+    id: UUID
+    tax_form_id: UUID
+    field_confidence_scores: Optional[dict[str, float]] = None
+    created_at: datetime
+    updated_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+# 1099-B Form Schemas
+class Form1099BBase(BaseModel):
+    """Base schema for 1099-B form."""
+    
+    payer_name: Optional[str] = None
+    payer_tin_hash: Optional[str] = None
+    
+    description_of_property: Optional[str] = None
+    date_acquired: Optional[date] = None
+    date_sold: Optional[date] = None
+    proceeds: Optional[Decimal] = Field(None, decimal_places=2)
+    cost_basis: Optional[Decimal] = Field(None, decimal_places=2)
+    adjustments_to_basis: Optional[Decimal] = Field(None, decimal_places=2)
+    realized_gain_loss: Optional[Decimal] = Field(None, decimal_places=2)
+    federal_income_tax_withheld: Optional[Decimal] = Field(None, ge=0, decimal_places=2)
+    
+    short_term: Optional[bool] = None
+    long_term: Optional[bool] = None
+    basis_reported_to_irs: Optional[bool] = None
+    noncovered_security: Optional[bool] = None
+
+
+class Form1099BCreate(Form1099BBase):
+    """Schema for creating 1099-B form."""
+    
+    tax_form_id: UUID
+    field_confidence_scores: Optional[dict[str, float]] = None
+    raw_extraction_data: Optional[dict[str, Any]] = None
+
+
+class Form1099BResponse(Form1099BBase):
+    """Response schema for 1099-B form."""
+    
+    id: UUID
+    tax_form_id: UUID
+    field_confidence_scores: Optional[dict[str, float]] = None
+    created_at: datetime
+    updated_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+# Combined response with form details
+class TaxFormWithDetails(TaxFormResponse):
+    """Tax form response with nested form-specific details."""
+    
+    w2_form: Optional[W2FormResponse] = None
+    form_1099_misc: Optional[Form1099MiscResponse] = None
+    form_1099_int: Optional[Form1099IntResponse] = None
+    form_1099_div: Optional[Form1099DivResponse] = None
+    form_1099_b: Optional[Form1099BResponse] = None
diff --git a/financegpt_backend/app/tasks/document_processors/file_processors.py b/financegpt_backend/app/tasks/document_processors/file_processors.py
index f7eecb4..bbc1d7b 100644
--- a/financegpt_backend/app/tasks/document_processors/file_processors.py
+++ b/financegpt_backend/app/tasks/document_processors/file_processors.py
@@ -52,6 +52,114 @@
 )
 
 
+async def _process_tax_form_if_applicable(
+    session: AsyncSession,
+    document: Document,
+    filename: str,
+    user_id: str,
+    search_space_id: int,
+) -> None:
+    """
+    Check if uploaded document is a tax form and trigger parsing if so.
+    
+    Detects tax forms based on filename patterns (w2, 1099) and content.
+    If detected, extracts structured data and saves to tax_forms tables.
+    
+    Args:
+        session: Database session
+        document: The uploaded document
+        filename: Original filename
+        user_id: User ID
+        search_space_id: Search space ID
+    """
+    from uuid import UUID
+    from app.db import TaxForm
+    from app.parsers.tax_form_parser import TaxFormParser
+    from app.utils.pii_masking import prepare_tax_form_for_storage
+    from app.schemas.tax_forms import (
+        W2FormCreate,
+        Form1099MiscCreate,
+        Form1099IntCreate,
+        Form1099DivCreate,
+        Form1099BCreate,
+    )
+    import re
+    
+    try:
+        # Only process PDFs
+        if not filename.lower().endswith('.pdf'):
+            return
+        
+        # Detect tax form type from filename
+        filename_lower = filename.lower()
+        form_type = None
+        tax_year = None
+        
+        # Extract year from filename (e.g., "w2_2024.pdf", "2024_w2.pdf")
+        year_match = re.search(r'(20\d{2})', filename)
+        if year_match:
+            tax_year = int(year_match.group(1))
+        
+        # Detect form type
+        if 'w2' in filename_lower or 'w-2' in filename_lower:
+            form_type = 'W2'
+            if not tax_year:
+                tax_year = 2024  # Default to current tax year
+        elif '1099' in filename_lower:
+            if 'misc' in filename_lower:
+                form_type = '1099-MISC'
+            elif 'int' in filename_lower:
+                form_type = '1099-INT'
+            elif 'div' in filename_lower:
+                form_type = '1099-DIV'
+            elif 'b' in filename_lower:
+                form_type = '1099-B'
+            else:
+                # Generic 1099, try to detect from content later
+                form_type = '1099-MISC'  # Default
+            
+            if not tax_year:
+                tax_year = 2024
+        
+        # If not a tax form, return early
+        if not form_type:
+            return
+        
+        logger.info(f"Detected tax form: {form_type} for year {tax_year} in file {filename}")
+        
+        # Create tax_form record
+        tax_form = TaxForm(
+            user_id=UUID(user_id),
+            search_space_id=search_space_id,
+            form_type=form_type,
+            tax_year=tax_year,
+            document_id=document.id,
+            processing_status='pending',
+        )
+        session.add(tax_form)
+        await session.commit()
+        await session.refresh(tax_form)
+        
+        logger.info(f"Created tax form record with ID {tax_form.id}, starting parsing...")
+        
+        # TODO: Trigger async parsing task
+        # For now, log that parsing would happen
+        # In production, this would be a Celery task
+        logger.info(
+            f"Tax form parsing would be triggered here for {form_type} (tax_form_id={tax_form.id}). "
+            f"Parser integration pending."
+        )
+        
+        # Update status to show it's queued for processing
+        tax_form.processing_status = 'processing'
+        await session.commit()
+        
+    except Exception as e:
+        logger.error(f"Error processing tax form for {filename}: {e}")
+        # Don't fail the document upload if tax parsing fails
+        await session.rollback()
+
+
 async def _save_investment_holdings(
     session: AsyncSession,
     user_id: str,
@@ -522,6 +630,11 @@ async def add_received_file_document_using_unstructured(
             await session.commit()
             await session.refresh(document)
 
+        # After successful document creation, check if this is a tax form
+        await _process_tax_form_if_applicable(
+            session, document, file_name, user_id, search_space_id
+        )
+
         return document
     except SQLAlchemyError as db_error:
         await session.rollback()
@@ -661,6 +774,11 @@ async def add_received_file_document_using_llamacloud(
             await session.commit()
             await session.refresh(document)
 
+        # After successful document creation, check if this is a tax form
+        await _process_tax_form_if_applicable(
+            session, document, file_name, user_id, search_space_id
+        )
+
         return document
     except SQLAlchemyError as db_error:
         await session.rollback()
diff --git a/financegpt_backend/app/utils/pii_masking.py b/financegpt_backend/app/utils/pii_masking.py
new file mode 100644
index 0000000..97f8286
--- /dev/null
+++ b/financegpt_backend/app/utils/pii_masking.py
@@ -0,0 +1,287 @@
+"""PII masking utilities for tax forms.
+
+This module provides functions to mask personally identifiable information (PII)
+before sending tax form data to LLMs or external services.
+"""
+
+import hashlib
+import re
+from typing import Any
+
+
+def mask_ssn(ssn: str | None, keep_last: int = 4) -> str:
+    """Mask SSN, keeping only the last N digits.
+    
+    Args:
+        ssn: Social Security Number (any format: 123-45-6789, 123456789, etc.)
+        keep_last: Number of digits to keep unmasked (default 4)
+        
+    Returns:
+        Masked SSN like "***-**-6789" or "[SSN_REDACTED]" if invalid
+        
+    Examples:
+        >>> mask_ssn("123-45-6789")
+        "***-**-6789"
+        >>> mask_ssn("123456789")
+        "*****6789"
+        >>> mask_ssn("invalid")
+        "[SSN_REDACTED]"
+    """
+    if not ssn:
+        return "[SSN_REDACTED]"
+    
+    # Remove all non-digit characters
+    digits_only = re.sub(r'\D', '', ssn)
+    
+    # SSN must be exactly 9 digits
+    if len(digits_only) != 9:
+        return "[SSN_REDACTED]"
+    
+    # Determine format based on original string
+    if '-' in ssn:
+        # Format: 123-45-6789 -> ***-**-6789
+        last_digits = digits_only[-keep_last:]
+        return f"***-**-{last_digits}"
+    else:
+        # Format: 123456789 -> *****6789
+        last_digits = digits_only[-keep_last:]
+        mask_count = 9 - keep_last
+        return ('*' * mask_count) + last_digits
+
+
+def hash_tin(tin: str | None) -> str:
+    """Hash Tax Identification Number (SSN or EIN) using SHA-256.
+    
+    Args:
+        tin: SSN or EIN to hash
+        
+    Returns:
+        SHA-256 hash of the TIN (64 hex characters)
+        
+    Examples:
+        >>> hash_tin("123-45-6789")
+        "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3"
+    """
+    if not tin:
+        return ""
+    
+    # Remove all non-digit characters for consistent hashing
+    digits_only = re.sub(r'\D', '', tin)
+    
+    # Hash using SHA-256
+    return hashlib.sha256(digits_only.encode('utf-8')).hexdigest()
+
+
+def mask_ein(ein: str | None) -> str:
+    """Mask Employer Identification Number.
+    
+    Args:
+        ein: EIN in format XX-XXXXXXX
+        
+    Returns:
+        Hashed EIN (never shows plaintext for privacy)
+        
+    Examples:
+        >>> mask_ein("12-3456789")
+        (returns SHA-256 hash)
+    """
+    return hash_tin(ein)
+
+
+def mask_name(name: str | None, replacement: str = "[NAME_REDACTED]") -> str:
+    """Mask a person's name.
+    
+    Args:
+        name: Full name to mask
+        replacement: Replacement text (default: "[NAME_REDACTED]")
+        
+    Returns:
+        Replacement text
+        
+    Examples:
+        >>> mask_name("John Smith")
+        "[NAME_REDACTED]"
+        >>> mask_name("John Smith", "[EMPLOYEE]")
+        "[EMPLOYEE]"
+    """
+    if not name:
+        return replacement
+    
+    return replacement
+
+
+def mask_address(address: str | None, replacement: str = "[ADDRESS_REDACTED]") -> str:
+    """Mask a full address.
+    
+    Args:
+        address: Full address to mask
+        replacement: Replacement text (default: "[ADDRESS_REDACTED]")
+        
+    Returns:
+        Replacement text
+        
+    Examples:
+        >>> mask_address("123 Main St, New York, NY 10001")
+        "[ADDRESS_REDACTED]"
+    """
+    if not address:
+        return replacement
+    
+    return replacement
+
+
+def mask_tax_form_for_llm(form_data: dict[str, Any], form_type: str) -> dict[str, Any]:
+    """Mask all PII in a tax form before sending to LLM.
+    
+    This function removes or masks:
+    - SSN (keep last 4 for display)
+    - EIN (hash completely)
+    - Names (replace with placeholders)
+    - Addresses (replace with placeholders)
+    
+    Financial data (wages, taxes, etc.) is NOT masked.
+    
+    Args:
+        form_data: Dictionary containing tax form data
+        form_type: Type of form (W2, 1099-MISC, etc.)
+        
+    Returns:
+        Dictionary with PII masked
+        
+    Examples:
+        >>> w2_data = {
+        ...     "employee_ssn": "123-45-6789",
+        ...     "employer_ein": "12-3456789",
+        ...     "employee_name": "John Smith",
+        ...     "wages": 75000.00,
+        ... }
+        >>> masked = mask_tax_form_for_llm(w2_data, "W2")
+        >>> masked["employee_ssn"]
+        "***-**-6789"
+        >>> masked["wages"]
+        75000.0
+    """
+    masked_data = form_data.copy()
+    
+    # Mask SSNs (keep last 4 for context)
+    if "employee_ssn" in masked_data:
+        masked_data["employee_ssn"] = mask_ssn(masked_data["employee_ssn"])
+    
+    if "recipient_ssn" in masked_data:
+        masked_data["recipient_ssn"] = mask_ssn(masked_data["recipient_ssn"])
+    
+    # Hash EINs (never show plaintext)
+    if "employer_ein" in masked_data:
+        masked_data["employer_ein_hash"] = hash_tin(masked_data["employer_ein"])
+        del masked_data["employer_ein"]
+    
+    if "payer_tin" in masked_data:
+        masked_data["payer_tin_hash"] = hash_tin(masked_data["payer_tin"])
+        del masked_data["payer_tin"]
+    
+    if "recipient_tin" in masked_data:
+        masked_data["recipient_tin_hash"] = hash_tin(masked_data["recipient_tin"])
+        del masked_data["recipient_tin"]
+    
+    # Mask names
+    if "employee_name" in masked_data:
+        masked_data["employee_name"] = mask_name(masked_data["employee_name"], "[EMPLOYEE_NAME]")
+    
+    if "employer_name" in masked_data:
+        # Keep employer name for context (helps LLM understand employment)
+        # But could mask if user prefers
+        pass
+    
+    if "payer_name" in masked_data:
+        # Keep payer name (e.g., "Vanguard", "Chase Bank") - useful for context
+        pass
+    
+    # Mask addresses
+    if "employee_address" in masked_data:
+        masked_data["employee_address"] = mask_address(masked_data["employee_address"])
+    
+    if "employer_address" in masked_data:
+        masked_data["employer_address"] = mask_address(masked_data["employer_address"])
+    
+    if "payer_address" in masked_data:
+        masked_data["payer_address"] = mask_address(masked_data["payer_address"])
+    
+    # Financial data is NOT masked - it's needed for analysis
+    # This includes: wages, taxes withheld, interest income, dividends, etc.
+    
+    return masked_data
+
+
+def prepare_tax_form_for_storage(form_data: dict[str, Any]) -> dict[str, Any]:
+    """Prepare tax form data for database storage with proper hashing.
+    
+    This function:
+    - Hashes SSNs/EINs (stores hash only, never plaintext)
+    - Keeps financial data intact
+    - Optionally masks names/addresses based on user preference
+    
+    Args:
+        form_data: Dictionary containing raw tax form data
+        
+    Returns:
+        Dictionary ready for database insertion
+    """
+    storage_data = form_data.copy()
+    
+    # Hash all TINs for storage (never store plaintext)
+    if "employee_ssn" in storage_data:
+        storage_data["employee_ssn_hash"] = hash_tin(storage_data["employee_ssn"])
+        del storage_data["employee_ssn"]
+    
+    if "employer_ein" in storage_data:
+        storage_data["employer_ein_hash"] = hash_tin(storage_data["employer_ein"])
+        del storage_data["employer_ein"]
+    
+    if "payer_tin" in storage_data:
+        storage_data["payer_tin_hash"] = hash_tin(storage_data["payer_tin"])
+        del storage_data["payer_tin"]
+    
+    if "recipient_tin" in storage_data:
+        storage_data["recipient_tin_hash"] = hash_tin(storage_data["recipient_tin"])
+        del storage_data["recipient_tin"]
+    
+    # Mask employee name for storage
+    if "employee_name" in storage_data:
+        storage_data["employee_name_masked"] = "[EMPLOYEE_NAME]"
+        del storage_data["employee_name"]
+    
+    # Keep employer/payer names (useful for queries)
+    # Keep financial data (wages, taxes, etc.)
+    
+    return storage_data
+
+
+def validate_confidence_threshold(
+    confidence_scores: dict[str, float],
+    threshold: float = 0.85
+) -> tuple[bool, list[str]]:
+    """Check if confidence scores meet threshold.
+    
+    Args:
+        confidence_scores: Dictionary of field -> confidence score
+        threshold: Minimum acceptable confidence (default 0.85)
+        
+    Returns:
+        Tuple of (all_passed, list_of_failed_fields)
+        
+    Examples:
+        >>> scores = {"wages": 0.95, "federal_tax": 0.80, "ssn": 0.90}
+        >>> passed, failed = validate_confidence_threshold(scores, 0.85)
+        >>> passed
+        False
+        >>> failed
+        ['federal_tax']
+    """
+    failed_fields = []
+    
+    for field, score in confidence_scores.items():
+        if score < threshold:
+            failed_fields.append(field)
+    
+    all_passed = len(failed_fields) == 0
+    return all_passed, failed_fields
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..2e36183
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# ==============================================================================
+# FinanceGPT Quick Start Script
+# ==============================================================================
+# Usage: ./run.sh [command]
+#
+# Commands:
+#   start   - Start FinanceGPT (default)
+#   stop    - Stop FinanceGPT
+#   restart - Restart FinanceGPT
+#   logs    - Show logs (follow mode)
+#   status  - Show container status
+#   update  - Pull latest image and restart
+#   clean   - Stop and remove all data (DESTRUCTIVE!)
+
+set -e
+
+COMPOSE_FILE="docker-compose.quickstart.yml"
+CONTAINER_NAME="financegpt"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+print_banner() {
+    echo -e "${BLUE}"
+    echo "╔═══════════════════════════════════════════╗"
+    echo "║         FinanceGPT All-in-One             ║"
+    echo "╚═══════════════════════════════════════════╝"
+    echo -e "${NC}"
+}
+
+print_status() {
+    echo -e "${GREEN}✓${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}⚠${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}✗${NC} $1"
+}
+
+check_docker() {
+    if ! command -v docker &> /dev/null; then
+        print_error "Docker is not installed. Please install Docker first."
+        echo "  → https://docs.docker.com/get-docker/"
+        exit 1
+    fi
+    
+    if ! docker info &> /dev/null; then
+        print_error "Docker is not running. Please start Docker first."
+        exit 1
+    fi
+}
+
+start() {
+    print_banner
+    check_docker
+    
+    echo "Starting FinanceGPT..."
+    
+    # Check if .env exists
+    if [ -f ".env" ]; then
+        print_status "Using configuration from .env"
+    else
+        print_warning "No .env file found. Using defaults."
+        echo "  → Copy .env.example to .env to customize settings"
+    fi
+    
+    # Pull latest image if not exists
+    echo ""
+    echo "Pulling latest image (if needed)..."
+    docker compose -f "$COMPOSE_FILE" pull
+    
+    # Start container
+    echo ""
+    echo "Starting container..."
+    docker compose -f "$COMPOSE_FILE" up -d
+    
+    echo ""
+    print_status "FinanceGPT is starting!"
+    echo ""
+    echo "  Frontend:  http://localhost:${FRONTEND_PORT:-3000}"
+    echo "  Backend:   http://localhost:${BACKEND_PORT:-8000}"
+    echo "  API Docs:  http://localhost:${BACKEND_PORT:-8000}/docs"
+    echo ""
+    echo "  Note: First startup takes ~2 minutes to initialize databases."
+    echo "  Run './run.sh logs' to watch startup progress."
+}
+
+stop() {
+    print_banner
+    echo "Stopping FinanceGPT..."
+    docker compose -f "$COMPOSE_FILE" down
+    print_status "FinanceGPT stopped."
+}
+
+restart() {
+    print_banner
+    echo "Restarting FinanceGPT..."
+    docker compose -f "$COMPOSE_FILE" restart
+    print_status "FinanceGPT restarted."
+}
+
+logs() {
+    docker compose -f "$COMPOSE_FILE" logs -f
+}
+
+status() {
+    print_banner
+    echo "Container Status:"
+    echo ""
+    docker compose -f "$COMPOSE_FILE" ps
+    echo ""
+    
+    # Check if running and show health
+    if docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then
+        HEALTH=$(docker inspect --format='{{.State.Health.Status}}' "$CONTAINER_NAME" 2>/dev/null || echo "unknown")
+        echo "Health: $HEALTH"
+        
+        if [ "$HEALTH" = "healthy" ]; then
+            print_status "FinanceGPT is running and healthy!"
+        elif [ "$HEALTH" = "starting" ]; then
+            print_warning "FinanceGPT is still starting up..."
+        else
+            print_warning "Health status: $HEALTH"
+        fi
+    else
+        print_warning "FinanceGPT is not running."
+    fi
+}
+
+update() {
+    print_banner
+    echo "Updating FinanceGPT..."
+    
+    echo "Pulling latest image..."
+    docker compose -f "$COMPOSE_FILE" pull
+    
+    echo "Restarting with new image..."
+    docker compose -f "$COMPOSE_FILE" up -d
+    
+    print_status "FinanceGPT updated!"
+}
+
+clean() {
+    print_banner
+    print_warning "This will DELETE all FinanceGPT data!"
+    echo ""
+    read -p "Are you sure? Type 'yes' to confirm: " confirm
+    
+    if [ "$confirm" = "yes" ]; then
+        echo "Stopping and removing containers..."
+        docker compose -f "$COMPOSE_FILE" down -v
+        
+        echo "Removing data volume..."
+        docker volume rm financegpt-data 2>/dev/null || true
+        
+        print_status "All FinanceGPT data has been removed."
+    else
+        echo "Cancelled."
+    fi
+}
+
+# Main command handler
+case "${1:-start}" in
+    start)
+        start
+        ;;
+    stop)
+        stop
+        ;;
+    restart)
+        restart
+        ;;
+    logs)
+        logs
+        ;;
+    status)
+        status
+        ;;
+    update)
+        update
+        ;;
+    clean)
+        clean
+        ;;
+    *)
+        echo "Usage: $0 {start|stop|restart|logs|status|update|clean}"
+        echo ""
+        echo "Commands:"
+        echo "  start   - Start FinanceGPT (default)"
+        echo "  stop    - Stop FinanceGPT"
+        echo "  restart - Restart FinanceGPT"
+        echo "  logs    - Show logs (follow mode)"
+        echo "  status  - Show container status"
+        echo "  update  - Pull latest image and restart"
+        echo "  clean   - Stop and remove all data (DESTRUCTIVE!)"
+        exit 1
+        ;;
+esac
diff --git a/scripts/docker/init-postgres.sh b/scripts/docker/init-postgres.sh
index 4d9c66d..bd8acb1 100644
--- a/scripts/docker/init-postgres.sh
+++ b/scripts/docker/init-postgres.sh
@@ -59,17 +59,16 @@ CREATE USER $POSTGRES_USER WITH PASSWORD '$POSTGRES_PASSWORD' SUPERUSER;
 CREATE DATABASE $POSTGRES_DB OWNER $POSTGRES_USER;
 \c $POSTGRES_DB
 CREATE EXTENSION IF NOT EXISTS vector;
-
--- Create Electric SQL replication user
-CREATE USER $ELECTRIC_DB_USER WITH REPLICATION PASSWORD '$ELECTRIC_DB_PASSWORD';
-GRANT CONNECT ON DATABASE $POSTGRES_DB TO $ELECTRIC_DB_USER;
-GRANT USAGE ON SCHEMA public TO $ELECTRIC_DB_USER;
-GRANT SELECT ON ALL TABLES IN SCHEMA public TO $ELECTRIC_DB_USER;
-GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO $ELECTRIC_DB_USER;
-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO $ELECTRIC_DB_USER;
-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON SEQUENCES TO $ELECTRIC_DB_USER;
 EOF
 
+# Run the same Electric SQL setup script used by local docker-compose
+# This ensures both setups are identical
+export POSTGRES_USER
+export POSTGRES_DB
+export ELECTRIC_DB_USER
+export ELECTRIC_DB_PASSWORD
+/app/init-electric-user.sh
+
 echo "PostgreSQL initialized successfully."
 
 # Stop PostgreSQL (supervisor will start it)
diff --git a/start-financegpt.sh b/start-financegpt.sh
deleted file mode 100755
index 94b8b44..0000000
--- a/start-financegpt.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-# FinanceGPT - Quick Start Script
-
-echo "=================================="
-echo "FinanceGPT - Quick Start"
-echo "=================================="
-echo ""
-
-# Check if Docker is running
-if ! docker info > /dev/null 2>&1; then
-    echo "❌ Docker is not running. Please start Docker Desktop first."
-    exit 1
-fi
-
-echo "✓ Docker is running"
-echo ""
-
-# Check if we're in the right directory
-if [ ! -f "docker-compose.quickstart.yml" ]; then
-    echo "❌ Error: docker-compose.quickstart.yml not found"
-    echo "   Please run this script from the FinanceGPT root directory"
-    exit 1
-fi
-
-echo "Starting FinanceGPT with Docker Compose..."
-echo ""
-
-# Pull latest images
-echo "📥 Pulling latest images..."
-docker compose -f docker-compose.quickstart.yml pull
-
-echo ""
-echo "🚀 Starting services..."
-docker compose -f docker-compose.quickstart.yml up -d
-
-echo ""
-echo "⏳ Waiting for services to be ready..."
-sleep 5
-
-# Check if containers are running
-if docker ps | grep -q financegpt; then
-    echo ""
-    echo "=================================="
-    echo "✅ FinanceGPT is running!"
-    echo "=================================="
-    echo ""
-    echo "🌐 Frontend: http://localhost:3000"
-    echo "🔧 Backend API: http://localhost:8000"
-    echo "📊 API Docs: http://localhost:8000/docs"
-    echo ""
-    echo "📝 To upload financial statements:"
-    echo "   1. Go to http://localhost:3000"
-    echo "   2. Create an account / Log in"
-    echo "   3. Upload your CSV/OFX files from Chase, Fidelity, etc."
-    echo "   4. Ask questions about your finances!"
-    echo ""
-    echo "🛑 To stop: docker compose -f docker-compose.quickstart.yml down"
-    echo "📋 View logs: docker compose -f docker-compose.quickstart.yml logs -f"
-    echo ""
-else
-    echo ""
-    echo "❌ Failed to start FinanceGPT"
-    echo "   Check logs with: docker compose -f docker-compose.quickstart.yml logs"
-    exit 1
-fi