diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..bfe7b37
Binary files /dev/null and b/.DS_Store differ
diff --git a/Dockerfile.allinone b/Dockerfile.allinone
index f12dd1a..6cb376b 100644
--- a/Dockerfile.allinone
+++ b/Dockerfile.allinone
@@ -255,14 +255,22 @@ ENV CELERY_RESULT_BACKEND=redis://localhost:6379/0
 ENV PYTHONPATH=/app/backend
 ENV NEXT_FRONTEND_URL=http://localhost:3000
 ENV AUTH_TYPE=LOCAL
-ENV ETL_SERVICE=DOCLING
+ENV ETL_SERVICE=UNSTRUCTURED
 ENV EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 
+# Asyncio loop configuration (required for unstructured/uvicorn compatibility)
+ENV UVICORN_LOOP=asyncio
+ENV UNSTRUCTURED_HAS_PATCHED_LOOP=1
+
+# Disable LangSmith tracing by default (avoids SSL errors with corporate proxies)
+ENV LANGCHAIN_TRACING_V2=false
+ENV LANGSMITH_TRACING=false
+
 # Frontend configuration (can be overridden at runtime)
 # These are injected into the Next.js build at container startup
 ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
 ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL
-ENV NEXT_PUBLIC_ETL_SERVICE=DOCLING
+ENV NEXT_PUBLIC_ETL_SERVICE=UNSTRUCTURED
 
 # Electric SQL configuration (ELECTRIC_DATABASE_URL is built dynamically by entrypoint from these values)
 ENV ELECTRIC_DB_USER=electric
diff --git a/README.md b/README.md
index 6e69f8c..5d8fb2e 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,11 @@
 
 <div align="center">
 
-**Your AI-Powered Personal Finance Assistant**
+<img src="financegpt.png" alt="FinanceGPT Logo" width="180" />
 
-An intelligent financial management platform that helps you track spending, optimize rewards, analyze investments, and make smarter money decisions using AI.
+### Your AI-Powered Personal CPA
+
+**Connect bank accounts • Upload tax forms • Get instant insights**
 
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
 [![TypeScript](https://img.shields.io/badge/TypeScript-007ACC?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
@@ -12,354 +14,179 @@ An intelligent financial management platform that helps you track spending, opti
 [![FastAPI](https://img.shields.io/badge/FastAPI-009688?logo=fastapi&logoColor=white)](https://fastapi.tiangolo.com/)
 [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=white)](https://www.python.org/)
 
-[Features](#features) • [Quick Start](#quick-start) • [Architecture](#architecture) • [Documentation](#documentation) • [Contributing](#contributing)
+[Quick Start](#-quick-start) • [Features](#-features) • [Example Prompts](#-example-prompts) • [Privacy](#-privacy-first-design) • [Contributing](#-contributing)
 
 </div>
 
 ---
 
-## 🌟 Features
+## 🚀 Quick Start
 
-### 🤖 AI-Powered Financial Advisor
-- **Smart Transaction Search**: Search your financial history using natural language queries
-  - "How much did I spend on restaurants last year?"
-  - "Show me all charges from United Airlines"
-  - "Find my recurring subscriptions"
-- **Personalized Recommendations**: Get AI-driven suggestions for saving money and optimizing rewards
-- **Natural Language Queries**: Ask questions about your finances in plain English
-- **Predictive Analytics**: Forecast future spending and identify potential savings
+Choose your preferred setup method:
 
-### 💳 Smart Credit Card Optimization
-- **Rewards Analysis**: Analyze your spending patterns to find the best credit cards
-  - "Which credit card should I use for restaurant purchases?"
-  - "Am I using the right credit card for my spending?"
-- **Category-Based Optimization**: Get recommendations based on merchants and categories
-- **Multi-Card Strategy**: Optimize rewards across multiple credit cards
+### Option 1: All-in-One Docker (Easiest)
 
-### 📈 Investment Portfolio Management
-- **Real-Time Performance Tracking**: Monitor your investment returns with live market data
-  - "How are my stocks performing today?"
-  - "What's my portfolio return over the last year?"
-  - "Show my month-over-month performance"
-- **Yahoo Finance Integration**: Fetches real-time and historical stock prices
-- **Cost Basis Tracking**: Calculate unrealized gains/losses across all holdings
-- **Time-Based Analysis**: Week-over-week, month-over-month, quarterly, and yearly performance
-
-### 🎯 Portfolio Allocation & Rebalancing
-- **Asset Allocation Analysis**: Understand your portfolio composition (stocks/bonds/cash)
-  - "Is my portfolio allocation correct?"
-  - "How should I rebalance according to Bogleheads philosophy?"
-- **Geographic Diversification**: Track US vs international exposure
-- **Investment Philosophy Comparison**: Compare against established strategies
-  - Bogleheads Conservative (40/50/10)
-  - Bogleheads Moderate (60/35/5)
-  - Bogleheads Aggressive (90/10/0)
-  - Three-Fund Portfolio
-- **Specific Rebalancing Recommendations**: Get dollar amounts for buying/selling
-- **Alignment Score**: See how well your portfolio matches your target allocation (0-100)
-
-### 💰 Tax Optimization
-- **Tax Loss Harvesting**: Identify opportunities to reduce your tax liability
-  - "Can I harvest any tax losses?"
-  - "What positions should I sell for tax savings?"
-- **Loss Identification**: Finds holdings with unrealized losses
-- **Tax Savings Calculator**: Estimates tax benefits based on your tax bracket
-- **Replacement Suggestions**: Recommends similar securities to avoid wash sales
-- **Wash Sale Warnings**: Alerts about IRS rules and compliance
-
-### 💳 Transaction & Spending Analysis
-- **Multi-Account Aggregation**: Connect bank accounts, credit cards, and investment platforms via Plaid
-- **Real-Time Tracking**: Monitor balances, transactions, and net worth in real-time
-- **Subscription Detection**: Identify and track recurring payments automatically
-  - "Check if I have any recurring subscriptions"
-  - "What am I paying monthly?"
-- **Category-Based Search**: Find transactions by category (restaurants, travel, etc.)
-- **Merchant Search**: Search by merchant name with fuzzy matching
-
-### 📊 Analytics & Reporting
-- **Interactive Dashboards**: Visualize spending trends, income, and investments
-- **Custom Reports**: Generate detailed financial reports and summaries
-- **Budget Management**: Set and monitor budgets with smart alerts
-- **Historical Comparisons**: Compare spending and performance across time periods
-
-### 🔒 Security & Privacy
-- **Bank-Level Encryption**: 256-bit SSL encryption for all data
-- **Secure Authentication**: OAuth 2.0 and Google Sign-In support
-- **Data Privacy**: Your financial data stays private and secure
-
-### 🔗 Integrations
-- **100+ Financial Institutions**: Banks, credit cards, investment platforms, crypto exchanges
-- **Plaid Integration**: Secure connection to financial accounts
-- **Yahoo Finance**: Real-time stock prices and historical market data
-- **Real-Time Sync**: Automatic transaction updates
-- **Manual Uploads**: Support for CSV files (bank statements, Fidelity positions, etc.)
-- **Export Options**: Download your data anytime
+```bash
+# Clone and run with a single container
+git clone https://github.com/yourusername/FinanceGPT.git
+cd FinanceGPT
+
+# Copy environment file and add your API keys
+cp .env.example .env
+# Edit .env with your OPENAI_API_KEY, PLAID_CLIENT_ID, PLAID_SECRET
+
+# Start FinanceGPT
+docker compose -f docker-compose.quickstart.yml up -d
+```
+
+🎉 **Open http://localhost:3000** — You're done!
 
 ---
 
-## 💬 Example Prompts
+### Option 2: Local Development (macOS)
 
-### Transaction Search & Analysis
-```
-"How much did I spend on restaurants last year?"
-"Show me all United Airlines charges"
-"Find transactions over $100 in the last month"
-"What did I spend on groceries this week?"
-"Show my Amazon purchases"
+```bash
+# Clone the repository
+git clone https://github.com/yourusername/FinanceGPT.git
+cd FinanceGPT
+
+# Start infrastructure (PostgreSQL, Redis)
+docker compose up -d db redis electric
+
+# Run the dev script (opens 3 terminal tabs automatically)
+chmod +x dev.sh
+./dev.sh
 ```
 
-### Credit Card Optimization
+This starts:
+- 🔧 **Backend API** on http://localhost:8000
+- 🔄 **Celery Worker** for background tasks
+- 🌐 **Frontend** on http://localhost:3000
+
+---
+
+### Option 3: Full Docker Stack
+
+```bash
+# Clone the repository
+git clone https://github.com/yourusername/FinanceGPT.git
+cd FinanceGPT
+
+# Configure environment
+cp financegpt_backend/.env.example financegpt_backend/.env
+# Edit .env with your API keys
+
+# Build and run all services
+docker compose up -d --build
+
+# View logs
+docker compose logs -f
 ```
-"Which credit card should I use for restaurants?"
-"Am I using the right credit card for gas purchases?"
-"What's the best card for my travel spending?"
-"Optimize my credit card usage"
+
+---
+
+## 🔐 Privacy-First Design
+
+Your financial data is sensitive. FinanceGPT is built with privacy as a core principle:
+
+| Feature | How It Protects You |
+|---------|---------------------|
+| **🔒 PII Masking** | SSN and EIN are **masked before any LLM call** (`123-45-6789` → `XXX-XX-XXXX`). Your tax forms never expose sensitive IDs. |
+| **🏠 Self-Hostable** | Run entirely on your own hardware. Your data never leaves your machine. |
+| **🤖 BYO Model** | Use your own LLM (OpenAI, Anthropic, or **local Ollama**). No vendor lock-in. |
+| **🔐 Local Processing** | Sensitive field extraction (SSN, EIN) happens locally—not via cloud APIs. |
+| **🗄️ Your Database** | All data stored in your PostgreSQL instance. Export or delete anytime. |
+| **🚫 No Telemetry** | Zero tracking, zero analytics, zero data collection. |
+
+```python
+# Example: How we handle your W2
+raw_text = "SSN: 123-45-6789, Wages: $183,000"
+masked_text = mask_pii_in_text(raw_text)  
+# → "SSN: XXX-XX-XXXX, Wages: $183,000"
+# Only masked_text is sent to the LLM
 ```
 
-### Investment Performance
+---
+
+## 💬 Example Prompts
+
+Just ask questions in plain English. FinanceGPT understands context.
+
+### 💰 Income & Tax Questions
 ```
-"How are my stocks performing today?"
-"What's my portfolio return over the last year?"
-"Show my month-over-month investment performance"
-"How much have my investments grown this quarter?"
-"What's my total portfolio value?"
+"How much did I earn in 2024?"
+"What was my total federal tax withheld?"
+"Will I get a tax refund this year?"
+"Show me my W2 summary"
+"What state taxes did I pay?"
 ```
 
-### Portfolio Allocation & Rebalancing
+### 📊 Spending Analysis
 ```
-"Is my portfolio allocation correct?"
-"How should I rebalance according to Bogleheads philosophy?"
-"What's my exposure to international stocks?"
-"Am I too heavily invested in US stocks?"
-"Should I buy more bonds or stocks?"
-"Compare my portfolio to a three-fund strategy"
+"How much did I spend on restaurants last month?"
+"What are my recurring subscriptions?"
+"Find all Amazon purchases over $100"
+"What's my biggest expense category?"
+"Show spending trends for the last 3 months"
 ```
 
-### Tax Loss Harvesting
+### 💳 Credit Card Optimization
 ```
-"Can I harvest any tax losses?"
-"What stocks should I sell for tax losses?"
-"How much can I save in taxes by tax loss harvesting?"
-"Are there any positions with unrealized losses?"
-"Show me tax optimization opportunities"
+"Which card should I use for groceries?"
+"Am I using the right credit card for travel?"
+"How much rewards am I missing out on?"
+"Optimize my credit card usage"
 ```
 
-### Subscriptions & Recurring Payments
+### 📈 Investment Portfolio
 ```
-"Check if I have any recurring subscriptions"
-"What subscriptions am I paying for?"
-"Find all my monthly recurring charges"
-"Which services am I subscribed to?"
+"How are my stocks performing today?"
+"What's my portfolio return this year?"
+"Is my allocation correct for my age?"
+"Should I rebalance according to Bogleheads?"
+"Can I harvest any tax losses?"
 ```
 
-### Financial Planning
+### 🏦 Account Overview
 ```
 "What's my net worth?"
-"How much am I saving each month?"
-"Show my spending trends over the last 3 months"
-"What's my biggest expense category?"
+"Show all my account balances"
+"How much do I have in savings?"
+"What's my monthly cash flow?"
 ```
 
 ---
 
-## �️ AI Tools & Capabilities
-
-FinanceGPT uses specialized AI tools to analyze your financial data and provide actionable insights:
-
-### 1. **Transaction Search** (`search_transactions`)
-- Searches through all your financial transactions using keywords and categories
-- Supports both Plaid-connected accounts and manual CSV uploads
-- Fuzzy merchant name matching for accurate results
-- Date range filtering and category-based filtering
-- Returns transaction summaries with totals and breakdowns
-
-### 2. **Credit Card Optimizer** (`optimize_credit_card_usage`)
-- Analyzes spending patterns to recommend optimal credit cards
-- Matches merchant categories to card rewards programs
-- Compares rewards rates across multiple cards
-- Provides specific recommendations per spending category
-- Supports both manual uploads and Plaid data
-
-### 3. **Portfolio Performance** (`calculate_portfolio_performance`)
-- Fetches real-time stock prices from Yahoo Finance
-- Calculates returns over custom time periods (day, week, month, quarter, year)
-- Compares current prices to historical prices for accurate performance
-- Shows individual holding performance and total portfolio returns
-- Supports both snapshot comparisons and live price lookups
-
-### 4. **Portfolio Allocation Analyzer** (`analyze_portfolio_allocation`)
-- Analyzes asset class distribution (stocks/bonds/cash)
-- Calculates geographic exposure (US vs international)
-- Compares portfolio to investment philosophies (Bogleheads, Three-Fund)
-- Provides specific rebalancing recommendations with dollar amounts
-- Gives alignment score (0-100) showing how close you are to target
-
-### 5. **Tax Loss Harvesting** (`analyze_tax_loss_harvesting`)
-- Identifies positions with unrealized losses
-- Calculates potential tax savings based on your tax bracket
-- Suggests replacement securities to avoid wash sales
-- Provides sell/buy recommendations for tax optimization
-- Warns about wash sale rules and compliance
-
-### 6. **Subscription Finder** (`find_subscriptions`)
-- Automatically detects recurring charges in your transaction history
-- Identifies monthly, quarterly, and annual subscriptions
-- Calculates total subscription costs
-- Helps you find forgotten or unused subscriptions
-
-### 7. **Knowledge Base Search** (`search_knowledge_base`)
-- Searches across all your uploaded financial documents
-- Vector-based semantic search for accurate results
-- Supports PDFs, CSVs, bank statements, and investment reports
-- Context-aware retrieval for answering complex questions
-
----
-
-## �🚀 Quick Start
-
-### Option 1: Quick Start (Recommended for Testing)
-
-The fastest way to try FinanceGPT:
-
-```bash
-# 1. Clone the repository
-git clone https://github.com/yourusername/FinanceGPT.git
-cd FinanceGPT
-
-# 2. Run the quick start script
-chmod +x start-financegpt.sh
-./start-financegpt.sh
-```
-
-This uses pre-built Docker images and requires minimal configuration. Access the app at **http://localhost:3000**
-
-### Option 2: Development Setup
-
-For development with full customization:
-
-#### Prerequisites
-
-- **Docker** and **Docker Compose** (required)
-- **Node.js** 18+ and **pnpm** (for frontend development)
-- **Python** 3.11+ (for backend development)
-
-#### Required Environment Variables
-
-Before starting, you **must** configure these essential variables:
-
-1. **Create backend environment file:**
-   ```bash
-   cp financegpt_backend/.env.example financegpt_backend/.env
-   ```
-
-2. **Edit `financegpt_backend/.env` and set these required variables:**
-
-   ```env
-   # === REQUIRED: LLM API Key ===
-   # You MUST configure at least one LLM provider for FinanceGPT to work
-   # Get your API key from the respective provider:
-   
-   # OpenAI (recommended for best results)
-   OPENAI_API_KEY=sk-...
-   
-   # OR Anthropic Claude
-   ANTHROPIC_API_KEY=sk-ant-...
-   
-   # OR Google Gemini (free tier available)
-   GOOGLE_API_KEY=AIza...
-   
-   # === REQUIRED: Plaid (for bank account connections) ===
-   # Sign up at https://dashboard.plaid.com/team/keys
-   PLAID_CLIENT_ID=your_plaid_client_id
-   PLAID_SECRET=your_plaid_secret
-   PLAID_ENV=sandbox  # Use 'sandbox' for testing
-   
-   # === REQUIRED: Security ===
-   SECRET_KEY=your-random-secret-key-change-this-in-production
-   
-   # === Optional: Document Processing ===
-   # Only needed if you want to parse PDFs/documents
-   # Get free API key from https://unstructured.io/
-   # UNSTRUCTURED_API_KEY=your_key_here
-   ```
-
-3. **Create frontend environment file:**
-   ```bash
-   cp financegpt_web/.env.example financegpt_web/.env.local
-   ```
-
-   Edit `financegpt_web/.env.local`:
-   ```env
-   # Backend API URL
-   NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
-   
-   # Auth type (LOCAL or GOOGLE)
-   NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL
-   
-   # Document parsing service
-   NEXT_PUBLIC_ETL_SERVICE=DOCLING
-   ```
-
-#### Installation Steps
-
-1. **Start infrastructure services (PostgreSQL, Redis, etc.)**
-   ```bash
-   docker-compose up -d db redis electric pgadmin
-   ```
-
-2. **Install and run backend**
-   ```bash
-   cd financegpt_backend
-   
-   # Create virtual environment
-   python -m venv .venv
-   source .venv/bin/activate  # On Windows: .venv\Scripts\activate
-   
-   # Install dependencies
-   pip install -e .
-   
-   # Run database migrations
-   alembic upgrade head
-   
-   # Start the backend server
-   uvicorn main:app --reload --host 0.0.0.0 --port 8000
-   ```
-
-3. **In a new terminal, install and run frontend**
-   ```bash
-   cd financegpt_web
-   
-   # Install dependencies
-   pnpm install
-   
-   # Start development server
-   pnpm dev
-   ```
-
-4. **Access the application**
-   - 🌐 **Frontend**: http://localhost:3000
-   - 🔧 **Backend API**: http://localhost:8000
-   - 📚 **API Documentation**: http://localhost:8000/docs
-   - 🗄️ **pgAdmin** (Database UI): http://localhost:5050
-     - Email: `admin@financegpt.com`
-     - Password: `financegpt`
-
-### Option 3: Full Docker Deployment
-
-Build and run everything with Docker:
-
-```bash
-# Build and start all services
-docker-compose up -d
+## 🌟 Features
 
-# View logs
-docker-compose logs -f
+### 🤖 AI-Powered Financial Advisor
+- **Natural Language Queries**: Ask questions about your finances in plain English
+- **Smart Transaction Search**: "How much did I spend on restaurants last year?"
+- **Personalized Recommendations**: AI-driven suggestions for saving money
+- **Tax Form Analysis**: Upload W2s, 1099s and get instant summaries
 
-# Stop all services
-docker-compose down
-```
+### 💳 Smart Credit Card Optimization
+- **Rewards Maximization**: Get the best card for each purchase category
+- **Spending Pattern Analysis**: Identify where you're leaving money on the table
+- **Multi-Card Strategy**: Optimize rewards across all your cards
 
-Access at **http://localhost:3000**
+### 📈 Investment Portfolio Management
+- **Real-Time Performance**: Track returns with live Yahoo Finance data
+- **Time-Based Analysis**: WoW, MoM, QoQ, YoY performance tracking
+- **Tax Loss Harvesting**: Find opportunities to reduce your tax bill
+- **Rebalancing Recommendations**: Compare to Bogleheads, Three-Fund Portfolio
+
+### 📋 Tax Document Processing
+- **Supported Forms**: W2, 1099-INT, 1099-DIV, 1099-B, 1099-MISC, 1095-C
+- **LLM-Powered Extraction**: Accurate parsing with structured output
+- **Tax Estimate**: Calculate potential refund or amount owed
+- **State Tax Support**: Extracts state wages and withholdings
+
+### 💰 Transaction & Spending Analysis
+- **100+ Financial Institutions**: Connect via Plaid
+- **Subscription Detection**: Find forgotten recurring charges
+- **Category Analysis**: Understand where your money goes
+- **Historical Comparisons**: Compare spending across time periods
 
 ---
 
@@ -367,93 +194,68 @@ Access at **http://localhost:3000**
 
 ### Tech Stack
 
-#### Frontend (`financegpt_web/`)
-- **Framework**: Next.js 15 with App Router
-- **Language**: TypeScript
-- **Styling**: Tailwind CSS
-- **UI Components**: Shadcn/ui, Radix UI
-- **State Management**: Jotai
-- **Animations**: Framer Motion
-- **Real-Time**: ElectricSQL
-
-#### Backend (`financegpt_backend/`)
-- **Framework**: FastAPI
-- **Language**: Python 3.11+
-- **AI/ML**: LangChain, OpenAI GPT-4
-- **Database**: PostgreSQL with SQLAlchemy
-- **Task Queue**: Celery with Redis
-- **Financial Data**: Plaid API
-- **Authentication**: OAuth 2.0
-
-#### Infrastructure
-- **Database**: PostgreSQL 15
-- **Cache**: Redis
-- **Message Broker**: Redis (for Celery)
-- **Container**: Docker & Docker Compose
+| Layer | Technology | Purpose |
+|-------|------------|---------|
+| **Frontend** | Next.js 15, TypeScript, Tailwind | Modern web UI with server components |
+| **Backend** | FastAPI, Python 3.11+ | Async API with auto-generated docs |
+| **Database** | PostgreSQL + pgvector | Relational + vector search |
+| **Task Queue** | Celery + Redis | Background document processing |
+| **AI** | LiteLLM | Provider-agnostic (OpenAI, Anthropic, Ollama) |
+| **Banking** | Plaid API | 100+ financial institution connections |
+| **Auth** | Better Auth | OAuth 2.0, Google Sign-In |
 
 ### Project Structure
 
 ```
 FinanceGPT/
-├── financegpt_web/          # Next.js frontend application
+├── financegpt_web/          # Next.js frontend
 │   ├── app/                 # App router pages
 │   ├── components/          # React components
-│   ├── lib/                 # Utilities and helpers
-│   └── public/              # Static assets
-├── financegpt_backend/      # FastAPI backend application
+│   └── lib/                 # Utilities
+├── financegpt_backend/      # FastAPI backend
 │   ├── app/
 │   │   ├── agents/          # AI agents and tools
+│   │   ├── parsers/         # Tax form parsers
 │   │   ├── routes/          # API endpoints
-│   │   ├── services/        # Business logic
-│   │   ├── tasks/           # Celery tasks
-│   │   └── utils/           # Utilities
+│   │   └── tasks/           # Celery tasks
 │   └── alembic/             # Database migrations
-├── financegpt_browser_extension/  # Browser extension
-├── docker-compose.yml       # Docker services configuration
-└── README.md               # This file
+├── docker-compose.yml       # Full stack deployment
+├── docker-compose.quickstart.yml  # All-in-one container
+└── dev.sh                   # Local development script
 ```
 
 ---
 
-## 📖 Documentation
-
-### Configuration
+## 📖 Configuration
 
-#### Plaid API Setup
-1. Sign up for a [Plaid account](https://plaid.com/)
-2. Get your API keys (Client ID and Secret)
-3. Add to `financegpt_backend/.env`:
-   ```env
-   PLAID_CLIENT_ID=your_client_id
-   PLAID_SECRET=your_secret
-   PLAID_ENV=sandbox  # or development/production
-   ```
+### Required Environment Variables
 
-#### OpenAI API Setup
-1. Get your API key from [OpenAI](https://platform.openai.com/)
-2. Add to `financegpt_backend/.env`:
-   ```env
-   OPENAI_API_KEY=your_api_key
-   ```
-
-#### Database Configuration
 ```env
-DATABASE_URL=postgresql://user:password@localhost:5432/financegpt
+# LLM Provider (choose one)
+OPENAI_API_KEY=sk-...
+# or ANTHROPIC_API_KEY=sk-ant-...
+# or GOOGLE_API_KEY=AIza...
+
+# Plaid (for bank connections)
+PLAID_CLIENT_ID=your_client_id
+PLAID_SECRET=your_secret
+PLAID_ENV=sandbox
+
+# Security
+SECRET_KEY=your-random-secret-key
 ```
 
-### API Endpoints
+### Optional Configuration
 
-#### Financial Data
-- `GET /api/accounts` - List all connected accounts
-- `GET /api/transactions` - Get transactions
-- `POST /api/plaid/link-token` - Create Plaid Link token
-- `POST /api/plaid/exchange-token` - Exchange public token
+```env
+# Document Processing
+UNSTRUCTURED_API_KEY=...     # For PDF parsing
+ETL_SERVICE=DOCLING          # Or UNSTRUCTURED
 
-#### AI Features
-- `POST /api/chat` - Chat with AI assistant
-- `POST /api/analyze/spending` - Analyze spending patterns
-- `POST /api/optimize/credit-card` - Get credit card recommendations
-- `GET /api/insights` - Get personalized insights
+# Voice Features
+TTS_SERVICE=local/kokoro
+STT_SERVICE=local/base
+```
 
 ---
 
@@ -461,42 +263,19 @@ DATABASE_URL=postgresql://user:password@localhost:5432/financegpt
 
 ### Running Tests
 
-**Frontend:**
-```bash
-cd financegpt_web
-pnpm test
-```
-
-**Backend:**
-```bash
-cd financegpt_backend
-pytest
-```
-
-### Code Quality
-
-**Frontend:**
 ```bash
-pnpm lint
-pnpm type-check
-```
+# Backend
+cd financegpt_backend && pytest
 
-**Backend:**
-```bash
-ruff check .
-mypy .
+# Frontend  
+cd financegpt_web && pnpm test
 ```
 
 ### Database Migrations
 
-**Create a new migration:**
 ```bash
 cd financegpt_backend
 alembic revision --autogenerate -m "Description"
-```
-
-**Apply migrations:**
-```bash
 alembic upgrade head
 ```
 
@@ -504,7 +283,7 @@ alembic upgrade head
 
 ## 🤝 Contributing
 
-We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
+We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md).
 
 1. Fork the repository
 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
@@ -522,18 +301,16 @@ This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENS
 
 ## 🙏 Acknowledgments
 
-- Built with [Next.js](https://nextjs.org/), [FastAPI](https://fastapi.tiangolo.com/), and [LangChain](https://langchain.com/)
+- Built on [SurfSense](https://github.com/MODSetter/SurfSense), an open-source NotebookLM alternative
 - Financial data powered by [Plaid](https://plaid.com/)
-- AI capabilities powered by [OpenAI](https://openai.com/)
+- AI capabilities via [LiteLLM](https://github.com/BerriAI/litellm)
 
 ---
 
-## 📧 Contact
+<div align="center">
 
-For questions or support, please open an issue or contact us at support@financegpt.com
+Made with ❤️ for anyone who's ever stared at a W2 wondering what it all means.
 
----
+**[⬆ Back to Top](#financegpt-)**
 
-<div align="center">
-Made with ❤️ by the FinanceGPT Team
 </div>
diff --git a/dev.sh b/dev.sh
new file mode 100755
index 0000000..5686066
--- /dev/null
+++ b/dev.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# FinanceGPT Local Development Script
+# Runs all services in separate terminal tabs (macOS)
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BACKEND_DIR="$SCRIPT_DIR/financegpt_backend"
+WEB_DIR="$SCRIPT_DIR/financegpt_web"
+
+echo "🚀 Starting FinanceGPT development environment..."
+echo "   Backend: $BACKEND_DIR"
+echo "   Web:     $WEB_DIR"
+
+# Check if running on macOS
+if [[ "$OSTYPE" != "darwin"* ]]; then
+    echo "❌ This script is designed for macOS. For Linux, use tmux or run manually."
+    echo ""
+    echo "Commands to run manually:"
+    echo "  Terminal 1 (Backend API):"
+    echo "    cd $BACKEND_DIR && source .venv/bin/activate && uv pip install -e . && python main.py --reload"
+    echo ""
+    echo "  Terminal 2 (Celery Worker):"
+    echo "    cd $BACKEND_DIR && source .venv/bin/activate && celery -A celery_worker.celery_app worker --loglevel=info --concurrency=1 --pool=solo"
+    echo ""
+    echo "  Terminal 3 (Frontend):"
+    echo "    cd $WEB_DIR && pnpm run dev"
+    exit 1
+fi
+
+# Create venv if it doesn't exist
+if [ ! -d "$BACKEND_DIR/.venv" ]; then
+    echo "📦 Creating virtual environment..."
+    cd "$BACKEND_DIR"
+    python3 -m venv .venv
+fi
+
+# Open Terminal tabs using AppleScript
+osascript <<EOF
+tell application "Terminal"
+    activate
+    
+    -- Tab 1: Backend API
+    do script "cd '$BACKEND_DIR' && source .venv/bin/activate && echo '📦 Installing dependencies...' && pip install -e . && echo '🚀 Starting Backend API...' && python main.py --reload"
+    
+    -- Tab 2: Celery Worker
+    delay 1
+    tell application "System Events" to keystroke "t" using command down
+    delay 0.5
+    do script "cd '$BACKEND_DIR' && source .venv/bin/activate && echo '⏳ Waiting for dependencies...' && sleep 5 && echo '🔄 Starting Celery Worker...' && celery -A celery_worker.celery_app worker --loglevel=info --concurrency=1 --pool=solo" in front window
+    
+    -- Tab 3: Frontend
+    delay 1
+    tell application "System Events" to keystroke "t" using command down
+    delay 0.5
+    do script "cd '$WEB_DIR' && echo '🌐 Starting Frontend...' && pnpm run dev" in front window
+    
+end tell
+EOF
+
+echo ""
+echo "✅ Development environment started in Terminal tabs!"
+echo ""
+echo "Services:"
+echo "  📡 Backend API:    http://localhost:8000"
+echo "  📚 API Docs:       http://localhost:8000/docs"
+echo "  🌐 Frontend:       http://localhost:3000"
+echo ""
+echo "Press Ctrl+C in each tab to stop the services."
diff --git a/financegpt.png b/financegpt.png
new file mode 100644
index 0000000..b1665e9
Binary files /dev/null and b/financegpt.png differ
diff --git a/financegpt_backend/alembic/versions/3_change_tax_forms_document_fk_to_cascade.py b/financegpt_backend/alembic/versions/3_change_tax_forms_document_fk_to_cascade.py
new file mode 100644
index 0000000..b4a363e
--- /dev/null
+++ b/financegpt_backend/alembic/versions/3_change_tax_forms_document_fk_to_cascade.py
@@ -0,0 +1,48 @@
+"""change_tax_forms_document_fk_to_cascade
+
+Revision ID: 3
+Revises: 2
+Create Date: 2026-02-02 20:43:14.342128
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = '3'
+down_revision: Union[str, None] = '2'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Change tax_forms.document_id FK from SET NULL to CASCADE."""
+    # Drop the existing foreign key constraint
+    op.drop_constraint('tax_forms_document_id_fkey', 'tax_forms', type_='foreignkey')
+    
+    # Recreate with CASCADE on delete
+    op.create_foreign_key(
+        'tax_forms_document_id_fkey',
+        'tax_forms',
+        'documents',
+        ['document_id'],
+        ['id'],
+        ondelete='CASCADE'
+    )
+
+
+def downgrade() -> None:
+    """Revert to SET NULL behavior."""
+    op.drop_constraint('tax_forms_document_id_fkey', 'tax_forms', type_='foreignkey')
+    
+    op.create_foreign_key(
+        'tax_forms_document_id_fkey',
+        'tax_forms',
+        'documents',
+        ['document_id'],
+        ['id'],
+        ondelete='SET NULL'
+    )
diff --git a/financegpt_backend/app/agents/new_chat/system_prompt.py b/financegpt_backend/app/agents/new_chat/system_prompt.py
index d96b625..a088979 100644
--- a/financegpt_backend/app/agents/new_chat/system_prompt.py
+++ b/financegpt_backend/app/agents/new_chat/system_prompt.py
@@ -519,14 +519,15 @@
 11. analyze_tax_data: Query uploaded and processed tax forms to answer tax questions.
   - **USE THIS TOOL** when users ask about:
     * Tax form data: "How much did I earn?", "What were my wages?"
-    * Tax withholdings: "How much federal tax was withheld?"
+    * Tax withholdings: "How much federal tax was withheld?", "State taxes withheld?"
     * Interest income: "Did I have interest income?", "1099-INT summary?"
     * Dividend income: "What dividends did I receive?"
     * Capital gains: "Stock sale gains/losses?", "1099-B summary?"
-    * W2 employment: "Where did I work?", "Wages by employer?"
+    * W2 employment: "Where did I work?", "Wages by employer?", "State wages?"
     * Tax year summaries: "2024 tax summary", "Total income 2024"
+    * Tax estimates/refunds: "Will I get a refund?", "How much do I owe?", "Estimate my taxes"
   - IMPORTANT: This tool queries ONLY uploaded tax forms (W2, 1099-MISC, 1099-INT, 1099-DIV, 1099-B)
-  - Does NOT calculate estimates or current year projections - only historical data from uploaded forms
+  - W2 forms include state tax info (state code, state wages, state income tax withheld)
   - Args:
     - query_type: Type of analysis (required). Options:
       * "income_summary": Total income across all sources
@@ -534,12 +535,13 @@
       * "interest_income": Interest from 1099-INT forms
       * "dividends_income": Dividends from 1099-DIV forms
       * "capital_gains": Capital gains from 1099-B forms
-      * "w2_summary": W2 employment wages and withholdings
+      * "w2_summary": W2 employment wages and withholdings (includes state tax info)
       * "all_forms": List all uploaded tax forms
+      * "tax_estimate": Estimate federal tax liability and potential refund/amount owed
     - tax_year: Specific tax year (e.g., 2024) or omit for all years
     - form_types: Optional filter by form types (e.g., ["W2", "1099-INT"])
   - Returns: Structured tax data with totals, breakdowns, and per-form details
-  - Privacy: All PII (SSN, EIN) is hashed - never exposed in responses
+  - Privacy: All PII (SSN, EIN) is masked - never exposed in responses
 
 </tools>
 <tool_call_examples>
@@ -645,14 +647,25 @@
 
 - User: "Which companies did I work for in 2024?"
   - Call: `analyze_tax_data(query_type="w2_summary", tax_year=2024)`
-  - Returns: W2 forms with employers, wages, and withholdings
-  - Summarize total wages and tax withheld
+  - Returns: W2 forms with employers, wages, withholdings, and state tax info
+  - Summarize total wages and federal/state tax withheld
 
 - User: "List all my uploaded tax forms"
   - Call: `analyze_tax_data(query_type="all_forms")`
   - Returns: All tax forms with types, years, and processing status
   - Note which forms need review (low confidence extractions)
 
+- User: "Will I get a tax refund?" or "How much do I owe?"
+  - Call: `analyze_tax_data(query_type="tax_estimate", tax_year=2024)`
+  - Returns: Estimated federal tax, total withheld, refund or amount owed
+  - Note: Simplified estimate using single filer status and standard deduction
+  - Always recommend consulting a tax professional for accuracy
+
+- User: "What state income tax did I pay?"
+  - Call: `analyze_tax_data(query_type="w2_summary", tax_year=2024)`
+  - Returns: W2 data including state_code, state_wages, and state_income_tax withheld
+  - Summarize state taxes by state if multiple W2s from different states
+
 - User: "How much more am I spending this month compared to last month?"
   - First call: `search_knowledge_base(query="transactions spending", start_date="2025-12-01", end_date="2025-12-31")` (Dec)
   - Second call: `search_knowledge_base(query="transactions spending", start_date="2026-01-01", end_date="2026-01-26")` (Jan)
diff --git a/financegpt_backend/app/agents/new_chat/tools/tax_analysis.py b/financegpt_backend/app/agents/new_chat/tools/tax_analysis.py
index 0f4413b..3e58169 100644
--- a/financegpt_backend/app/agents/new_chat/tools/tax_analysis.py
+++ b/financegpt_backend/app/agents/new_chat/tools/tax_analysis.py
@@ -5,22 +5,47 @@
 - "What was my total federal tax withheld?"
 - "Did I have any interest income?"
 - "What were my capital gains from stock sales?"
+- "Can you estimate my tax refund?"
 """
 
 import logging
 from datetime import datetime
 from decimal import Decimal
 from typing import Any
+from uuid import UUID
 
 from langchain_core.tools import tool
 from sqlalchemy import and_, desc, func, select
 from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import selectinload
 
-from app.schemas.tax_forms import TaxFormWithDetails
+from app.db import (
+    TaxForm,
+    W2Form,
+    Form1099Misc,
+    Form1099Int,
+    Form1099Div,
+    Form1099B,
+)
 
 logger = logging.getLogger(__name__)
 
 
+# 2024 Federal Tax Brackets (Single Filers)
+TAX_BRACKETS_2024_SINGLE = [
+    (11600, Decimal("0.10")),    # 10% up to $11,600
+    (47150, Decimal("0.12")),    # 12% up to $47,150
+    (100525, Decimal("0.22")),   # 22% up to $100,525
+    (191950, Decimal("0.24")),   # 24% up to $191,950
+    (243725, Decimal("0.32")),   # 32% up to $243,725
+    (609350, Decimal("0.35")),   # 35% up to $609,350
+    (float('inf'), Decimal("0.37")),  # 37% above $609,350
+]
+
+# Standard Deduction 2024
+STANDARD_DEDUCTION_2024 = Decimal("14600")
+
+
 def create_tax_analysis_tool(user_id: str, search_space_id: int, db_session: AsyncSession):
     """Create the tax analysis tool for the agent.
     
@@ -42,12 +67,19 @@ async def analyze_tax_data(
         """Query uploaded tax forms to answer tax-related questions.
         
         Use this tool when users ask about income, taxes withheld, interest, dividends,
-        capital gains, or W2 employment information from their uploaded tax documents.
+        capital gains, W2 employment information, or tax estimates/refunds from their 
+        uploaded tax documents.
         
         Args:
-            query_type: Type of tax analysis - "income_summary", "tax_summary", 
-                       "interest_income", "dividends_income", "capital_gains", 
-                       "w2_summary", or "all_forms"
+            query_type: Type of tax analysis:
+                - "income_summary": Total income across all sources
+                - "tax_summary": Total taxes withheld from all sources
+                - "interest_income": Interest income from 1099-INT forms
+                - "dividends_income": Dividend income from 1099-DIV forms
+                - "capital_gains": Capital gains from 1099-B forms
+                - "w2_summary": W2 wage and withholding summary
+                - "all_forms": List all tax forms
+                - "tax_estimate": Estimate tax liability and potential refund
             tax_year: Specific tax year (e.g., 2024) or None for all years
             form_types: Optional list of form types to filter (e.g., ["W2", "1099-INT"])
             
@@ -55,6 +87,7 @@ async def analyze_tax_data(
             Dictionary with analysis results including totals, breakdowns, and details
         """
         return await _analyze_tax_data_impl(
+            db_session=db_session,
             user_id=user_id,
             search_space_id=search_space_id,
             query_type=query_type,
@@ -66,6 +99,7 @@ async def analyze_tax_data(
 
 
 async def _analyze_tax_data_impl(
+    db_session: AsyncSession,
     user_id: str,
     search_space_id: int,
     query_type: str,
@@ -73,150 +107,662 @@ async def _analyze_tax_data_impl(
     form_types: list[str] | None = None,
 ) -> dict[str, Any]:
     """Implementation of tax data analysis."""
-    # Note: Actual database queries would go here
-    # For now, returning placeholder structure
     
     if query_type == "income_summary":
-        return await _get_income_summary(user_id, search_space_id, tax_year)
+        return await _get_income_summary(db_session, user_id, search_space_id, tax_year)
     elif query_type == "tax_summary":
-        return await _get_tax_summary(user_id, search_space_id, tax_year)
+        return await _get_tax_summary(db_session, user_id, search_space_id, tax_year)
     elif query_type == "interest_income":
-        return await _get_interest_income(user_id, search_space_id, tax_year)
+        return await _get_interest_income(db_session, user_id, search_space_id, tax_year)
     elif query_type == "dividends_income":
-        return await _get_dividends_income(user_id, search_space_id, tax_year)
+        return await _get_dividends_income(db_session, user_id, search_space_id, tax_year)
     elif query_type == "capital_gains":
-        return await _get_capital_gains(user_id, search_space_id, tax_year)
+        return await _get_capital_gains(db_session, user_id, search_space_id, tax_year)
     elif query_type == "w2_summary":
-        return await _get_w2_summary(user_id, search_space_id, tax_year)
+        return await _get_w2_summary(db_session, user_id, search_space_id, tax_year)
     elif query_type == "all_forms":
-        return await _get_all_forms(user_id, search_space_id, tax_year, form_types)
+        return await _get_all_forms(db_session, user_id, search_space_id, tax_year, form_types)
+    elif query_type == "tax_estimate":
+        return await _get_tax_estimate(db_session, user_id, search_space_id, tax_year)
     else:
         return {"error": f"Unknown query type: {query_type}"}
 
 
+async def _get_w2_summary(
+    db_session: AsyncSession,
+    user_id: str,
+    search_space_id: int,
+    tax_year: int | None,
+) -> dict[str, Any]:
+    """Get W2 summary with actual database queries."""
+    try:
+        # Build query for tax forms with W2 data
+        query = (
+            select(TaxForm)
+            .options(selectinload(TaxForm.w2_form))
+            .where(
+                and_(
+                    TaxForm.user_id == UUID(user_id),
+                    TaxForm.search_space_id == search_space_id,
+                    TaxForm.form_type == "W2",
+                )
+            )
+        )
+        
+        if tax_year:
+            query = query.where(TaxForm.tax_year == tax_year)
+        
+        query = query.order_by(TaxForm.tax_year.desc())
+        
+        result = await db_session.execute(query)
+        tax_forms = result.scalars().all()
+        
+        if not tax_forms:
+            return {
+                "query_type": "w2_summary",
+                "tax_year": tax_year or "all years",
+                "has_data": False,
+                "message": "No W2 forms found. Upload your W2s to see employment income and withholdings.",
+            }
+        
+        # Aggregate data
+        employers = []
+        total_wages = Decimal("0.00")
+        total_federal_withheld = Decimal("0.00")
+        total_ss_withheld = Decimal("0.00")
+        total_medicare_withheld = Decimal("0.00")
+        total_state_withheld = Decimal("0.00")
+        
+        for form in tax_forms:
+            w2 = form.w2_form
+            if w2:
+                employer_info = {
+                    "employer_name": w2.employer_name or "Unknown Employer",
+                    "tax_year": form.tax_year,
+                    "wages": float(w2.wages_tips_compensation or 0),
+                    "federal_withheld": float(w2.federal_income_tax_withheld or 0),
+                    "social_security_withheld": float(w2.social_security_tax_withheld or 0),
+                    "medicare_withheld": float(w2.medicare_tax_withheld or 0),
+                    "state_withheld": float(w2.state_income_tax or 0),
+                    "retirement_plan": w2.retirement_plan,
+                    "processing_status": form.processing_status,
+                    "needs_review": form.needs_review,
+                }
+                employers.append(employer_info)
+                
+                total_wages += w2.wages_tips_compensation or Decimal("0.00")
+                total_federal_withheld += w2.federal_income_tax_withheld or Decimal("0.00")
+                total_ss_withheld += w2.social_security_tax_withheld or Decimal("0.00")
+                total_medicare_withheld += w2.medicare_tax_withheld or Decimal("0.00")
+                total_state_withheld += w2.state_income_tax or Decimal("0.00")
+        
+        return {
+            "query_type": "w2_summary",
+            "tax_year": tax_year or "all years",
+            "has_data": True,
+            "employers": employers,
+            "total_wages": float(total_wages),
+            "total_federal_withheld": float(total_federal_withheld),
+            "total_social_security_withheld": float(total_ss_withheld),
+            "total_medicare_withheld": float(total_medicare_withheld),
+            "total_state_withheld": float(total_state_withheld),
+            "total_w2_forms": len(employers),
+        }
+    except Exception as e:
+        logger.exception("Error getting W2 summary")
+        return {
+            "query_type": "w2_summary",
+            "error": str(e),
+        }
+
+
 async def _get_income_summary(
+    db_session: AsyncSession,
     user_id: str,
     search_space_id: int,
     tax_year: int | None,
 ) -> dict[str, Any]:
     """Get total income across all sources."""
-    # TODO: Implement actual database queries
-    # This would query W2s and 1099s to sum total income
-    return {
-        "query_type": "income_summary",
-        "tax_year": tax_year or "all years",
-        "total_w2_wages": Decimal("0.00"),
-        "total_1099_misc_income": Decimal("0.00"),
-        "total_interest_income": Decimal("0.00"),
-        "total_dividend_income": Decimal("0.00"),
-        "total_capital_gains": Decimal("0.00"),
-        "grand_total_income": Decimal("0.00"),
-        "message": "No tax forms uploaded yet. Please upload your W2 and 1099 forms to see income summary.",
-    }
+    try:
+        w2_data = await _get_w2_summary(db_session, user_id, search_space_id, tax_year)
+        interest_data = await _get_interest_income(db_session, user_id, search_space_id, tax_year)
+        dividend_data = await _get_dividends_income(db_session, user_id, search_space_id, tax_year)
+        capital_gains_data = await _get_capital_gains(db_session, user_id, search_space_id, tax_year)
+        misc_data = await _get_misc_income(db_session, user_id, search_space_id, tax_year)
+        
+        total_w2_wages = w2_data.get("total_wages", 0) if w2_data.get("has_data") else 0
+        total_interest = interest_data.get("total_interest", 0) if interest_data.get("has_data") else 0
+        total_dividends = dividend_data.get("total_ordinary_dividends", 0) if dividend_data.get("has_data") else 0
+        total_capital_gains = capital_gains_data.get("net_capital_gains", 0) if capital_gains_data.get("has_data") else 0
+        total_misc = misc_data.get("total_misc_income", 0) if misc_data.get("has_data") else 0
+        
+        grand_total = total_w2_wages + total_interest + total_dividends + total_capital_gains + total_misc
+        
+        has_any_data = any([
+            w2_data.get("has_data"),
+            interest_data.get("has_data"),
+            dividend_data.get("has_data"),
+            capital_gains_data.get("has_data"),
+            misc_data.get("has_data"),
+        ])
+        
+        if not has_any_data:
+            return {
+                "query_type": "income_summary",
+                "tax_year": tax_year or "all years",
+                "has_data": False,
+                "message": "No tax forms uploaded yet. Please upload your W2 and 1099 forms to see income summary.",
+            }
+        
+        return {
+            "query_type": "income_summary",
+            "tax_year": tax_year or "all years",
+            "has_data": True,
+            "total_w2_wages": total_w2_wages,
+            "total_interest_income": total_interest,
+            "total_dividend_income": total_dividends,
+            "total_capital_gains": total_capital_gains,
+            "total_misc_income": total_misc,
+            "grand_total_income": grand_total,
+            "breakdown": {
+                "w2": w2_data,
+                "interest": interest_data,
+                "dividends": dividend_data,
+                "capital_gains": capital_gains_data,
+                "misc": misc_data,
+            },
+        }
+    except Exception as e:
+        logger.exception("Error getting income summary")
+        return {"query_type": "income_summary", "error": str(e)}
 
 
 async def _get_tax_summary(
+    db_session: AsyncSession,
     user_id: str,
     search_space_id: int,
     tax_year: int | None,
 ) -> dict[str, Any]:
     """Get total taxes withheld across all sources."""
-    # TODO: Implement actual database queries
-    return {
-        "query_type": "tax_summary",
-        "tax_year": tax_year or "all years",
-        "total_federal_withheld": Decimal("0.00"),
-        "total_social_security_withheld": Decimal("0.00"),
-        "total_medicare_withheld": Decimal("0.00"),
-        "total_state_withheld": Decimal("0.00"),
-        "grand_total_withheld": Decimal("0.00"),
-        "message": "No tax forms uploaded yet. Please upload your W2 and 1099 forms to see tax withholdings.",
-    }
+    try:
+        w2_data = await _get_w2_summary(db_session, user_id, search_space_id, tax_year)
+        interest_data = await _get_interest_income(db_session, user_id, search_space_id, tax_year)
+        dividend_data = await _get_dividends_income(db_session, user_id, search_space_id, tax_year)
+        
+        total_federal = w2_data.get("total_federal_withheld", 0) if w2_data.get("has_data") else 0
+        total_federal += interest_data.get("total_federal_withheld", 0) if interest_data.get("has_data") else 0
+        total_federal += dividend_data.get("total_federal_withheld", 0) if dividend_data.get("has_data") else 0
+        
+        total_ss = w2_data.get("total_social_security_withheld", 0) if w2_data.get("has_data") else 0
+        total_medicare = w2_data.get("total_medicare_withheld", 0) if w2_data.get("has_data") else 0
+        total_state = w2_data.get("total_state_withheld", 0) if w2_data.get("has_data") else 0
+        
+        has_data = w2_data.get("has_data") or interest_data.get("has_data") or dividend_data.get("has_data")
+        
+        if not has_data:
+            return {
+                "query_type": "tax_summary",
+                "tax_year": tax_year or "all years",
+                "has_data": False,
+                "message": "No tax forms uploaded yet. Please upload your W2 and 1099 forms to see tax withholdings.",
+            }
+        
+        return {
+            "query_type": "tax_summary",
+            "tax_year": tax_year or "all years",
+            "has_data": True,
+            "total_federal_withheld": total_federal,
+            "total_social_security_withheld": total_ss,
+            "total_medicare_withheld": total_medicare,
+            "total_state_withheld": total_state,
+            "grand_total_withheld": total_federal + total_ss + total_medicare + total_state,
+        }
+    except Exception as e:
+        logger.exception("Error getting tax summary")
+        return {"query_type": "tax_summary", "error": str(e)}
 
 
 async def _get_interest_income(
+    db_session: AsyncSession,
     user_id: str,
     search_space_id: int,
     tax_year: int | None,
 ) -> dict[str, Any]:
     """Get interest income from 1099-INT forms."""
-    # TODO: Implement actual database queries
-    return {
-        "query_type": "interest_income",
-        "tax_year": tax_year or "all years",
-        "total_interest": Decimal("0.00"),
-        "sources": [],
-        "message": "No 1099-INT forms found. Upload your interest income statements to see details.",
-    }
+    try:
+        query = (
+            select(TaxForm)
+            .options(selectinload(TaxForm.form_1099_int))
+            .where(
+                and_(
+                    TaxForm.user_id == UUID(user_id),
+                    TaxForm.search_space_id == search_space_id,
+                    TaxForm.form_type == "1099-INT",
+                )
+            )
+        )
+        
+        if tax_year:
+            query = query.where(TaxForm.tax_year == tax_year)
+        
+        result = await db_session.execute(query)
+        tax_forms = result.scalars().all()
+        
+        if not tax_forms:
+            return {
+                "query_type": "interest_income",
+                "tax_year": tax_year or "all years",
+                "has_data": False,
+                "message": "No 1099-INT forms found. Upload your interest income statements to see details.",
+            }
+        
+        sources = []
+        total_interest = Decimal("0.00")
+        total_federal_withheld = Decimal("0.00")
+        
+        for form in tax_forms:
+            int_form = form.form_1099_int
+            if int_form:
+                source_info = {
+                    "payer_name": int_form.payer_name or "Unknown Payer",
+                    "tax_year": form.tax_year,
+                    "interest_income": float(int_form.interest_income or 0),
+                    "federal_withheld": float(int_form.federal_income_tax_withheld or 0),
+                }
+                sources.append(source_info)
+                
+                total_interest += int_form.interest_income or Decimal("0.00")
+                total_federal_withheld += int_form.federal_income_tax_withheld or Decimal("0.00")
+        
+        return {
+            "query_type": "interest_income",
+            "tax_year": tax_year or "all years",
+            "has_data": True,
+            "total_interest": float(total_interest),
+            "total_federal_withheld": float(total_federal_withheld),
+            "sources": sources,
+        }
+    except Exception as e:
+        logger.exception("Error getting interest income")
+        return {"query_type": "interest_income", "error": str(e)}
 
 
 async def _get_dividends_income(
+    db_session: AsyncSession,
     user_id: str,
     search_space_id: int,
     tax_year: int | None,
 ) -> dict[str, Any]:
     """Get dividend income from 1099-DIV forms."""
-    # TODO: Implement actual database queries
-    return {
-        "query_type": "dividends_income",
-        "tax_year": tax_year or "all years",
-        "total_ordinary_dividends": Decimal("0.00"),
-        "total_qualified_dividends": Decimal("0.00"),
-        "sources": [],
-        "message": "No 1099-DIV forms found. Upload your dividend income statements to see details.",
-    }
+    try:
+        query = (
+            select(TaxForm)
+            .options(selectinload(TaxForm.form_1099_div))
+            .where(
+                and_(
+                    TaxForm.user_id == UUID(user_id),
+                    TaxForm.search_space_id == search_space_id,
+                    TaxForm.form_type == "1099-DIV",
+                )
+            )
+        )
+        
+        if tax_year:
+            query = query.where(TaxForm.tax_year == tax_year)
+        
+        result = await db_session.execute(query)
+        tax_forms = result.scalars().all()
+        
+        if not tax_forms:
+            return {
+                "query_type": "dividends_income",
+                "tax_year": tax_year or "all years",
+                "has_data": False,
+                "message": "No 1099-DIV forms found. Upload your dividend income statements to see details.",
+            }
+        
+        sources = []
+        total_ordinary = Decimal("0.00")
+        total_qualified = Decimal("0.00")
+        total_capital_gain_dist = Decimal("0.00")
+        total_federal_withheld = Decimal("0.00")
+        
+        for form in tax_forms:
+            div_form = form.form_1099_div
+            if div_form:
+                source_info = {
+                    "payer_name": div_form.payer_name or "Unknown Payer",
+                    "tax_year": form.tax_year,
+                    "ordinary_dividends": float(div_form.total_ordinary_dividends or 0),
+                    "qualified_dividends": float(div_form.qualified_dividends or 0),
+                    "capital_gain_distributions": float(div_form.total_capital_gain_distributions or 0),
+                    "federal_withheld": float(div_form.federal_income_tax_withheld or 0),
+                }
+                sources.append(source_info)
+                
+                total_ordinary += div_form.total_ordinary_dividends or Decimal("0.00")
+                total_qualified += div_form.qualified_dividends or Decimal("0.00")
+                total_capital_gain_dist += div_form.total_capital_gain_distributions or Decimal("0.00")
+                total_federal_withheld += div_form.federal_income_tax_withheld or Decimal("0.00")
+        
+        return {
+            "query_type": "dividends_income",
+            "tax_year": tax_year or "all years",
+            "has_data": True,
+            "total_ordinary_dividends": float(total_ordinary),
+            "total_qualified_dividends": float(total_qualified),
+            "total_capital_gain_distributions": float(total_capital_gain_dist),
+            "total_federal_withheld": float(total_federal_withheld),
+            "sources": sources,
+        }
+    except Exception as e:
+        logger.exception("Error getting dividends income")
+        return {"query_type": "dividends_income", "error": str(e)}
 
 
 async def _get_capital_gains(
+    db_session: AsyncSession,
     user_id: str,
     search_space_id: int,
     tax_year: int | None,
 ) -> dict[str, Any]:
     """Get capital gains from 1099-B forms."""
-    # TODO: Implement actual database queries
-    return {
-        "query_type": "capital_gains",
-        "tax_year": tax_year or "all years",
-        "total_short_term_gains": Decimal("0.00"),
-        "total_long_term_gains": Decimal("0.00"),
-        "total_realized_gains": Decimal("0.00"),
-        "transactions": [],
-        "message": "No 1099-B forms found. Upload your brokerage statements to see capital gains.",
-    }
+    try:
+        query = (
+            select(TaxForm)
+            .options(selectinload(TaxForm.form_1099_b))
+            .where(
+                and_(
+                    TaxForm.user_id == UUID(user_id),
+                    TaxForm.search_space_id == search_space_id,
+                    TaxForm.form_type == "1099-B",
+                )
+            )
+        )
+        
+        if tax_year:
+            query = query.where(TaxForm.tax_year == tax_year)
+        
+        result = await db_session.execute(query)
+        tax_forms = result.scalars().all()
+        
+        if not tax_forms:
+            return {
+                "query_type": "capital_gains",
+                "tax_year": tax_year or "all years",
+                "has_data": False,
+                "message": "No 1099-B forms found. Upload your brokerage statements to see capital gains.",
+            }
+        
+        sources = []
+        total_proceeds = Decimal("0.00")
+        total_cost_basis = Decimal("0.00")
+        
+        for form in tax_forms:
+            b_form = form.form_1099_b
+            if b_form:
+                proceeds = b_form.proceeds or Decimal("0.00")
+                cost_basis = b_form.cost_basis or Decimal("0.00")
+                gain_loss = proceeds - cost_basis
+                
+                source_info = {
+                    "broker_name": b_form.payer_name or "Unknown Broker",
+                    "tax_year": form.tax_year,
+                    "proceeds": float(proceeds),
+                    "cost_basis": float(cost_basis),
+                    "gain_loss": float(gain_loss),
+                }
+                sources.append(source_info)
+                
+                total_proceeds += proceeds
+                total_cost_basis += cost_basis
+        
+        net_gains = total_proceeds - total_cost_basis
+        
+        return {
+            "query_type": "capital_gains",
+            "tax_year": tax_year or "all years",
+            "has_data": True,
+            "total_proceeds": float(total_proceeds),
+            "total_cost_basis": float(total_cost_basis),
+            "net_capital_gains": float(net_gains),
+            "sources": sources,
+        }
+    except Exception as e:
+        logger.exception("Error getting capital gains")
+        return {"query_type": "capital_gains", "error": str(e)}
 
 
-async def _get_w2_summary(
+async def _get_misc_income(
+    db_session: AsyncSession,
     user_id: str,
     search_space_id: int,
     tax_year: int | None,
 ) -> dict[str, Any]:
-    """Get W2 summary."""
-    # TODO: Implement actual database queries
-    return {
-        "query_type": "w2_summary",
-        "tax_year": tax_year or "all years",
-        "employers": [],
-        "total_wages": Decimal("0.00"),
-        "total_federal_withheld": Decimal("0.00"),
-        "total_social_security_withheld": Decimal("0.00"),
-        "total_medicare_withheld": Decimal("0.00"),
-        "message": "No W2 forms found. Upload your W2s to see employment income and withholdings.",
-    }
+    """Get miscellaneous income from 1099-MISC forms."""
+    try:
+        query = (
+            select(TaxForm)
+            .options(selectinload(TaxForm.form_1099_misc))
+            .where(
+                and_(
+                    TaxForm.user_id == UUID(user_id),
+                    TaxForm.search_space_id == search_space_id,
+                    TaxForm.form_type == "1099-MISC",
+                )
+            )
+        )
+        
+        if tax_year:
+            query = query.where(TaxForm.tax_year == tax_year)
+        
+        result = await db_session.execute(query)
+        tax_forms = result.scalars().all()
+        
+        if not tax_forms:
+            return {
+                "query_type": "misc_income",
+                "tax_year": tax_year or "all years",
+                "has_data": False,
+            }
+        
+        sources = []
+        total_misc = Decimal("0.00")
+        
+        for form in tax_forms:
+            misc_form = form.form_1099_misc
+            if misc_form:
+                misc_income = (
+                    (misc_form.rents or Decimal("0.00")) +
+                    (misc_form.royalties or Decimal("0.00")) +
+                    (misc_form.other_income or Decimal("0.00"))
+                )
+                
+                source_info = {
+                    "payer_name": misc_form.payer_name or "Unknown Payer",
+                    "tax_year": form.tax_year,
+                    "rents": float(misc_form.rents or 0),
+                    "royalties": float(misc_form.royalties or 0),
+                    "other_income": float(misc_form.other_income or 0),
+                    "total": float(misc_income),
+                }
+                sources.append(source_info)
+                total_misc += misc_income
+        
+        return {
+            "query_type": "misc_income",
+            "tax_year": tax_year or "all years",
+            "has_data": True,
+            "total_misc_income": float(total_misc),
+            "sources": sources,
+        }
+    except Exception as e:
+        logger.exception("Error getting misc income")
+        return {"query_type": "misc_income", "error": str(e)}
 
 
 async def _get_all_forms(
+    db_session: AsyncSession,
     user_id: str,
     search_space_id: int,
     tax_year: int | None,
     form_types: list[str] | None,
 ) -> dict[str, Any]:
     """Get all tax forms with optional filters."""
-    # TODO: Implement actual database queries
-    return {
-        "query_type": "all_forms",
-        "tax_year": tax_year or "all years",
-        "form_types_filter": form_types,
-        "forms": [],
-        "total_forms": 0,
-        "message": "No tax forms uploaded yet. Upload W2s and 1099s to get started.",
-    }
+    try:
+        query = (
+            select(TaxForm)
+            .where(
+                and_(
+                    TaxForm.user_id == UUID(user_id),
+                    TaxForm.search_space_id == search_space_id,
+                )
+            )
+            .order_by(TaxForm.tax_year.desc(), TaxForm.form_type)
+        )
+        
+        if tax_year:
+            query = query.where(TaxForm.tax_year == tax_year)
+        
+        if form_types:
+            query = query.where(TaxForm.form_type.in_(form_types))
+        
+        result = await db_session.execute(query)
+        tax_forms = result.scalars().all()
+        
+        if not tax_forms:
+            return {
+                "query_type": "all_forms",
+                "tax_year": tax_year or "all years",
+                "form_types_filter": form_types,
+                "has_data": False,
+                "total_forms": 0,
+                "message": "No tax forms uploaded yet. Upload W2s and 1099s to get started.",
+            }
+        
+        forms_list = [
+            {
+                "id": str(form.id),
+                "form_type": form.form_type,
+                "tax_year": form.tax_year,
+                "processing_status": form.processing_status,
+                "extraction_method": form.extraction_method,
+                "needs_review": form.needs_review,
+                "uploaded_at": form.uploaded_at.isoformat() if form.uploaded_at else None,
+            }
+            for form in tax_forms
+        ]
+        
+        return {
+            "query_type": "all_forms",
+            "tax_year": tax_year or "all years",
+            "form_types_filter": form_types,
+            "has_data": True,
+            "forms": forms_list,
+            "total_forms": len(forms_list),
+        }
+    except Exception as e:
+        logger.exception("Error getting all forms")
+        return {"query_type": "all_forms", "error": str(e)}
+
+
+async def _get_tax_estimate(
+    db_session: AsyncSession,
+    user_id: str,
+    search_space_id: int,
+    tax_year: int | None,
+) -> dict[str, Any]:
+    """Estimate tax liability and potential refund.
+    
+    This provides a simplified estimate based on single filer status.
+    For accurate tax calculations, users should consult a tax professional.
+    """
+    try:
+        # Get income summary first
+        income_data = await _get_income_summary(db_session, user_id, search_space_id, tax_year)
+        
+        if not income_data.get("has_data"):
+            return {
+                "query_type": "tax_estimate",
+                "tax_year": tax_year or "all years",
+                "has_data": False,
+                "message": "No tax forms uploaded yet. Please upload your W2 and 1099 forms to get a tax estimate.",
+            }
+        
+        # Get withholdings
+        tax_data = await _get_tax_summary(db_session, user_id, search_space_id, tax_year)
+        
+        # Calculate estimated tax
+        gross_income = Decimal(str(income_data.get("grand_total_income", 0)))
+        
+        # Apply standard deduction
+        taxable_income = max(Decimal("0.00"), gross_income - STANDARD_DEDUCTION_2024)
+        
+        # Calculate tax using brackets
+        estimated_tax = _calculate_tax_from_brackets(taxable_income)
+        
+        # Get total withholdings
+        total_withheld = Decimal(str(tax_data.get("total_federal_withheld", 0)))
+        
+        # Calculate refund/owed
+        difference = total_withheld - estimated_tax
+        
+        if difference > 0:
+            refund_or_owed = "refund"
+            amount = float(difference)
+        else:
+            refund_or_owed = "owed"
+            amount = float(abs(difference))
+        
+        return {
+            "query_type": "tax_estimate",
+            "tax_year": tax_year or 2024,
+            "has_data": True,
+            "disclaimer": "This is a simplified estimate assuming single filer status with standard deduction. For accurate calculations, please consult a tax professional.",
+            "gross_income": float(gross_income),
+            "standard_deduction": float(STANDARD_DEDUCTION_2024),
+            "taxable_income": float(taxable_income),
+            "estimated_federal_tax": float(estimated_tax),
+            "total_federal_withheld": float(total_withheld),
+            "estimate_result": refund_or_owed,
+            "estimate_amount": amount,
+            "breakdown": {
+                "income": {
+                    "w2_wages": income_data.get("total_w2_wages", 0),
+                    "interest": income_data.get("total_interest_income", 0),
+                    "dividends": income_data.get("total_dividend_income", 0),
+                    "capital_gains": income_data.get("total_capital_gains", 0),
+                    "misc_income": income_data.get("total_misc_income", 0),
+                },
+                "withholdings": {
+                    "federal": tax_data.get("total_federal_withheld", 0),
+                    "social_security": tax_data.get("total_social_security_withheld", 0),
+                    "medicare": tax_data.get("total_medicare_withheld", 0),
+                    "state": tax_data.get("total_state_withheld", 0),
+                },
+            },
+        }
+    except Exception as e:
+        logger.exception("Error calculating tax estimate")
+        return {"query_type": "tax_estimate", "error": str(e)}
+
+
+def _calculate_tax_from_brackets(taxable_income: Decimal) -> Decimal:
+    """Calculate federal tax using progressive brackets."""
+    if taxable_income <= 0:
+        return Decimal("0.00")
+    
+    tax = Decimal("0.00")
+    prev_bracket = Decimal("0")
+    
+    for bracket_max, rate in TAX_BRACKETS_2024_SINGLE:
+        bracket_max_decimal = Decimal(str(bracket_max))
+        
+        if taxable_income <= bracket_max_decimal:
+            tax += (taxable_income - prev_bracket) * rate
+            break
+        else:
+            tax += (bracket_max_decimal - prev_bracket) * rate
+            prev_bracket = bracket_max_decimal
+    
+    return tax.quantize(Decimal("0.01"))
diff --git a/financegpt_backend/app/celery_app.py b/financegpt_backend/app/celery_app.py
index 36b0b64..f711d13 100644
--- a/financegpt_backend/app/celery_app.py
+++ b/financegpt_backend/app/celery_app.py
@@ -64,6 +64,7 @@ def parse_schedule_interval(interval: str) -> dict:
         "app.tasks.celery_tasks.schedule_checker_task",
         "app.tasks.celery_tasks.blocknote_migration_tasks",
         "app.tasks.celery_tasks.document_reindex_tasks",
+        "app.tasks.celery_tasks.tax_form_tasks",
     ],
 )
 
diff --git a/financegpt_backend/app/db.py b/financegpt_backend/app/db.py
index 3932bf1..5688f05 100644
--- a/financegpt_backend/app/db.py
+++ b/financegpt_backend/app/db.py
@@ -981,7 +981,7 @@ class TaxForm(BaseModel):
     form_type = Column(String(20), nullable=False, index=True)  # W2, 1099-MISC, 1099-INT, etc.
     tax_year = Column(Integer, nullable=False, index=True)
     document_id = Column(
-        Integer, ForeignKey("documents.id", ondelete="SET NULL"), nullable=True
+        Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=True
     )
     uploaded_at = Column(TIMESTAMP(timezone=True), server_default=text('now()'), nullable=False)
     processed_at = Column(TIMESTAMP(timezone=True), nullable=True)
diff --git a/financegpt_backend/app/parsers/llm_csv_parser.py b/financegpt_backend/app/parsers/llm_csv_parser.py
index 563752d..6136c8d 100644
--- a/financegpt_backend/app/parsers/llm_csv_parser.py
+++ b/financegpt_backend/app/parsers/llm_csv_parser.py
@@ -32,6 +32,80 @@ def __init__(self):
         """Initialize LLM CSV parser."""
         super().__init__("LLM CSV Parser")
 
+    def _skip_preamble_rows(self, text_content: str) -> str:
+        """
+        Skip preamble/header rows that appear before actual CSV data.
+        
+        Many brokerage CSV exports include metadata rows before the actual CSV headers.
+        Examples:
+        - Schwab: "Positions for account ****9247 as of 01/29/2026 04:00 PM ET"
+        - Fidelity: "Brokerage" or account name rows
+        - E*TRADE: Account summary rows
+        
+        Detection strategy:
+        1. Find the first line that looks like a proper CSV header row
+        2. A header row has multiple comma-separated fields (>=3)
+        3. Subsequent data rows should have the same number of fields
+        4. Preamble rows typically have fewer fields or are single values
+        
+        Args:
+            text_content: Raw CSV text content
+            
+        Returns:
+            CSV text with preamble rows removed
+        """
+        lines = text_content.strip().split('\n')
+        if not lines:
+            return text_content
+        
+        def count_csv_fields(line: str) -> int:
+            """Count fields in a CSV line, handling quoted values with commas."""
+            try:
+                # Use csv module to properly parse the line
+                reader = csv.reader(io.StringIO(line))
+                fields = next(reader, [])
+                return len(fields)
+            except Exception:
+                # Fallback: simple comma count (less accurate but safe)
+                return line.count(',') + 1
+        
+        def is_likely_header_row(line: str, next_lines: list[str]) -> bool:
+            """Check if this line looks like a CSV header row."""
+            if not line.strip():
+                return False
+            
+            field_count = count_csv_fields(line)
+            
+            # Headers should have at least 3 fields
+            if field_count < 3:
+                return False
+            
+            # Check if the next few non-empty lines have the same field count
+            # This indicates we found the actual CSV structure
+            matching_lines = 0
+            for next_line in next_lines[:5]:  # Check up to 5 subsequent lines
+                if not next_line.strip():
+                    continue
+                if count_csv_fields(next_line) == field_count:
+                    matching_lines += 1
+            
+            # If at least 2 subsequent lines match, this is likely the header
+            return matching_lines >= 2
+        
+        # Find the first line that looks like a CSV header
+        for i, line in enumerate(lines):
+            if not line.strip():
+                continue
+            
+            remaining_lines = lines[i+1:]
+            if is_likely_header_row(line, remaining_lines):
+                if i > 0:
+                    logger.info(f"Skipped {i} preamble row(s) before CSV headers")
+                return '\n'.join(lines[i:])
+        
+        # No preamble detected, return original content
+        return text_content
+
     async def parse_file(
         self,
         file_content: bytes,
@@ -56,6 +130,11 @@ async def parse_file(
         try:
             # Decode CSV
             text_content = file_content.decode("utf-8-sig")
+            
+            # Handle CSV files with preamble/header rows before actual CSV data
+            # (e.g., Schwab files start with "Positions for account ****XXXX as of...")
+            text_content = self._skip_preamble_rows(text_content)
+            
             csv_reader = csv.DictReader(io.StringIO(text_content))
             
             # Get headers and first few rows for LLM analysis
@@ -316,14 +395,18 @@ def safe_decimal(value: str | None, allow_negative: bool = False) -> Decimal | N
         
         holdings = []
         
+        # Log schema for debugging
+        logger.debug(f"Applying holdings schema: {schema}")
+        
         for row in rows:
             try:
                 # Extract symbol
                 symbol_col = schema.get("symbol", {}).get("column")
                 if not symbol_col or symbol_col not in row:
+                    logger.debug(f"Symbol column '{symbol_col}' not found in row keys: {list(row.keys())}")
                     continue
                 symbol = str(row[symbol_col]).strip().upper()
-                if not symbol or symbol in ["", "N/A", "Total", "TOTAL"]:
+                if not symbol or symbol in ["", "N/A", "Total", "TOTAL", "ACCOUNT TOTAL", "CASH & CASH INVESTMENTS"]:
                     continue
                 
                 # Extract quantity using safe_decimal
diff --git a/financegpt_backend/app/parsers/tax_form_parser.py b/financegpt_backend/app/parsers/tax_form_parser.py
index 57f9fcd..e3a4bde 100644
--- a/financegpt_backend/app/parsers/tax_form_parser.py
+++ b/financegpt_backend/app/parsers/tax_form_parser.py
@@ -9,20 +9,76 @@
 Each tier returns confidence scores. If confidence < 0.85, escalate to next tier.
 """
 
+import decimal
+import json
 import logging
 import re
 from decimal import Decimal
 from pathlib import Path
 from typing import Any, Literal
 
+import litellm
 import pdfplumber
+from pydantic import BaseModel, Field
 from unstructured.partition.pdf import partition_pdf
 
-from app.utils.pii_masking import mask_tax_form_for_llm, validate_confidence_threshold
+from app.utils.pii_masking import mask_tax_form_for_llm, validate_confidence_threshold, mask_pii_in_text, recover_pii_from_mapping
 
 logger = logging.getLogger(__name__)
 
 
+# Pydantic models for structured LLM extraction
+class W2ExtractedData(BaseModel):
+    """Structured W2 data extracted by LLM."""
+    wages_tips_compensation: float | None = Field(None, description="Box 1: Wages, tips, other compensation")
+    federal_income_tax_withheld: float | None = Field(None, description="Box 2: Federal income tax withheld")
+    social_security_wages: float | None = Field(None, description="Box 3: Social security wages")
+    social_security_tax_withheld: float | None = Field(None, description="Box 4: Social security tax withheld")
+    medicare_wages: float | None = Field(None, description="Box 5: Medicare wages and tips")
+    medicare_tax_withheld: float | None = Field(None, description="Box 6: Medicare tax withheld")
+    # State tax fields
+    state_code: str | None = Field(None, description="Box 15: State code (e.g., CA, NY)")
+    state_wages: float | None = Field(None, description="Box 16: State wages, tips, etc.")
+    state_income_tax: float | None = Field(None, description="Box 17: State income tax withheld")
+    # Note: SSN/EIN extracted from heuristics, not LLM (privacy)
+    employer_name: str | None = Field(None, description="Employer's name")
+    retirement_plan: bool = Field(False, description="Box 13: Retirement plan checkbox marked")
+
+
+class Form1099ExtractedData(BaseModel):
+    """Structured 1099 data extracted by LLM."""
+    interest_income: float | None = Field(None, description="Interest income (1099-INT Box 1)")
+    dividend_income: float | None = Field(None, description="Total ordinary dividends (1099-DIV Box 1a)")
+    qualified_dividends: float | None = Field(None, description="Qualified dividends (1099-DIV Box 1b)")
+    capital_gain_distributions: float | None = Field(None, description="Capital gain distributions (1099-DIV Box 2a)")
+    proceeds: float | None = Field(None, description="Proceeds from sales (1099-B Box 1d)")
+    cost_basis: float | None = Field(None, description="Cost or other basis (1099-B Box 1e)")
+    rents: float | None = Field(None, description="Rents (1099-MISC Box 1)")
+    other_income: float | None = Field(None, description="Other income (1099-MISC Box 3)")
+    federal_tax_withheld: float | None = Field(None, description="Federal income tax withheld")
+    payer_name: str | None = Field(None, description="Payer's name")
+    # Note: Payer TIN extracted from heuristics, not LLM (privacy)
+
+
+class Form1095CExtractedData(BaseModel):
+    """Structured 1095-C data extracted by LLM (Employer-Provided Health Insurance Offer and Coverage)."""
+    # Part I - Employee Information
+    employer_name: str | None = Field(None, description="Part I Line 1: Employer name")
+    employer_contact_phone: str | None = Field(None, description="Part I Line 9: Employer contact phone")
+    
+    # Part II - Employee Offer and Coverage (Line 14-16 codes for each month)
+    offer_of_coverage_code: str | None = Field(None, description="Line 14: Offer of Coverage code (e.g., 1A, 1B, 1C, 1E, 1H)")
+    employee_share_lowest_cost: float | None = Field(None, description="Line 15: Employee share of lowest cost monthly premium")
+    safe_harbor_code: str | None = Field(None, description="Line 16: Safe Harbor code (e.g., 2C, 2D, 2F)")
+    
+    # Coverage months (Part II Line 14 - which months had coverage)
+    covered_all_12_months: bool = Field(False, description="Whether coverage was offered for all 12 months")
+    covered_months: list[str] | None = Field(None, description="List of months with coverage if not all 12")
+    
+    # Part III - Covered Individuals (if applicable)
+    covered_individuals_count: int | None = Field(None, description="Number of covered individuals listed in Part III")
+
+
 class TaxFormParser:
     """Hybrid tax form parser with tiered extraction."""
     
@@ -41,11 +97,71 @@ def __init__(self):
         """Initialize parser."""
         self.extraction_history: list[dict[str, Any]] = []
     
+    async def parse_from_text(
+        self,
+        text: str,
+        form_type: Literal["W2", "1099-MISC", "1099-INT", "1099-DIV", "1099-B", "1095-C"],
+        tax_year: int,
+        llm_model: str | None = None,
+    ) -> dict[str, Any]:
+        """Parse tax form from pre-extracted text using LLM.
+        
+        This method is used when text has already been extracted from the PDF
+        (e.g., by Unstructured or Docling) and we just need to parse the fields.
+        
+        Always uses LLM extraction for accuracy.
+        
+        Args:
+            text: Pre-extracted text content
+            form_type: Type of tax form
+            tax_year: Tax year for the form
+            llm_model: Optional LLM model string (e.g., "gemini/gemini-2.0-flash")
+                      If not provided, uses default model
+            
+        Returns:
+            Dictionary containing parsed data and confidence scores.
+        """
+        logger.info(f"Parsing {form_type} from pre-extracted text ({len(text)} chars) using LLM")
+        
+        # Use LLM extraction for accuracy
+        extracted_data = await self._extract_with_llm(text, form_type, tax_year, llm_model=llm_model)
+        extraction_method = "llm_assisted"
+        
+        # If LLM extraction failed, fall back to heuristic parsing
+        if not extracted_data:
+            logger.warning(f"LLM extraction failed for {form_type}, falling back to heuristic parsing")
+            if form_type == "W2":
+                extracted_data = self._parse_w2_text(text)
+            elif form_type == "1099-MISC":
+                extracted_data = self._parse_1099_misc_text(text)
+            elif form_type == "1099-INT":
+                extracted_data = self._parse_1099_int_text(text)
+            elif form_type == "1099-DIV":
+                extracted_data = self._parse_1099_div_text(text)
+            elif form_type == "1099-B":
+                extracted_data = self._parse_1099_b_text(text)
+            elif form_type == "1095-C":
+                extracted_data = self._parse_1095_c_text(text)
+            else:
+                raise ValueError(f"Unsupported form type: {form_type}")
+            extraction_method = "heuristic"
+        
+        confidence_scores = self._calculate_confidence_scores(extracted_data)
+        
+        return {
+            "extracted_data": extracted_data,
+            "confidence_scores": confidence_scores,
+            "extraction_method": extraction_method,
+            "needs_review": not self._meets_confidence_threshold(confidence_scores),
+            "raw_extraction_data": {"text_length": len(text)},
+        }
+    
     async def parse_tax_form(
         self,
         file_path: str | Path,
-        form_type: Literal["W2", "1099-MISC", "1099-INT", "1099-DIV", "1099-B"],
+        form_type: Literal["W2", "1099-MISC", "1099-INT", "1099-DIV", "1099-B", "1095-C"],
         tax_year: int,
+        llm_model: str | None = None,
     ) -> dict[str, Any]:
         """Parse tax form using tiered extraction strategy.
         
@@ -53,6 +169,7 @@ async def parse_tax_form(
             file_path: Path to PDF file
             form_type: Type of tax form
             tax_year: Tax year for the form
+            llm_model: Optional LLM model string for fallback extraction
             
         Returns:
             Dictionary containing:
@@ -249,13 +366,6 @@ async def _extract_llm_assisted(
         Best for: Verification of low-confidence fields, unusual layouts.
         IMPORTANT: All PII is masked before sending to LLM.
         """
-        # TODO: Implement LLM-assisted extraction using instructor
-        # This would:
-        # 1. Convert PDF to image or extract text
-        # 2. Mask any PII found in previous extraction
-        # 3. Send to LLM with structured output schema
-        # 4. Return verified/extracted data
-        
         logger.warning("LLM-assisted extraction not yet implemented")
         
         # For now, return previous result marked as needs_review
@@ -272,39 +382,217 @@ async def _extract_llm_assisted(
             "raw_extraction_data": {"status": "not_implemented"},
         }
     
+    async def _extract_with_llm(
+        self,
+        text: str,
+        form_type: str,
+        tax_year: int,
+        llm_model: str | None = None,
+    ) -> dict[str, Any]:
+        """Use LLM to extract structured data from tax form text.
+        
+        This is called when heuristic parsing fails to find dollar amounts.
+        PII (SSN, EIN) is masked before sending to the LLM.
+        
+        Args:
+            text: The document text to parse
+            form_type: Type of tax form
+            tax_year: Tax year
+            llm_model: Optional LLM model string. If not provided, uses default.
+        """
+        # Use provided model or fall back to default
+        model = llm_model or "gemini/gemini-2.0-flash"
+        logger.info(f"Using LLM extraction for {form_type} (tax year {tax_year}) with model: {model}")
+        
+        # Mask PII before sending to LLM
+        masked_text, pii_mapping = mask_pii_in_text(text)
+        if pii_mapping:
+            logger.info(f"Masked {len(pii_mapping)} PII items before LLM call")
+        
+        # Select the appropriate prompt based on form type
+        if form_type == "W2":
+            system_prompt = """You are a tax document parser. Extract W-2 form data from the provided text.
+            
+Extract these specific fields:
+- wages_tips_compensation: Box 1 - Wages, tips, other compensation (the main income amount)
+- federal_income_tax_withheld: Box 2 - Federal income tax withheld
+- social_security_wages: Box 3 - Social security wages (often same as Box 1, capped at ~$176,100 for 2025)
+- social_security_tax_withheld: Box 4 - Social security tax withheld (about 6.2% of Box 3)
+- medicare_wages: Box 5 - Medicare wages and tips (often same as Box 1)
+- medicare_tax_withheld: Box 6 - Medicare tax withheld (about 1.45% of Box 5)
+- state_code: Box 15 - Two-letter state code (e.g., CA, NY, TX)
+- state_wages: Box 16 - State wages, tips, etc.
+- state_income_tax: Box 17 - State income tax withheld
+- employer_name: Name of the employer
+- retirement_plan: True if Box 13 retirement plan is checked
+
+Note: SSN and EIN have been masked for privacy (shown as [SSN:***-**-XXXX] or [EIN:XXXXXXXX]).
+Do NOT try to extract SSN or EIN values.
+
+Important:
+- Look for dollar amounts formatted as $XX,XXX.XX or XX,XXX.XX
+- The wages amount is typically the largest number (often $50,000 - $500,000 range)
+- Federal tax withheld is typically 10-35% of wages
+- Return null for any field you cannot find
+- Do NOT guess values - only extract what you can clearly identify"""
+
+            response_format = W2ExtractedData
+        elif form_type == "1095-C":
+            system_prompt = """You are a tax document parser. Extract Form 1095-C data from the provided text.
+            
+Form 1095-C is the Employer-Provided Health Insurance Offer and Coverage form.
+
+Extract these specific fields:
+- employer_name: Part I Line 1 - Name of employer
+- employer_contact_phone: Part I Line 9 - Contact telephone number
+- offer_of_coverage_code: Line 14 - The offer of coverage code (common codes: 1A, 1B, 1C, 1E, 1H)
+- employee_share_lowest_cost: Line 15 - Employee's share of lowest cost monthly premium for self-only coverage
+- safe_harbor_code: Line 16 - Safe Harbor code (common codes: 2C, 2D, 2F)
+- covered_all_12_months: True if "All 12 Months" is checked or coverage shown for all months
+- covered_months: List of specific months if not all 12 (e.g., ["Jan", "Feb", "Mar"])
+- covered_individuals_count: Number of people listed in Part III Covered Individuals section
+
+Note: SSN and EIN have been masked for privacy. Do NOT try to extract SSN or EIN values.
+
+Important:
+- Look for the coverage codes in Line 14, 15, 16 columns
+- Return null for any field you cannot find
+- Do NOT guess values - only extract what you can clearly identify"""
+
+            response_format = Form1095CExtractedData
+        else:
+            system_prompt = """You are a tax document parser. Extract 1099 form data from the provided text.
+            
+Extract any applicable fields:
+- interest_income: 1099-INT Box 1
+- dividend_income: 1099-DIV Box 1a
+- qualified_dividends: 1099-DIV Box 1b  
+- capital_gain_distributions: 1099-DIV Box 2a
+- proceeds: 1099-B Box 1d (sale proceeds)
+- cost_basis: 1099-B Box 1e
+- rents: 1099-MISC Box 1
+- other_income: 1099-MISC Box 3
+- federal_tax_withheld: Federal tax withheld amount
+- payer_name: Name of the payer/institution
+
+Note: TINs have been masked for privacy. Do NOT try to extract TIN/SSN/EIN values.
+
+Return null for fields not present in this specific form."""
+
+            response_format = Form1099ExtractedData
+        
+        # Use masked text in the prompt (first 8000 chars)
+        text_for_llm = masked_text[:8000]
+        
+        user_prompt = f"""Extract tax form data from this {form_type} for tax year {tax_year}:
+
+{text_for_llm}
+
+Return the extracted values as JSON."""
+
+        try:
+            # Use litellm for LLM call with structured output
+            response = await litellm.acompletion(
+                model=model,  # Use user's configured model or default
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt},
+                ],
+                response_format={"type": "json_object"},
+                temperature=0,  # Deterministic
+            )
+            
+            # Parse the response
+            content = response.choices[0].message.content
+            extracted_json = json.loads(content)
+            
+            logger.info(f"LLM extracted data: {extracted_json}")
+            
+            # Convert to our internal format
+            extracted_data = {}
+            for key, value in extracted_json.items():
+                if value is not None:
+                    if isinstance(value, (int, float)) and key not in ["retirement_plan", "covered_all_12_months"]:
+                        extracted_data[key] = Decimal(str(value))
+                    else:
+                        extracted_data[key] = value
+            
+            # Map LLM field names to database field names
+            # 1099-DIV fields: LLM uses dividend_income, DB uses total_ordinary_dividends
+            field_mappings = {
+                "dividend_income": "total_ordinary_dividends",
+                "capital_gain_distributions": "total_capital_gain_distributions",
+            }
+            
+            for llm_name, db_name in field_mappings.items():
+                if llm_name in extracted_data and db_name not in extracted_data:
+                    extracted_data[db_name] = extracted_data.pop(llm_name)
+            
+            return extracted_data
+            
+        except Exception as e:
+            logger.error(f"LLM extraction failed: {e}")
+            return {}
+    
     def _parse_w2_text(self, text: str) -> dict[str, Any]:
-        """Parse W2 form from extracted text."""
+        """Parse W2 form from extracted text.
+        
+        Uses a two-phase approach:
+        1. First, try to find all dollar amounts in the text
+        2. Use context around those amounts to map them to W2 boxes
+        """
         data: dict[str, Any] = {}
         
-        # Box 1: Wages, tips, other compensation
-        wages_match = re.search(r"(?:Wages|Box 1).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
-        if wages_match:
-            data["wages_tips_compensation"] = self._parse_money(wages_match.group(1))
+        # Find all dollar amounts in the text (realistic income/tax values)
+        # Must have comma-separated thousands OR decimal point for cents, OR be over 100
+        # This filters out box numbers like "21", "34", etc.
+        dollar_amounts = self._extract_dollar_amounts(text)
         
-        # Box 2: Federal income tax withheld
-        fed_tax_match = re.search(r"(?:Federal.*?tax.*?withheld|Box 2).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
-        if fed_tax_match:
-            data["federal_income_tax_withheld"] = self._parse_money(fed_tax_match.group(1))
+        logger.debug(f"Found {len(dollar_amounts)} dollar amounts: {dollar_amounts[:10]}")
         
-        # Box 3: Social security wages
-        ss_wages_match = re.search(r"(?:Social security wages|Box 3).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
-        if ss_wages_match:
-            data["social_security_wages"] = self._parse_money(ss_wages_match.group(1))
+        # For each W2 field, find the most likely value by looking at context
+        # Box 1: Wages, tips, other compensation (usually the largest amount)
+        if dollar_amounts:
+            # Sort by value descending - wages is typically the largest
+            sorted_amounts = sorted(dollar_amounts, key=lambda x: x[0], reverse=True)
+            
+            # Wages is usually the largest amount
+            data["wages_tips_compensation"] = sorted_amounts[0][0]
+            
+            # Federal tax withheld is usually second or third largest
+            if len(sorted_amounts) > 1:
+                # Look for one that's roughly 10-30% of wages
+                wages = float(sorted_amounts[0][0])
+                for amount, context in sorted_amounts[1:]:
+                    amount_float = float(amount)
+                    if wages > 0 and 0.05 <= amount_float / wages <= 0.40:
+                        data["federal_income_tax_withheld"] = amount
+                        break
+                else:
+                    # Fallback to second largest
+                    if len(sorted_amounts) > 1:
+                        data["federal_income_tax_withheld"] = sorted_amounts[1][0]
+        
+        # Try to find SS wages (often equal to wages, capped at SS limit ~$168,600 for 2024)
+        ss_limit = Decimal("168600")
+        if "wages_tips_compensation" in data:
+            wages = data["wages_tips_compensation"]
+            if wages <= ss_limit:
+                data["social_security_wages"] = wages
+            else:
+                data["social_security_wages"] = ss_limit
         
-        # Box 4: Social security tax withheld
-        ss_tax_match = re.search(r"(?:Social security tax|Box 4).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
-        if ss_tax_match:
-            data["social_security_tax_withheld"] = self._parse_money(ss_tax_match.group(1))
+        # SS tax is 6.2% of SS wages
+        if "social_security_wages" in data:
+            data["social_security_tax_withheld"] = (data["social_security_wages"] * Decimal("0.062")).quantize(Decimal("0.01"))
         
-        # Box 5: Medicare wages
-        medicare_wages_match = re.search(r"(?:Medicare wages|Box 5).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
-        if medicare_wages_match:
-            data["medicare_wages"] = self._parse_money(medicare_wages_match.group(1))
+        # Medicare wages usually equals regular wages
+        if "wages_tips_compensation" in data:
+            data["medicare_wages"] = data["wages_tips_compensation"]
         
-        # Box 6: Medicare tax withheld
-        medicare_tax_match = re.search(r"(?:Medicare tax|Box 6).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
-        if medicare_tax_match:
-            data["medicare_tax_withheld"] = self._parse_money(medicare_tax_match.group(1))
+        # Medicare tax is 1.45% of Medicare wages
+        if "medicare_wages" in data:
+            data["medicare_tax_withheld"] = (data["medicare_wages"] * Decimal("0.0145")).quantize(Decimal("0.01"))
         
         # Extract SSN (will be hashed before storage)
         ssn_match = re.search(self.PATTERNS["ssn"], text)
@@ -321,6 +609,56 @@ def _parse_w2_text(self, text: str) -> dict[str, Any]:
         
         return data
     
+    def _extract_dollar_amounts(self, text: str) -> list[tuple[Decimal, str]]:
+        """Extract all realistic dollar amounts from text with surrounding context.
+        
+        Returns list of (amount, context) tuples sorted by likelihood of being real values.
+        Filters out small numbers that are likely box numbers, dates, etc.
+        """
+        amounts = []
+        
+        # Pattern for dollar amounts:
+        # - Optional $
+        # - Either: comma-separated thousands (1,000+)
+        # - Or: decimal values (100.00+)
+        # - Or: large numbers without commas (1000+)
+        patterns = [
+            # $1,234.56 or $1,234
+            r'\$\s*(\d{1,3}(?:,\d{3})+(?:\.\d{2})?)',
+            # 1,234.56 or 1,234 (with commas, no $)
+            r'(?<![.\d])(\d{1,3}(?:,\d{3})+(?:\.\d{2})?)(?![.\d])',
+            # $1234.56 (4+ digits with decimal, $)
+            r'\$\s*(\d{4,}(?:\.\d{2}))',
+            # 1234.56 (4+ digits with decimal, no $)
+            r'(?<![,.\d])(\d{4,}\.\d{2})(?![.\d])',
+        ]
+        
+        for pattern in patterns:
+            for match in re.finditer(pattern, text):
+                try:
+                    amount_str = match.group(1)
+                    amount = self._parse_money(amount_str)
+                    
+                    # Only include amounts >= $100 (filters out box numbers, dates, etc.)
+                    if amount >= 100:
+                        # Get context (50 chars before and after)
+                        start = max(0, match.start() - 50)
+                        end = min(len(text), match.end() + 50)
+                        context = text[start:end]
+                        amounts.append((amount, context))
+                except (ValueError, decimal.InvalidOperation):
+                    continue
+        
+        # Remove duplicates by value
+        seen = set()
+        unique = []
+        for amount, context in amounts:
+            if amount not in seen:
+                seen.add(amount)
+                unique.append((amount, context))
+        
+        return unique
+    
     def _parse_1099_misc_text(self, text: str) -> dict[str, Any]:
         """Parse 1099-MISC form from extracted text."""
         data: dict[str, Any] = {}
@@ -409,6 +747,35 @@ def _parse_1099_b_text(self, text: str) -> dict[str, Any]:
         
         return data
     
+    def _parse_1095_c_text(self, text: str) -> dict[str, Any]:
+        """Parse 1095-C form from extracted text (Employer-Provided Health Insurance)."""
+        data: dict[str, Any] = {}
+        
+        # Employer name
+        employer_match = re.search(r"(?:Employer|Company).*?name[:\s]+([A-Za-z0-9\s,\.]+?)(?:\n|$)", text, re.IGNORECASE)
+        if employer_match:
+            data["employer_name"] = employer_match.group(1).strip()
+        
+        # Line 14: Offer of Coverage code (1A, 1B, 1C, 1E, 1H, etc.)
+        offer_code_match = re.search(r"(?:Line\s*14|offer.*?coverage).*?\b(1[A-HJ-Z])\b", text, re.IGNORECASE)
+        if offer_code_match:
+            data["offer_of_coverage_code"] = offer_code_match.group(1).upper()
+        
+        # Line 15: Employee share of lowest cost monthly premium
+        premium_match = re.search(r"(?:Line\s*15|lowest.*?cost|employee.*?share).*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)", text, re.IGNORECASE)
+        if premium_match:
+            data["employee_share_lowest_cost"] = self._parse_money(premium_match.group(1))
+        
+        # Line 16: Safe Harbor code (2C, 2D, 2F, etc.)
+        safe_harbor_match = re.search(r"(?:Line\s*16|safe.*?harbor).*?\b(2[A-HJ-Z])\b", text, re.IGNORECASE)
+        if safe_harbor_match:
+            data["safe_harbor_code"] = safe_harbor_match.group(1).upper()
+        
+        # Check for "All 12 Months" coverage
+        data["covered_all_12_months"] = bool(re.search(r"all\s*12\s*months", text, re.IGNORECASE))
+        
+        return data
+    
     def _parse_money(self, money_str: str) -> Decimal:
         """Parse money string to Decimal."""
         # Remove $, spaces, commas
diff --git a/financegpt_backend/app/routes/documents_routes.py b/financegpt_backend/app/routes/documents_routes.py
index 9bae45b..121b4bc 100644
--- a/financegpt_backend/app/routes/documents_routes.py
+++ b/financegpt_backend/app/routes/documents_routes.py
@@ -828,3 +828,143 @@ async def delete_document(
         raise HTTPException(
             status_code=500, detail=f"Failed to delete document: {e!s}"
         ) from e
+
+
+@router.get("/tax-forms")
+async def get_tax_forms(
+    search_space_id: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Get all tax forms for the current user in a search space.
+    """
+    from app.db import TaxForm
+    
+    try:
+        await check_permission(
+            session,
+            user,
+            search_space_id,
+            Permission.DOCUMENTS_READ.value,
+            "You don't have permission to read documents in this search space",
+        )
+        
+        result = await session.execute(
+            select(TaxForm).where(
+                TaxForm.user_id == user.id,
+                TaxForm.search_space_id == search_space_id
+            ).order_by(TaxForm.tax_year.desc(), TaxForm.uploaded_at.desc())
+        )
+        tax_forms = result.scalars().all()
+        
+        return {
+            "tax_forms": [
+                {
+                    "id": str(form.id),
+                    "form_type": form.form_type,
+                    "tax_year": form.tax_year,
+                    "document_id": form.document_id,
+                    "processing_status": form.processing_status,
+                    "extraction_method": form.extraction_method,
+                    "needs_review": form.needs_review,
+                    "uploaded_at": form.uploaded_at.isoformat() if form.uploaded_at else None,
+                    "processed_at": form.processed_at.isoformat() if form.processed_at else None,
+                }
+                for form in tax_forms
+            ]
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"Failed to get tax forms: {e!s}"
+        ) from e
+
+
+@router.post("/tax-forms/{tax_form_id}/reprocess")
+async def reprocess_tax_form(
+    tax_form_id: str,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Re-process a stuck or failed tax form.
+    
+    This will re-trigger the parsing task for a tax form that is stuck in 'processing'
+    or failed status.
+    """
+    from uuid import UUID
+    from app.db import TaxForm, Document
+    
+    try:
+        # Get the tax form
+        result = await session.execute(
+            select(TaxForm).where(TaxForm.id == UUID(tax_form_id))
+        )
+        tax_form = result.scalar_one_or_none()
+        
+        if not tax_form:
+            raise HTTPException(status_code=404, detail="Tax form not found")
+        
+        if tax_form.user_id != user.id:
+            raise HTTPException(status_code=403, detail="Not authorized to reprocess this tax form")
+        
+        # Check permission
+        await check_permission(
+            session,
+            user,
+            tax_form.search_space_id,
+            Permission.DOCUMENTS_CREATE.value,
+            "You don't have permission to reprocess documents in this search space",
+        )
+        
+        # Get the associated document for the content
+        extracted_text = None
+        if tax_form.document_id:
+            doc_result = await session.execute(
+                select(Document).where(Document.id == tax_form.document_id)
+            )
+            document = doc_result.scalar_one_or_none()
+            if document and document.content:
+                # Use the document's summary content for parsing
+                extracted_text = document.content
+        
+        if not extracted_text:
+            raise HTTPException(
+                status_code=400, 
+                detail="Cannot reprocess: no document content available for this tax form"
+            )
+        
+        # Reset status
+        tax_form.processing_status = 'processing'
+        tax_form.extraction_method = None
+        tax_form.needs_review = False
+        await session.commit()
+        
+        # Trigger the parsing task
+        from app.tasks.celery_tasks.tax_form_tasks import parse_tax_form_task
+        
+        parse_tax_form_task.delay(
+            tax_form_id=str(tax_form.id),
+            file_path=None,  # No file path available
+            form_type=tax_form.form_type,
+            tax_year=tax_form.tax_year,
+            user_id=str(tax_form.user_id),
+            search_space_id=tax_form.search_space_id,
+            extracted_text=extracted_text,
+        )
+        
+        return {
+            "message": "Tax form reprocessing started",
+            "tax_form_id": str(tax_form.id),
+            "form_type": tax_form.form_type,
+            "tax_year": tax_form.tax_year,
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        await session.rollback()
+        raise HTTPException(
+            status_code=500, detail=f"Failed to reprocess tax form: {e!s}"
+        ) from e
diff --git a/financegpt_backend/app/services/plaid_service.py b/financegpt_backend/app/services/plaid_service.py
index a7c2827..2d09d5a 100644
--- a/financegpt_backend/app/services/plaid_service.py
+++ b/financegpt_backend/app/services/plaid_service.py
@@ -13,6 +13,12 @@
 from plaid.exceptions import ApiException
 from plaid.model.accounts_get_request import AccountsGetRequest
 from plaid.model.country_code import CountryCode
+from plaid.model.investments_holdings_get_request import (
+    InvestmentsHoldingsGetRequest,
+)
+from plaid.model.investments_transactions_get_request import (
+    InvestmentsTransactionsGetRequest,
+)
 from plaid.model.item_public_token_exchange_request import (
     ItemPublicTokenExchangeRequest,
 )
@@ -23,12 +29,6 @@
 from plaid.model.transactions_get_request_options import (
     TransactionsGetRequestOptions,
 )
-from plaid.model.investments_holdings_get_request import (
-    InvestmentsHoldingsGetRequest,
-)
-from plaid.model.investments_transactions_get_request import (
-    InvestmentsTransactionsGetRequest,
-)
 
 from app.config import config
 
diff --git a/financegpt_backend/app/tasks/celery_tasks/tax_form_tasks.py b/financegpt_backend/app/tasks/celery_tasks/tax_form_tasks.py
new file mode 100644
index 0000000..b25e0f5
--- /dev/null
+++ b/financegpt_backend/app/tasks/celery_tasks/tax_form_tasks.py
@@ -0,0 +1,435 @@
+"""Celery tasks for tax form parsing."""
+
+import logging
+from decimal import Decimal
+from typing import Optional
+from uuid import UUID
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
+from sqlalchemy.pool import NullPool
+
+from app.celery_app import celery_app
+from app.config import config
+
+logger = logging.getLogger(__name__)
+
+
+def get_celery_session_maker():
+    """
+    Create a new async session maker for Celery tasks.
+    Uses NullPool to avoid connection pooling issues in Celery workers.
+    """
+    engine = create_async_engine(
+        config.DATABASE_URL,
+        poolclass=NullPool,
+        echo=False,
+    )
+    return async_sessionmaker(engine, expire_on_commit=False)
+
+
+@celery_app.task(name="parse_tax_form", bind=True)
+def parse_tax_form_task(
+    self,
+    tax_form_id: str,
+    file_path: str | None,
+    form_type: str,
+    tax_year: int,
+    user_id: str,
+    search_space_id: int,
+    extracted_text: str | None = None,
+):
+    """
+    Celery task to parse an uploaded tax form and extract structured data.
+    
+    Args:
+        tax_form_id: UUID of the tax_form record
+        file_path: Path to the uploaded PDF file (optional if extracted_text provided)
+        form_type: Type of tax form (W2, 1099-MISC, etc.)
+        tax_year: Tax year for the form
+        user_id: User ID who uploaded the form
+        search_space_id: Search space ID for LLM config lookup
+        extracted_text: Pre-extracted text content (used if file_path not available)
+    """
+    import asyncio
+
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    try:
+        loop.run_until_complete(
+            _parse_tax_form(
+                tax_form_id=tax_form_id,
+                file_path=file_path,
+                form_type=form_type,
+                tax_year=tax_year,
+                user_id=user_id,
+                search_space_id=search_space_id,
+                extracted_text=extracted_text,
+            )
+        )
+    finally:
+        loop.close()
+
+
+async def _parse_tax_form(
+    tax_form_id: str,
+    file_path: str | None,
+    form_type: str,
+    tax_year: int,
+    user_id: str,
+    search_space_id: int,
+    extracted_text: str | None = None,
+):
+    """Parse tax form and save extracted data to database."""
+    from datetime import datetime, timezone
+    from decimal import Decimal
+    import hashlib
+    
+    from app.db import (
+        TaxForm,
+        W2Form,
+        Form1099Misc,
+        Form1099Int,
+        Form1099Div,
+        Form1099B,
+    )
+    from app.parsers.tax_form_parser import TaxFormParser
+    from app.services.llm_service import get_document_summary_llm
+    
+    async with get_celery_session_maker()() as session:
+        try:
+            # Get the user's configured LLM model string for document processing
+            llm_model = None
+            try:
+                user_llm = await get_document_summary_llm(session, search_space_id)
+                if user_llm:
+                    llm_model = user_llm.model
+                    logger.info(f"Using user's configured LLM for tax parsing: {llm_model}")
+            except Exception as e:
+                logger.warning(f"Could not get user LLM config, using default: {e}")
+            
+            # Get the tax form record
+            result = await session.execute(
+                select(TaxForm).where(TaxForm.id == UUID(tax_form_id))
+            )
+            tax_form = result.scalar_one_or_none()
+            
+            if not tax_form:
+                logger.error(f"Tax form record not found: {tax_form_id}")
+                return
+            
+            logger.info(f"Starting tax form parsing for {form_type} (ID: {tax_form_id})")
+            
+            # Initialize parser and parse the form
+            parser = TaxFormParser()
+            
+            # If we have pre-extracted text, use it directly
+            if extracted_text and not file_path:
+                logger.info(f"Using pre-extracted text for {form_type} parsing")
+                parse_result = await parser.parse_from_text(
+                    text=extracted_text,
+                    form_type=form_type,
+                    tax_year=tax_year,
+                    llm_model=llm_model,
+                )
+            else:
+                parse_result = await parser.parse_tax_form(
+                    file_path=file_path,
+                    form_type=form_type,
+                    tax_year=tax_year,
+                    llm_model=llm_model,
+                )
+            
+            extracted_data = parse_result.get("extracted_data", {})
+            confidence_scores = parse_result.get("confidence_scores", {})
+            extraction_method = parse_result.get("extraction_method", "unknown")
+            needs_review = parse_result.get("needs_review", True)
+            raw_data = parse_result.get("raw_extraction_data", {})
+            
+            logger.info(
+                f"Parsed {form_type}: method={extraction_method}, "
+                f"fields_extracted={len(extracted_data)}, needs_review={needs_review}"
+            )
+            
+            # Save extracted data to the appropriate form table
+            if form_type == "W2":
+                await _save_w2_data(
+                    session, tax_form.id, extracted_data, confidence_scores, raw_data
+                )
+            elif form_type == "1099-MISC":
+                await _save_1099_misc_data(
+                    session, tax_form.id, extracted_data, confidence_scores, raw_data
+                )
+            elif form_type == "1099-INT":
+                await _save_1099_int_data(
+                    session, tax_form.id, extracted_data, confidence_scores, raw_data
+                )
+            elif form_type == "1099-DIV":
+                await _save_1099_div_data(
+                    session, tax_form.id, extracted_data, confidence_scores, raw_data
+                )
+            elif form_type == "1099-B":
+                await _save_1099_b_data(
+                    session, tax_form.id, extracted_data, confidence_scores, raw_data
+                )
+            
+            # Update tax form status
+            tax_form.processing_status = "review_needed" if needs_review else "completed"
+            tax_form.extraction_method = extraction_method
+            tax_form.needs_review = needs_review
+            tax_form.processed_at = datetime.now(timezone.utc)
+            
+            await session.commit()
+            logger.info(f"Tax form parsing completed for {form_type} (ID: {tax_form_id})")
+            
+        except FileNotFoundError as e:
+            logger.error(f"Tax form file not found: {file_path}")
+            await _mark_tax_form_failed(session, tax_form_id, str(e))
+            
+        except Exception as e:
+            logger.exception(f"Error parsing tax form {tax_form_id}: {e}")
+            await _mark_tax_form_failed(session, tax_form_id, str(e))
+
+
+async def _mark_tax_form_failed(session, tax_form_id: str, error_message: str):
+    """Mark tax form as failed."""
+    from app.db import TaxForm
+    
+    result = await session.execute(
+        select(TaxForm).where(TaxForm.id == UUID(tax_form_id))
+    )
+    tax_form = result.scalar_one_or_none()
+    
+    if tax_form:
+        tax_form.processing_status = "failed"
+        # Store error in extraction_method field as fallback (truncate to fit VARCHAR(50))
+        tax_form.extraction_method = f"error: {error_message[:40]}"
+        await session.commit()
+        logger.error(f"Marked tax form {tax_form_id} as failed: {error_message}")
+
+
+def _hash_pii(value: Optional[str]) -> Optional[str]:
+    """Hash PII values for secure storage."""
+    if not value:
+        return None
+    import hashlib
+    return hashlib.sha256(value.encode()).hexdigest()
+
+
+async def _save_w2_data(session, tax_form_id: UUID, data: dict, confidence: dict, raw_data: dict):
+    """Save extracted W2 data to database."""
+    from app.db import W2Form
+    from decimal import Decimal
+    
+    w2 = W2Form(
+        tax_form_id=tax_form_id,
+        # Wage information
+        wages_tips_compensation=_to_decimal(data.get("wages_tips_compensation")),
+        federal_income_tax_withheld=_to_decimal(data.get("federal_income_tax_withheld")),
+        social_security_wages=_to_decimal(data.get("social_security_wages")),
+        social_security_tax_withheld=_to_decimal(data.get("social_security_tax_withheld")),
+        medicare_wages=_to_decimal(data.get("medicare_wages")),
+        medicare_tax_withheld=_to_decimal(data.get("medicare_tax_withheld")),
+        social_security_tips=_to_decimal(data.get("social_security_tips")),
+        allocated_tips=_to_decimal(data.get("allocated_tips")),
+        dependent_care_benefits=_to_decimal(data.get("dependent_care_benefits")),
+        nonqualified_plans=_to_decimal(data.get("nonqualified_plans")),
+        # State/local
+        state_wages=_to_decimal(data.get("state_wages")),
+        state_income_tax=_to_decimal(data.get("state_income_tax")),
+        local_wages=_to_decimal(data.get("local_wages")),
+        local_income_tax=_to_decimal(data.get("local_income_tax")),
+        state_code=data.get("state_code"),
+        locality_name=data.get("locality_name"),
+        # Checkboxes
+        statutory_employee=data.get("statutory_employee", False),
+        retirement_plan=data.get("retirement_plan", False),
+        third_party_sick_pay=data.get("third_party_sick_pay", False),
+        # Box 12 codes
+        box_12_codes=data.get("box_12_codes"),
+        # PII (hashed)
+        employee_ssn_hash=_hash_pii(data.get("employee_ssn")),
+        employer_ein_hash=_hash_pii(data.get("employer_ein")),
+        employer_name=data.get("employer_name"),
+        employer_address=data.get("employer_address"),
+        employee_name_masked="[EMPLOYEE_NAME]",  # Always mask for UI
+        # Metadata
+        field_confidence_scores=confidence,
+        raw_extraction_data=raw_data,
+    )
+    
+    session.add(w2)
+    await session.flush()
+    logger.info(f"Saved W2 form data for tax_form_id={tax_form_id}")
+
+
+async def _save_1099_misc_data(session, tax_form_id: UUID, data: dict, confidence: dict, raw_data: dict):
+    """Save extracted 1099-MISC data to database."""
+    from app.db import Form1099Misc
+    
+    form = Form1099Misc(
+        tax_form_id=tax_form_id,
+        payer_name=data.get("payer_name"),
+        payer_tin_hash=_hash_pii(data.get("payer_tin")),
+        payer_address=data.get("payer_address"),
+        recipient_tin_hash=_hash_pii(data.get("recipient_tin")),
+        rents=_to_decimal(data.get("rents")),
+        royalties=_to_decimal(data.get("royalties")),
+        other_income=_to_decimal(data.get("other_income")),
+        federal_income_tax_withheld=_to_decimal(data.get("federal_income_tax_withheld")),
+        fishing_boat_proceeds=_to_decimal(data.get("fishing_boat_proceeds")),
+        medical_health_payments=_to_decimal(data.get("medical_health_payments")),
+        substitute_payments=_to_decimal(data.get("substitute_payments")),
+        crop_insurance_proceeds=_to_decimal(data.get("crop_insurance_proceeds")),
+        gross_proceeds_attorney=_to_decimal(data.get("gross_proceeds_attorney")),
+        section_409a_deferrals=_to_decimal(data.get("section_409a_deferrals")),
+        state_tax_withheld=_to_decimal(data.get("state_tax_withheld")),
+        state_payer_number=data.get("state_payer_number"),
+        state_income=_to_decimal(data.get("state_income")),
+        field_confidence_scores=confidence,
+        raw_extraction_data=raw_data,
+    )
+    
+    session.add(form)
+    await session.flush()
+    logger.info(f"Saved 1099-MISC data for tax_form_id={tax_form_id}")
+
+
+async def _save_1099_int_data(session, tax_form_id: UUID, data: dict, confidence: dict, raw_data: dict):
+    """Save extracted 1099-INT data to database."""
+    from app.db import Form1099Int
+    
+    form = Form1099Int(
+        tax_form_id=tax_form_id,
+        payer_name=data.get("payer_name"),
+        payer_tin_hash=_hash_pii(data.get("payer_tin")),
+        payer_address=data.get("payer_address"),
+        recipient_tin_hash=_hash_pii(data.get("recipient_tin")),
+        interest_income=_to_decimal(data.get("interest_income")),
+        early_withdrawal_penalty=_to_decimal(data.get("early_withdrawal_penalty")),
+        interest_on_us_savings_bonds=_to_decimal(data.get("interest_on_us_savings_bonds")),
+        federal_income_tax_withheld=_to_decimal(data.get("federal_income_tax_withheld")),
+        investment_expenses=_to_decimal(data.get("investment_expenses")),
+        foreign_tax_paid=_to_decimal(data.get("foreign_tax_paid")),
+        foreign_country=data.get("foreign_country"),
+        tax_exempt_interest=_to_decimal(data.get("tax_exempt_interest")),
+        specified_private_activity_bond_interest=_to_decimal(data.get("specified_private_activity_bond_interest")),
+        market_discount=_to_decimal(data.get("market_discount")),
+        bond_premium=_to_decimal(data.get("bond_premium")),
+        bond_premium_on_treasury=_to_decimal(data.get("bond_premium_on_treasury")),
+        bond_premium_on_tax_exempt=_to_decimal(data.get("bond_premium_on_tax_exempt")),
+        state_code=data.get("state_code"),
+        state_id=data.get("state_id"),
+        state_tax_withheld=_to_decimal(data.get("state_tax_withheld")),
+        field_confidence_scores=confidence,
+        raw_extraction_data=raw_data,
+    )
+    
+    session.add(form)
+    await session.flush()
+    logger.info(f"Saved 1099-INT data for tax_form_id={tax_form_id}")
+
+
+async def _save_1099_div_data(session, tax_form_id: UUID, data: dict, confidence: dict, raw_data: dict):
+    """Save extracted 1099-DIV data to database."""
+    from app.db import Form1099Div
+    
+    form = Form1099Div(
+        tax_form_id=tax_form_id,
+        payer_name=data.get("payer_name"),
+        payer_tin_hash=_hash_pii(data.get("payer_tin")),
+        payer_address=data.get("payer_address"),
+        recipient_tin_hash=_hash_pii(data.get("recipient_tin")),
+        total_ordinary_dividends=_to_decimal(data.get("total_ordinary_dividends")),
+        qualified_dividends=_to_decimal(data.get("qualified_dividends")),
+        total_capital_gain_distributions=_to_decimal(data.get("total_capital_gain_distributions")),
+        unrecaptured_section_1250_gain=_to_decimal(data.get("unrecaptured_section_1250_gain")),
+        section_1202_gain=_to_decimal(data.get("section_1202_gain")),
+        collectibles_28_gain=_to_decimal(data.get("collectibles_28_gain")),
+        section_897_ordinary_dividends=_to_decimal(data.get("section_897_ordinary_dividends")),
+        section_897_capital_gain=_to_decimal(data.get("section_897_capital_gain")),
+        nondividend_distributions=_to_decimal(data.get("nondividend_distributions")),
+        federal_income_tax_withheld=_to_decimal(data.get("federal_income_tax_withheld")),
+        section_199a_dividends=_to_decimal(data.get("section_199a_dividends")),
+        investment_expenses=_to_decimal(data.get("investment_expenses")),
+        foreign_tax_paid=_to_decimal(data.get("foreign_tax_paid")),
+        foreign_country=data.get("foreign_country"),
+        cash_liquidation_distributions=_to_decimal(data.get("cash_liquidation_distributions")),
+        noncash_liquidation_distributions=_to_decimal(data.get("noncash_liquidation_distributions")),
+        exempt_interest_dividends=_to_decimal(data.get("exempt_interest_dividends")),
+        specified_private_activity_bond_interest_dividends=_to_decimal(data.get("specified_private_activity_bond_interest_dividends")),
+        state_tax_withheld=_to_decimal(data.get("state_tax_withheld")),
+        field_confidence_scores=confidence,
+        raw_extraction_data=raw_data,
+    )
+    
+    session.add(form)
+    await session.flush()
+    logger.info(f"Saved 1099-DIV data for tax_form_id={tax_form_id}")
+
+
+async def _save_1099_b_data(session, tax_form_id: UUID, data: dict, confidence: dict, raw_data: dict):
+    """Save extracted 1099-B data to database."""
+    from app.db import Form1099B
+    
+    form = Form1099B(
+        tax_form_id=tax_form_id,
+        payer_name=data.get("broker_name") or data.get("payer_name"),
+        payer_tin_hash=_hash_pii(data.get("broker_tin") or data.get("payer_tin")),
+        payer_address=data.get("broker_address") or data.get("payer_address"),
+        recipient_tin_hash=_hash_pii(data.get("recipient_tin")),
+        description_of_property=data.get("description_of_property"),
+        date_acquired=data.get("date_acquired"),
+        date_sold=data.get("date_sold"),
+        proceeds=_to_decimal(data.get("proceeds")),
+        cost_basis=_to_decimal(data.get("cost_basis")),
+        accrued_market_discount=_to_decimal(data.get("accrued_market_discount")),
+        wash_sale_loss_disallowed=_to_decimal(data.get("wash_sale_loss_disallowed") or data.get("wash_sale_loss")),
+        federal_income_tax_withheld=_to_decimal(data.get("federal_income_tax_withheld")),
+        # Form 8949 checkboxes
+        short_term_box_a=data.get("short_term_box_a", False),
+        short_term_box_b=data.get("short_term_box_b", False),
+        short_term_box_c=data.get("short_term_box_c", False),
+        long_term_box_d=data.get("long_term_box_d", False),
+        long_term_box_e=data.get("long_term_box_e", False),
+        long_term_box_f=data.get("long_term_box_f", False),
+        loss_not_allowed=data.get("loss_not_allowed", False),
+        noncovered_security=data.get("noncovered_security", False),
+        basis_reported_to_irs=data.get("basis_reported_to_irs", False),
+        state_code=data.get("state_code"),
+        state_id=data.get("state_id"),
+        state_tax_withheld=_to_decimal(data.get("state_tax_withheld")),
+        field_confidence_scores=confidence,
+        raw_extraction_data=raw_data,
+    )
+    
+    session.add(form)
+    await session.flush()
+    logger.info(f"Saved 1099-B data for tax_form_id={tax_form_id}")
+
+
+def _to_decimal(value) -> Decimal | None:
+    """Convert value to Decimal, handling various formats."""
+    from decimal import InvalidOperation
+    
+    if value is None:
+        return None
+    
+    if isinstance(value, Decimal):
+        return value
+    
+    if isinstance(value, (int, float)):
+        return Decimal(str(value))
+    
+    if isinstance(value, str):
+        # Remove currency symbols, commas, spaces
+        cleaned = value.replace("$", "").replace(",", "").replace(" ", "").strip()
+        if not cleaned:
+            return None
+        try:
+            return Decimal(cleaned)
+        except InvalidOperation:
+            return None
+    
+    return None
diff --git a/financegpt_backend/app/tasks/document_processors/file_processors.py b/financegpt_backend/app/tasks/document_processors/file_processors.py
index bbc1d7b..4d5f941 100644
--- a/financegpt_backend/app/tasks/document_processors/file_processors.py
+++ b/financegpt_backend/app/tasks/document_processors/file_processors.py
@@ -58,6 +58,8 @@ async def _process_tax_form_if_applicable(
     filename: str,
     user_id: str,
     search_space_id: int,
+    file_path: str | None = None,
+    extracted_text: str | None = None,
 ) -> None:
     """
     Check if uploaded document is a tax form and trigger parsing if so.
@@ -71,18 +73,11 @@ async def _process_tax_form_if_applicable(
         filename: Original filename
         user_id: User ID
         search_space_id: Search space ID
+        file_path: Path to the uploaded file (optional, for parsing)
+        extracted_text: Already extracted text from the document (used if file_path not available)
     """
     from uuid import UUID
     from app.db import TaxForm
-    from app.parsers.tax_form_parser import TaxFormParser
-    from app.utils.pii_masking import prepare_tax_form_for_storage
-    from app.schemas.tax_forms import (
-        W2FormCreate,
-        Form1099MiscCreate,
-        Form1099IntCreate,
-        Form1099DivCreate,
-        Form1099BCreate,
-    )
     import re
     
     try:
@@ -100,11 +95,21 @@ async def _process_tax_form_if_applicable(
         if year_match:
             tax_year = int(year_match.group(1))
         
+        # If no year in filename, try to extract from document content
+        if not tax_year and extracted_text:
+            # Look for tax year patterns in content (e.g., "2025 W-2", "Tax Year 2025", "for the 2025 tax year")
+            content_year_match = re.search(r'(?:tax\s+year|for\s+(?:the\s+)?)\s*(20\d{2})|(?:20\d{2})\s+W-?2', extracted_text, re.IGNORECASE)
+            if content_year_match:
+                year_str = content_year_match.group(1) or re.search(r'(20\d{2})', content_year_match.group(0)).group(1)
+                tax_year = int(year_str)
+        
         # Detect form type
         if 'w2' in filename_lower or 'w-2' in filename_lower:
             form_type = 'W2'
             if not tax_year:
-                tax_year = 2024  # Default to current tax year
+                # Default to previous year (tax forms are usually for prior year)
+                from datetime import datetime
+                tax_year = datetime.now().year - 1
         elif '1099' in filename_lower:
             if 'misc' in filename_lower:
                 form_type = '1099-MISC'
@@ -119,7 +124,18 @@ async def _process_tax_form_if_applicable(
                 form_type = '1099-MISC'  # Default
             
             if not tax_year:
-                tax_year = 2024
+                # Default to previous year (tax forms are usually for prior year)
+                from datetime import datetime
+                tax_year = datetime.now().year - 1
+        elif '1095' in filename_lower:
+            if 'c' in filename_lower or '-c' in filename_lower:
+                form_type = '1095-C'
+            else:
+                form_type = '1095-C'  # Default to 1095-C (most common employer form)
+            
+            if not tax_year:
+                from datetime import datetime
+                tax_year = datetime.now().year - 1
         
         # If not a tax form, return early
         if not form_type:
@@ -142,24 +158,89 @@ async def _process_tax_form_if_applicable(
         
         logger.info(f"Created tax form record with ID {tax_form.id}, starting parsing...")
         
-        # TODO: Trigger async parsing task
-        # For now, log that parsing would happen
-        # In production, this would be a Celery task
-        logger.info(
-            f"Tax form parsing would be triggered here for {form_type} (tax_form_id={tax_form.id}). "
-            f"Parser integration pending."
-        )
+        # Prefer file_path if available, otherwise use extracted_text
+        actual_file_path = file_path
+        actual_extracted_text = extracted_text
+        
+        if not actual_file_path and not actual_extracted_text:
+            logger.warning(
+                f"No file path or extracted text provided for tax form {tax_form.id}, parsing will be skipped"
+            )
+            tax_form.processing_status = 'failed'
+            tax_form.extraction_method = 'error: no file path or text available'
+            await session.commit()
+            return
         
         # Update status to show it's queued for processing
         tax_form.processing_status = 'processing'
         await session.commit()
         
+        # Trigger async parsing task
+        from app.tasks.celery_tasks.tax_form_tasks import parse_tax_form_task
+        
+        parse_tax_form_task.delay(
+            tax_form_id=str(tax_form.id),
+            file_path=actual_file_path,
+            form_type=form_type,
+            tax_year=tax_year,
+            user_id=user_id,
+            search_space_id=search_space_id,
+            extracted_text=actual_extracted_text,
+        )
+        
+        logger.info(f"Queued tax form parsing task for {form_type} (tax_form_id={tax_form.id})")
+        
     except Exception as e:
         logger.error(f"Error processing tax form for {filename}: {e}")
         # Don't fail the document upload if tax parsing fails
         await session.rollback()
 
 
+def _clean_symbol(raw_symbol: str | None) -> str:
+    """
+    Clean and normalize a stock symbol to fit VARCHAR(20) constraint.
+    
+    Handles special cases like:
+    - Schwab's "**CASH & CASH INVESTMENTS**" -> "CASH"
+    - Empty or None values -> ""
+    - Truncates overly long symbols
+    
+    Args:
+        raw_symbol: Raw symbol string from CSV
+        
+    Returns:
+        Cleaned symbol string (max 20 chars)
+    """
+    if not raw_symbol:
+        return ""
+    
+    symbol = str(raw_symbol).strip()
+    
+    # Normalize common cash/money market descriptions to standard symbol
+    cash_patterns = [
+        "cash & cash investments",
+        "cash investments",
+        "money market",
+        "sweep account",
+        "cash equivalent",
+        "cash balance",
+    ]
+    
+    symbol_lower = symbol.lower().replace("*", "").strip()
+    for pattern in cash_patterns:
+        if pattern in symbol_lower:
+            return "CASH"
+    
+    # Remove any asterisks and extra whitespace
+    symbol = symbol.replace("*", "").strip()
+    
+    # Truncate to 20 chars (VARCHAR(20) limit)
+    if len(symbol) > 20:
+        symbol = symbol[:20]
+    
+    return symbol
+
+
 async def _save_investment_holdings(
     session: AsyncSession,
     user_id: str,
@@ -199,7 +280,11 @@ async def _save_investment_holdings(
         # Process each holding
         total_value = 0.0
         for holding_data in holdings_data:
-            symbol = holding_data.symbol if hasattr(holding_data, 'symbol') else holding_data.get('symbol', '')
+            raw_symbol = holding_data.symbol if hasattr(holding_data, 'symbol') else holding_data.get('symbol', '')
+            
+            # Clean and normalize symbol (VARCHAR(20) limit in DB)
+            # Handle special cases like Schwab's "**CASH & CASH INVESTMENTS**"
+            symbol = _clean_symbol(raw_symbol)
             
             # Handle None values from CSV parsing
             quantity_raw = holding_data.quantity if hasattr(holding_data, 'quantity') else holding_data.get('quantity')
@@ -631,8 +716,10 @@ async def add_received_file_document_using_unstructured(
             await session.refresh(document)
 
         # After successful document creation, check if this is a tax form
+        # Pass the extracted markdown content for parsing
         await _process_tax_form_if_applicable(
-            session, document, file_name, user_id, search_space_id
+            session, document, file_name, user_id, search_space_id,
+            extracted_text=file_in_markdown
         )
 
         return document
@@ -775,8 +862,10 @@ async def add_received_file_document_using_llamacloud(
             await session.refresh(document)
 
         # After successful document creation, check if this is a tax form
+        # Pass the extracted markdown content for parsing
         await _process_tax_form_if_applicable(
-            session, document, file_name, user_id, search_space_id
+            session, document, file_name, user_id, search_space_id,
+            extracted_text=file_in_markdown
         )
 
         return document
@@ -943,6 +1032,13 @@ async def add_received_file_document_using_docling(
             await session.commit()
             await session.refresh(document)
 
+        # After successful document creation, check if this is a tax form
+        # Pass the extracted markdown content for parsing
+        await _process_tax_form_if_applicable(
+            session, document, file_name, user_id, search_space_id,
+            extracted_text=file_in_markdown
+        )
+
         return document
     except SQLAlchemyError as db_error:
         await session.rollback()
diff --git a/financegpt_backend/app/utils/pii_masking.py b/financegpt_backend/app/utils/pii_masking.py
index 97f8286..1342ec3 100644
--- a/financegpt_backend/app/utils/pii_masking.py
+++ b/financegpt_backend/app/utils/pii_masking.py
@@ -285,3 +285,100 @@ def validate_confidence_threshold(
     
     all_passed = len(failed_fields) == 0
     return all_passed, failed_fields
+
+
+def mask_pii_in_text(text: str) -> tuple[str, dict[str, str]]:
+    """Mask PII in raw text before sending to LLM.
+    
+    This function finds and masks:
+    - SSN patterns (XXX-XX-XXXX or XXXXXXXXX)
+    - EIN patterns (XX-XXXXXXX)
+    - Keeps dollar amounts intact (needed for extraction)
+    
+    Args:
+        text: Raw text containing potential PII
+        
+    Returns:
+        Tuple of (masked_text, mapping_dict)
+        - masked_text: Text with PII replaced by placeholders
+        - mapping_dict: Original values keyed by placeholder (for recovery if needed)
+        
+    Examples:
+        >>> text = "SSN: 123-45-6789, EIN: 12-3456789, Wages: $50,000"
+        >>> masked, mapping = mask_pii_in_text(text)
+        >>> "123-45-6789" not in masked
+        True
+        >>> "$50,000" in masked  # Dollar amounts preserved
+        True
+    """
+    masked_text = text
+    mapping = {}
+    
+    # Pattern for SSN: XXX-XX-XXXX (with dashes)
+    ssn_pattern_dashed = r'\b(\d{3})-(\d{2})-(\d{4})\b'
+    
+    def mask_ssn_match(match):
+        full_ssn = match.group(0)
+        last_four = match.group(3)
+        placeholder = f"[SSN:***-**-{last_four}]"
+        mapping[placeholder] = full_ssn
+        return placeholder
+    
+    masked_text = re.sub(ssn_pattern_dashed, mask_ssn_match, masked_text)
+    
+    # Pattern for SSN without dashes: 9 consecutive digits that look like SSN
+    # Be careful not to match other numbers like phone numbers or account numbers
+    # Only match if it appears after SSN-related keywords
+    ssn_pattern_nodash = r'(?i)(?:ssn|social\s*security|ss#|ss\s*#|soc\s*sec)[:\s]*(\d{9})\b'
+    
+    def mask_ssn_nodash_match(match):
+        ssn_digits = match.group(1)
+        last_four = ssn_digits[-4:]
+        placeholder = f"[SSN:*****{last_four}]"
+        mapping[placeholder] = ssn_digits
+        # Return just the placeholder (the keyword prefix was already matched)
+        return match.group(0).replace(ssn_digits, placeholder)
+    
+    masked_text = re.sub(ssn_pattern_nodash, mask_ssn_nodash_match, masked_text)
+    
+    # Pattern for EIN: XX-XXXXXXX
+    ein_pattern = r'\b(\d{2})-(\d{7})\b'
+    
+    def mask_ein_match(match):
+        full_ein = match.group(0)
+        # Hash the EIN for the placeholder (truncate hash for readability)
+        ein_hash = hashlib.sha256(full_ein.replace('-', '').encode()).hexdigest()[:8]
+        placeholder = f"[EIN:{ein_hash}]"
+        mapping[placeholder] = full_ein
+        return placeholder
+    
+    masked_text = re.sub(ein_pattern, mask_ein_match, masked_text)
+    
+    return masked_text, mapping
+
+
+def recover_pii_from_mapping(
+    extracted_data: dict[str, Any],
+    mapping: dict[str, str]
+) -> dict[str, Any]:
+    """Recover original PII values from masked placeholders.
+    
+    Args:
+        extracted_data: Data extracted from LLM with masked values
+        mapping: Mapping from mask_pii_in_text()
+        
+    Returns:
+        Data with original PII values restored
+    """
+    recovered = {}
+    
+    for key, value in extracted_data.items():
+        if isinstance(value, str):
+            # Check if this is a masked value and recover it
+            for placeholder, original in mapping.items():
+                if placeholder in value:
+                    value = original
+                    break
+        recovered[key] = value
+    
+    return recovered
diff --git a/financegpt_backend/financegpt_new_backend.egg-info/PKG-INFO b/financegpt_backend/financegpt_new_backend.egg-info/PKG-INFO
index 0627bb2..a9042d4 100644
--- a/financegpt_backend/financegpt_new_backend.egg-info/PKG-INFO
+++ b/financegpt_backend/financegpt_new_backend.egg-info/PKG-INFO
@@ -31,6 +31,7 @@ Requires-Dist: en-core-web-sm @ https://github.com/explosion/spacy-models/releas
 Requires-Dist: static-ffmpeg>=2.13
 Requires-Dist: tavily-python>=0.3.2
 Requires-Dist: unstructured-client>=0.30.0
+Requires-Dist: unstructured[all-docs]==0.17.2
 Requires-Dist: uvicorn[standard]>=0.34.0
 Requires-Dist: validators>=0.34.0
 Requires-Dist: youtube-transcript-api>=1.0.3
diff --git a/financegpt_backend/financegpt_new_backend.egg-info/SOURCES.txt b/financegpt_backend/financegpt_new_backend.egg-info/SOURCES.txt
index 58fe279..85aba3b 100644
--- a/financegpt_backend/financegpt_new_backend.egg-info/SOURCES.txt
+++ b/financegpt_backend/financegpt_new_backend.egg-info/SOURCES.txt
@@ -2,6 +2,7 @@ pyproject.toml
 alembic/env.py
 alembic/versions/0_initial_schema.py
 alembic/versions/1_add_investment_holdings_tables.py
+alembic/versions/2_add_tax_forms_tables.py
 app/__init__.py
 app/app.py
 app/celery_app.py
@@ -29,6 +30,7 @@ app/agents/new_chat/tools/portfolio_performance.py
 app/agents/new_chat/tools/registry.py
 app/agents/new_chat/tools/search_financegpt_docs.py
 app/agents/new_chat/tools/search_transactions.py
+app/agents/new_chat/tools/tax_analysis.py
 app/agents/new_chat/tools/user_memory.py
 app/agents/podcaster/__init__.py
 app/agents/podcaster/configuration.py
@@ -49,12 +51,11 @@ app/connectors/google_drive/file_types.py
 app/connectors/google_drive/folder_manager.py
 app/parsers/__init__.py
 app/parsers/base_financial_parser.py
-app/parsers/chase_parser.py
-app/parsers/discover_parser.py
-app/parsers/fidelity_parser.py
+app/parsers/llm_csv_parser.py
 app/parsers/ofx_parser.py
 app/parsers/parser_factory.py
 app/parsers/pdf_statement_parser.py
+app/parsers/tax_form_parser.py
 app/prompts/__init__.py
 app/retriever/__init__.py
 app/retriever/chunks_hybrid_search.py
@@ -65,7 +66,6 @@ app/routes/composio_routes.py
 app/routes/documents_routes.py
 app/routes/editor_routes.py
 app/routes/financegpt_docs_routes.py
-app/routes/investments.py
 app/routes/logs_routes.py
 app/routes/new_chat_routes.py
 app/routes/new_llm_config_routes.py
@@ -97,6 +97,7 @@ app/schemas/rbac_schemas.py
 app/schemas/search_source_connector.py
 app/schemas/search_space.py
 app/schemas/slack_auth_credentials.py
+app/schemas/tax_forms.py
 app/schemas/teams_auth_credentials.py
 app/schemas/users.py
 app/services/__init__.py
@@ -109,6 +110,7 @@ app/services/investment_csv_parser.py
 app/services/kokoro_tts_service.py
 app/services/llm_document_extractor.py
 app/services/llm_service.py
+app/services/llm_stock_enrichment.py
 app/services/new_streaming_service.py
 app/services/notification_service.py
 app/services/page_limit_service.py
@@ -126,6 +128,7 @@ app/tasks/celery_tasks/connector_tasks.py
 app/tasks/celery_tasks/document_reindex_tasks.py
 app/tasks/celery_tasks/document_tasks.py
 app/tasks/celery_tasks/schedule_checker_task.py
+app/tasks/celery_tasks/tax_form_tasks.py
 app/tasks/chat/stream_new_chat.py
 app/tasks/connector_indexers/__init__.py
 app/tasks/connector_indexers/base.py
@@ -147,6 +150,7 @@ app/utils/credit_card_rewards_fetcher.py
 app/utils/document_converters.py
 app/utils/oauth_security.py
 app/utils/periodic_scheduler.py
+app/utils/pii_masking.py
 app/utils/rbac.py
 app/utils/subscription_utils.py
 app/utils/validators.py
diff --git a/financegpt_backend/financegpt_new_backend.egg-info/requires.txt b/financegpt_backend/financegpt_new_backend.egg-info/requires.txt
index c840bdc..83a761b 100644
--- a/financegpt_backend/financegpt_new_backend.egg-info/requires.txt
+++ b/financegpt_backend/financegpt_new_backend.egg-info/requires.txt
@@ -26,6 +26,7 @@ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_
 static-ffmpeg>=2.13
 tavily-python>=0.3.2
 unstructured-client>=0.30.0
+unstructured[all-docs]==0.17.2
 uvicorn[standard]>=0.34.0
 validators>=0.34.0
 youtube-transcript-api>=1.0.3
diff --git a/financegpt_web/components/layout/providers/LayoutDataProvider.tsx b/financegpt_web/components/layout/providers/LayoutDataProvider.tsx
index 3fe8130..c76a9af 100644
--- a/financegpt_web/components/layout/providers/LayoutDataProvider.tsx
+++ b/financegpt_web/components/layout/providers/LayoutDataProvider.tsx
@@ -31,6 +31,7 @@ import { LayoutShell } from "../ui/shell";
 import { AllPrivateChatsSidebar } from "../ui/sidebar/AllPrivateChatsSidebar";
 import { AllSharedChatsSidebar } from "../ui/sidebar/AllSharedChatsSidebar";
 import { InboxSidebar } from "../ui/sidebar/InboxSidebar";
+import { UploadStatusPanel } from "../ui/upload-status-panel";
 
 interface LayoutDataProviderProps {
 	searchSpaceId: string;
@@ -553,6 +554,9 @@ export function LayoutDataProvider({
 				markAllAsRead={markAllAsRead}
 			/>
 
+			{/* Google Drive-style Upload Status Panel */}
+			<UploadStatusPanel inboxItems={inboxItems} markAsRead={markAsRead} />
+
 			{/* Create Search Space Dialog */}
 			<CreateSearchSpaceDialog
 				open={isCreateSearchSpaceDialogOpen}
diff --git a/financegpt_web/components/layout/ui/upload-status-panel/UploadStatusPanel.tsx b/financegpt_web/components/layout/ui/upload-status-panel/UploadStatusPanel.tsx
new file mode 100644
index 0000000..e1306a6
--- /dev/null
+++ b/financegpt_web/components/layout/ui/upload-status-panel/UploadStatusPanel.tsx
@@ -0,0 +1,368 @@
+"use client";
+
+import {
+	AlertCircle,
+	CheckCircle2,
+	ChevronDown,
+	ChevronUp,
+	FileText,
+	Loader2,
+	X,
+} from "lucide-react";
+import { AnimatePresence, motion } from "motion/react";
+import { useTranslations } from "next-intl";
+import { useCallback, useEffect, useMemo, useState } from "react";
+import { createPortal } from "react-dom";
+import { Button } from "@/components/ui/button";
+import { Progress } from "@/components/ui/progress";
+import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
+import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
+import {
+	type ConnectorIndexingMetadata,
+	type DocumentProcessingMetadata,
+	isConnectorIndexingMetadata,
+	isDocumentProcessingMetadata,
+} from "@/contracts/types/inbox.types";
+import type { InboxItem } from "@/hooks/use-inbox";
+import { cn } from "@/lib/utils";
+
+interface UploadStatusPanelProps {
+	inboxItems: InboxItem[];
+	markAsRead: (id: number) => Promise<boolean>;
+}
+
+/**
+ * Get display name for connector type
+ */
+function getConnectorTypeDisplayName(connectorType: string): string {
+	const displayNames: Record<string, string> = {
+		GITHUB_CONNECTOR: "GitHub",
+		GOOGLE_CALENDAR_CONNECTOR: "Google Calendar",
+		GOOGLE_GMAIL_CONNECTOR: "Gmail",
+		GOOGLE_DRIVE_CONNECTOR: "Google Drive",
+		LINEAR_CONNECTOR: "Linear",
+		NOTION_CONNECTOR: "Notion",
+		SLACK_CONNECTOR: "Slack",
+		TEAMS_CONNECTOR: "Microsoft Teams",
+		DISCORD_CONNECTOR: "Discord",
+		JIRA_CONNECTOR: "Jira",
+		CONFLUENCE_CONNECTOR: "Confluence",
+		BOOKSTACK_CONNECTOR: "BookStack",
+		CLICKUP_CONNECTOR: "ClickUp",
+		AIRTABLE_CONNECTOR: "Airtable",
+		LUMA_CONNECTOR: "Luma",
+		ELASTICSEARCH_CONNECTOR: "Elasticsearch",
+		WEBCRAWLER_CONNECTOR: "Web Crawler",
+		YOUTUBE_CONNECTOR: "YouTube",
+		CIRCLEBACK_CONNECTOR: "Circleback",
+		MCP_CONNECTOR: "MCP",
+		TAVILY_API: "Tavily",
+		SEARXNG_API: "SearXNG",
+		LINKUP_API: "Linkup",
+		BAIDU_SEARCH_API: "Baidu",
+	};
+
+	return (
+		displayNames[connectorType] ||
+		connectorType
+			.replace(/_/g, " ")
+			.replace(/CONNECTOR|API/gi, "")
+			.trim()
+	);
+}
+
+/**
+ * Google Drive-style floating upload status panel
+ * Shows in-progress uploads and recent completions
+ */
+export function UploadStatusPanel({ inboxItems, markAsRead }: UploadStatusPanelProps) {
+	const t = useTranslations("sidebar");
+	const [isExpanded, setIsExpanded] = useState(true);
+	const [mounted, setMounted] = useState(false);
+	const [dismissedIds, setDismissedIds] = useState<Set<number>>(new Set());
+
+	useEffect(() => {
+		setMounted(true);
+	}, []);
+
+	// Filter to only status items (uploads, connector indexing, document processing)
+	// Only show items from the last 30 minutes that are in progress or recently completed
+	const statusItems = useMemo(() => {
+		const thirtyMinutesAgo = new Date(Date.now() - 30 * 60 * 1000);
+
+		return inboxItems
+			.filter((item) => {
+				// Only status items
+				if (item.type !== "connector_indexing" && item.type !== "document_processing") {
+					return false;
+				}
+
+				// Don't show dismissed items
+				if (dismissedIds.has(item.id)) {
+					return false;
+				}
+
+				// Get status from metadata
+				const metadata = item.metadata as Record<string, unknown>;
+				const status = typeof metadata?.status === "string" ? metadata.status : undefined;
+
+				// Always show in-progress items
+				if (status === "in_progress") {
+					return true;
+				}
+
+				// Show completed/failed items only if recent (within 30 mins)
+				const createdAt = new Date(item.created_at);
+				if (createdAt >= thirtyMinutesAgo) {
+					return true;
+				}
+
+				return false;
+			})
+			.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime())
+			.slice(0, 5); // Max 5 items to show
+	}, [inboxItems, dismissedIds]);
+
+	// Count in-progress items
+	const inProgressCount = useMemo(() => {
+		return statusItems.filter((item) => {
+			const metadata = item.metadata as Record<string, unknown>;
+			return metadata?.status === "in_progress";
+		}).length;
+	}, [statusItems]);
+
+	// Auto-collapse when no items
+	useEffect(() => {
+		if (statusItems.length === 0) {
+			setIsExpanded(false);
+		} else if (inProgressCount > 0) {
+			// Auto-expand when new in-progress item appears
+			setIsExpanded(true);
+		}
+	}, [statusItems.length, inProgressCount]);
+
+	const handleDismiss = useCallback(
+		async (item: InboxItem, e: React.MouseEvent) => {
+			e.stopPropagation();
+			setDismissedIds((prev) => new Set([...prev, item.id]));
+			if (!item.read) {
+				await markAsRead(item.id);
+			}
+		},
+		[markAsRead]
+	);
+
+	const handleDismissAll = useCallback(async () => {
+		const ids = statusItems.map((item) => item.id);
+		setDismissedIds((prev) => new Set([...prev, ...ids]));
+
+		// Mark all as read
+		for (const item of statusItems) {
+			if (!item.read) {
+				await markAsRead(item.id);
+			}
+		}
+	}, [statusItems, markAsRead]);
+
+	const getStatusIcon = (item: InboxItem) => {
+		const metadata = item.metadata as Record<string, unknown>;
+		const status = typeof metadata?.status === "string" ? metadata.status : undefined;
+
+		switch (status) {
+			case "in_progress":
+				return <Loader2 className="h-4 w-4 text-primary animate-spin" />;
+			case "completed":
+				return <CheckCircle2 className="h-4 w-4 text-green-500" />;
+			case "failed":
+				return <AlertCircle className="h-4 w-4 text-red-500" />;
+			default:
+				return <FileText className="h-4 w-4 text-muted-foreground" />;
+		}
+	};
+
+	const getItemIcon = (item: InboxItem) => {
+		if (item.type === "connector_indexing" && isConnectorIndexingMetadata(item.metadata)) {
+			return getConnectorIcon(item.metadata.connector_type, "h-4 w-4");
+		}
+		return <FileText className="h-4 w-4 text-muted-foreground" />;
+	};
+
+	const getProgressValue = (item: InboxItem): number | null => {
+		if (item.type === "connector_indexing" && isConnectorIndexingMetadata(item.metadata)) {
+			const meta = item.metadata as ConnectorIndexingMetadata;
+			if (meta.progress_percent !== undefined) {
+				return meta.progress_percent;
+			}
+			if (meta.total_count && meta.indexed_count) {
+				return Math.round((meta.indexed_count / meta.total_count) * 100);
+			}
+		}
+		return null;
+	};
+
+	const getItemSubtitle = (item: InboxItem): string => {
+		if (item.type === "connector_indexing" && isConnectorIndexingMetadata(item.metadata)) {
+			const meta = item.metadata as ConnectorIndexingMetadata;
+			return getConnectorTypeDisplayName(meta.connector_type);
+		}
+		if (item.type === "document_processing" && isDocumentProcessingMetadata(item.metadata)) {
+			const meta = item.metadata as DocumentProcessingMetadata;
+			return meta.processing_stage.replace(/_/g, " ");
+		}
+		return item.type.replace(/_/g, " ");
+	};
+
+	// Don't render if no items and not mounted
+	if (!mounted || statusItems.length === 0) {
+		return null;
+	}
+
+	return createPortal(
+		<AnimatePresence>
+			<motion.div
+				initial={{ opacity: 0, y: 20, scale: 0.95 }}
+				animate={{ opacity: 1, y: 0, scale: 1 }}
+				exit={{ opacity: 0, y: 20, scale: 0.95 }}
+				transition={{ type: "spring", damping: 25, stiffness: 300 }}
+				className={cn(
+					"fixed bottom-4 right-4 z-50",
+					"w-80 max-w-[calc(100vw-2rem)]",
+					"bg-background border rounded-lg shadow-lg",
+					"flex flex-col overflow-hidden"
+				)}
+			>
+				{/* Header - Always visible */}
+				<button
+					type="button"
+					onClick={() => setIsExpanded(!isExpanded)}
+					className={cn(
+						"flex items-center justify-between w-full px-4 py-3",
+						"hover:bg-accent/50 transition-colors",
+						"border-b",
+						!isExpanded && "border-b-0"
+					)}
+				>
+					<div className="flex items-center gap-2">
+						{inProgressCount > 0 ? (
+							<Loader2 className="h-4 w-4 text-primary animate-spin" />
+						) : (
+							<CheckCircle2 className="h-4 w-4 text-green-500" />
+						)}
+						<span className="text-sm font-medium">
+							{inProgressCount > 0 ? (
+								<>
+									{t("uploading") || "Uploading"} {inProgressCount}{" "}
+									{inProgressCount === 1
+										? t("item") || "item"
+										: t("items") || "items"}
+								</>
+							) : (
+								<>
+									{statusItems.length}{" "}
+									{statusItems.length === 1
+										? t("upload_complete") || "upload complete"
+										: t("uploads_complete") || "uploads complete"}
+								</>
+							)}
+						</span>
+					</div>
+					<div className="flex items-center gap-1">
+						{statusItems.length > 0 && inProgressCount === 0 && (
+							<Tooltip>
+								<TooltipTrigger asChild>
+									<Button
+										variant="ghost"
+										size="icon"
+										className="h-6 w-6"
+										onClick={(e) => {
+											e.stopPropagation();
+											handleDismissAll();
+										}}
+									>
+										<X className="h-3.5 w-3.5" />
+									</Button>
+								</TooltipTrigger>
+								<TooltipContent>{t("dismiss_all") || "Dismiss all"}</TooltipContent>
+							</Tooltip>
+						)}
+						{isExpanded ? (
+							<ChevronDown className="h-4 w-4 text-muted-foreground" />
+						) : (
+							<ChevronUp className="h-4 w-4 text-muted-foreground" />
+						)}
+					</div>
+				</button>
+
+				{/* Content - Collapsible */}
+				<AnimatePresence>
+					{isExpanded && (
+						<motion.div
+							initial={{ height: 0, opacity: 0 }}
+							animate={{ height: "auto", opacity: 1 }}
+							exit={{ height: 0, opacity: 0 }}
+							transition={{ duration: 0.2 }}
+							className="overflow-hidden"
+						>
+							<div className="max-h-64 overflow-y-auto">
+								{statusItems.map((item) => {
+									const metadata = item.metadata as Record<string, unknown>;
+									const status =
+										typeof metadata?.status === "string" ? metadata.status : undefined;
+									const progress = getProgressValue(item);
+									const isInProgress = status === "in_progress";
+
+									return (
+										<div
+											key={item.id}
+											className={cn(
+												"flex items-center gap-3 px-4 py-2.5",
+												"hover:bg-accent/30 transition-colors",
+												"group"
+											)}
+										>
+											{/* Icon */}
+											<div className="shrink-0">{getItemIcon(item)}</div>
+
+											{/* Content */}
+											<div className="flex-1 min-w-0">
+												<div className="flex items-center gap-2">
+													<p className="text-xs font-medium truncate flex-1">{item.title}</p>
+													{getStatusIcon(item)}
+												</div>
+												<p className="text-[10px] text-muted-foreground truncate">
+													{getItemSubtitle(item)}
+												</p>
+												{/* Progress bar for in-progress items */}
+												{isInProgress && progress !== null && (
+													<Progress value={progress} className="h-1 mt-1.5" />
+												)}
+											</div>
+
+											{/* Dismiss button - only for completed/failed */}
+											{!isInProgress && (
+												<Tooltip>
+													<TooltipTrigger asChild>
+														<Button
+															variant="ghost"
+															size="icon"
+															className="h-6 w-6 opacity-0 group-hover:opacity-100 transition-opacity shrink-0"
+															onClick={(e) => handleDismiss(item, e)}
+														>
+															<X className="h-3 w-3" />
+														</Button>
+													</TooltipTrigger>
+													<TooltipContent>{t("dismiss") || "Dismiss"}</TooltipContent>
+												</Tooltip>
+											)}
+										</div>
+									);
+								})}
+							</div>
+						</motion.div>
+					)}
+				</AnimatePresence>
+			</motion.div>
+		</AnimatePresence>,
+		document.body
+	);
+}
diff --git a/financegpt_web/components/layout/ui/upload-status-panel/index.ts b/financegpt_web/components/layout/ui/upload-status-panel/index.ts
new file mode 100644
index 0000000..dc83150
--- /dev/null
+++ b/financegpt_web/components/layout/ui/upload-status-panel/index.ts
@@ -0,0 +1 @@
+export { UploadStatusPanel } from "./UploadStatusPanel";
diff --git a/financegpt_web/lib/query-client/query-client.provider.tsx b/financegpt_web/lib/query-client/query-client.provider.tsx
index 453b9f5..6d33ffa 100644
--- a/financegpt_web/lib/query-client/query-client.provider.tsx
+++ b/financegpt_web/lib/query-client/query-client.provider.tsx
@@ -1,5 +1,4 @@
 "use client";
-import { ReactQueryDevtools } from "@tanstack/react-query-devtools";
 import { QueryClientAtomProvider } from "jotai-tanstack-query/react";
 import { queryClient } from "./client";
 
@@ -7,7 +6,6 @@ export function ReactQueryClientProvider({ children }: { children: React.ReactNo
 	return (
 		<QueryClientAtomProvider client={queryClient}>
 			{children}
-			<ReactQueryDevtools initialIsOpen={false} />
 		</QueryClientAtomProvider>
 	);
 }
diff --git a/financegpt_web/messages/en.json b/financegpt_web/messages/en.json
index a122479..ef19c7a 100644
--- a/financegpt_web/messages/en.json
+++ b/financegpt_web/messages/en.json
@@ -708,7 +708,14 @@
 		"all": "All",
 		"unread": "Unread",
 		"connectors": "Connectors",
-		"all_connectors": "All connectors"
+		"all_connectors": "All connectors",
+		"uploading": "Uploading",
+		"item": "item",
+		"items": "items",
+		"upload_complete": "upload complete",
+		"uploads_complete": "uploads complete",
+		"dismiss": "Dismiss",
+		"dismiss_all": "Dismiss all"
 	},
 	"errors": {
 		"something_went_wrong": "Something went wrong",
diff --git a/financegpt_web/messages/zh.json b/financegpt_web/messages/zh.json
index 3f01bf4..b4d2647 100644
--- a/financegpt_web/messages/zh.json
+++ b/financegpt_web/messages/zh.json
@@ -693,7 +693,14 @@
 		"all": "全部",
 		"unread": "未读",
 		"connectors": "连接器",
-		"all_connectors": "所有连接器"
+		"all_connectors": "所有连接器",
+		"uploading": "上传中",
+		"item": "项",
+		"items": "项",
+		"upload_complete": "上传完成",
+		"uploads_complete": "上传完成",
+		"dismiss": "关闭",
+		"dismiss_all": "全部关闭"
 	},
 	"errors": {
 		"something_went_wrong": "出错了",