From da50111237c95fd75cb1cbe1633891ac39d0a9c6 Mon Sep 17 00:00:00 2001 From: benzntech Date: Thu, 19 Mar 2026 09:52:52 +0530 Subject: [PATCH 1/2] feat: add google_oauth provider type and Gmail OAuth connect proposal --- packages/types/src/ingestion.types.ts | 9 ++ .../gmail-oauth-connect/investigation.md | 38 +++++++ .../changes/gmail-oauth-connect/proposal.md | 106 ++++++++++++++++++ .../changes/gmail-oauth-connect/research.md | 100 +++++++++++++++++ .../specs/google-oauth/spec.md | 46 ++++++++ .../changes/gmail-oauth-connect/tasks.md | 83 ++++++++++++++ 6 files changed, 382 insertions(+) create mode 100644 rapidspec/changes/gmail-oauth-connect/investigation.md create mode 100644 rapidspec/changes/gmail-oauth-connect/proposal.md create mode 100644 rapidspec/changes/gmail-oauth-connect/research.md create mode 100644 rapidspec/changes/gmail-oauth-connect/specs/google-oauth/spec.md create mode 100644 rapidspec/changes/gmail-oauth-connect/tasks.md diff --git a/packages/types/src/ingestion.types.ts b/packages/types/src/ingestion.types.ts index 40ca2c63..02b22340 100644 --- a/packages/types/src/ingestion.types.ts +++ b/packages/types/src/ingestion.types.ts @@ -20,6 +20,7 @@ export type SyncState = { export type IngestionProvider = | 'google_workspace' + | 'google_oauth' | 'microsoft_365' | 'generic_imap' | 'pst_import' @@ -63,6 +64,13 @@ export interface GoogleWorkspaceCredentials extends BaseIngestionCredentials { impersonatedAdminEmail: string; } +export interface GoogleOAuthCredentials extends BaseIngestionCredentials { + type: 'google_oauth'; + email: string; + accessToken: string; + refreshToken: string; +} + export interface Microsoft365Credentials extends BaseIngestionCredentials { type: 'microsoft_365'; clientId: string; @@ -95,6 +103,7 @@ export interface MboxImportCredentials extends BaseIngestionCredentials { export type IngestionCredentials = | GenericImapCredentials | GoogleWorkspaceCredentials + | GoogleOAuthCredentials | Microsoft365Credentials | PSTImportCredentials | EMLImportCredentials diff --git a/rapidspec/changes/gmail-oauth-connect/investigation.md b/rapidspec/changes/gmail-oauth-connect/investigation.md new file mode 100644 index 00000000..23c37e29 --- /dev/null +++ b/rapidspec/changes/gmail-oauth-connect/investigation.md @@ -0,0 +1,38 @@ +# Investigation: gmail-oauth-connect + +## Current State Analysis + +### Credential Flow (verified) +1. User opens "Create Ingestion Source" dialog (`+page.svelte:28-31`) +2. Selects `google_workspace` provider +3. `IngestionSourceForm.svelte` shows two fields: + - Textarea for `serviceAccountKeyJson` (raw JSON blob from Google Cloud Console) + - Input for `impersonatedAdminEmail` (a super-admin's email) +4. On submit → `POST /api/ingestion-sources` → `IngestionService.create()` validates via `testConnection()` + +### GoogleWorkspaceConnector (app/domain-wide, NOT individual) +- `GoogleWorkspaceConnector.ts:19` — class targets the entire Google Workspace domain +- `getAuthClient()` at `:48` — creates JWT client that impersonates ANY user in the domain +- `listAllUsers()` at `:~80` — calls Google Admin Directory API to enumerate all domain users +- This is intentionally designed for organization-wide archiving, not individual accounts + +### Why a new provider type (not modifying existing) +- `google_workspace` is a different use case (entire org via service account) +- `google_oauth` is individual account via personal consent +- Keeping them separate preserves existing customers' configurations +- `EmailProviderFactory` uses a discriminated union — adding a new case is clean + +### IngestionCredentials storage +- Credentials stored as encrypted JSONB in PostgreSQL via `CryptoService` +- `accessToken` + `refreshToken` will be encrypted like other credential fields +- No schema migration needed — existing JSONB column accommodates new shape + +### SyncState compatibility +- `SyncState.google` is keyed by `userEmail` — `GoogleOAuthConnector` uses same shape +- `historyId` delta sync pattern works identically for OAuth individual accounts +- No `SyncState` changes needed + +## Existing Patterns to Follow +- `ImapConnector.ts` — similar single-user connector pattern, good reference +- `IngestionService.testConnection()` — called on create, handles `auth_success` status +- `CryptoService.encrypt/decrypt` — wrap tokens before storing diff --git a/rapidspec/changes/gmail-oauth-connect/proposal.md b/rapidspec/changes/gmail-oauth-connect/proposal.md new file mode 100644 index 00000000..8e8d4b0c --- /dev/null +++ b/rapidspec/changes/gmail-oauth-connect/proposal.md @@ -0,0 +1,106 @@ +# Change: Gmail OAuth — "Connect with Google" for Individual Org Accounts + +## Why +Users currently must paste a raw Google Service Account JSON blob and an admin email to connect Gmail — a highly technical process that fails to match the standard "click to connect" experience expected by modern apps. This adds a `google_oauth` provider that lets an individual connect their Google Workspace Gmail account via a single OAuth button. + +## Code Verification +- [x] `GoogleWorkspaceCredentials` at `packages/types/src/ingestion.types.ts:53-64` — requires `serviceAccountKeyJson` + `impersonatedAdminEmail` +- [x] `GoogleWorkspaceConnector.ts:19-56` — uses service account JWT with domain-wide delegation +- [x] `IngestionSourceForm.svelte:187-186` — manual textarea for JSON key + admin email fields +- [x] `IngestionProvider` union at `ingestion.types.ts:21-27` — add `'google_oauth'` here +- [x] `EmailProviderFactory.ts` — provider switch/case needs new `google_oauth` case + +## What Changes + +### Before (Verified Actual Code) +```typescript +// packages/types/src/ingestion.types.ts:53-64 +export interface GoogleWorkspaceCredentials extends BaseIngestionCredentials { + type: 'google_workspace'; + serviceAccountKeyJson: string; // paste entire JSON blob + impersonatedAdminEmail: string; // super-admin email +} + +// packages/types/src/ingestion.types.ts:21-27 +export type IngestionProvider = + | 'google_workspace' + | 'microsoft_365' + | 'generic_imap' + | 'pst_import' + | 'eml_import' + | 'mbox_import'; +``` + +### After (Proposed) +```typescript +// New credential type for individual OAuth +export interface GoogleOAuthCredentials extends BaseIngestionCredentials { + type: 'google_oauth'; + email: string; // auto-populated from Google + accessToken: string; // encrypted at rest + refreshToken: string; // encrypted at rest +} + +// Extended provider union +export type IngestionProvider = + | 'google_workspace' + | 'google_oauth' // NEW + | 'microsoft_365' + | 'generic_imap' + | 'pst_import' + | 'eml_import' + | 'mbox_import'; +``` + +## Architecture + +### OAuth Flow +``` +User clicks "Connect with Google" + → GET /api/oauth/google/authorize?name= + → Backend builds Google OAuth2 consent URL (gmail.readonly + userinfo.email) + → Redirect to accounts.google.com/o/oauth2/auth + → User logs in with org Gmail account + → Google redirects to GET /api/oauth/google/callback?code=...&state=... + → Backend exchanges code for { access_token, refresh_token } + → Backend fetches user email via googleapis userinfo + → Backend creates IngestionSource with type 'google_oauth', status 'auth_success' + → Redirect to /dashboard/ingestions +``` + +### New Files +| File | Purpose | +|------|---------| +| `packages/backend/src/services/ingestion-connectors/GoogleOAuthConnector.ts` | OAuth2-based Gmail connector (single user) | +| `packages/backend/src/api/controllers/oauth.controller.ts` | Handles /authorize and /callback | +| `packages/backend/src/api/routes/oauth.routes.ts` | Route definitions | + +### Modified Files +| File | Change | +|------|--------| +| `packages/types/src/ingestion.types.ts` | Add `GoogleOAuthCredentials`, extend `IngestionProvider` | +| `packages/backend/src/services/EmailProviderFactory.ts` | Add `google_oauth` case | +| `packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte` | Add `google_oauth` provider option with OAuth button | +| `.env.example` | Add `GOOGLE_OAUTH_CLIENT_ID`, `GOOGLE_OAUTH_CLIENT_SECRET`, `GOOGLE_OAUTH_REDIRECT_URI` | + +## Environment Variables +```env +GOOGLE_OAUTH_CLIENT_ID=your-client-id.apps.googleusercontent.com +GOOGLE_OAUTH_CLIENT_SECRET=your-client-secret +GOOGLE_OAUTH_REDIRECT_URI=http://localhost:3000/api/oauth/google/callback +``` + +## Scopes Required +- `https://www.googleapis.com/auth/gmail.readonly` — read emails +- `https://www.googleapis.com/auth/userinfo.email` — identify the account + +## Security Notes +- `access_token` and `refresh_token` encrypted at rest via existing `CryptoService` +- State parameter (signed JWT or UUID) used in OAuth flow to prevent CSRF +- Token refresh handled automatically in `GoogleOAuthConnector` before each sync + +## Impact +- No breaking changes — existing `google_workspace` connector untouched +- New provider type is purely additive +- Affected specs: `specs/google-oauth/spec.md` +- Affected files: `ingestion.types.ts`, `EmailProviderFactory.ts`, `IngestionSourceForm.svelte`, `.env.example` diff --git a/rapidspec/changes/gmail-oauth-connect/research.md b/rapidspec/changes/gmail-oauth-connect/research.md new file mode 100644 index 00000000..7a96bb11 --- /dev/null +++ b/rapidspec/changes/gmail-oauth-connect/research.md @@ -0,0 +1,100 @@ +# Research: gmail-oauth-connect + +## Google OAuth2 for Gmail (Delegated, Individual) + +### Required Scopes +- `https://www.googleapis.com/auth/gmail.readonly` — read all messages and settings +- `https://www.googleapis.com/auth/userinfo.email` — get connected account email address +- `openid` — required when using userinfo endpoint + +### OAuth2 Flow (Authorization Code) +1. Backend builds URL: `https://accounts.google.com/o/oauth2/v2/auth?client_id=...&redirect_uri=...&scope=...&response_type=code&access_type=offline&prompt=consent` +2. `access_type=offline` — required to receive a refresh token +3. `prompt=consent` — forces consent screen even if previously authorized (ensures refresh token is always returned) +4. After consent → callback receives `code` param +5. Exchange: `POST https://oauth2.googleapis.com/token` with `code`, `client_id`, `client_secret`, `redirect_uri` +6. Response: `{ access_token, refresh_token, expires_in, scope }` + +### googleapis SDK (already in project) +```typescript +import { google } from 'googleapis'; + +const oauth2Client = new google.auth.OAuth2( + process.env.GOOGLE_OAUTH_CLIENT_ID, + process.env.GOOGLE_OAUTH_CLIENT_SECRET, + process.env.GOOGLE_OAUTH_REDIRECT_URI +); + +// Build auth URL +const url = oauth2Client.generateAuthUrl({ + access_type: 'offline', + scope: ['https://www.googleapis.com/auth/gmail.readonly', 'https://www.googleapis.com/auth/userinfo.email'], + prompt: 'consent', + state: stateToken, +}); + +// Exchange code for tokens +const { tokens } = await oauth2Client.getToken(code); +oauth2Client.setCredentials(tokens); + +// Get user email +const oauth2 = google.oauth2({ version: 'v2', auth: oauth2Client }); +const { data } = await oauth2.userinfo.get(); +// data.email = 'user@company.com' +``` + +### Gmail API — History-based Delta Sync +```typescript +// Initial sync: list all messages +const gmail = google.gmail({ version: 'v1', auth: oauth2Client }); +const profile = await gmail.users.getProfile({ userId: 'me' }); +const historyId = profile.data.historyId; // save this for delta sync + +// Subsequent syncs: use history API +const history = await gmail.users.history.list({ + userId: 'me', + startHistoryId: savedHistoryId, + historyTypes: ['messageAdded'], +}); +``` + +### Token Refresh Pattern +```typescript +// googleapis handles refresh automatically when setCredentials is called with refresh_token +oauth2Client.setCredentials({ refresh_token: storedRefreshToken }); +// On expiry, googleapis auto-refreshes. Listen for new tokens: +oauth2Client.on('tokens', (tokens) => { + if (tokens.access_token) { + // save updated access_token to DB + } +}); +``` + +## Google Cloud Console Setup (for docs/README) +1. Go to console.cloud.google.com → APIs & Services → Credentials +2. Create OAuth 2.0 Client ID (Web application type) +3. Add authorized redirect URI: `https://your-domain/api/oauth/google/callback` +4. Enable Gmail API under APIs & Services → Library +5. Copy Client ID and Client Secret to `.env` + +## Security: State Parameter (CSRF Protection) +```typescript +// Generate state before redirect +const state = crypto.randomBytes(32).toString('hex'); +// Store in session or short-lived cache (Redis/Valkey already available) +await redis.set(`oauth_state:${state}`, userId, 'EX', 600); // 10 min TTL + +// Verify in callback +const stored = await redis.get(`oauth_state:${state}`); +if (!stored) throw new Error('Invalid state parameter'); +``` + +## Google Brand Guidelines for Button +- Official button text: "Sign in with Google" or "Continue with Google" +- Must use Google logo SVG (available at developers.google.com/identity/branding-guidelines) +- Button background: white, border: #dadce0, text color: #3c4043 + +## Reference Implementations +- Midday.ai uses `@googleapis/gmail` with OAuth2 for individual mailbox connections +- Nextcloud Mail uses same `access_type=offline` + `prompt=consent` pattern +- Google's own sample: https://developers.google.com/gmail/api/quickstart/nodejs diff --git a/rapidspec/changes/gmail-oauth-connect/specs/google-oauth/spec.md b/rapidspec/changes/gmail-oauth-connect/specs/google-oauth/spec.md new file mode 100644 index 00000000..a7550b39 --- /dev/null +++ b/rapidspec/changes/gmail-oauth-connect/specs/google-oauth/spec.md @@ -0,0 +1,46 @@ +# Spec: Google OAuth Individual Account Connection + +## ADDED Requirements + +### Requirement: Google OAuth Ingestion Provider +The system SHALL support a `google_oauth` ingestion provider that allows an individual Google Workspace user to connect their Gmail account via OAuth 2.0 Authorization Code flow. + +#### Scenario: Successful OAuth connection +- **GIVEN** a user is logged into OpenArchiver +- **WHEN** they select "Gmail (Connect with Google)", enter a source name, and click the connect button +- **THEN** they are redirected to Google's consent screen +- **AND** after granting consent, a new ingestion source is created with status `auth_success` +- **AND** they are redirected back to `/dashboard/ingestions` + +#### Scenario: OAuth flow cancelled +- **GIVEN** a user is on the Google consent screen +- **WHEN** they cancel or deny consent +- **THEN** they are redirected to `/dashboard/ingestions?error=oauth_cancelled` +- **AND** an error alert is shown +- **AND** no ingestion source is created + +#### Scenario: Token refresh on sync +- **GIVEN** a `google_oauth` ingestion source exists with an expired access token +- **WHEN** a sync job runs +- **THEN** the system SHALL automatically refresh the access token using the stored refresh token +- **AND** the updated access token SHALL be encrypted and persisted + +### Requirement: Google OAuth Credentials Storage +The system SHALL store `google_oauth` credentials as `{ type, email, accessToken, refreshToken }` encrypted at rest using the existing `CryptoService`. + +### Requirement: CSRF Protection +The system SHALL use a random state parameter (stored in Redis with 10-minute TTL) to prevent CSRF attacks during the OAuth flow. + +### Requirement: Gmail Delta Sync +The system SHALL use the Gmail History API (`users.history.list`) for incremental syncs, storing `historyId` in `syncState.google[email].historyId`. + +## MODIFIED Requirements + +### Requirement: IngestionProvider Type +The `IngestionProvider` union type SHALL include `'google_oauth'` alongside existing provider types. Existing `google_workspace` provider behavior is unchanged. + +### Requirement: EmailProviderFactory +The `EmailProviderFactory` SHALL instantiate `GoogleOAuthConnector` when provider type is `'google_oauth'`. + +## REMOVED Requirements +None — all existing requirements unchanged. diff --git a/rapidspec/changes/gmail-oauth-connect/tasks.md b/rapidspec/changes/gmail-oauth-connect/tasks.md new file mode 100644 index 00000000..2a8a9fbd --- /dev/null +++ b/rapidspec/changes/gmail-oauth-connect/tasks.md @@ -0,0 +1,83 @@ +# Tasks: Gmail OAuth Connect + +## 1. Types & Schema + +### 1.1 Add GoogleOAuthCredentials type (15 min) — Checkpoint ⏸ +- [x] Add `GoogleOAuthCredentials` interface to `packages/types/src/ingestion.types.ts` +- [x] Add `'google_oauth'` to `IngestionProvider` union +- [x] Add `GoogleOAuthCredentials` to `IngestionCredentials` discriminated union +**Checkpoint:** Types compile — run `pnpm build` from root + +--- + +## 2. Backend — OAuth Routes + +### 2.1 OAuth Controller (45 min) — Checkpoint ⏸ +- [ ] Create `packages/backend/src/api/controllers/oauth.controller.ts` +- [ ] `GET /authorize` handler: build Google OAuth2 URL with `gmail.readonly` + `userinfo.email` scopes, embed `state` param (source name + anti-CSRF token) +- [ ] `GET /callback` handler: exchange code for tokens, fetch user email, create ingestion source via `IngestionService.create()` +- [ ] Redirect to `/dashboard/ingestions` on success, `/dashboard/ingestions?error=...` on failure +**Checkpoint:** Test authorize URL redirects to Google in browser + +### 2.2 OAuth Routes (10 min) +- [ ] Create `packages/backend/src/api/routes/oauth.routes.ts` +- [ ] Register routes in main Express router +- [ ] Add auth middleware (must be logged in to initiate OAuth) + +### 2.3 Environment Config (10 min) +- [ ] Add `GOOGLE_OAUTH_CLIENT_ID`, `GOOGLE_OAUTH_CLIENT_SECRET`, `GOOGLE_OAUTH_REDIRECT_URI` to `.env.example` +- [ ] Read and validate vars in backend config/env loader + +--- + +## 3. Backend — Connector + +### 3.1 GoogleOAuthConnector (60 min) — Checkpoint ⏸ +- [ ] Create `packages/backend/src/services/ingestion-connectors/GoogleOAuthConnector.ts` +- [ ] Constructor accepts `GoogleOAuthCredentials` — initializes `google.auth.OAuth2` client with stored tokens +- [ ] Implement `testConnection()` — verify token validity via `gmail.users.getProfile` +- [ ] Implement `listAllUsers()` — yields single `MailboxUser` (the connected account) +- [ ] Implement `fetchEmails()` — use Gmail API (`gmail.users.messages.list` + `gmail.users.messages.get`) with history ID delta sync +- [ ] Implement `getUpdatedSyncState()` — returns updated `historyId` +- [ ] Handle token refresh: catch 401, use `oauth2Client.refreshAccessToken()`, update stored credentials via `IngestionService` +**Checkpoint:** Unit test `testConnection()` with real credentials + +### 3.2 Register in EmailProviderFactory (10 min) +- [ ] Add `case 'google_oauth': return new GoogleOAuthConnector(credentials)` to `EmailProviderFactory.ts` + +--- + +## 4. Frontend + +### 4.1 Add google_oauth provider option (30 min) — Checkpoint ⏸ +- [ ] Add `{ value: 'google_oauth', label: 'Gmail (Connect with Google)' }` to provider list in `IngestionSourceForm.svelte` +- [ ] Add `{:else if formData.provider === 'google_oauth'}` block — show "Connect with Google" button instead of manual fields +- [ ] Button click: `window.location.href = /api/oauth/google/authorize?name=${encodeURIComponent(formData.name)}` +- [ ] Add Google branding to button (Google logo SVG + "Sign in with Google" styling per Google brand guidelines) +**Checkpoint:** Button visible in dialog, clicking redirects to Google + +### 4.2 Handle OAuth return (15 min) +- [ ] On `/dashboard/ingestions` page load, check for `?error=` query param and show error alert +- [ ] Refresh ingestion source list after OAuth return (page already loads fresh data via `+page.server.ts`) + +--- + +## 5. Testing + +### 5.1 Manual E2E test +- [ ] Set up Google OAuth credentials in `.env` +- [ ] Click "Create New" → select "Gmail (Connect with Google)" → enter name → click button +- [ ] Complete Google consent → verify source appears in list with `auth_success` status +- [ ] Trigger manual sync → verify emails are imported + +### 5.2 Error cases +- [ ] Test with invalid/revoked token → confirm error state shown +- [ ] Test cancelling OAuth consent → confirm redirect to error page +- [ ] Test missing env vars → confirm clear error message in logs + +--- + +## 6. Review (run /rapidspec:review after implementation) +- [ ] Security audit: state param CSRF protection, token encryption, no token exposure in logs +- [ ] Code review: connector follows `IEmailConnector` interface contract +- [ ] Fix any critical issues before merging From 60e67e9822bcbb980d810938969e9e995cca02de Mon Sep 17 00:00:00 2001 From: benzntech Date: Thu, 19 Mar 2026 10:01:03 +0530 Subject: [PATCH 2/2] feat: implement Gmail OAuth connect - individual account via Google OAuth2 --- .env.example | 8 + .../src/api/controllers/oauth.controller.ts | 159 +++++++++++ .../backend/src/api/routes/oauth.routes.ts | 26 ++ packages/backend/src/api/server.ts | 3 + .../src/services/EmailProviderFactory.ts | 4 + .../GoogleOAuthConnector.ts | 262 ++++++++++++++++++ .../custom/IngestionSourceForm.svelte | 71 ++++- .../routes/dashboard/ingestions/+page.svelte | 32 +++ .../changes/gmail-oauth-connect/tasks.md | 44 +-- 9 files changed, 578 insertions(+), 31 deletions(-) create mode 100644 packages/backend/src/api/controllers/oauth.controller.ts create mode 100644 packages/backend/src/api/routes/oauth.routes.ts create mode 100644 packages/backend/src/services/ingestion-connectors/GoogleOAuthConnector.ts diff --git a/.env.example b/.env.example index 8c3997ea..02994af3 100644 --- a/.env.example +++ b/.env.example @@ -98,3 +98,11 @@ ENCRYPTION_KEY= # Apache Tika Integration # ONLY active if TIKA_URL is set TIKA_URL=http://tika:9998 + +# --- Google OAuth (Gmail individual account connect) --- +# Create an OAuth 2.0 Client ID at console.cloud.google.com → APIs & Services → Credentials +# Set the authorized redirect URI to: /v1/oauth/google/callback +# Enable the Gmail API under APIs & Services → Library +GOOGLE_OAUTH_CLIENT_ID= +GOOGLE_OAUTH_CLIENT_SECRET= +GOOGLE_OAUTH_REDIRECT_URI=http://localhost:4000/v1/oauth/google/callback diff --git a/packages/backend/src/api/controllers/oauth.controller.ts b/packages/backend/src/api/controllers/oauth.controller.ts new file mode 100644 index 00000000..4fc8d312 --- /dev/null +++ b/packages/backend/src/api/controllers/oauth.controller.ts @@ -0,0 +1,159 @@ +import type { Request, Response } from 'express'; +import { google } from 'googleapis'; +import { createHmac, randomBytes } from 'crypto'; +import { IngestionService } from '../../services/IngestionService'; +import { UserService } from '../../services/UserService'; +import { logger } from '../../config/logger'; + +const SCOPES = [ + 'https://www.googleapis.com/auth/gmail.readonly', + 'https://www.googleapis.com/auth/userinfo.email', +]; + +function getOAuth2Client() { + return new google.auth.OAuth2( + process.env.GOOGLE_OAUTH_CLIENT_ID, + process.env.GOOGLE_OAUTH_CLIENT_SECRET, + process.env.GOOGLE_OAUTH_REDIRECT_URI + ); +} + +function signState(payload: object): string { + const data = Buffer.from(JSON.stringify(payload)).toString('base64'); + const sig = createHmac('sha256', process.env.JWT_SECRET!) + .update(data) + .digest('hex'); + return `${data}.${sig}`; +} + +function verifyState(state: string): { userId: string; name: string } | null { + try { + const [data, sig] = state.split('.'); + const expected = createHmac('sha256', process.env.JWT_SECRET!) + .update(data) + .digest('hex'); + if (sig !== expected) return null; + return JSON.parse(Buffer.from(data, 'base64').toString('utf8')); + } catch { + return null; + } +} + +export class OAuthController { + /** + * GET /v1/oauth/google/authorize?name= + * Protected — user must be logged in. + * Redirects to Google OAuth consent screen. + */ + public googleAuthorize = async (req: Request, res: Response): Promise => { + const { name } = req.query; + const userId = req.user?.sub; + + if (!userId) { + res.status(401).json({ message: 'Unauthorized' }); + return; + } + + if (!name || typeof name !== 'string') { + res.status(400).json({ message: 'Missing required query parameter: name' }); + return; + } + + if ( + !process.env.GOOGLE_OAUTH_CLIENT_ID || + !process.env.GOOGLE_OAUTH_CLIENT_SECRET || + !process.env.GOOGLE_OAUTH_REDIRECT_URI + ) { + res.status(500).json({ message: 'Google OAuth is not configured on this server.' }); + return; + } + + const state = signState({ userId, name, nonce: randomBytes(8).toString('hex') }); + const oauth2Client = getOAuth2Client(); + const url = oauth2Client.generateAuthUrl({ + access_type: 'offline', + prompt: 'consent', + scope: SCOPES, + state, + }); + + res.status(200).json({ url }); + }; + + /** + * GET /v1/oauth/google/callback?code=...&state=... + * Public — Google redirects here after consent. + * Creates an ingestion source and redirects to dashboard. + */ + public googleCallback = async (req: Request, res: Response): Promise => { + const frontendUrl = process.env.APP_URL || 'http://localhost:3000'; + const { code, state, error } = req.query; + + if (error) { + logger.warn({ error }, 'Google OAuth consent was denied or cancelled.'); + res.redirect(`${frontendUrl}/dashboard/ingestions?error=oauth_cancelled`); + return; + } + + if (!code || typeof code !== 'string' || !state || typeof state !== 'string') { + res.redirect(`${frontendUrl}/dashboard/ingestions?error=oauth_invalid_response`); + return; + } + + const payload = verifyState(state); + if (!payload) { + logger.warn('Google OAuth callback received invalid state parameter.'); + res.redirect(`${frontendUrl}/dashboard/ingestions?error=oauth_invalid_state`); + return; + } + + const { userId, name } = payload; + + try { + const oauth2Client = getOAuth2Client(); + const { tokens } = await oauth2Client.getToken(code); + + if (!tokens.access_token || !tokens.refresh_token) { + throw new Error('Google did not return required tokens.'); + } + + oauth2Client.setCredentials(tokens); + + // Get the user's email address from Google + const oauth2 = google.oauth2({ version: 'v2', auth: oauth2Client }); + const { data: userInfo } = await oauth2.userinfo.get(); + + if (!userInfo.email) { + throw new Error('Could not retrieve email from Google account.'); + } + + // Fetch the OpenArchiver user to pass as actor + const userService = new UserService(); + const actor = await userService.findById(userId); + if (!actor) { + throw new Error('Could not find user account.'); + } + + await IngestionService.create( + { + name, + provider: 'google_oauth', + providerConfig: { + type: 'google_oauth', + email: userInfo.email, + accessToken: tokens.access_token, + refreshToken: tokens.refresh_token, + }, + }, + userId, + actor, + req.ip || 'unknown' + ); + + res.redirect(`${frontendUrl}/dashboard/ingestions?connected=google`); + } catch (err) { + logger.error({ err }, 'Google OAuth callback failed.'); + res.redirect(`${frontendUrl}/dashboard/ingestions?error=oauth_failed`); + } + }; +} diff --git a/packages/backend/src/api/routes/oauth.routes.ts b/packages/backend/src/api/routes/oauth.routes.ts new file mode 100644 index 00000000..27f6bd3c --- /dev/null +++ b/packages/backend/src/api/routes/oauth.routes.ts @@ -0,0 +1,26 @@ +import { Router } from 'express'; +import { OAuthController } from '../controllers/oauth.controller'; +import { requireAuth } from '../middleware/requireAuth'; +import type { AuthService } from '../../services/AuthService'; + +export const createOAuthRouter = (authService: AuthService): Router => { + const router = Router(); + const controller = new OAuthController(); + + /** + * @route GET /v1/oauth/google/authorize?name= + * @description Initiates the Google OAuth flow for Gmail individual account connection. + * @access Protected (JWT required) + */ + router.get('/google/authorize', requireAuth(authService), controller.googleAuthorize); + + /** + * @route GET /v1/oauth/google/callback + * @description Handles the Google OAuth callback, exchanges code for tokens, + * creates an ingestion source, and redirects to the dashboard. + * @access Public (called by Google) + */ + router.get('/google/callback', controller.googleCallback); + + return router; +}; diff --git a/packages/backend/src/api/server.ts b/packages/backend/src/api/server.ts index f3f74a5d..9571e953 100644 --- a/packages/backend/src/api/server.ts +++ b/packages/backend/src/api/server.ts @@ -20,6 +20,7 @@ import { createSettingsRouter } from './routes/settings.routes'; import { apiKeyRoutes } from './routes/api-key.routes'; import { integrityRoutes } from './routes/integrity.routes'; import { createJobsRouter } from './routes/jobs.routes'; +import { createOAuthRouter } from './routes/oauth.routes'; import { AuthService } from '../services/AuthService'; import { AuditService } from '../services/AuditService'; import { UserService } from '../services/UserService'; @@ -123,6 +124,7 @@ export async function createServer(modules: ArchiverModule[] = []): Promise { @@ -154,6 +156,7 @@ export async function createServer(modules: ArchiverModule[] = []): Promise; + private newHistoryId: string | undefined; + private labelCache: Map = new Map(); + + constructor(credentials: GoogleOAuthCredentials) { + this.credentials = credentials; + + this.oauth2Client = new google.auth.OAuth2( + process.env.GOOGLE_OAUTH_CLIENT_ID, + process.env.GOOGLE_OAUTH_CLIENT_SECRET, + process.env.GOOGLE_OAUTH_REDIRECT_URI + ); + + this.oauth2Client.setCredentials({ + access_token: this.credentials.accessToken, + refresh_token: this.credentials.refreshToken, + }); + } + + /** + * Tests the connection by fetching the Gmail profile for the connected account. + */ + public async testConnection(): Promise { + try { + const gmail = google.gmail({ version: 'v1', auth: this.oauth2Client }); + await gmail.users.getProfile({ userId: 'me' }); + logger.info({ email: this.credentials.email }, 'Google OAuth connection test successful.'); + return true; + } catch (error) { + logger.error({ err: error, email: this.credentials.email }, 'Failed to verify Google OAuth connection'); + throw error; + } + } + + /** + * Yields the single connected user. + */ + public async *listAllUsers(): AsyncGenerator { + yield { + id: this.credentials.email, + primaryEmail: this.credentials.email, + displayName: this.credentials.email, + }; + } + + /** + * Fetches emails for the connected account using history-based delta sync. + */ + public async *fetchEmails( + userEmail: string, + syncState?: SyncState | null, + checkDuplicate?: (messageId: string) => Promise + ): AsyncGenerator { + const gmail = google.gmail({ version: 'v1', auth: this.oauth2Client }); + const startHistoryId = syncState?.google?.[userEmail]?.historyId; + + if (!startHistoryId) { + yield* this.fetchAllMessages(gmail, userEmail, checkDuplicate); + return; + } + + this.newHistoryId = startHistoryId; + let pageToken: string | undefined = undefined; + + do { + const historyResponse: Common.GaxiosResponseWithHTTP2 = + await gmail.users.history.list({ + userId: 'me', + startHistoryId: this.newHistoryId, + pageToken, + historyTypes: ['messageAdded'], + }); + + const histories = historyResponse.data.history; + if (!histories || histories.length === 0) return; + + for (const historyRecord of histories) { + if (historyRecord.messagesAdded) { + for (const messageAdded of historyRecord.messagesAdded) { + if (messageAdded.message?.id) { + try { + const messageId = messageAdded.message.id; + + if (checkDuplicate && (await checkDuplicate(messageId))) { + logger.debug({ messageId, userEmail }, 'Skipping duplicate email (pre-check)'); + continue; + } + + const emailObject = await this.fetchAndParseMessage(gmail, userEmail, messageId); + if (emailObject) yield emailObject; + } catch (error: any) { + if (error.code === 404) { + logger.warn({ messageId: messageAdded.message.id, userEmail }, 'Message not found, skipping.'); + } else { + throw error; + } + } + } + } + } + } + + pageToken = historyResponse.data.nextPageToken ?? undefined; + if (historyResponse.data.historyId) { + this.newHistoryId = historyResponse.data.historyId; + } + } while (pageToken); + } + + private async *fetchAllMessages( + gmail: gmail_v1.Gmail, + userEmail: string, + checkDuplicate?: (messageId: string) => Promise + ): AsyncGenerator { + const profileResponse = await gmail.users.getProfile({ userId: 'me' }); + if (profileResponse.data.historyId) { + this.newHistoryId = profileResponse.data.historyId; + } + + let pageToken: string | undefined = undefined; + do { + const listResponse: Common.GaxiosResponseWithHTTP2 = + await gmail.users.messages.list({ userId: 'me', pageToken }); + + const messages = listResponse.data.messages; + if (!messages || messages.length === 0) return; + + for (const message of messages) { + if (message.id) { + try { + if (checkDuplicate && (await checkDuplicate(message.id))) { + logger.debug({ messageId: message.id, userEmail }, 'Skipping duplicate email (pre-check)'); + continue; + } + const emailObject = await this.fetchAndParseMessage(gmail, userEmail, message.id); + if (emailObject) yield emailObject; + } catch (error: any) { + if (error.code === 404) { + logger.warn({ messageId: message.id, userEmail }, 'Message not found during initial import, skipping.'); + } else { + throw error; + } + } + } + } + + pageToken = listResponse.data.nextPageToken ?? undefined; + } while (pageToken); + } + + private async fetchAndParseMessage( + gmail: gmail_v1.Gmail, + userEmail: string, + messageId: string + ): Promise { + const metadataResponse = await gmail.users.messages.get({ + userId: 'me', + id: messageId, + format: 'METADATA', + fields: 'labelIds', + }); + const labels = await this.getLabelDetails(gmail, metadataResponse.data.labelIds || []); + + const msgResponse = await gmail.users.messages.get({ + userId: 'me', + id: messageId, + format: 'RAW', + }); + + if (!msgResponse.data.raw) return null; + + const rawEmail = Buffer.from(msgResponse.data.raw, 'base64url'); + const parsedEmail: ParsedMail = await simpleParser(rawEmail); + + const mapAddresses = (addresses: AddressObject | AddressObject[] | undefined): EmailAddress[] => { + if (!addresses) return []; + const arr = Array.isArray(addresses) ? addresses : [addresses]; + return arr.flatMap((a) => a.value.map((v) => ({ name: v.name, address: v.address || '' }))); + }; + + const attachments = parsedEmail.attachments.map((a: Attachment) => ({ + filename: a.filename || 'untitled', + contentType: a.contentType, + size: a.size, + content: a.content as Buffer, + })); + + return { + id: msgResponse.data.id!, + threadId: getThreadId(parsedEmail.headers), + userEmail, + eml: rawEmail, + from: mapAddresses(parsedEmail.from), + to: mapAddresses(parsedEmail.to), + cc: mapAddresses(parsedEmail.cc), + bcc: mapAddresses(parsedEmail.bcc), + subject: parsedEmail.subject || '', + body: parsedEmail.text || '', + html: parsedEmail.html || '', + headers: parsedEmail.headers, + attachments, + receivedAt: parsedEmail.date || new Date(), + path: labels.path, + tags: labels.tags, + }; + } + + public getUpdatedSyncState(userEmail: string): SyncState { + if (!this.newHistoryId) return {}; + return { + google: { + [userEmail]: { historyId: this.newHistoryId }, + }, + }; + } + + private async getLabelDetails( + gmail: gmail_v1.Gmail, + labelIds: string[] + ): Promise<{ path: string; tags: string[] }> { + const tags: string[] = []; + let path = ''; + + for (const labelId of labelIds) { + let label = this.labelCache.get(labelId); + if (!label) { + const res = await gmail.users.labels.get({ userId: 'me', id: labelId }); + label = res.data; + this.labelCache.set(labelId, label); + } + if (label.name) { + tags.push(label.name); + if (label.type === 'user') { + path = path ? `${path}/${label.name}` : label.name; + } + } + } + + return { path, tags }; + } +} diff --git a/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte b/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte index 2f423a7f..86c436e9 100644 --- a/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte +++ b/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte @@ -26,6 +26,10 @@ value: 'generic_imap', label: $t('app.components.ingestion_source_form.provider_generic_imap'), }, + { + value: 'google_oauth', + label: 'Gmail (Connect with Google)', + }, { value: 'google_workspace', label: $t('app.components.ingestion_source_form.provider_google_workspace'), @@ -92,6 +96,33 @@ } }); + const handleGoogleConnect = async () => { + if (!formData.name.trim()) { + setAlert({ + type: 'error', + title: 'Name required', + message: 'Please enter a name for this connection before connecting.', + duration: 4000, + show: true, + }); + return; + } + try { + const res = await api(`/oauth/google/authorize?name=${encodeURIComponent(formData.name)}`); + if (!res.ok) throw new Error('Failed to initiate Google OAuth.'); + const { url } = await res.json(); + window.location.href = url; + } catch (e) { + setAlert({ + type: 'error', + title: 'Connection failed', + message: e instanceof Error ? e.message : 'Could not start Google OAuth flow.', + duration: 5000, + show: true, + }); + } + }; + const handleSubmit = async (event: Event) => { event.preventDefault(); isSubmitting = true; @@ -184,6 +215,26 @@ class="col-span-3" /> + {:else if formData.provider === 'google_oauth'} +
+

+ Click the button below to securely connect your Gmail account via Google OAuth. + You will be redirected to Google to authorize access. +

+ +
{:else if formData.provider === 'microsoft_365'}