diff --git a/.env.example b/.env.example index 8c3997ea..02994af3 100644 --- a/.env.example +++ b/.env.example @@ -98,3 +98,11 @@ ENCRYPTION_KEY= # Apache Tika Integration # ONLY active if TIKA_URL is set TIKA_URL=http://tika:9998 + +# --- Google OAuth (Gmail individual account connect) --- +# Create an OAuth 2.0 Client ID at console.cloud.google.com → APIs & Services → Credentials +# Set the authorized redirect URI to: /v1/oauth/google/callback +# Enable the Gmail API under APIs & Services → Library +GOOGLE_OAUTH_CLIENT_ID= +GOOGLE_OAUTH_CLIENT_SECRET= +GOOGLE_OAUTH_REDIRECT_URI=http://localhost:4000/v1/oauth/google/callback diff --git a/packages/backend/src/api/controllers/oauth.controller.ts b/packages/backend/src/api/controllers/oauth.controller.ts new file mode 100644 index 00000000..4fc8d312 --- /dev/null +++ b/packages/backend/src/api/controllers/oauth.controller.ts @@ -0,0 +1,159 @@ +import type { Request, Response } from 'express'; +import { google } from 'googleapis'; +import { createHmac, randomBytes } from 'crypto'; +import { IngestionService } from '../../services/IngestionService'; +import { UserService } from '../../services/UserService'; +import { logger } from '../../config/logger'; + +const SCOPES = [ + 'https://www.googleapis.com/auth/gmail.readonly', + 'https://www.googleapis.com/auth/userinfo.email', +]; + +function getOAuth2Client() { + return new google.auth.OAuth2( + process.env.GOOGLE_OAUTH_CLIENT_ID, + process.env.GOOGLE_OAUTH_CLIENT_SECRET, + process.env.GOOGLE_OAUTH_REDIRECT_URI + ); +} + +function signState(payload: object): string { + const data = Buffer.from(JSON.stringify(payload)).toString('base64'); + const sig = createHmac('sha256', process.env.JWT_SECRET!) + .update(data) + .digest('hex'); + return `${data}.${sig}`; +} + +function verifyState(state: string): { userId: string; name: string } | null { + try { + const [data, sig] = state.split('.'); + const expected = createHmac('sha256', process.env.JWT_SECRET!) + .update(data) + .digest('hex'); + if (sig !== expected) return null; + return JSON.parse(Buffer.from(data, 'base64').toString('utf8')); + } catch { + return null; + } +} + +export class OAuthController { + /** + * GET /v1/oauth/google/authorize?name= + * Protected — user must be logged in. + * Redirects to Google OAuth consent screen. + */ + public googleAuthorize = async (req: Request, res: Response): Promise => { + const { name } = req.query; + const userId = req.user?.sub; + + if (!userId) { + res.status(401).json({ message: 'Unauthorized' }); + return; + } + + if (!name || typeof name !== 'string') { + res.status(400).json({ message: 'Missing required query parameter: name' }); + return; + } + + if ( + !process.env.GOOGLE_OAUTH_CLIENT_ID || + !process.env.GOOGLE_OAUTH_CLIENT_SECRET || + !process.env.GOOGLE_OAUTH_REDIRECT_URI + ) { + res.status(500).json({ message: 'Google OAuth is not configured on this server.' }); + return; + } + + const state = signState({ userId, name, nonce: randomBytes(8).toString('hex') }); + const oauth2Client = getOAuth2Client(); + const url = oauth2Client.generateAuthUrl({ + access_type: 'offline', + prompt: 'consent', + scope: SCOPES, + state, + }); + + res.status(200).json({ url }); + }; + + /** + * GET /v1/oauth/google/callback?code=...&state=... + * Public — Google redirects here after consent. + * Creates an ingestion source and redirects to dashboard. + */ + public googleCallback = async (req: Request, res: Response): Promise => { + const frontendUrl = process.env.APP_URL || 'http://localhost:3000'; + const { code, state, error } = req.query; + + if (error) { + logger.warn({ error }, 'Google OAuth consent was denied or cancelled.'); + res.redirect(`${frontendUrl}/dashboard/ingestions?error=oauth_cancelled`); + return; + } + + if (!code || typeof code !== 'string' || !state || typeof state !== 'string') { + res.redirect(`${frontendUrl}/dashboard/ingestions?error=oauth_invalid_response`); + return; + } + + const payload = verifyState(state); + if (!payload) { + logger.warn('Google OAuth callback received invalid state parameter.'); + res.redirect(`${frontendUrl}/dashboard/ingestions?error=oauth_invalid_state`); + return; + } + + const { userId, name } = payload; + + try { + const oauth2Client = getOAuth2Client(); + const { tokens } = await oauth2Client.getToken(code); + + if (!tokens.access_token || !tokens.refresh_token) { + throw new Error('Google did not return required tokens.'); + } + + oauth2Client.setCredentials(tokens); + + // Get the user's email address from Google + const oauth2 = google.oauth2({ version: 'v2', auth: oauth2Client }); + const { data: userInfo } = await oauth2.userinfo.get(); + + if (!userInfo.email) { + throw new Error('Could not retrieve email from Google account.'); + } + + // Fetch the OpenArchiver user to pass as actor + const userService = new UserService(); + const actor = await userService.findById(userId); + if (!actor) { + throw new Error('Could not find user account.'); + } + + await IngestionService.create( + { + name, + provider: 'google_oauth', + providerConfig: { + type: 'google_oauth', + email: userInfo.email, + accessToken: tokens.access_token, + refreshToken: tokens.refresh_token, + }, + }, + userId, + actor, + req.ip || 'unknown' + ); + + res.redirect(`${frontendUrl}/dashboard/ingestions?connected=google`); + } catch (err) { + logger.error({ err }, 'Google OAuth callback failed.'); + res.redirect(`${frontendUrl}/dashboard/ingestions?error=oauth_failed`); + } + }; +} diff --git a/packages/backend/src/api/routes/oauth.routes.ts b/packages/backend/src/api/routes/oauth.routes.ts new file mode 100644 index 00000000..27f6bd3c --- /dev/null +++ b/packages/backend/src/api/routes/oauth.routes.ts @@ -0,0 +1,26 @@ +import { Router } from 'express'; +import { OAuthController } from '../controllers/oauth.controller'; +import { requireAuth } from '../middleware/requireAuth'; +import type { AuthService } from '../../services/AuthService'; + +export const createOAuthRouter = (authService: AuthService): Router => { + const router = Router(); + const controller = new OAuthController(); + + /** + * @route GET /v1/oauth/google/authorize?name= + * @description Initiates the Google OAuth flow for Gmail individual account connection. + * @access Protected (JWT required) + */ + router.get('/google/authorize', requireAuth(authService), controller.googleAuthorize); + + /** + * @route GET /v1/oauth/google/callback + * @description Handles the Google OAuth callback, exchanges code for tokens, + * creates an ingestion source, and redirects to the dashboard. + * @access Public (called by Google) + */ + router.get('/google/callback', controller.googleCallback); + + return router; +}; diff --git a/packages/backend/src/api/server.ts b/packages/backend/src/api/server.ts index f3f74a5d..9571e953 100644 --- a/packages/backend/src/api/server.ts +++ b/packages/backend/src/api/server.ts @@ -20,6 +20,7 @@ import { createSettingsRouter } from './routes/settings.routes'; import { apiKeyRoutes } from './routes/api-key.routes'; import { integrityRoutes } from './routes/integrity.routes'; import { createJobsRouter } from './routes/jobs.routes'; +import { createOAuthRouter } from './routes/oauth.routes'; import { AuthService } from '../services/AuthService'; import { AuditService } from '../services/AuditService'; import { UserService } from '../services/UserService'; @@ -123,6 +124,7 @@ export async function createServer(modules: ArchiverModule[] = []): Promise { @@ -154,6 +156,7 @@ export async function createServer(modules: ArchiverModule[] = []): Promise; + private newHistoryId: string | undefined; + private labelCache: Map = new Map(); + + constructor(credentials: GoogleOAuthCredentials) { + this.credentials = credentials; + + this.oauth2Client = new google.auth.OAuth2( + process.env.GOOGLE_OAUTH_CLIENT_ID, + process.env.GOOGLE_OAUTH_CLIENT_SECRET, + process.env.GOOGLE_OAUTH_REDIRECT_URI + ); + + this.oauth2Client.setCredentials({ + access_token: this.credentials.accessToken, + refresh_token: this.credentials.refreshToken, + }); + } + + /** + * Tests the connection by fetching the Gmail profile for the connected account. + */ + public async testConnection(): Promise { + try { + const gmail = google.gmail({ version: 'v1', auth: this.oauth2Client }); + await gmail.users.getProfile({ userId: 'me' }); + logger.info({ email: this.credentials.email }, 'Google OAuth connection test successful.'); + return true; + } catch (error) { + logger.error({ err: error, email: this.credentials.email }, 'Failed to verify Google OAuth connection'); + throw error; + } + } + + /** + * Yields the single connected user. + */ + public async *listAllUsers(): AsyncGenerator { + yield { + id: this.credentials.email, + primaryEmail: this.credentials.email, + displayName: this.credentials.email, + }; + } + + /** + * Fetches emails for the connected account using history-based delta sync. + */ + public async *fetchEmails( + userEmail: string, + syncState?: SyncState | null, + checkDuplicate?: (messageId: string) => Promise + ): AsyncGenerator { + const gmail = google.gmail({ version: 'v1', auth: this.oauth2Client }); + const startHistoryId = syncState?.google?.[userEmail]?.historyId; + + if (!startHistoryId) { + yield* this.fetchAllMessages(gmail, userEmail, checkDuplicate); + return; + } + + this.newHistoryId = startHistoryId; + let pageToken: string | undefined = undefined; + + do { + const historyResponse: Common.GaxiosResponseWithHTTP2 = + await gmail.users.history.list({ + userId: 'me', + startHistoryId: this.newHistoryId, + pageToken, + historyTypes: ['messageAdded'], + }); + + const histories = historyResponse.data.history; + if (!histories || histories.length === 0) return; + + for (const historyRecord of histories) { + if (historyRecord.messagesAdded) { + for (const messageAdded of historyRecord.messagesAdded) { + if (messageAdded.message?.id) { + try { + const messageId = messageAdded.message.id; + + if (checkDuplicate && (await checkDuplicate(messageId))) { + logger.debug({ messageId, userEmail }, 'Skipping duplicate email (pre-check)'); + continue; + } + + const emailObject = await this.fetchAndParseMessage(gmail, userEmail, messageId); + if (emailObject) yield emailObject; + } catch (error: any) { + if (error.code === 404) { + logger.warn({ messageId: messageAdded.message.id, userEmail }, 'Message not found, skipping.'); + } else { + throw error; + } + } + } + } + } + } + + pageToken = historyResponse.data.nextPageToken ?? undefined; + if (historyResponse.data.historyId) { + this.newHistoryId = historyResponse.data.historyId; + } + } while (pageToken); + } + + private async *fetchAllMessages( + gmail: gmail_v1.Gmail, + userEmail: string, + checkDuplicate?: (messageId: string) => Promise + ): AsyncGenerator { + const profileResponse = await gmail.users.getProfile({ userId: 'me' }); + if (profileResponse.data.historyId) { + this.newHistoryId = profileResponse.data.historyId; + } + + let pageToken: string | undefined = undefined; + do { + const listResponse: Common.GaxiosResponseWithHTTP2 = + await gmail.users.messages.list({ userId: 'me', pageToken }); + + const messages = listResponse.data.messages; + if (!messages || messages.length === 0) return; + + for (const message of messages) { + if (message.id) { + try { + if (checkDuplicate && (await checkDuplicate(message.id))) { + logger.debug({ messageId: message.id, userEmail }, 'Skipping duplicate email (pre-check)'); + continue; + } + const emailObject = await this.fetchAndParseMessage(gmail, userEmail, message.id); + if (emailObject) yield emailObject; + } catch (error: any) { + if (error.code === 404) { + logger.warn({ messageId: message.id, userEmail }, 'Message not found during initial import, skipping.'); + } else { + throw error; + } + } + } + } + + pageToken = listResponse.data.nextPageToken ?? undefined; + } while (pageToken); + } + + private async fetchAndParseMessage( + gmail: gmail_v1.Gmail, + userEmail: string, + messageId: string + ): Promise { + const metadataResponse = await gmail.users.messages.get({ + userId: 'me', + id: messageId, + format: 'METADATA', + fields: 'labelIds', + }); + const labels = await this.getLabelDetails(gmail, metadataResponse.data.labelIds || []); + + const msgResponse = await gmail.users.messages.get({ + userId: 'me', + id: messageId, + format: 'RAW', + }); + + if (!msgResponse.data.raw) return null; + + const rawEmail = Buffer.from(msgResponse.data.raw, 'base64url'); + const parsedEmail: ParsedMail = await simpleParser(rawEmail); + + const mapAddresses = (addresses: AddressObject | AddressObject[] | undefined): EmailAddress[] => { + if (!addresses) return []; + const arr = Array.isArray(addresses) ? addresses : [addresses]; + return arr.flatMap((a) => a.value.map((v) => ({ name: v.name, address: v.address || '' }))); + }; + + const attachments = parsedEmail.attachments.map((a: Attachment) => ({ + filename: a.filename || 'untitled', + contentType: a.contentType, + size: a.size, + content: a.content as Buffer, + })); + + return { + id: msgResponse.data.id!, + threadId: getThreadId(parsedEmail.headers), + userEmail, + eml: rawEmail, + from: mapAddresses(parsedEmail.from), + to: mapAddresses(parsedEmail.to), + cc: mapAddresses(parsedEmail.cc), + bcc: mapAddresses(parsedEmail.bcc), + subject: parsedEmail.subject || '', + body: parsedEmail.text || '', + html: parsedEmail.html || '', + headers: parsedEmail.headers, + attachments, + receivedAt: parsedEmail.date || new Date(), + path: labels.path, + tags: labels.tags, + }; + } + + public getUpdatedSyncState(userEmail: string): SyncState { + if (!this.newHistoryId) return {}; + return { + google: { + [userEmail]: { historyId: this.newHistoryId }, + }, + }; + } + + private async getLabelDetails( + gmail: gmail_v1.Gmail, + labelIds: string[] + ): Promise<{ path: string; tags: string[] }> { + const tags: string[] = []; + let path = ''; + + for (const labelId of labelIds) { + let label = this.labelCache.get(labelId); + if (!label) { + const res = await gmail.users.labels.get({ userId: 'me', id: labelId }); + label = res.data; + this.labelCache.set(labelId, label); + } + if (label.name) { + tags.push(label.name); + if (label.type === 'user') { + path = path ? `${path}/${label.name}` : label.name; + } + } + } + + return { path, tags }; + } +} diff --git a/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte b/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte index 2f423a7f..86c436e9 100644 --- a/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte +++ b/packages/frontend/src/lib/components/custom/IngestionSourceForm.svelte @@ -26,6 +26,10 @@ value: 'generic_imap', label: $t('app.components.ingestion_source_form.provider_generic_imap'), }, + { + value: 'google_oauth', + label: 'Gmail (Connect with Google)', + }, { value: 'google_workspace', label: $t('app.components.ingestion_source_form.provider_google_workspace'), @@ -92,6 +96,33 @@ } }); + const handleGoogleConnect = async () => { + if (!formData.name.trim()) { + setAlert({ + type: 'error', + title: 'Name required', + message: 'Please enter a name for this connection before connecting.', + duration: 4000, + show: true, + }); + return; + } + try { + const res = await api(`/oauth/google/authorize?name=${encodeURIComponent(formData.name)}`); + if (!res.ok) throw new Error('Failed to initiate Google OAuth.'); + const { url } = await res.json(); + window.location.href = url; + } catch (e) { + setAlert({ + type: 'error', + title: 'Connection failed', + message: e instanceof Error ? e.message : 'Could not start Google OAuth flow.', + duration: 5000, + show: true, + }); + } + }; + const handleSubmit = async (event: Event) => { event.preventDefault(); isSubmitting = true; @@ -184,6 +215,26 @@ class="col-span-3" /> + {:else if formData.provider === 'google_oauth'} +
+

+ Click the button below to securely connect your Gmail account via Google OAuth. + You will be redirected to Google to authorize access. +

+ +
{:else if formData.provider === 'microsoft_365'}