diff --git a/app/analyze/[videoId]/page.tsx b/app/analyze/[videoId]/page.tsx index 272670b..1a3b51a 100644 --- a/app/analyze/[videoId]/page.tsx +++ b/app/analyze/[videoId]/page.tsx @@ -2050,6 +2050,43 @@ export default function AnalyzePage() { currentSourceLanguage={videoInfo?.language} onRequestExport={handleRequestExport} exportButtonState={exportButtonState} + onTranscriptUpdate={(newTranscript) => { + setTranscript(newTranscript); + + // Update topics to reflect enhanced text + // Since enhanced transcript changes text length, existing char offsets are invalid. + // We reset offsets to cover the full segments identified by indices. + const updateTopics = (currentTopics: Topic[]) => { + return currentTopics.map(topic => ({ + ...topic, + segments: topic.segments.map(seg => { + if (typeof seg.startSegmentIdx === 'number' && typeof seg.endSegmentIdx === 'number') { + const relevantSegments = newTranscript.slice(seg.startSegmentIdx, seg.endSegmentIdx + 1); + if (relevantSegments.length > 0) { + const newText = relevantSegments.map(s => s.text).join(' '); + const lastSegText = relevantSegments[relevantSegments.length - 1].text; + return { + ...seg, + text: newText, + startCharOffset: 0, + endCharOffset: lastSegText.length + }; + } + } + return seg; + }) + })); + }; + + const updatedTopics = updateTopics(baseTopics); + setTopics(updatedTopics); + setBaseTopics(updatedTopics); + + // Clear selected topic to avoid rendering issues with stale offsets + if (selectedTopic) { + setSelectedTopic(null); + } + }} /> diff --git a/app/api/transcript/enhance/route.ts b/app/api/transcript/enhance/route.ts new file mode 100644 index 0000000..7172541 --- /dev/null +++ b/app/api/transcript/enhance/route.ts @@ -0,0 +1,285 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { z } from 'zod'; +import { createClient } from '@/lib/supabase/server'; +import { withSecurity, SECURITY_PRESETS } from '@/lib/security-middleware'; +import { generateAIResponse } from '@/lib/ai-client'; +import { safeJsonParse } from '@/lib/json-utils'; +import { + consumeVideoCreditAtomic, + canGenerateVideo, +} from '@/lib/subscription-manager'; +import { youtubeIdSchema, transcriptSchema, videoInfoSchema } from '@/lib/validation'; + +// Define the request schema for validation +const enhanceTranscriptSchema = z.object({ + videoId: youtubeIdSchema, + videoInfo: videoInfoSchema, + transcript: transcriptSchema, +}); + +// Define the response schema from AI to ensure strict JSON output +const aiResponseSchema = z.object({ + enhancedSegments: z.array(z.string()), +}); + +// Chunking configuration for large transcripts +const CHUNK_SIZE = 100; // segments per chunk +const SMALL_TRANSCRIPT_THRESHOLD = 150; // Process in single request if <= this + +/** + * Split an array into chunks of specified size + */ +function chunkArray(array: T[], size: number): T[][] { + const chunks: T[][] = []; + for (let i = 0; i < array.length; i += size) { + chunks.push(array.slice(i, i + size)); + } + return chunks; +} + +/** + * Build the system prompt for enhancing a chunk of transcript segments + */ +function buildEnhancePrompt( + rawTexts: string[], + videoInfo: { title: string; author?: string | null; description?: string | null }, + chunkIndex: number, + totalChunks: number +): string { + const chunkInfo = totalChunks > 1 + ? `- Processing chunk ${chunkIndex + 1} of ${totalChunks}\n` + : ''; + + return ` +You are an expert transcript editor. Your task is to enhance the accuracy and readability of a video transcript while maintaining a strict 1:1 mapping with the input segments. + +Context: +- Video Title: "${videoInfo.title}" +- Channel/Author: "${videoInfo.author || 'Unknown'}" +- Description: "${videoInfo.description?.slice(0, 500) || 'N/A'}" +${chunkInfo} +Instructions: +1. Read the input array of strings. Each string corresponds to a specific time segment. +2. Clean up filler words (um, uh, like, etc.), fix grammar, punctuation, and capitalization. +3. Fix specific terms based on context (e.g., technical terms, proper nouns). +4. Identify speakers if clear from context, but prioritize flow and readability. +5. **CRITICAL:** You MUST return an array of strings called "enhancedSegments". +6. **CRITICAL:** The "enhancedSegments" array MUST have exactly ${rawTexts.length} elements. Index 0 of output must correspond to Index 0 of input. +7. Do not merge or split segments across indices. If a sentence spans multiple segments, ensure the split points remain roughly the same or flow naturally across the boundary. + +Input Segments (${rawTexts.length} total): +${JSON.stringify(rawTexts)} +`; +} + +/** + * Process a single chunk of transcript segments with AI enhancement + */ +async function processEnhanceChunk( + rawTexts: string[], + videoInfo: { title: string; author?: string | null; description?: string | null }, + chunkIndex: number, + totalChunks: number +): Promise { + const prompt = buildEnhancePrompt(rawTexts, videoInfo, chunkIndex, totalChunks); + + const aiResponse = await generateAIResponse(prompt, { + model: 'grok-4-1-fast-non-reasoning', + zodSchema: aiResponseSchema, + schemaName: 'EnhancedTranscript', + temperature: 0.2, + maxOutputTokens: 131072, // Reduced for smaller chunks + }); + + // Log response for debugging + console.log(`[Enhance] Chunk ${chunkIndex + 1}/${totalChunks} response length: ${aiResponse.length}`); + + const parsed = safeJsonParse(aiResponse); + const validation = aiResponseSchema.safeParse(parsed); + + if (!validation.success) { + console.error(`[Enhance] Chunk ${chunkIndex + 1} schema validation failed:`, validation.error.flatten()); + throw new Error(`Chunk ${chunkIndex + 1} schema validation failed`); + } + + if (validation.data.enhancedSegments.length !== rawTexts.length) { + console.error( + `[Enhance] Chunk ${chunkIndex + 1} length mismatch: expected ${rawTexts.length}, got ${validation.data.enhancedSegments.length}` + ); + throw new Error(`Chunk ${chunkIndex + 1} length mismatch`); + } + + return validation.data.enhancedSegments; +} + +/** + * Process a chunk with retry logic and fallback to original text + */ +async function processChunkWithRetry( + rawTexts: string[], + videoInfo: { title: string; author?: string | null; description?: string | null }, + chunkIndex: number, + totalChunks: number, + maxRetries = 2 +): Promise { + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + return await processEnhanceChunk(rawTexts, videoInfo, chunkIndex, totalChunks); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + console.warn(`[Enhance] Chunk ${chunkIndex + 1} attempt ${attempt}/${maxRetries} failed:`, errorMessage); + + if (attempt < maxRetries) { + // Exponential backoff + await new Promise((resolve) => setTimeout(resolve, 500 * attempt)); + } + } + } + + // All retries failed - return original text as fallback + console.error(`[Enhance] Chunk ${chunkIndex + 1} failed after ${maxRetries} attempts, using original text`); + return rawTexts; +} + +async function handler(req: NextRequest) { + try { + const body = await req.json(); + + // 1. Validate request body + const result = enhanceTranscriptSchema.safeParse(body); + if (!result.success) { + return NextResponse.json( + { error: 'Invalid request data', details: result.error.format() }, + { status: 400 } + ); + } + + const { videoId, videoInfo, transcript } = result.data; + const supabase = await createClient(); + + // 2. Get user info + const { data: { user }, error: userError } = await supabase.auth.getUser(); + if (userError || !user) { + return NextResponse.json( + { error: 'Unauthorized' }, + { status: 401 } + ); + } + + // 3. Check credits (using canGenerateVideo to mimic standard checks, + // but we force a credit consumption regardless of cache since this is a new "action") + // Wait, if we use canGenerateVideo with youtubeId, it might say "CACHED" and free. + // But "Enhance" is a new paid action. + // So we should check if they have remaining credits directly, ignoring the cache status of the video itself. + // Or we can pass skipCacheCheck: true + const decision = await canGenerateVideo(user.id, videoId, { + client: supabase, + skipCacheCheck: true + }); + + if (!decision.allowed) { + return NextResponse.json( + { error: decision.reason || 'Insufficient credits' }, + { status: 403 } + ); + } + + // 4. Process transcript with AI (chunked for large transcripts) + const rawTexts = transcript.map(s => s.text); + let enhancedTexts: string[] = []; + + console.log(`[Enhance] Total segments: ${rawTexts.length}`); + + if (rawTexts.length <= SMALL_TRANSCRIPT_THRESHOLD) { + // Small transcript: process in single request + console.log(`[Enhance] Small transcript, processing in single request`); + enhancedTexts = await processChunkWithRetry(rawTexts, videoInfo, 0, 1); + } else { + // Large transcript: process chunks in PARALLEL for speed + const chunks = chunkArray(rawTexts, CHUNK_SIZE); + console.log(`[Enhance] Large transcript, processing ${chunks.length} chunks in parallel`); + + const chunkPromises = chunks.map((chunk, i) => { + console.log(`[Enhance] Starting chunk ${i + 1}/${chunks.length} (${chunk.length} segments)`); + return processChunkWithRetry(chunk, videoInfo, i, chunks.length); + }); + + const results = await Promise.all(chunkPromises); + enhancedTexts = results.flat(); + } + + // Final validation + if (enhancedTexts.length !== transcript.length) { + console.error(`[Enhance] Final segment count mismatch: Input ${transcript.length}, Output ${enhancedTexts.length}`); + return NextResponse.json( + { error: 'AI generated transcript length mismatch' }, + { status: 502 } + ); + } + + console.log(`[Enhance] Successfully enhanced all ${enhancedTexts.length} segments`); + + // 7. Reconstruct Transcript + const enhancedTranscript = transcript.map((segment, idx) => ({ + ...segment, + text: enhancedTexts[idx] + })); + + // 8. Consume Credit Atomic + // We need to fetch stats again for the snapshot required by consumeVideoCreditAtomic + // (Or rely on the ones from decision if they are fresh enough, but safer to re-fetch or use decision.stats if available) + if (!decision.subscription || !decision.stats) { + return NextResponse.json( + { error: 'Failed to retrieve subscription info' }, + { status: 500 } + ); + } + + const consumption = await consumeVideoCreditAtomic({ + userId: user.id, + youtubeId: videoId, + subscription: decision.subscription, + statsSnapshot: decision.stats, + counted: true, + identifier: `enhance:${videoId}:${Date.now()}`, + client: supabase + }); + + if (!consumption.success) { + return NextResponse.json( + { error: consumption.reason || 'Failed to consume credit' }, + { status: 500 } + ); + } + + // 9. Update Database + const { error: updateError } = await supabase + .from('video_analyses') + .update({ transcript: enhancedTranscript }) + .eq('youtube_id', videoId); + + if (updateError) { + console.error('Failed to update transcript in DB:', updateError); + // Note: Credit was already consumed. In a production system, we might want to rollback or flag this. + // For now, we log it. + return NextResponse.json( + { error: 'Failed to save enhanced transcript' }, + { status: 500 } + ); + } + + return NextResponse.json({ + success: true, + transcript: enhancedTranscript + }); + + } catch (error) { + console.error('Enhance transcript error:', error); + return NextResponse.json( + { error: 'Internal Server Error' }, + { status: 500 } + ); + } +} + +export const POST = withSecurity(handler, SECURITY_PRESETS.STRICT); diff --git a/components/right-column-tabs.tsx b/components/right-column-tabs.tsx index cc5fd54..80274f4 100644 --- a/components/right-column-tabs.tsx +++ b/components/right-column-tabs.tsx @@ -56,6 +56,7 @@ interface RightColumnTabsProps { badgeLabel?: string; isLoading?: boolean; }; + onTranscriptUpdate?: (newTranscript: TranscriptSegment[]) => void; } export interface RightColumnTabsHandle { @@ -93,6 +94,7 @@ export const RightColumnTabs = forwardRef { const [activeTab, setActiveTab] = useState<"transcript" | "chat" | "notes">("transcript"); @@ -208,6 +210,8 @@ export const RightColumnTabs = forwardRef
diff --git a/components/transcript-viewer.tsx b/components/transcript-viewer.tsx index 59ba877..3b250a9 100644 --- a/components/transcript-viewer.tsx +++ b/components/transcript-viewer.tsx @@ -1,17 +1,19 @@ "use client"; import { useEffect, useRef, useState, useCallback } from "react"; -import { TranscriptSegment, Topic, Citation, TranslationRequestHandler } from "@/lib/types"; +import { TranscriptSegment, Topic, Citation, TranslationRequestHandler, VideoInfo } from "@/lib/types"; import { getTopicHSLColor, formatDuration } from "@/lib/utils"; import { cn } from "@/lib/utils"; import { ScrollArea } from "@/components/ui/scroll-area"; -import { Eye, EyeOff, ChevronDown, Download, Loader2, Search, ChevronUp, X } from "lucide-react"; +import { Eye, EyeOff, ChevronDown, Download, Loader2, Search, ChevronUp, X, Sparkles } from "lucide-react"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Tooltip, TooltipContent, TooltipTrigger, TooltipProvider } from "@/components/ui/tooltip"; import { Badge } from "@/components/ui/badge"; import { SelectionActions, triggerExplainSelection, SelectionActionPayload } from "@/components/selection-actions"; import { NoteMetadata } from "@/lib/types"; +import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog"; +import { csrfFetch } from "@/lib/csrf-client"; interface TranscriptViewerProps { transcript: TranscriptSegment[]; @@ -31,6 +33,8 @@ interface TranscriptViewerProps { badgeLabel?: string; isLoading?: boolean; }; + videoInfo?: VideoInfo | null; + onTranscriptUpdate?: (newTranscript: TranscriptSegment[]) => void; } export function TranscriptViewer({ @@ -46,6 +50,8 @@ export function TranscriptViewer({ onRequestTranslation, onRequestExport, exportButtonState, + videoInfo, + onTranscriptUpdate }: TranscriptViewerProps) { const highlightedRefs = useRef<(HTMLDivElement | null)[]>([]); const scrollAreaRef = useRef(null); @@ -59,6 +65,10 @@ export function TranscriptViewer({ const [loadingTranslations, setLoadingTranslations] = useState>(new Set()); const [translationErrors, setTranslationErrors] = useState>(new Set()); + // Enhance Transcript State + const [isEnhanceModalOpen, setIsEnhanceModalOpen] = useState(false); + const [isEnhancing, setIsEnhancing] = useState(false); + // Search state const [isSearchOpen, setIsSearchOpen] = useState(false); const [searchQuery, setSearchQuery] = useState(""); @@ -119,34 +129,6 @@ export function TranscriptViewer({ // Clear refs when topic changes useEffect(() => { highlightedRefs.current = []; - - // Debug: Verify segment indices match content - if (selectedTopic && selectedTopic.segments.length > 0 && transcript.length > 0) { - - const firstSeg = selectedTopic.segments[0]; - if (firstSeg.startSegmentIdx !== undefined && firstSeg.endSegmentIdx !== undefined) { - - // Check what's actually at those indices - if (transcript[firstSeg.startSegmentIdx]) { - - // Try to find where the quote actually is - const quoteStart = firstSeg.text.substring(0, 30).toLowerCase().replace(/[^a-z0-9 ]/g, ''); - let foundAt = -1; - - for (let i = Math.max(0, firstSeg.startSegmentIdx - 5); i <= Math.min(firstSeg.startSegmentIdx + 5, transcript.length - 1); i++) { - const segText = transcript[i]?.text || ''; - const segTextNorm = segText.toLowerCase().replace(/[^a-z0-9 ]/g, ''); - if (segTextNorm.includes(quoteStart)) { - foundAt = i; - break; - } - } - - if (foundAt !== -1 && foundAt !== firstSeg.startSegmentIdx) { - } - } - } - } }, [selectedTopic, transcript]); // Scroll to citation highlight when it changes @@ -271,10 +253,7 @@ export function TranscriptViewer({ } }, [isSearchOpen]); - // Jump to first result when search results change (if user typed something new) - // But careful not to jump unexpectedly if just typing more characters of same word? - // For now, let's just stick to the first result being selected but maybe not auto-scrolled unless requested. - // Actually, standard behavior is usually jump to first match. + // Jump to first result when search results change useEffect(() => { if (searchResults.length > 0 && currentResultIndex === 0) { const result = searchResults[0]; @@ -414,8 +393,6 @@ export function TranscriptViewer({ // Use segment indices with character offsets for precise matching if (highlightSeg.startSegmentIdx !== undefined && highlightSeg.endSegmentIdx !== undefined) { - // Skip this debug logging - removed for cleaner output - // Skip segments that are before the start or after the end if (segmentIndex < highlightSeg.startSegmentIdx || segmentIndex > highlightSeg.endSegmentIdx) { continue; @@ -554,15 +531,43 @@ export function TranscriptViewer({ return; // Do nothing if text is selected } - // Check if the user is dragging (moved mouse significantly between down and up) - // Actually, selection check handles this mostly, but if they drag and don't select anything (empty selection)? - // The requirement is "dragging to select text". If they drag but select nothing, maybe they still meant to drag? - // But usually click implies mousedown and mouseup at same location. - // Seek to the start of the segment onTimestampClick(segment.start); }; + const handleEnhance = async () => { + if (!videoId) return; + + setIsEnhancing(true); + try { + const response = await csrfFetch.post('/api/transcript/enhance', { + videoId, + videoInfo, + transcript + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || 'Failed to enhance transcript'); + } + + const data = await response.json(); + if (data.transcript && onTranscriptUpdate) { + onTranscriptUpdate(data.transcript); + } + setIsEnhanceModalOpen(false); + } catch (error) { + console.error('Enhance failed:', error); + // Ideally show toast error here, but we don't have toast imported in this component yet (it's in providers) + // Since we are in a client component, we could use Sonner's toast if we import it, + // but sticking to console for now as per minimal changes unless requested. + // Actually, let's keep it simple. + alert(error instanceof Error ? error.message : "Failed to enhance transcript"); + } finally { + setIsEnhancing(false); + } + }; + return (
@@ -678,6 +683,25 @@ export function TranscriptViewer({ + {/* Enhance Button */} + {videoId && onTranscriptUpdate && ( + + + + + +

Enhance Transcript with AI

+
+
+ )} + + + + +
); diff --git a/dev_server.log b/dev_server.log new file mode 100644 index 0000000..b7f8069 --- /dev/null +++ b/dev_server.log @@ -0,0 +1,58 @@ + +> nextjs@0.1.0 dev /app +> next dev --turbopack --port 3000 + + ▲ Next.js 15.5.7 (Turbopack) + - Local: http://localhost:3000 + - Network: http://192.168.0.2:3000 + + ✓ Starting... +Attention: Next.js now collects completely anonymous telemetry regarding usage. +This information is used to shape Next.js' roadmap and prioritize features. +You can learn more, including how to opt-out if you'd not like to participate in this anonymous program, by visiting the following URL: +https://nextjs.org/telemetry + + ○ Compiling middleware ... + ✓ Compiled middleware in 1223ms + ✓ Ready in 4.7s +Auth token refresh failed: Auth session missing! + ○ Compiling /analyze/[videoId] ... + ✓ Compiled /analyze/[videoId] in 13.5s + GET /analyze/dQw4w9WgXcQ 200 in 16268ms +Auth token refresh failed: Auth session missing! +Auth token refresh failed: Auth session missing! + ○ Compiling /api/check-limit ... + ✓ Compiled /api/check-limit in 1912ms +[Transcript Format] Detected old format transcript for YouTube ID: dQw4w9WgXcQ. Applying runtime sentence merging (cache not updated)... +[Transcript Format] Original segments: 61 +[Transcript Format] Merged segments: 8 +[Transcript Format] Segment reduction: 86.9% + POST /api/check-video-cache 200 in 3245ms +Auth token refresh failed: Auth session missing! +Auth token refresh failed: Auth session missing! + GET /api/check-limit 200 in 3314ms +Auth token refresh failed: Auth session missing! + ○ Compiling /api/video-analysis ... + ✓ Compiled /api/video-analysis in 1574ms + GET /api/image-limit 200 in 1736ms +Auth token refresh failed: Auth session missing! + GET /api/image-limit 200 in 537ms +Theme generation failed: Error: Grok API error (Client specified an invalid argument): Bad Request + at Object.generate (lib/ai-providers/grok-adapter.ts:184:17) + at async generateAIResponse (lib/ai-client.ts:63:18) + at async generateThemesFromTranscript (lib/ai-processing.ts:1444:22) + at async handler (app/api/video-analysis/route.ts:341:18) + at async securedHandler (lib/security-middleware.ts:113:24) + 182 | 'Unknown error'; + 183 | const code = parsed?.error?.code || parsed?.code; +> 184 | throw new Error( + | ^ + 185 | `Grok API error${code ? ` (${code})` : ''}: ${message}` + 186 | ); + 187 | } +[Transcript Format] Detected old format transcript for YouTube ID: dQw4w9WgXcQ. Applying runtime sentence merging (cache not updated)... +[Transcript Format] Original segments: 61 +[Transcript Format] Merged segments: 8 +[Transcript Format] Segment reduction: 86.9% + POST /api/video-analysis 200 in 3095ms + POST /api/transcript 200 in 3096ms diff --git a/lib/ai-providers/grok-adapter.ts b/lib/ai-providers/grok-adapter.ts index ccf9f53..6b28995 100644 --- a/lib/ai-providers/grok-adapter.ts +++ b/lib/ai-providers/grok-adapter.ts @@ -119,6 +119,7 @@ function buildPayload(params: ProviderGenerateParams) { json_schema: { name: ensureSchemaName(params.schemaName), schema: jsonSchema, + strict: true, }, }; } catch (error) { diff --git a/lib/json-utils.ts b/lib/json-utils.ts index 0104d94..932575e 100644 --- a/lib/json-utils.ts +++ b/lib/json-utils.ts @@ -67,24 +67,30 @@ export function extractJsonPayload(raw: string): string { return trimmed; } + // If the string already starts with { or [, it's likely clean JSON - return as-is + // This prevents incorrectly extracting inner arrays from objects like {"key":[...]} + if (trimmed.startsWith('{') || trimmed.startsWith('[')) { + return trimmed; + } + // Try to extract content from markdown code fences like ```json ... ``` const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i); if (fencedMatch) { return fencedMatch[1].trim(); } + // Try to extract JSON object first (more common in structured AI responses) + const objectMatch = trimmed.match(/\{[\s\S]*\}/); + if (objectMatch) { + return objectMatch[0]; + } + // Try to extract JSON array const arrayMatch = trimmed.match(/\[[\s\S]*\]/); if (arrayMatch) { return arrayMatch[0]; } - // Try to extract JSON object - const objectMatch = trimmed.match(/\{[\s\S]*\}/); - if (objectMatch) { - return objectMatch[0]; - } - return trimmed; } diff --git a/lib/validation.ts b/lib/validation.ts index 8767854..108b8d2 100644 --- a/lib/validation.ts +++ b/lib/validation.ts @@ -38,6 +38,7 @@ export const sanitizedTextSchema = z.string() export const videoInfoSchema = z.object({ title: z.string().min(1).max(200).transform(val => val.trim()), author: z.string().max(100).transform(val => val.trim()).optional(), + description: z.string().max(5000).optional(), duration: z.number().int().min(0).max(86400), // Max 24 hours thumbnail: z.string().url().optional(), // Use nullish() to accept both null and undefined (backward compat for old cached videos)