diff --git a/src/providers/copilot.ts b/src/providers/copilot.ts index f32738f..0a469a9 100644 --- a/src/providers/copilot.ts +++ b/src/providers/copilot.ts @@ -1,4 +1,5 @@ -import { readdir, stat } from 'fs/promises' +import { existsSync } from 'fs' +import { readdir, readFile, stat } from 'fs/promises' import { basename, dirname, join } from 'path' import { homedir } from 'os' @@ -24,63 +25,223 @@ const modelDisplayNames: Record = { const toolNameMap: Record = { bash: 'Bash', + run_in_terminal: 'Bash', read_file: 'Read', write_file: 'Edit', edit_file: 'Edit', + replace_string_in_file: 'Edit', create_file: 'Write', delete_file: 'Delete', search_files: 'Grep', + file_search: 'Grep', find_files: 'Glob', list_directory: 'LS', + list_dir: 'LS', web_search: 'WebSearch', fetch_webpage: 'WebFetch', github_repo: 'GitHub', + memory: 'Memory', + kill_terminal: 'Bash', } -// Pre-sorted by key length descending so longer/more-specific keys match first +const CHARS_PER_TOKEN = 4 + const modelDisplayEntries = Object.entries(modelDisplayNames).sort((a, b) => b[0].length - a[0].length) -// Fields marked optional document the on-disk schema; they are not read by the parser -type ToolRequest = { +// --- Legacy format (session-state/events.jsonl with outputTokens) --- + +type LegacyToolRequest = { name?: string toolCallId?: string type?: string } -type ModelChangeData = { - newModel: string - previousModel?: string +type LegacyCopilotEvent = + | { type: 'session.model_change'; timestamp?: string; data: { newModel: string } } + | { type: 'user.message'; timestamp?: string; data: { content: string; interactionId?: string } } + | { type: 'assistant.message'; timestamp?: string; data: { messageId: string; outputTokens: number; interactionId?: string; toolRequests?: LegacyToolRequest[] } } + +function parseLegacyEvents(content: string, sessionId: string, seenKeys: Set): ParsedProviderCall[] { + const results: ParsedProviderCall[] = [] + const lines = content.split('\n').filter(l => l.trim()) + let currentModel = '' + let pendingUserMessage = '' + + for (const line of lines) { + let event: LegacyCopilotEvent + try { + event = JSON.parse(line) + } catch { + continue + } + + if (event.type === 'session.model_change') { + currentModel = event.data.newModel ?? currentModel + continue + } + + if (event.type === 'user.message') { + pendingUserMessage = event.data.content ?? '' + continue + } + + if (event.type === 'assistant.message') { + const { messageId, outputTokens, toolRequests = [] } = event.data + if (outputTokens === 0) continue + if (!currentModel) continue + + const dedupKey = `copilot:${sessionId}:${messageId}` + if (seenKeys.has(dedupKey)) continue + seenKeys.add(dedupKey) + + const tools = toolRequests + .map(t => t.name ?? '') + .filter(Boolean) + .map(n => toolNameMap[n] ?? n) + + const costUSD = calculateCost(currentModel, 0, outputTokens, 0, 0, 0) + + results.push({ + provider: 'copilot', + model: currentModel, + inputTokens: 0, + outputTokens, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens: 0, + webSearchRequests: 0, + costUSD, + tools, + bashCommands: [], + timestamp: event.timestamp ?? '', + speed: 'standard', + deduplicationKey: dedupKey, + userMessage: pendingUserMessage, + sessionId, + }) + + pendingUserMessage = '' + } + } + + return results } -type UserMessageData = { - content: string - interactionId?: string +// --- VS Code transcript format (workspaceStorage transcripts) --- + +type TranscriptToolRequest = { + toolCallId?: string + name?: string + arguments?: string + type?: string } -type AssistantMessageData = { - messageId: string - outputTokens: number - interactionId?: string - toolRequests?: ToolRequest[] +type TranscriptEvent = + | { type: 'session.start'; timestamp?: string; data: { sessionId: string; producer?: string } } + | { type: 'user.message'; timestamp?: string; data: { content: string; attachments?: unknown[] } } + | { type: 'assistant.message'; timestamp?: string; data: { messageId: string; content?: string; reasoningText?: string; toolRequests?: TranscriptToolRequest[]; outputTokens?: number } } + | { type: string; timestamp?: string; data: Record } + +function inferModelFromToolCallIds(events: TranscriptEvent[]): string { + for (const e of events) { + if (e.type !== 'assistant.message') continue + const msg = e as { data: { toolRequests?: TranscriptToolRequest[] } } + for (const t of msg.data.toolRequests ?? []) { + if (t.toolCallId?.startsWith('toolu_bdrk_')) return 'claude-sonnet-4-5' + if (t.toolCallId?.startsWith('call_')) return 'gpt-4.1' + } + } + return 'gpt-4.1' } -type CopilotEvent = - | { type: 'session.model_change'; timestamp?: string; data: ModelChangeData } - | { type: 'user.message'; timestamp?: string; data: UserMessageData } - | { type: 'assistant.message'; timestamp?: string; data: AssistantMessageData } +function parseTranscriptEvents(content: string, sessionId: string, seenKeys: Set): ParsedProviderCall[] { + const results: ParsedProviderCall[] = [] + const lines = content.split('\n').filter(l => l.trim()) + const events: TranscriptEvent[] = [] -function getCopilotSessionStateDir(override?: string): string { - return override ?? join(homedir(), '.copilot', 'session-state') + for (const line of lines) { + try { + events.push(JSON.parse(line)) + } catch { + continue + } + } + + const model = inferModelFromToolCallIds(events) + let pendingUserMessage = '' + + for (const event of events) { + if (event.type === 'user.message') { + const data = event.data as { content?: string } + pendingUserMessage = (data.content ?? '').slice(0, 500) + continue + } + + if (event.type === 'assistant.message') { + const data = event.data as { messageId: string; content?: string; reasoningText?: string; toolRequests?: TranscriptToolRequest[]; outputTokens?: number } + const contentText = data.content ?? '' + const reasoningText = data.reasoningText ?? '' + + if (contentText.length === 0 && reasoningText.length === 0 && (data.toolRequests ?? []).length === 0) continue + + const dedupKey = `copilot:${sessionId}:${data.messageId}` + if (seenKeys.has(dedupKey)) continue + seenKeys.add(dedupKey) + + let outputTokens = data.outputTokens ?? 0 + let reasoningTokens = 0 + if (outputTokens === 0) { + outputTokens = Math.ceil(contentText.length / CHARS_PER_TOKEN) + reasoningTokens = Math.ceil(reasoningText.length / CHARS_PER_TOKEN) + } + + const inputTokens = Math.ceil(pendingUserMessage.length / CHARS_PER_TOKEN) + + const tools = (data.toolRequests ?? []) + .map(t => t.name ?? '') + .filter(Boolean) + .map(n => toolNameMap[n] ?? n) + + const costUSD = calculateCost(model, inputTokens, outputTokens + reasoningTokens, 0, 0, 0) + + results.push({ + provider: 'copilot', + model, + inputTokens, + outputTokens, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens, + webSearchRequests: 0, + costUSD, + tools, + bashCommands: [], + timestamp: event.timestamp ?? '', + speed: 'standard', + deduplicationKey: dedupKey, + userMessage: pendingUserMessage, + sessionId, + }) + + pendingUserMessage = '' + } + } + + return results } -function parseCwd(yaml: string): string | null { - const match = yaml.match(/^cwd:\s*(.+)$/m) - if (!match?.[1]) return null - const raw = match[1] - .replace(/\s*#.*$/, '') // strip trailing comment - .replace(/^['"]|['"]$/g, '') // strip surrounding quotes - .trim() - return raw || null +// --- Parser --- + +function isTranscriptFormat(content: string): boolean { + const firstLine = content.split('\n')[0] ?? '' + try { + const event = JSON.parse(firstLine) + return event.type === 'session.start' && event.data?.producer === 'copilot-agent' + } catch { + return false + } } function createParser(source: SessionSource, seenKeys: Set): SessionParser { @@ -88,76 +249,60 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars async *parse(): AsyncGenerator { const content = await readSessionFile(source.path) if (content === null) return - const sessionId = basename(dirname(source.path)) - const lines = content.split('\n').filter(l => l.trim()) - let currentModel = '' - let pendingUserMessage = '' - - for (const line of lines) { - let event: CopilotEvent - try { - event = JSON.parse(line) as CopilotEvent - } catch { - continue - } - - if (event.type === 'session.model_change') { - currentModel = event.data.newModel ?? currentModel - continue - } - - if (event.type === 'user.message') { - pendingUserMessage = event.data.content ?? '' - continue - } - - if (event.type === 'assistant.message') { - const { messageId, outputTokens, toolRequests = [] } = event.data - if (outputTokens === 0) continue - // Skip if no model has been identified yet - avoids silent misattribution - if (!currentModel) continue - - const dedupKey = `copilot:${sessionId}:${messageId}` - if (seenKeys.has(dedupKey)) continue - seenKeys.add(dedupKey) - - const tools = toolRequests - .map(t => t.name ?? '') - .filter(Boolean) - .map(n => toolNameMap[n] ?? n) - - // Copilot only logs outputTokens; inputTokens are not available in session logs. - // Cost will be lower than actual API cost. - const costUSD = calculateCost(currentModel, 0, outputTokens, 0, 0, 0) - - yield { - provider: 'copilot', - model: currentModel, - inputTokens: 0, - outputTokens, - cacheCreationInputTokens: 0, - cacheReadInputTokens: 0, - cachedInputTokens: 0, - reasoningTokens: 0, - webSearchRequests: 0, - costUSD, - tools, - bashCommands: [], - timestamp: event.timestamp ?? '', - speed: 'standard', - deduplicationKey: dedupKey, - userMessage: pendingUserMessage, - sessionId, - } - - pendingUserMessage = '' - } + const sessionId = basename(source.path, '.jsonl').length === 36 + ? basename(source.path, '.jsonl') + : basename(dirname(source.path)) + + const calls = isTranscriptFormat(content) + ? parseTranscriptEvents(content, sessionId, seenKeys) + : parseLegacyEvents(content, sessionId, seenKeys) + + for (const call of calls) { + yield call } }, } } -async function discoverSessionsInDir(sessionStateDir: string): Promise { +// --- Discovery --- + +function getCopilotSessionStateDir(override?: string): string { + return override ?? join(homedir(), '.copilot', 'session-state') +} + +function getVSCodeWorkspaceStorageDir(): string { + if (process.platform === 'darwin') { + return join(homedir(), 'Library', 'Application Support', 'Code', 'User', 'workspaceStorage') + } + if (process.platform === 'win32') { + return join(homedir(), 'AppData', 'Roaming', 'Code', 'User', 'workspaceStorage') + } + return join(homedir(), '.config', 'Code', 'User', 'workspaceStorage') +} + +function parseCwd(yaml: string): string | null { + const match = yaml.match(/^cwd:\s*(.+)$/m) + if (!match?.[1]) return null + const raw = match[1] + .replace(/\s*#.*$/, '') + .replace(/^['"]|['"]$/g, '') + .trim() + return raw || null +} + +async function readWorkspaceProject(workspaceDir: string): Promise { + try { + const raw = await readFile(join(workspaceDir, 'workspace.json'), 'utf-8') + const data = JSON.parse(raw) as { folder?: string } + if (data.folder) { + const url = data.folder.replace(/^file:\/\//, '') + return basename(decodeURIComponent(url)) + } + } catch {} + return basename(workspaceDir) +} + +async function discoverLegacySessions(sessionStateDir: string): Promise { const sources: SessionSource[] = [] let sessionDirs: string[] @@ -185,8 +330,44 @@ async function discoverSessionsInDir(sessionStateDir: string): Promise { + const sources: SessionSource[] = [] + + let workspaceDirs: string[] + try { + workspaceDirs = await readdir(workspaceStorageDir) + } catch { + return sources + } + + for (const wsDir of workspaceDirs) { + const transcriptsDir = join(workspaceStorageDir, wsDir, 'GitHub.copilot-chat', 'transcripts') + if (!existsSync(transcriptsDir)) continue + + const project = await readWorkspaceProject(join(workspaceStorageDir, wsDir)) + + let files: string[] + try { + files = await readdir(transcriptsDir) + } catch { + continue + } + + for (const file of files) { + if (!file.endsWith('.jsonl')) continue + const filePath = join(transcriptsDir, file) + const s = await stat(filePath).catch(() => null) + if (!s?.isFile()) continue + sources.push({ path: filePath, project, provider: 'copilot' }) + } + } + + return sources +} + +export function createCopilotProvider(sessionStateDir?: string, workspaceStorageDirOverride?: string): Provider { + const legacyDir = getCopilotSessionStateDir(sessionStateDir) + const vscodeDir = workspaceStorageDirOverride ?? getVSCodeWorkspaceStorageDir() return { name: 'copilot', @@ -204,7 +385,11 @@ export function createCopilotProvider(sessionStateDir?: string): Provider { }, async discoverSessions(): Promise { - return discoverSessionsInDir(dir) + const [legacy, vscode] = await Promise.all([ + discoverLegacySessions(legacyDir), + discoverVSCodeTranscripts(vscodeDir), + ]) + return [...legacy, ...vscode] }, createSessionParser(source: SessionSource, seenKeys: Set): SessionParser { diff --git a/tests/providers/copilot.test.ts b/tests/providers/copilot.test.ts index eb1b6c5..273ef42 100644 --- a/tests/providers/copilot.test.ts +++ b/tests/providers/copilot.test.ts @@ -174,7 +174,7 @@ describe('copilot provider - discoverSessions', () => { await createSessionDir('sess-disc-001', [modelChange('gpt-4.1')]) await createSessionDir('sess-disc-002', [modelChange('gpt-4.1')]) - const provider = createCopilotProvider(tmpDir) + const provider = createCopilotProvider(tmpDir, '/nonexistent/vscode') const sessions = await provider.discoverSessions() expect(sessions).toHaveLength(2) @@ -185,7 +185,7 @@ describe('copilot provider - discoverSessions', () => { it('reads project name from workspace.yaml cwd', async () => { await createSessionDir('sess-disc-003', [modelChange('gpt-4.1')], '/home/user/myapp') - const provider = createCopilotProvider(tmpDir) + const provider = createCopilotProvider(tmpDir, '/nonexistent/vscode') const sessions = await provider.discoverSessions() expect(sessions).toHaveLength(1) @@ -198,7 +198,7 @@ describe('copilot provider - discoverSessions', () => { await writeFile(join(sessionDir, 'workspace.yaml'), 'cwd: "/home/user/myapp" # project root\n') await writeFile(join(sessionDir, 'events.jsonl'), '\n') - const provider = createCopilotProvider(tmpDir) + const provider = createCopilotProvider(tmpDir, '/nonexistent/vscode') const sessions = await provider.discoverSessions() expect(sessions).toHaveLength(1) @@ -206,7 +206,7 @@ describe('copilot provider - discoverSessions', () => { }) it('returns empty when directory does not exist', async () => { - const provider = createCopilotProvider('/nonexistent/path') + const provider = createCopilotProvider('/nonexistent/path', '/nonexistent/vscode') const sessions = await provider.discoverSessions() expect(sessions).toHaveLength(0) }) @@ -215,10 +215,25 @@ describe('copilot provider - discoverSessions', () => { const emptyDir = join(tmpDir, 'empty-session') await mkdir(emptyDir, { recursive: true }) - const provider = createCopilotProvider(tmpDir) + const provider = createCopilotProvider(tmpDir, '/nonexistent/vscode') const sessions = await provider.discoverSessions() expect(sessions).toHaveLength(0) }) + + it('discovers VS Code workspace transcripts', async () => { + const wsDir = join(tmpDir, 'vscode-ws') + const transcriptsDir = join(wsDir, 'abc123', 'GitHub.copilot-chat', 'transcripts') + await mkdir(transcriptsDir, { recursive: true }) + await writeFile(join(wsDir, 'abc123', 'workspace.json'), JSON.stringify({ folder: 'file:///home/user/myapp' })) + await writeFile(join(transcriptsDir, 'session-1.jsonl'), JSON.stringify({ type: 'session.start', data: { sessionId: 's1', producer: 'copilot-agent' } }) + '\n') + + const provider = createCopilotProvider('/nonexistent/legacy', wsDir) + const sessions = await provider.discoverSessions() + + expect(sessions).toHaveLength(1) + expect(sessions[0]!.project).toBe('myapp') + expect(sessions[0]!.path).toContain('session-1.jsonl') + }) }) describe('copilot provider - metadata', () => {