Merge pull request #114 from ajcwebdev/new-llms

ajcwebdev · web-flow · commit a62ae3c6de72 · 2025-01-25T02:52:21.000-06:00
Add DeepSeek and Grok Models
diff --git a/src/commander.ts b/src/commander.ts
@@ -59,7 +59,9 @@ program
   .option('--claude [model]', 'Use Claude for processing with optional model specification')
   .option('--gemini [model]', 'Use Gemini for processing with optional model specification')
   .option('--cohere [model]', 'Use Cohere for processing with optional model specification')
-  .option('--mistral [model]', 'Use Mistral for processing')
+  .option('--mistral [model]', 'Use Mistral for processing with optional model specification')
+  .option('--deepseek [model]', 'Use DeepSeek for processing with optional model specification')
+  .option('--grok [model]', 'Use Grok for processing with optional model specification')
   .option('--fireworks [model]', 'Use Fireworks AI for processing with optional model specification')
   .option('--together [model]', 'Use Together AI for processing with optional model specification')
   .option('--groq [model]', 'Use Groq for processing with optional model specification')
@@ -76,6 +78,7 @@ program
   .option('--geminiApiKey <key>', 'Specify Gemini API key (overrides .env variable)')
   .option('--cohereApiKey <key>', 'Specify Cohere API key (overrides .env variable)')
   .option('--mistralApiKey <key>', 'Specify Mistral API key (overrides .env variable)')
+  .option('--deepseekApiKey <key>', 'Specify DeepSeek API key (overrides .env variable)')
   .option('--grokApiKey <key>', 'Specify GROK API key (overrides .env variable)')
   .option('--togetherApiKey <key>', 'Specify Together API key (overrides .env variable)')
   .option('--fireworksApiKey <key>', 'Specify Fireworks API key (overrides .env variable)')
diff --git a/src/llms/deepseek.ts b/src/llms/deepseek.ts
@@ -0,0 +1,69 @@
+// src/llms/deepseek.ts
+
+/**
+ * @file Provides integration with the DeepSeek API, compatible with the OpenAI Node.js SDK.
+ * @packageDocumentation
+ */
+
+import { env } from 'node:process'
+import { OpenAI } from 'openai'
+import { err, logLLMCost } from '../utils/logging'
+import { DEEPSEEK_MODELS } from '../utils/globals/llms'
+import type { DeepSeekModelType } from '../utils/types/llms'
+
+/**
+ * Main function to call DeepSeek API via an OpenAI-compatible SDK.
+ * 
+ * @param prompt - The prompt or instructions to process.
+ * @param transcript - The transcript text to be appended to the prompt.
+ * @param model - (optional) The DeepSeek model to use (e.g., 'DEEPSEEK_CHAT' or 'DEEPSEEK_REASONER').
+ *                Defaults to 'DEEPSEEK_CHAT'.
+ * @returns A Promise that resolves with the generated text from DeepSeek.
+ * @throws Will throw an error if the DEEPSEEK_API_KEY environment variable is not set, or if no valid response is returned.
+ */
+export async function callDeepSeek(
+  prompt: string,
+  transcript: string,
+  model: string = 'DEEPSEEK_CHAT'
+): Promise<string> {
+  if (!env['DEEPSEEK_API_KEY']) {
+    throw new Error('DEEPSEEK_API_KEY environment variable is not set. Please set it to your DeepSeek API key.')
+  }
+
+  const openai = new OpenAI({
+    baseURL: 'https://api.deepseek.com',
+    apiKey: env['DEEPSEEK_API_KEY']
+  })
+
+  try {
+    const actualModel = (DEEPSEEK_MODELS[model as DeepSeekModelType] || DEEPSEEK_MODELS.DEEPSEEK_CHAT).modelId
+    const combinedPrompt = `${prompt}\n${transcript}`
+
+    const response = await openai.chat.completions.create({
+      model: actualModel,
+      messages: [{ role: 'user', content: combinedPrompt }]
+    })
+
+    const firstChoice = response.choices[0]
+    if (!firstChoice || !firstChoice.message?.content) {
+      throw new Error('No valid response received from the DeepSeek API')
+    }
+
+    const content = firstChoice.message.content
+
+    logLLMCost({
+      modelName: actualModel,
+      stopReason: firstChoice.finish_reason ?? 'unknown',
+      tokenUsage: {
+        input: response.usage?.prompt_tokens,
+        output: response.usage?.completion_tokens,
+        total: response.usage?.total_tokens
+      }
+    })
+
+    return content
+  } catch (error) {
+    err(`Error in callDeepSeek: ${(error as Error).message}`)
+    throw error
+  }
+}
diff --git a/src/llms/grok.ts b/src/llms/grok.ts
@@ -0,0 +1,86 @@
+// src/llms/grok.ts
+
+/**
+ * @file Provides integration with the Grok LLM via xAI's REST API endpoint.
+ * @packageDocumentation
+ */
+
+import { env } from 'node:process'
+import { OpenAI } from 'openai'
+import { err, logLLMCost } from '../utils/logging'
+import type { GroqChatCompletionResponse, GrokModelType } from '../utils/types/llms'
+
+/**
+ * Calls the Grok API to generate a response to a prompt and transcript.
+ * Uses the xAI-compatible OpenAI interface with a custom baseURL.
+ * 
+ * @async
+ * @function callGrok
+ * @param {string} prompt - The prompt or instructions for Grok
+ * @param {string} transcript - The transcript or additional context to process
+ * @param {GrokModelType | string | { modelId: string } | boolean} [model='GROK_2_LATEST'] - The Grok model to use (defaults to GROK_2_LATEST).
+ *   Note: a boolean may appear if the CLI is used like `--grok` with no model specified. We handle that by defaulting to 'grok-2-latest'.
+ * @throws Will throw an error if GROK_API_KEY is not set or if the API call fails
+ * @returns {Promise<string>} The generated text from Grok
+ */
+export async function callGrok(
+  prompt: string,
+  transcript: string,
+  model: GrokModelType | string | { modelId: string } | boolean = 'GROK_2_LATEST'
+): Promise<string> {
+  if (!env['GROK_API_KEY']) {
+    throw new Error('GROK_API_KEY environment variable is not set. Please set it to your xAI Grok API key.')
+  }
+
+  // Safely parse the model parameter, since it can be a string, object, or boolean
+  const modelId = typeof model === 'boolean'
+    ? 'grok-2-latest'
+    : typeof model === 'object'
+      ? model?.modelId ?? 'grok-2-latest'
+      : typeof model === 'string'
+        ? model
+        : 'grok-2-latest'
+
+  const openai = new OpenAI({
+    apiKey: env['GROK_API_KEY'],
+    baseURL: 'https://api.x.ai/v1',
+  })
+
+  try {
+    const combinedPrompt = `${prompt}\n${transcript}`
+
+    const response = await openai.chat.completions.create({
+      model: modelId,
+      messages: [
+        {
+          role: 'user',
+          content: combinedPrompt
+        }
+      ],
+    }) as GroqChatCompletionResponse
+
+    const firstChoice = response.choices[0]
+    if (!firstChoice || !firstChoice.message?.content) {
+      throw new Error('No valid response received from Grok API')
+    }
+
+    const content = firstChoice.message.content
+
+    if (response.usage) {
+      logLLMCost({
+        modelName: modelId,
+        stopReason: firstChoice.finish_reason ?? 'unknown',
+        tokenUsage: {
+          input: response.usage.prompt_tokens,
+          output: response.usage.completion_tokens,
+          total: response.usage.total_tokens
+        }
+      })
+    }
+
+    return content
+  } catch (error) {
+    err(`Error in callGrok: ${(error as Error).message}`)
+    throw error
+  }
+}
diff --git a/src/server/tests/fetch-all.ts b/src/server/tests/fetch-all.ts
@@ -289,6 +289,24 @@ const requests = [
     endpoint: '/process',
     outputFiles: ['FILE_29.md'],
   },
+  {
+    data: {
+      type: 'video',
+      url: 'https://www.youtube.com/watch?v=MORMZXEaONk',
+      llm: 'deepseek',
+    },
+    endpoint: '/process',
+    outputFiles: ['FILE_30.md'],
+  },
+  {
+    data: {
+      type: 'video',
+      url: 'https://www.youtube.com/watch?v=MORMZXEaONk',
+      llm: 'grok',
+    },
+    endpoint: '/process',
+    outputFiles: ['FILE_30.md'],
+  },
   {
     data: {
       type: 'video',
diff --git a/src/utils/globals/llms.ts b/src/utils/globals/llms.ts
@@ -1,23 +1,27 @@
-// src/utils/llm-globals.ts
+// src/utils/globals/llms.ts
 
 import { callOllama } from '../../llms/ollama'
 import { callChatGPT } from '../../llms/chatgpt'
 import { callClaude } from '../../llms/claude'
 import { callGemini } from '../../llms/gemini'
 import { callCohere } from '../../llms/cohere'
 import { callMistral } from '../../llms/mistral'
+import { callDeepSeek } from '../../llms/deepseek'
+import { callGrok } from '../../llms/grok'
 import { callFireworks } from '../../llms/fireworks'
 import { callTogether } from '../../llms/together'
 import { callGroq } from '../../llms/groq'
 
 import type {
   ModelConfig,
+  OllamaModelType,
   ChatGPTModelType,
   ClaudeModelType,
   CohereModelType,
   GeminiModelType,
   MistralModelType,
-  OllamaModelType,
+  DeepSeekModelType,
+  GrokModelType,
   TogetherModelType,
   FireworksModelType,
   GroqModelType,
@@ -43,6 +47,8 @@ export const LLM_SERVICES: Record<string, LLMServiceConfig> = {
   GEMINI: { name: 'Google Gemini', value: 'gemini' },
   COHERE: { name: 'Cohere', value: 'cohere' },
   MISTRAL: { name: 'Mistral', value: 'mistral' },
+  DEEPSEEK: { name: 'DeepSeek', value: 'deepseek' },
+  GROK: { name: 'Grok', value: 'grok' },
   FIREWORKS: { name: 'Fireworks AI', value: 'fireworks' },
   TOGETHER: { name: 'Together AI', value: 'together' },
   GROQ: { name: 'Groq', value: 'groq' },
@@ -105,6 +111,8 @@ export const LLM_FUNCTIONS = {
   gemini: callGemini,
   cohere: callCohere,
   mistral: callMistral,
+  deepseek: callDeepSeek,
+  grok: callGrok,
   fireworks: callFireworks,
   together: callTogether,
   groq: callGroq,
@@ -458,17 +466,53 @@ export const GROQ_MODELS: ModelConfig<GroqModelType> = {
   },
 }
 
+/**
+ * Configuration for Grok models, mapping model types to their display names and identifiers.
+ * Pricing is hypothetical or as provided by xAI docs
+ * @type {ModelConfig<GrokModelType>}
+ */
+export const GROK_MODELS: ModelConfig<GrokModelType> = {
+  GROK_2_LATEST: {
+    name: 'Grok 2 Latest',
+    modelId: 'grok-2-latest',
+    inputCostPer1M: 2.00,
+    outputCostPer1M: 10.00
+  },
+}
+
+/**
+ * Configuration for DeepSeek models, mapping model types to their display names and identifiers.
+ * Pricing is based on publicly listed rates for DeepSeek. 
+ * @type {ModelConfig<DeepSeekModelType>}
+ */
+export const DEEPSEEK_MODELS: ModelConfig<DeepSeekModelType> = {
+  DEEPSEEK_CHAT: {
+    name: 'DeepSeek Chat',
+    modelId: 'deepseek-chat',
+    inputCostPer1M: 0.07,
+    outputCostPer1M: 1.10
+  },
+  DEEPSEEK_REASONER: {
+    name: 'DeepSeek Reasoner',
+    modelId: 'deepseek-reasoner',
+    inputCostPer1M: 0.14,
+    outputCostPer1M: 2.19
+  },
+}
+
 /**
  * All available model configurations combined
  */
 export const ALL_MODELS: { [key: string]: ModelConfigValue } = {
+  ...OLLAMA_MODELS,
   ...GPT_MODELS,
   ...CLAUDE_MODELS,
   ...GEMINI_MODELS,
   ...COHERE_MODELS,
   ...MISTRAL_MODELS,
-  ...OLLAMA_MODELS,
+  ...DEEPSEEK_MODELS,
+  ...GROK_MODELS,
   ...FIREWORKS_MODELS,
   ...TOGETHER_MODELS,
-  ...GROQ_MODELS
+  ...GROQ_MODELS,
 }
diff --git a/src/utils/types/llms.ts b/src/utils/types/llms.ts
@@ -18,7 +18,7 @@ export type ModelConfigValue = {
 /**
  * Options for Language Models (LLMs) that can be used in the application.
  */
-export type LLMServices = 'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'ollama' | 'gemini' | 'fireworks' | 'together' | 'groq'
+export type LLMServices = 'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'ollama' | 'gemini' | 'deepseek' | 'fireworks' | 'together' | 'groq' | 'grok'
 
 export type LLMServiceConfig = {
   name: string
@@ -84,11 +84,22 @@ export type TogetherModelType = 'LLAMA_3_2_3B' | 'LLAMA_3_1_405B' | 'LLAMA_3_1_7
  */
 export type GroqModelType = 'LLAMA_3_2_1B_PREVIEW' | 'LLAMA_3_2_3B_PREVIEW' | 'LLAMA_3_3_70B_VERSATILE' | 'LLAMA_3_1_8B_INSTANT' | 'MIXTRAL_8X7B_INSTRUCT'
 
+/**
+ * Available Grok models.
+ */
+export type GrokModelType = 'GROK_2_LATEST'
+
 /**
  * Local model with Ollama.
  */
 export type OllamaModelType = 'LLAMA_3_2_1B' | 'LLAMA_3_2_3B' | 'GEMMA_2_2B' | 'PHI_3_5' | 'QWEN_2_5_0B' | 'QWEN_2_5_1B' | 'QWEN_2_5_3B'
 
+/**
+ * @typedef DeepSeekModelType
+ * Represents the possible DeepSeek model IDs used within the application.
+ */
+export type DeepSeekModelType = 'DEEPSEEK_CHAT' | 'DEEPSEEK_REASONER'
+
 // API Response Types
 /**
  * Response structure from Fireworks AI API.
diff --git a/test/all.test.ts b/test/all.test.ts
@@ -216,6 +216,18 @@ const commands = [
     expectedFile: 'audio-mistral-shownotes.md',
     newName: '42-mistral-shownotes.md'
   },
+  {
+    // process file using DeepSeek for LLM operations
+    cmd: 'npm run as -- --file "content/audio.mp3" --deepseek',
+    expectedFile: 'audio-deepseek-shownotes.md',
+    newName: '41-deepsek-shownotes.md'
+  },
+  {
+    // process file using Grok for LLM operations
+    cmd: 'npm run as -- --file "content/audio.mp3" --grok',
+    expectedFile: 'audio-grok-shownotes.md',
+    newName: '41-grok-shownotes.md'
+  },
   {
     // process file using Fireworks for LLM operations
     cmd: 'npm run as -- --file "content/audio.mp3" --fireworks',
diff --git a/test/docker.test.ts b/test/docker.test.ts
@@ -71,20 +71,30 @@ const commands = [
     expectedFile: 'audio-mistral-shownotes.md',
     newName: '14-docker-three-prompts-mistral-shownotes.md'
   },
+  {
+    cmd: 'npm run docker-cli -- --file "content/audio.mp3" --prompt titles summary shortChapters --whisper base --deepseek',
+    expectedFile: 'audio-deepseek-shownotes.md',
+    newName: '15-docker-three-prompts-grok-shownotes.md'
+  },
+  {
+    cmd: 'npm run docker-cli -- --file "content/audio.mp3" --prompt titles summary shortChapters --whisper base --grok',
+    expectedFile: 'audio-grok-shownotes.md',
+    newName: '15-docker-three-prompts-grok-shownotes.md'
+  },
   {
     cmd: 'npm run docker-cli -- --file "content/audio.mp3" --prompt titles summary shortChapters --whisper base --fireworks',
     expectedFile: 'audio-fireworks-shownotes.md',
-    newName: '15-docker-three-prompts-fireworks-shownotes.md'
+    newName: '16-docker-three-prompts-fireworks-shownotes.md'
   },
   {
     cmd: 'npm run docker-cli -- --file "content/audio.mp3" --prompt titles summary shortChapters --whisper base --together',
     expectedFile: 'audio-together-shownotes.md',
-    newName: '16-docker-three-prompts-together-shownotes.md'
+    newName: '17-docker-three-prompts-together-shownotes.md'
   },
   {
     cmd: 'npm run docker-cli -- --file "content/audio.mp3" --prompt titles summary shortChapters --whisper base --groq',
     expectedFile: 'audio-groq-shownotes.md',
-    newName: '17-docker-three-prompts-groq-shownotes.md'
+    newName: '18-docker-three-prompts-groq-shownotes.md'
   },
 ]
 
diff --git a/test/services.test.ts b/test/services.test.ts