diff --git a/CLAUDE.md b/CLAUDE.md index ac35958..dc49954 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -19,6 +19,21 @@ npm run tauri:dev # Start full Tauri app in dev mode npm run build # Build frontend only npm run tauri:build # Build complete application npm run check # Run svelte-check type checking +npm run lint # Run ESLint +npm run format:check # Check Prettier formatting +npm run format # Fix Prettier formatting +``` + +### Pre-CI Checks + +Run these before pushing to catch CI failures locally: + +```bash +npm run lint # ESLint +npm run format:check # Prettier +npm run check # Svelte/TypeScript +cargo fmt --manifest-path src-tauri/Cargo.toml -- --check # Rust formatting +cargo clippy --manifest-path src-tauri/Cargo.toml -- -D warnings # Clippy lints ``` ### Prerequisites diff --git a/README.md b/README.md index e611984..66e7416 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,20 @@ npm install # Install dependencies npm run tauri:dev # Start app in dev mode npm run tauri:build # Build application npm run check # Type-check Svelte/TypeScript +npm run lint # ESLint +npm run format:check # Check Prettier formatting +``` + +### Pre-CI Checks + +Run these before pushing to catch CI failures locally: + +```bash +npm run lint # ESLint +npm run format:check # Prettier +npm run check # Svelte/TypeScript +cargo fmt --manifest-path src-tauri/Cargo.toml -- --check # Rust formatting +cargo clippy --manifest-path src-tauri/Cargo.toml -- -D warnings # Clippy lints ``` ## Configuration diff --git a/docs/PRD-remove-openai-provider.md b/docs/PRD-remove-openai-provider.md new file mode 100644 index 0000000..98bbf9f --- /dev/null +++ b/docs/PRD-remove-openai-provider.md @@ -0,0 +1,88 @@ +# PRD: Remove OpenAI Cloud Provider + +## 1. Introduction/Overview + +The application currently supports two cloud transcription providers: Gemini and OpenAI. This feature removes the OpenAI provider entirely to simplify the codebase. Gemini and local Whisper provide sufficient transcription coverage, making the OpenAI integration unnecessary maintenance overhead. + +## 2. Goals + +- Remove all OpenAI-specific code from the Rust backend (client, API calls, model filtering) +- Remove OpenAI as a selectable option in the frontend settings UI +- Silently clean up any existing OpenAI provider entries from user configuration on load +- Preserve the existing provider abstraction (trait, pool, enum pattern) for future extensibility + +## 3. User Stories + +- As a developer, I want fewer provider implementations to maintain so that the codebase stays lean and focused. +- As a user with an existing OpenAI configuration, I want my settings to be silently cleaned up on upgrade so that I don't see broken or unusable provider entries. +- As a user configuring a new cloud provider, I want the UI to only show providers that are actually supported so that I'm not confused by non-functional options. + +## 4. Functional Requirements + +### 4.1 Backend — Remove OpenAI Provider Implementation + +1. Delete `src-tauri/src/ai/openai.rs` entirely. +2. Remove the `pub mod openai;` declaration from the `ai` module file. +3. Remove the `OpenAi` variant from the `ProviderType` enum in `src-tauri/src/config/schema.rs`. +4. Remove the OpenAI match arm from the provider pool factory logic in `src-tauri/src/ai/pool.rs` (the `rebuild()` method that creates provider instances based on `provider_type`). +5. Remove the OpenAI match arm from the `list_provider_models` command in `src-tauri/src/lib.rs`. +6. Remove any OpenAI-specific imports throughout the backend. + +### 4.2 Backend — Settings Migration + +7. When loading settings from disk (`config/manager.rs`), filter out any `ProviderConfig` entries where `provider_type` is `"openai"`. This ensures existing user configurations are silently cleaned up. +8. After filtering, persist the cleaned settings back to disk so the cleanup is permanent. + +### 4.3 Frontend — Update Provider UI + +9. In `src/components/ProviderConfig.svelte`, remove `"openai"` from the provider type dropdown options. The dropdown should only show `"Gemini"` but remain as a dropdown element (preserving UI structure for future providers). +10. Remove the OpenAI default model (`"gpt-4o-mini-audio-preview"`) from any model defaults or fallback logic in the frontend. +11. In `src/lib/types.ts`, update the `ProviderConfig.providerType` type from `'gemini' | 'openai'` to `'gemini'`. + +### 4.4 Cleanup + +12. Remove any OpenAI-related comments, documentation references, or dead code paths across the codebase. +13. Verify that Cargo builds without warnings related to unused imports or dead code after removal. + +## 5. Non-Goals (Out of Scope) + +- Not included: Adding a replacement cloud provider (e.g., Anthropic, Deepgram). +- Not included: Simplifying or removing the provider abstraction layer (trait, pool, round-robin). The abstraction stays as-is for future extensibility. +- Not included: Changes to the local Whisper transcription engine. +- Not included: User-facing notifications or migration dialogs about the removal. Cleanup is silent. + +## 6. Design Considerations + +- The provider type dropdown in the settings UI should remain visible (with only "Gemini" as an option) to preserve the UI layout and make it easy to add new providers in the future. +- No other UI changes are needed since the provider configuration form (API key, model selection, enable/disable) is shared across provider types. + +## 7. Technical Considerations + +- **Provider pool rebuild**: After filtering out OpenAI entries from settings, the provider pool must be rebuilt. Ensure the pool handles the case where all providers have been removed (e.g., user only had OpenAI configured). +- **Serde deserialization**: When loading settings JSON that contains `"openai"` as a `provider_type`, the deserialization must not fail. Two approaches: + - Filter at the JSON/serde level using a custom deserializer or `#[serde(other)]` on the enum. + - Deserialize with a lenient approach (e.g., keep `ProviderType` temporarily accepting `"openai"` during load, then filter in the manager). + - The simpler approach: deserialize the raw JSON, filter entries with `provider_type == "openai"`, then parse the remaining entries into the typed config. Choose whichever approach is cleanest. +- **Cargo dependencies**: Check if removing OpenAI removes any crate dependencies that are no longer needed (e.g., if OpenAI used specific HTTP client features or serialization helpers not used by Gemini). +- **Affected files summary**: + - `src-tauri/src/ai/openai.rs` — delete + - `src-tauri/src/ai/mod.rs` — remove module declaration + - `src-tauri/src/ai/pool.rs` — remove OpenAI factory arm + - `src-tauri/src/config/schema.rs` — remove `OpenAi` enum variant + - `src-tauri/src/config/manager.rs` — add migration filter + - `src-tauri/src/lib.rs` — remove OpenAI match arm in `list_provider_models` + - `src/lib/types.ts` — update union type + - `src/components/ProviderConfig.svelte` — remove OpenAI from dropdown and defaults + +## 8. Success Metrics + +- `npm run tauri:build` completes without errors or warnings related to OpenAI. +- `npm run check` passes with no type errors. +- Existing user settings files containing OpenAI providers are silently cleaned on first load after upgrade. +- The provider pool functions correctly with only Gemini providers configured. +- The settings UI shows only "Gemini" in the provider type dropdown. + +## 9. Open Questions + +- [x] Are there any OpenAI-specific Cargo dependencies that can be removed to reduce binary size? -> No, all dependencies are still needed. +- [x] Should the settings migration (filtering OpenAI entries) log a debug/info message for troubleshooting, or be completely silent? -> completly silent diff --git a/docs/PRD-whisper-local-transcription.md b/docs/PRD-whisper-local-transcription.md index f954bc5..41ba33a 100644 --- a/docs/PRD-whisper-local-transcription.md +++ b/docs/PRD-whisper-local-transcription.md @@ -2,7 +2,7 @@ ## 1. Introduction/Overview -Pisum Transcript currently relies exclusively on cloud AI providers (Gemini, OpenAI) for speech-to-text transcription. This feature adds offline, local transcription using whisper.cpp via the `whisper-rs` Rust crate. Users will be able to transcribe speech entirely on-device without an internet connection or API costs. +Pisum Transcript currently relies exclusively on a cloud AI provider (Gemini) for speech-to-text transcription. This feature adds offline, local transcription using whisper.cpp via the `whisper-rs` Rust crate. Users will be able to transcribe speech entirely on-device without an internet connection or API costs. Whisper runs as a separate "local engine" mode distinct from the cloud provider pool. Users choose either local Whisper transcription or cloud-based transcription — they are not mixed in the round-robin provider pool. @@ -32,7 +32,7 @@ Whisper runs as a separate "local engine" mode distinct from the cloud provider 1. The system must provide a setting to choose between "Local (Whisper)" and "Cloud" transcription modes. 2. When "Local (Whisper)" mode is selected, the system must bypass the cloud provider pool entirely and use the local Whisper engine for all transcription. -3. When "Cloud" mode is selected, the system must use the existing cloud provider pool (Gemini/OpenAI) as it does today. +3. When "Cloud" mode is selected, the system must use the existing cloud provider pool (Gemini) as it does today. 4. The system must persist the selected transcription mode in settings. 5. The system must prevent transcription attempts in Local mode if no model is downloaded, and display a clear message directing the user to download a model. diff --git a/src-tauri/src/ai/mod.rs b/src-tauri/src/ai/mod.rs index d4c2d64..5320067 100644 --- a/src-tauri/src/ai/mod.rs +++ b/src-tauri/src/ai/mod.rs @@ -1,5 +1,4 @@ pub mod gemini; -pub mod openai; pub mod pool; pub mod provider; pub mod whisper; diff --git a/src-tauri/src/ai/openai.rs b/src-tauri/src/ai/openai.rs deleted file mode 100644 index 2abd0bd..0000000 --- a/src-tauri/src/ai/openai.rs +++ /dev/null @@ -1,384 +0,0 @@ -//! OpenAI Chat Completions API client implementation - -use base64::Engine; -use reqwest::Client; -use serde::{Deserialize, Serialize}; -use std::time::Duration; - -use super::gemini::ModelInfo; -use super::provider::{TranscriptionProvider, TranscriptionResult}; -use crate::error::AppError; - -const OPENAI_API_BASE: &str = "https://api.openai.com/v1"; -const DEFAULT_MODEL: &str = "gpt-4o-mini-audio-preview"; -const MAX_RETRIES: u32 = 3; -const RETRY_DELAY_MS: u64 = 1000; - -/// Known audio-capable model prefixes for Chat Completions with input_audio support -const AUDIO_MODEL_PREFIXES: &[&str] = &["gpt-4o-audio-preview", "gpt-4o-mini-audio-preview"]; - -/// Fallback models when API listing fails -const FALLBACK_MODELS: &[(&str, &str)] = &[ - ("gpt-4o-audio-preview", "GPT-4o Audio Preview"), - ("gpt-4o-mini-audio-preview", "GPT-4o Mini Audio Preview"), -]; - -pub struct OpenAiProvider { - client: Client, - api_key: String, - model: String, -} - -impl OpenAiProvider { - pub fn new(api_key: String, model: Option) -> Self { - Self { - client: Client::new(), - api_key, - model: model.unwrap_or_else(|| DEFAULT_MODEL.to_string()), - } - } - - /// Map MIME type to OpenAI input_audio format - fn audio_format(mime_type: &str) -> &'static str { - match mime_type { - "audio/wav" => "wav", - // Opus/OGG not directly supported in input_audio; use mp3 per PRD - _ => "mp3", - } - } - - fn is_retryable_error(status: reqwest::StatusCode, body: &str) -> bool { - let lower = body.to_lowercase(); - status.as_u16() == 429 - || status.as_u16() == 503 - || lower.contains("rate limit") - || lower.contains("overloaded") - } - - fn extract_text(response: ChatCompletionResponse) -> Result { - let text = response - .choices - .into_iter() - .next() - .and_then(|c| c.message.content) - .ok_or_else(|| AppError::Transcription("No response generated by AI".to_string()))?; - - if text.trim().is_empty() { - return Err(AppError::Transcription( - "AI returned an empty response".to_string(), - )); - } - - Ok(text) - } - - fn extract_error(body: &str) -> String { - if let Ok(err_resp) = serde_json::from_str::(body) { - err_resp.error.message - } else { - body[..body.len().min(200)].to_string() - } - } - - /// Execute a request with retry logic - async fn execute_with_retry( - &self, - request: &ChatCompletionRequest, - ) -> Result { - let mut last_error = None; - let url = format!("{}/chat/completions", OPENAI_API_BASE); - - for attempt in 0..MAX_RETRIES { - let response = match self - .client - .post(&url) - .header("Authorization", format!("Bearer {}", self.api_key)) - .json(request) - .send() - .await - { - Ok(resp) => resp, - Err(e) => { - last_error = Some(AppError::Transcription(format!("Request failed: {}", e))); - if attempt < MAX_RETRIES - 1 { - tokio::time::sleep(Duration::from_millis( - RETRY_DELAY_MS * (attempt as u64 + 1), - )) - .await; - continue; - } - break; - } - }; - - let status = response.status(); - let body = response.text().await.unwrap_or_else(|_| "{}".to_string()); - - if Self::is_retryable_error(status, &body) { - last_error = Some(AppError::Transcription(format!( - "API error {}: {}", - status, - Self::extract_error(&body) - ))); - - if attempt < MAX_RETRIES - 1 { - tokio::time::sleep(Duration::from_millis( - RETRY_DELAY_MS * (attempt as u64 + 1), - )) - .await; - continue; - } - break; - } - - if !status.is_success() { - return Err(AppError::Transcription(format!( - "API error {}: {}", - status, - Self::extract_error(&body) - ))); - } - - let chat_response: ChatCompletionResponse = serde_json::from_str(&body) - .map_err(|e| AppError::Transcription(format!("Failed to parse response: {}", e)))?; - - return Self::extract_text(chat_response); - } - - Err(last_error - .unwrap_or_else(|| AppError::Transcription("Request failed after retries".to_string()))) - } - - /// List available audio-capable OpenAI models - pub async fn list_models(api_key: &str) -> Result, AppError> { - let url = format!("{}/models", OPENAI_API_BASE); - let client = Client::new(); - let response = client - .get(&url) - .header("Authorization", format!("Bearer {}", api_key)) - .send() - .await; - - let response = match response { - Ok(resp) if resp.status().is_success() => resp, - _ => { - // Fallback to hardcoded list - return Ok(FALLBACK_MODELS - .iter() - .map(|(id, name)| ModelInfo { - id: id.to_string(), - display_name: name.to_string(), - }) - .collect()); - } - }; - - let body: ModelsListResponse = match response.json().await { - Ok(b) => b, - Err(_) => { - return Ok(FALLBACK_MODELS - .iter() - .map(|(id, name)| ModelInfo { - id: id.to_string(), - display_name: name.to_string(), - }) - .collect()); - } - }; - - let mut models: Vec = body - .data - .into_iter() - .filter(|m| { - AUDIO_MODEL_PREFIXES - .iter() - .any(|prefix| m.id.starts_with(prefix)) - }) - .map(|m| ModelInfo { - display_name: humanize_model_id(&m.id), - id: m.id, - }) - .collect(); - - if models.is_empty() { - // Fallback if no audio models found - models = FALLBACK_MODELS - .iter() - .map(|(id, name)| ModelInfo { - id: id.to_string(), - display_name: name.to_string(), - }) - .collect(); - } - - models.sort_by(|a, b| a.id.cmp(&b.id)); - Ok(models) - } -} - -/// Humanize an OpenAI model ID into a display name -fn humanize_model_id(id: &str) -> String { - id.split('-') - .map(|part| { - if part.chars().all(|c| c.is_ascii_digit()) { - // Date parts stay as-is - part.to_string() - } else { - let mut chars = part.chars(); - match chars.next() { - Some(c) => c.to_uppercase().to_string() + chars.as_str(), - None => String::new(), - } - } - }) - .collect::>() - .join(" ") -} - -// --- Request / Response types --- - -#[derive(Serialize)] -struct ChatCompletionRequest { - model: String, - temperature: f32, - max_tokens: u32, - messages: Vec, -} - -#[derive(Serialize)] -struct Message { - role: String, - content: MessageContent, -} - -#[derive(Serialize)] -#[serde(untagged)] -enum MessageContent { - Text(String), - Parts(Vec), -} - -#[derive(Serialize)] -#[serde(tag = "type")] -enum ContentPart { - #[serde(rename = "input_audio")] - InputAudio { input_audio: InputAudio }, -} - -#[derive(Serialize)] -struct InputAudio { - data: String, - format: String, -} - -#[derive(Deserialize)] -struct ChatCompletionResponse { - choices: Vec, -} - -#[derive(Deserialize)] -struct Choice { - message: ResponseMessage, -} - -#[derive(Deserialize)] -struct ResponseMessage { - content: Option, -} - -#[derive(Deserialize)] -struct OpenAiErrorResponse { - error: OpenAiError, -} - -#[derive(Deserialize)] -struct OpenAiError { - message: String, -} - -// --- Model listing types --- - -#[derive(Deserialize)] -struct ModelsListResponse { - data: Vec, -} - -#[derive(Deserialize)] -struct ModelEntry { - id: String, -} - -// --- Trait implementation --- - -impl TranscriptionProvider for OpenAiProvider { - fn transcribe( - &self, - audio_data: &[u8], - mime_type: &str, - system_prompt: &str, - ) -> std::pin::Pin< - Box> + Send + '_>, - > { - let audio_base64 = base64::engine::general_purpose::STANDARD.encode(audio_data); - let format = Self::audio_format(mime_type).to_string(); - let system_prompt = system_prompt.to_string(); - - Box::pin(async move { - let request = ChatCompletionRequest { - model: self.model.clone(), - temperature: 0.1, - max_tokens: 8192, - messages: vec![ - Message { - role: "system".to_string(), - content: MessageContent::Text(system_prompt), - }, - Message { - role: "user".to_string(), - content: MessageContent::Parts(vec![ContentPart::InputAudio { - input_audio: InputAudio { - data: audio_base64, - format, - }, - }]), - }, - ], - }; - - let text = self.execute_with_retry(&request).await?; - Ok(TranscriptionResult { text }) - }) - } - - fn test_connection( - &self, - ) -> std::pin::Pin> + Send + '_>> - { - Box::pin(async move { - let url = format!("{}/models", OPENAI_API_BASE); - let response = self - .client - .get(&url) - .header("Authorization", format!("Bearer {}", self.api_key)) - .send() - .await - .map_err(|e| AppError::Transcription(format!("Connection test failed: {}", e)))?; - - if response.status().is_success() { - Ok(true) - } else { - let status = response.status(); - let body = response.text().await.unwrap_or_default(); - Err(AppError::Transcription(format!( - "Authentication failed ({}): {}", - status, - Self::extract_error(&body) - ))) - } - }) - } - - fn provider_name(&self) -> &str { - "OpenAI" - } -} diff --git a/src-tauri/src/ai/pool.rs b/src-tauri/src/ai/pool.rs index 82d3364..02118f5 100644 --- a/src-tauri/src/ai/pool.rs +++ b/src-tauri/src/ai/pool.rs @@ -5,7 +5,6 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use tracing::{debug, error, info, warn}; use super::gemini::GeminiProvider; -use super::openai::OpenAiProvider; use super::provider::{TranscriptionProvider, TranscriptionResult}; use crate::error::AppError; @@ -43,10 +42,6 @@ impl ProviderPool { let provider = GeminiProvider::new(entry.api_key.clone(), entry.model.clone()); self.providers.push(Box::new(provider)); } - "openai" | "OpenAi" => { - let provider = OpenAiProvider::new(entry.api_key.clone(), entry.model.clone()); - self.providers.push(Box::new(provider)); - } _ => {} } } @@ -105,10 +100,6 @@ impl ProviderPool { let provider = GeminiProvider::new(entry.api_key.clone(), entry.model.clone()); provider.test_connection().await } - "openai" | "OpenAi" => { - let provider = OpenAiProvider::new(entry.api_key.clone(), entry.model.clone()); - provider.test_connection().await - } other => Err(AppError::Transcription(format!( "Unknown provider type: {}", other diff --git a/src-tauri/src/config/manager.rs b/src-tauri/src/config/manager.rs index be60702..39cd30c 100644 --- a/src-tauri/src/config/manager.rs +++ b/src-tauri/src/config/manager.rs @@ -39,9 +39,31 @@ pub fn load_settings() -> Result { let contents = std::fs::read_to_string(&path) .map_err(|e| AppError::Config(format!("Failed to read settings: {}", e)))?; - let mut settings: AppSettings = serde_json::from_str(&contents) + let mut raw: serde_json::Value = serde_json::from_str(&contents) .map_err(|e| AppError::Config(format!("Failed to parse settings: {}", e)))?; + // Silently filter out removed provider types (e.g., OpenAI) + let needs_migration = + if let Some(providers) = raw.get_mut("providers").and_then(|v| v.as_array_mut()) { + let original_len = providers.len(); + providers.retain(|p| { + p.get("providerType") + .and_then(|v| v.as_str()) + .map(|t| t != "openai") + .unwrap_or(true) + }); + providers.len() != original_len + } else { + false + }; + + let mut settings: AppSettings = serde_json::from_value(raw) + .map_err(|e| AppError::Config(format!("Failed to parse settings: {}", e)))?; + + if needs_migration { + save_settings(&settings)?; + } + // Merge built-in presets: add any that are missing let builtins = get_builtin_presets(); for builtin in &builtins { diff --git a/src-tauri/src/config/schema.rs b/src-tauri/src/config/schema.rs index 10f8f5c..f46826e 100644 --- a/src-tauri/src/config/schema.rs +++ b/src-tauri/src/config/schema.rs @@ -89,8 +89,6 @@ pub enum RecordingMode { #[serde(rename_all = "lowercase")] pub enum ProviderType { Gemini, - #[serde(rename = "openai")] - OpenAi, } #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 3ed9a0b..897eb58 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -11,7 +11,6 @@ mod whisper; use std::sync::RwLock; use ai::gemini::{GeminiProvider, ModelInfo}; -use ai::openai::OpenAiProvider; use ai::pool::{ProviderEntry, ProviderPool}; use config::schema::{AppSettings, Preset, ProviderConfig, TranscriptionMode}; use hotkey::conflict::HotkeyBinding; @@ -117,7 +116,6 @@ async fn set_autostart(enabled: bool, app: AppHandle) -> Result<(), String> { async fn test_provider_connection(provider: ProviderConfig) -> Result { let provider_type_str = match provider.provider_type { config::schema::ProviderType::Gemini => "gemini", - config::schema::ProviderType::OpenAi => "openai", }; let entry = ProviderEntry { api_key: provider.api_key, @@ -139,9 +137,6 @@ async fn list_provider_models( "gemini" => GeminiProvider::list_models(&api_key) .await .map_err(|e| e.to_string()), - "openai" => OpenAiProvider::list_models(&api_key) - .await - .map_err(|e| e.to_string()), _ => Err(format!("Unknown provider type: {}", provider_type)), } } @@ -453,7 +448,6 @@ async fn apply_settings(settings: &AppSettings, app: &AppHandle) { .map(|p| { let provider_type_str = match p.provider_type { config::schema::ProviderType::Gemini => "gemini", - config::schema::ProviderType::OpenAi => "openai", } .to_string(); ProviderEntry { @@ -571,7 +565,6 @@ pub fn run() { .map(|p| { let provider_type_str = match p.provider_type { config::schema::ProviderType::Gemini => "gemini", - config::schema::ProviderType::OpenAi => "openai", } .to_string(); ProviderEntry { diff --git a/src/components/ProviderConfig.svelte b/src/components/ProviderConfig.svelte index c327edc..94e9a32 100644 --- a/src/components/ProviderConfig.svelte +++ b/src/components/ProviderConfig.svelte @@ -114,7 +114,7 @@ {#if !provider.enabled} Disabled @@ -193,11 +192,7 @@ disabled={!provider.apiKey} class="flex-1 rounded-lg border border-gray-200 bg-white px-3 py-1.5 text-sm focus:border-blue-400 focus:outline-none focus:ring-2 focus:ring-blue-300 disabled:bg-gray-50 disabled:text-gray-400" > - + {#if loadingModels[provider.id]} {/if} diff --git a/src/lib/types.ts b/src/lib/types.ts index e649267..2441f87 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -22,7 +22,7 @@ export interface Preset { export interface ProviderConfig { id: string; - providerType: 'gemini' | 'openai'; + providerType: 'gemini'; apiKey: string; model: string | null; enabled: boolean;