diff --git a/docs/IMPL-whisper-local-transcription.md b/docs/IMPL-whisper-local-transcription.md deleted file mode 100644 index b5d66df..0000000 --- a/docs/IMPL-whisper-local-transcription.md +++ /dev/null @@ -1,1141 +0,0 @@ -# Implementation Plan: Local Whisper Transcription Support - -> Generated from: `docs/PRD-whisper-local-transcription.md` -> Date: 2026-03-22 - -## 1. Overview - -This plan adds offline speech-to-text transcription via whisper.cpp (`whisper-rs` crate) as an alternative to the existing cloud provider pool (Gemini/OpenAI). Users select a transcription mode — Local (Whisper) or Cloud — and the system routes audio through the appropriate pipeline. - -The Whisper engine is **not** part of the round-robin `ProviderPool`. It operates as a separate code path gated by `TranscriptionMode` in settings. The audio pipeline branches early: cloud mode encodes to Opus/WAV, while local mode resamples raw PCM to 16 kHz mono and passes it directly to whisper-rs. - -Key integration points: -- `config/schema.rs` — new `TranscriptionMode` enum and `WhisperConfig` struct -- `hotkey/manager.rs` — `process_and_transcribe()` branches on mode -- New `ai/whisper.rs` module — WhisperContext lifecycle, inference -- New `whisper/` module — model download/management -- New `WhisperConfig.svelte` — settings UI for local mode - -## 2. Architecture & Design - -### Data Flow - -``` - ┌─────────────────────────────────┐ - │ AudioRecorderHandle::stop() │ - │ → (Vec, sample_rate, ch) │ - └───────────────┬─────────────────┘ - │ - ┌─────────────▼──────────────┐ - │ TranscriptionMode check │ - └─────┬───────────────┬──────┘ - │ │ - Cloud mode │ │ Local mode - │ │ - ┌─────────────▼─────┐ ┌─────▼──────────────┐ - │ encode_to_opus() │ │ resample_for_ │ - │ or encode_to_wav()│ │ whisper() │ - │ → (bytes, mime) │ │ → Vec 16kHz │ - └────────┬──────────┘ └─────┬──────────────┘ - │ │ - ┌────────▼──────────┐ ┌─────▼──────────────┐ - │ ProviderPool │ │ WHISPER_ENGINE │ - │ .transcribe() │ │ .transcribe() │ - └────────┬──────────┘ └─────┬──────────────┘ - │ │ - └───────┬────────────┘ - │ - ┌──────▼──────┐ - │ paste text │ - └─────────────┘ -``` - -### Module Boundaries - -| Module | Responsibility | -|--------|---------------| -| `ai/whisper.rs` | WhisperContext lifecycle, inference wrapper, language config | -| `whisper/mod.rs` | Model registry (tiers, filenames, URLs, sizes) | -| `whisper/download.rs` | HTTP download with progress events, cancellation, cleanup | -| `whisper/models.rs` | Model discovery on disk, integrity checks, deletion | -| `audio/encoder.rs` | New `resample_for_whisper()` function (reuses rubato) | -| `config/schema.rs` | `TranscriptionMode`, `WhisperConfig`, `WhisperLanguage` | - -### Global State - -```rust -// Existing -pub static PROVIDER_POOL: Lazy> = ...; -pub static SETTINGS: Lazy> = ...; - -// New -pub static WHISPER_ENGINE: Lazy>> = ...; -``` - -`WhisperEngine` wraps a `WhisperContext` and is `Option` because it's only loaded when Local mode is active and a transcription is first requested. Set to `None` when switching to Cloud mode (Req #41). - -## 3. Phases & Milestones - -### Phase 1: Configuration & Schema -**Goal:** Extend settings to support transcription mode selection and Whisper configuration. -**Deliverable:** Settings file can store and load `transcription_mode`, `whisper_config`. Existing users' settings deserialize without error (serde defaults). - -### Phase 2: Audio Pipeline for Whisper -**Goal:** Add a resampling path that produces 16 kHz mono f32 samples from raw recorder output. -**Deliverable:** `resample_for_whisper()` function, tested independently. - -### Phase 3: Whisper Engine Integration -**Goal:** Load a GGML model, run inference, return transcribed text. -**Deliverable:** `WhisperEngine` struct with `transcribe()` method. Verifiable with a hardcoded model path and test audio. - -### Phase 4: Transcription Pipeline Branching -**Goal:** Route audio through cloud or local path based on `TranscriptionMode`. -**Deliverable:** End-to-end transcription works in both modes via hotkey. - -### Phase 5: Model Management -**Goal:** Download, verify, list, and delete GGML models. IPC commands for frontend. -**Deliverable:** All model management IPC commands functional, progress events emitted. - -### Phase 6: Settings UI -**Goal:** Mode toggle, Whisper configuration panel, model management UI. -**Deliverable:** Complete settings UI for both modes. - -## 4. Files Overview - -### Files to Create - -| File Path | Purpose | -|-----------|---------| -| `src-tauri/src/ai/whisper.rs` | WhisperEngine: context lifecycle, inference, language config | -| `src-tauri/src/whisper/mod.rs` | Module root, model tier registry, re-exports | -| `src-tauri/src/whisper/download.rs` | HTTP model download with progress events and cancellation | -| `src-tauri/src/whisper/models.rs` | On-disk model discovery, integrity verification, deletion | -| `src/components/WhisperConfig.svelte` | Whisper settings panel (model management, language, status) | -| `src/components/ModeToggle.svelte` | Transcription mode toggle (Local vs. Cloud) | - -### Files to Modify - -| File Path | What Changes | -|-----------|-------------| -| `src-tauri/Cargo.toml` | Add `whisper-rs` with platform-conditional features | -| `src-tauri/src/config/schema.rs` | Add `TranscriptionMode`, `WhisperConfig`, `WhisperLanguage` | -| `src-tauri/src/config/manager.rs` | No structural changes (serde defaults handle new fields) | -| `src-tauri/src/audio/encoder.rs` | Add `resample_for_whisper()` public function | -| `src-tauri/src/ai/mod.rs` | Add `pub mod whisper;` | -| `src-tauri/src/lib.rs` | Add `WHISPER_ENGINE` global, new IPC commands, update `apply_settings()` | -| `src-tauri/src/hotkey/manager.rs` | Branch `process_and_transcribe()` on transcription mode | -| `src-tauri/src/error.rs` | Add `ModelDownload(String)` variant | -| `src-tauri/src/main.rs` | Register new IPC commands | -| `src-tauri/src/tray.rs` | No changes expected (existing notification system suffices) | -| `src/lib/types.ts` | Add `TranscriptionMode`, `WhisperConfig`, `ModelInfo` types | -| `src/lib/commands.ts` | Add model management command wrappers | -| `src/App.svelte` (or settings layout) | Add mode toggle, conditional panel rendering | -| `src/components/ProviderConfig.svelte` | Wrap in conditional (only shown in Cloud mode) | - -## 5. Task Breakdown - -### Phase 1: Configuration & Schema - -#### Task 1.1: Add Whisper-related types to config schema - -- **Files to modify:** - - `src-tauri/src/config/schema.rs` — add new types -- **Implementation details:** - - ```rust - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] - #[serde(rename_all = "camelCase")] - pub enum TranscriptionMode { - Cloud, - Local, - } - - impl Default for TranscriptionMode { - fn default() -> Self { - TranscriptionMode::Cloud // Req #40 - } - } - - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] - #[serde(rename_all = "camelCase")] - pub enum WhisperLanguage { - Auto, - German, - English, - } - - impl Default for WhisperLanguage { - fn default() -> Self { - WhisperLanguage::Auto - } - } - - #[derive(Debug, Clone, Serialize, Deserialize)] - #[serde(rename_all = "camelCase")] - pub struct WhisperConfig { - #[serde(default = "default_whisper_model")] - pub selected_model: String, // e.g. "large-v3-turbo" - #[serde(default)] - pub language: WhisperLanguage, - } - - fn default_whisper_model() -> String { - "large-v3-turbo".to_string() - } - ``` - - Add to `AppSettings`: - ```rust - #[serde(default)] - pub transcription_mode: TranscriptionMode, - #[serde(default)] - pub whisper_config: WhisperConfig, - ``` - -- **Dependencies:** None -- **Acceptance criteria:** Existing settings files deserialize without error. New fields appear with defaults when serialized. `TranscriptionMode` defaults to `Cloud`. - -#### Task 1.2: Add frontend types for Whisper configuration - -- **Files to modify:** - - `src/lib/types.ts` — add TypeScript types -- **Implementation details:** - - ```typescript - export type TranscriptionMode = 'cloud' | 'local'; - export type WhisperLanguage = 'auto' | 'german' | 'english'; - - export interface WhisperConfig { - selectedModel: string; - language: WhisperLanguage; - } - - export interface WhisperModelInfo { - id: string; - name: string; - fileName: string; - sizeBytes: number; - description: string; - downloaded: boolean; - fileSizeOnDisk: number | null; - } - - export interface WhisperStatus { - state: 'ready' | 'loading' | 'noModel' | 'notActive'; - loadedModel: string | null; - } - ``` - - Add to `AppSettings`: - ```typescript - transcriptionMode: TranscriptionMode; - whisperConfig: WhisperConfig; - ``` - -- **Dependencies:** Task 1.1 (types must match Rust schema) -- **Acceptance criteria:** Types compile. `AppSettings` interface matches the Rust `AppSettings` serde output. - -### Phase 2: Audio Pipeline for Whisper - -#### Task 2.1: Add `resample_for_whisper()` to encoder module - -- **Files to modify:** - - `src-tauri/src/audio/encoder.rs` — add public function -- **Implementation details:** - - Reuse the existing rubato `SincFixedIn` resampler pattern from `encode_to_opus()` (lines ~75-135). The new function: - 1. Mix down to mono if multi-channel (average all channels) - 2. Resample from source rate to 16000 Hz using rubato sinc interpolation - 3. Return `Vec` samples - - ```rust - /// Resamples raw audio to 16 kHz mono f32 for Whisper inference. - pub fn resample_for_whisper( - samples: &[f32], - sample_rate: u32, - channels: u16, - ) -> Result, AppError> { - // Step 1: Mix to mono - let mono = if channels > 1 { - samples - .chunks(channels as usize) - .map(|frame| frame.iter().sum::() / channels as f32) - .collect::>() - } else { - samples.to_vec() - }; - - // Step 2: Resample to 16 kHz - if sample_rate == 16000 { - return Ok(mono); - } - - let params = SincInterpolationParameters { - sinc_len: 256, - f_cutoff: 0.95, - interpolation: SincInterpolationType::Linear, - oversampling_factor: 256, - window: WindowFunction::BlackmanHarris2, - }; - let mut resampler = SincFixedIn::::new( - 16000.0 / sample_rate as f64, - 2.0, - params, - mono.len(), - 1, // mono - ).map_err(|e| AppError::Audio(format!("Resampler init failed: {e}")))?; - - let resampled = resampler - .process(&[&mono], None) - .map_err(|e| AppError::Audio(format!("Resampling failed: {e}")))?; - - Ok(resampled.into_iter().next().unwrap_or_default()) - } - ``` - -- **Dependencies:** None -- **Acceptance criteria:** Given 48 kHz stereo f32 samples, returns 16 kHz mono f32 samples. Given 16 kHz mono input, returns input unchanged. - -### Phase 3: Whisper Engine Integration - -#### Task 3.1: Add `whisper-rs` dependency with platform features - -- **Files to modify:** - - `src-tauri/Cargo.toml` — add conditional dependencies -- **Implementation details:** - - ```toml - [target.'cfg(target_os = "macos")'.dependencies] - whisper-rs = { version = "0.16", features = ["metal"] } - - [target.'cfg(target_os = "windows")'.dependencies] - whisper-rs = { version = "0.16", features = ["vulkan"] } - ``` - -- **Dependencies:** None -- **Acceptance criteria:** `cargo check` succeeds on the build platform. - -#### Task 3.2: Create `WhisperEngine` struct and inference logic - -- **Files to create:** - - `src-tauri/src/ai/whisper.rs` — engine implementation -- **Files to modify:** - - `src-tauri/src/ai/mod.rs` — add `pub mod whisper;` -- **Implementation details:** - - ```rust - use whisper_rs::{WhisperContext, WhisperContextParameters, FullParams, SamplingStrategy}; - use crate::error::AppError; - - pub struct WhisperEngine { - ctx: WhisperContext, - loaded_model_id: String, - } - - impl WhisperEngine { - pub fn load(model_path: &std::path::Path, model_id: &str) -> Result { - let params = WhisperContextParameters::default(); - let ctx = WhisperContext::new_with_params( - model_path.to_str().ok_or_else(|| AppError::Transcription("Invalid model path".into()))?, - params, - ).map_err(|e| AppError::Transcription(format!("Failed to load Whisper model: {e}")))?; - - Ok(Self { ctx, loaded_model_id: model_id.to_string() }) - } - - pub fn transcribe( - &self, - samples: &[f32], - language: &str, // "auto", "de", "en" - ) -> Result { - let mut state = self.ctx.create_state() - .map_err(|e| AppError::Transcription(format!("Failed to create Whisper state: {e}")))?; - - let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); - - // Req #15, #16: translate task for English output - params.set_translate(true); - params.set_no_timestamps(true); - params.set_print_special(false); - params.set_print_progress(false); - params.set_print_realtime(false); - - // Req #17: language hint - if language != "auto" { - params.set_language(Some(language)); - } - - state.full(params, samples) - .map_err(|e| AppError::Transcription(format!("Whisper inference failed: {e}")))?; - - // Req #18: extract and concatenate all segments - let num_segments = state.full_n_segments() - .map_err(|e| AppError::Transcription(format!("Failed to get segments: {e}")))?; - - let mut text = String::new(); - for i in 0..num_segments { - if let Ok(segment) = state.full_get_segment_text(i) { - text.push_str(segment.trim()); - if i < num_segments - 1 { - text.push(' '); - } - } - } - - Ok(text.trim().to_string()) - } - - pub fn loaded_model_id(&self) -> &str { - &self.loaded_model_id - } - } - ``` - - Note: `WhisperEngine` does **not** implement `TranscriptionProvider`. It has its own `transcribe(samples, language)` signature that accepts raw f32 PCM, not encoded bytes. This matches the PRD's design of keeping Whisper separate from the cloud provider pool. - -- **Dependencies:** Task 3.1 (whisper-rs available), Task 2.1 (resampled audio format understood) -- **Acceptance criteria:** Given a valid GGML model path and 16 kHz mono f32 samples, returns English transcription text. Error messages are descriptive. - -#### Task 3.3: Add `WHISPER_ENGINE` global state and lifecycle management - -- **Files to modify:** - - `src-tauri/src/lib.rs` — add global, load/unload helpers -- **Implementation details:** - - ```rust - pub static WHISPER_ENGINE: Lazy>> = - Lazy::new(|| RwLock::new(None)); - ``` - - Add helper functions: - ```rust - /// Loads the Whisper model if not already loaded or if the selected model - /// has changed (lazy, Req #14). Called from process_and_transcribe(). - pub fn ensure_whisper_loaded(app_handle: &AppHandle) -> Result<(), AppError> { - let settings = SETTINGS.read().map_err(|e| AppError::Config(e.to_string()))?; - let desired_model_id = settings.whisper_config.selected_model.clone(); - drop(settings); - - let engine = WHISPER_ENGINE.read().map_err(|e| AppError::Transcription(e.to_string()))?; - if let Some(ref e) = *engine { - if e.loaded_model_id() == desired_model_id { - return Ok(()); // already loaded with correct model - } - // Model changed — need to reload - drop(engine); - unload_whisper(); - } else { - drop(engine); // release read lock - } - - let settings = SETTINGS.read().map_err(|e| AppError::Config(e.to_string()))?; - let model_id = &settings.whisper_config.selected_model; - let models_dir = app_handle.path().app_data_dir() - .map_err(|e| AppError::Config(e.to_string()))? - .join("models"); - let model_info = whisper::models::get_model_tier(model_id) - .ok_or_else(|| AppError::Transcription(format!("Unknown model: {model_id}")))?; - let model_path = models_dir.join(&model_info.file_name); - - if !model_path.exists() { - return Err(AppError::Transcription( - "No model downloaded. Please download a model in Settings.".into() - )); // Req #5 - } - drop(settings); - - let engine = ai::whisper::WhisperEngine::load(&model_path, model_id)?; - let mut guard = WHISPER_ENGINE.write().map_err(|e| AppError::Transcription(e.to_string()))?; - *guard = Some(engine); - Ok(()) - } - - /// Unloads the Whisper model to free memory (Req #41). - pub fn unload_whisper() { - if let Ok(mut guard) = WHISPER_ENGINE.write() { - *guard = None; - } - } - ``` - - Update `apply_settings()` to unload Whisper when switching to Cloud mode: - ```rust - // In apply_settings(), after rebuilding the provider pool: - if settings.transcription_mode == TranscriptionMode::Cloud { - unload_whisper(); - } - ``` - -- **Dependencies:** Task 3.2 -- **Acceptance criteria:** `ensure_whisper_loaded()` loads model on first call, returns `Ok` on subsequent calls. `unload_whisper()` sets engine to `None`. Switching to Cloud mode unloads. - -### Phase 4: Transcription Pipeline Branching - -#### Task 4.1: Branch `process_and_transcribe()` on transcription mode - -- **Files to modify:** - - `src-tauri/src/hotkey/manager.rs` — modify `process_and_transcribe()` -- **Implementation details:** - - The function currently (lines 331-384) always encodes and calls the provider pool. Add a mode check early: - - ```rust - fn process_and_transcribe( - recorder: AudioRecorderHandle, - app_handle: AppHandle, // NEW: needed for model path resolution - ) -> Result { - let (samples, sample_rate, channels) = recorder.stop()?; - - let mode = { - let settings = crate::SETTINGS.read() - .map_err(|e| AppError::Config(e.to_string()))?; - settings.transcription_mode.clone() - }; - - match mode { - TranscriptionMode::Local => { - // Req #7, #8, #9: resample to 16 kHz mono - let resampled = crate::audio::encoder::resample_for_whisper( - &samples, sample_rate, channels - )?; - - // Req #14: lazy load model - crate::ensure_whisper_loaded(&app_handle)?; - - // Req #17: get language setting - let language = { - let settings = crate::SETTINGS.read() - .map_err(|e| AppError::Config(e.to_string()))?; - match settings.whisper_config.language { - WhisperLanguage::Auto => "auto", - WhisperLanguage::German => "de", - WhisperLanguage::English => "en", - }.to_string() - }; - - // Req #10, #19: transcribe with Whisper (no system prompt) - let engine = crate::WHISPER_ENGINE.read() - .map_err(|e| AppError::Transcription(e.to_string()))?; - let engine = engine.as_ref() - .ok_or_else(|| AppError::Transcription("Whisper engine not loaded".into()))?; - engine.transcribe(&resampled, &language) - } - TranscriptionMode::Cloud => { - // Existing cloud path (unchanged) - let preferred_format = { /* read from settings */ }; - let (audio_data, mime_type) = /* encode as before */; - let system_prompt = crate::active_system_prompt(); - let pool = crate::PROVIDER_POOL.read() - .map_err(|e| AppError::Transcription(e.to_string()))?; - let rt = tokio::runtime::Runtime::new() - .map_err(|e| AppError::Transcription(e.to_string()))?; - rt.block_on(pool.transcribe(&audio_data, mime_type, &system_prompt)) - .map(|r| r.text) - } - } - } - ``` - - The `AppHandle` must be threaded through from `stop_and_transcribe()`. Since the hotkey manager already has access to global state, pass `app_handle` from the Tauri setup or store it in a global. - -- **Dependencies:** Task 2.1, Task 3.3 -- **Acceptance criteria:** Recording → release hotkey transcribes via Whisper in Local mode, via cloud in Cloud mode. Switching mode in settings changes behavior on next transcription. - -#### Task 4.2: Pass `AppHandle` to hotkey manager - -- **Files to modify:** - - `src-tauri/src/hotkey/manager.rs` — store `AppHandle` for model path resolution - - `src-tauri/src/lib.rs` — pass `AppHandle` during hotkey manager setup -- **Implementation details:** - - Add a global or pass `AppHandle` into `start_event_loop()`. The simplest approach is a global: - ```rust - // In lib.rs - pub static APP_HANDLE: OnceLock = OnceLock::new(); - ``` - Set it during Tauri `setup()`. Use it in `process_and_transcribe()` to resolve `app_data_dir()`. - -- **Dependencies:** None -- **Acceptance criteria:** `APP_HANDLE.get()` returns `Some` after app startup. Model path resolution works via `app_handle.path().app_data_dir()`. - -### Phase 5: Model Management - -#### Task 5.1: Create model registry module - -- **Files to create:** - - `src-tauri/src/whisper/mod.rs` — module root and re-exports - - `src-tauri/src/whisper/models.rs` — model tier definitions, on-disk discovery -- **Files to modify:** - - `src-tauri/src/main.rs` — add `mod whisper;` -- **Implementation details:** - - ```rust - // whisper/models.rs - - pub struct ModelTier { - pub id: &'static str, - pub name: &'static str, - pub file_name: &'static str, - pub size_bytes: u64, - pub description: &'static str, - pub url: &'static str, - } - - pub const MODEL_TIERS: &[ModelTier] = &[ - ModelTier { - id: "large-v3-turbo", - name: "Large (v3 Turbo)", - file_name: "ggml-large-v3-turbo-q5_0.bin", - size_bytes: 574_000_000, // ~574 MB - description: "Best accuracy, recommended for most users", - url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q5_0.bin", - }, - ModelTier { - id: "small", - name: "Small", - file_name: "ggml-small-q5_1.bin", - size_bytes: 200_000_000, // ~200 MB - description: "Lighter alternative, good accuracy", - url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small-q5_1.bin", - }, - ModelTier { - id: "base", - name: "Base", - file_name: "ggml-base-q5_1.bin", - size_bytes: 60_000_000, // ~60 MB - description: "Minimal, for constrained hardware", - url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q5_1.bin", - }, - ]; - - pub fn get_model_tier(id: &str) -> Option<&'static ModelTier> { - MODEL_TIERS.iter().find(|m| m.id == id) - } - - /// Returns model info with download status for each tier. - pub fn list_models(models_dir: &Path) -> Vec { - MODEL_TIERS.iter().map(|tier| { - let path = models_dir.join(tier.file_name); - let downloaded = path.exists(); - let file_size_on_disk = if downloaded { - std::fs::metadata(&path).ok().map(|m| m.len()) - } else { - None - }; - ModelStatus { - id: tier.id.to_string(), - name: tier.name.to_string(), - file_name: tier.file_name.to_string(), - size_bytes: tier.size_bytes, - description: tier.description.to_string(), - downloaded, - file_size_on_disk, - } - }).collect() - } - - /// Verifies model file integrity by checking file size (Req #26). - pub fn verify_model(models_dir: &Path, model_id: &str) -> Result { - let tier = get_model_tier(model_id) - .ok_or_else(|| AppError::Transcription(format!("Unknown model: {model_id}")))?; - let path = models_dir.join(tier.file_name); - if !path.exists() { - return Ok(false); - } - let actual_size = std::fs::metadata(&path) - .map_err(|e| AppError::Io(e))?.len(); - // Allow 5% tolerance for size differences across versions - let expected = tier.size_bytes; - Ok(actual_size > expected * 95 / 100 && actual_size < expected * 105 / 100) - } - - /// Deletes a downloaded model file (Req #28). - pub fn delete_model(models_dir: &Path, model_id: &str) -> Result<(), AppError> { - let tier = get_model_tier(model_id) - .ok_or_else(|| AppError::Transcription(format!("Unknown model: {model_id}")))?; - let path = models_dir.join(tier.file_name); - if path.exists() { - std::fs::remove_file(&path)?; - } - Ok(()) - } - ``` - -- **Dependencies:** None -- **Acceptance criteria:** `list_models()` returns three tiers with accurate download status. `verify_model()` checks file size. `delete_model()` removes file. - -#### Task 5.2: Create model download module with progress and cancellation - -- **Files to create:** - - `src-tauri/src/whisper/download.rs` — download logic -- **Files to modify:** - - `src-tauri/src/error.rs` — add `ModelDownload(String)` variant -- **Implementation details:** - - ```rust - // whisper/download.rs - - use std::sync::atomic::{AtomicBool, Ordering}; - use std::sync::Arc; - use tauri::{AppHandle, Emitter}; - - /// Global cancellation flag (Req #31: only one download at a time) - static DOWNLOAD_IN_PROGRESS: AtomicBool = AtomicBool::new(false); - static DOWNLOAD_CANCELLED: AtomicBool = AtomicBool::new(false); - - #[derive(Clone, Serialize)] - #[serde(rename_all = "camelCase")] - pub struct DownloadProgress { - pub model_id: String, - pub bytes_downloaded: u64, - pub total_bytes: u64, - pub percentage: f64, - } - - pub async fn download_model( - app: &AppHandle, - model_id: &str, - models_dir: &Path, - ) -> Result<(), AppError> { - // Req #31: prevent concurrent downloads - if DOWNLOAD_IN_PROGRESS.swap(true, Ordering::SeqCst) { - return Err(AppError::ModelDownload( - "A download is already in progress. Cancel it first.".into() - )); - } - DOWNLOAD_CANCELLED.store(false, Ordering::SeqCst); - - let result = do_download(app, model_id, models_dir).await; - - DOWNLOAD_IN_PROGRESS.store(false, Ordering::SeqCst); - - if let Err(ref e) = result { - // Req #30: clean up partial files on failure - if let Some(tier) = super::models::get_model_tier(model_id) { - let partial = models_dir.join(tier.file_name); - let _ = std::fs::remove_file(&partial); - } - } - - result - } - - async fn do_download( - app: &AppHandle, - model_id: &str, - models_dir: &Path, - ) -> Result<(), AppError> { - let tier = super::models::get_model_tier(model_id) - .ok_or_else(|| AppError::ModelDownload(format!("Unknown model: {model_id}")))?; - - std::fs::create_dir_all(models_dir)?; - let dest = models_dir.join(tier.file_name); - - let client = reqwest::Client::new(); - let response = client.get(tier.url).send().await - .map_err(|e| AppError::ModelDownload(format!("Download failed: {e}")))?; - - if !response.status().is_success() { - return Err(AppError::ModelDownload( - format!("Download failed with status: {}", response.status()) - )); - } - - let total = response.content_length().unwrap_or(tier.size_bytes); - let mut file = std::fs::File::create(&dest)?; - let mut downloaded: u64 = 0; - let mut stream = response.bytes_stream(); - - use futures_util::StreamExt; - while let Some(chunk) = stream.next().await { - if DOWNLOAD_CANCELLED.load(Ordering::SeqCst) { - drop(file); - let _ = std::fs::remove_file(&dest); - return Err(AppError::ModelDownload("Download cancelled".into())); - } - - let chunk = chunk - .map_err(|e| AppError::ModelDownload(format!("Network error: {e}")))?; - std::io::Write::write_all(&mut file, &chunk)?; - downloaded += chunk.len() as u64; - - // Emit progress every ~100KB to avoid flooding - if downloaded % (100 * 1024) < chunk.len() as u64 { - let _ = app.emit("whisper-download-progress", DownloadProgress { - model_id: model_id.to_string(), - bytes_downloaded: downloaded, - total_bytes: total, - percentage: (downloaded as f64 / total as f64) * 100.0, - }); - } - } - - // Req #26: verify file size - if !super::models::verify_model(models_dir, model_id)? { - let _ = std::fs::remove_file(&dest); - return Err(AppError::ModelDownload( - "Downloaded model failed integrity check. Please retry.".into() - )); - } - - Ok(()) - } - - pub fn cancel_download() { - DOWNLOAD_CANCELLED.store(true, Ordering::SeqCst); - } - ``` - - Add `reqwest` `stream` feature (which re-exports `futures-util` stream types — check if a separate `futures-util` dep is needed during implementation): - ```toml - reqwest = { version = "0.12", features = ["json", "rustls-tls", "stream"] } - ``` - -- **Dependencies:** Task 5.1 (model registry) -- **Acceptance criteria:** Downloads model from HuggingFace with progress events. Cancellation stops download and removes partial file. Concurrent download attempts are rejected. Failed downloads clean up partial files. - -#### Task 5.3: Add IPC commands for model management - -- **Files to modify:** - - `src-tauri/src/lib.rs` — add Tauri commands - - `src-tauri/src/main.rs` — register commands -- **Implementation details:** - - ```rust - #[tauri::command] - async fn get_available_models(app: AppHandle) -> Result, String> { - let models_dir = app.path().app_data_dir() - .map_err(|e| e.to_string())? - .join("models"); - Ok(whisper::models::list_models(&models_dir)) - } - - #[tauri::command] - async fn download_model(app: AppHandle, model_id: String) -> Result<(), String> { - let models_dir = app.path().app_data_dir() - .map_err(|e| e.to_string())? - .join("models"); - whisper::download::download_model(&app, &model_id, &models_dir) - .await - .map_err(|e| e.to_string()) - } - - #[tauri::command] - fn cancel_download() -> Result<(), String> { - whisper::download::cancel_download(); - Ok(()) - } - - #[tauri::command] - async fn delete_model(app: AppHandle, model_id: String) -> Result<(), String> { - let models_dir = app.path().app_data_dir() - .map_err(|e| e.to_string())? - .join("models"); - - // If this model is currently loaded, unload it first - { - let engine = WHISPER_ENGINE.read().map_err(|e| e.to_string())?; - if let Some(ref e) = *engine { - if e.loaded_model_id() == model_id { - drop(engine); - unload_whisper(); - } - } - } - - whisper::models::delete_model(&models_dir, &model_id) - .map_err(|e| e.to_string()) - } - - #[tauri::command] - fn get_whisper_status() -> Result { - let engine = WHISPER_ENGINE.read().map_err(|e| e.to_string())?; - let settings = SETTINGS.read().map_err(|e| e.to_string())?; - - if settings.transcription_mode != TranscriptionMode::Local { - return Ok(WhisperStatusResponse { state: "notActive".into(), loaded_model: None }); - } - - match engine.as_ref() { - Some(e) => Ok(WhisperStatusResponse { - state: "ready".into(), - loaded_model: Some(e.loaded_model_id().to_string()), - }), - None => Ok(WhisperStatusResponse { - state: "noModel".into(), - loaded_model: None, - }), - } - } - ``` - - Register in `main.rs` invoke handler: - ```rust - .invoke_handler(tauri::generate_handler![ - // existing commands... - get_available_models, - download_model, - cancel_download, - delete_model, - get_whisper_status, - ]) - ``` - -- **Dependencies:** Task 5.1, Task 5.2, Task 3.3 -- **Acceptance criteria:** All five IPC commands callable from frontend. `download_model` emits `whisper-download-progress` events. `get_whisper_status` reflects current engine state. - -#### Task 5.4: Add frontend command wrappers - -- **Files to modify:** - - `src/lib/commands.ts` — add TypeScript wrappers -- **Implementation details:** - - ```typescript - export async function getAvailableModels(): Promise { - return invoke('get_available_models'); - } - - export async function downloadModel(modelId: string): Promise { - return invoke('download_model', { modelId }); - } - - export async function cancelDownload(): Promise { - return invoke('cancel_download'); - } - - export async function deleteModel(modelId: string): Promise { - return invoke('delete_model', { modelId }); - } - - export async function getWhisperStatus(): Promise { - return invoke('get_whisper_status'); - } - ``` - -- **Dependencies:** Task 5.3 (IPC commands exist), Task 1.2 (types defined) -- **Acceptance criteria:** All wrappers compile and invoke the correct Tauri commands. - -### Phase 6: Settings UI - -#### Task 6.1: Create mode toggle component - -- **Files to create:** - - `src/components/ModeToggle.svelte` -- **Implementation details:** - - A segmented control or toggle switch with two options: "Local (Whisper)" and "Cloud (API)". Binds to `settings.transcriptionMode`. Emits change event to parent for conditional panel rendering. - - Uses the same styling patterns as existing settings components (Tailwind utility classes). - -- **Dependencies:** Task 1.2 (types) -- **Acceptance criteria:** Toggle switches between `'local'` and `'cloud'`. Visually indicates active mode. - -#### Task 6.2: Create Whisper configuration panel - -- **Files to create:** - - `src/components/WhisperConfig.svelte` -- **Implementation details:** - - Panel contains: - - **Model list** (Req #29, #34, #35): Table/list showing all three tiers. Each row shows: name, description, size, download status. Downloaded models show file size and "Delete" button. Not-downloaded models show "Download" button. - - **Download progress** (Req #24): Progress bar with percentage and MB count. Shows during active download. "Cancel" button (Req #25). - - **Language selector** (Req #17): Dropdown with Auto/German/English. Bound to `settings.whisperConfig.language`. - - **Status indicator** (Req #36): Badge showing "Model loaded and ready" (green), "No model downloaded" (yellow), "Loading model..." (blue). - - Listen to `whisper-download-progress` Tauri event for live progress updates: - ```typescript - import { listen } from '@tauri-apps/api/event'; - - let downloadProgress: DownloadProgress | null = null; - - onMount(() => { - const unlisten = listen('whisper-download-progress', (event) => { - downloadProgress = event.payload; - }); - return () => { unlisten.then(fn => fn()); }; - }); - ``` - - After download completes, refresh model list via `getAvailableModels()` and whisper status via `getWhisperStatus()`. - -- **Dependencies:** Task 5.4 (command wrappers), Task 6.1 (mode toggle) -- **Acceptance criteria:** All model tiers displayed with correct download status. Download starts on button click with progress. Cancel stops download. Delete removes model. Language selector persists choice. Status badge reflects engine state. - -#### Task 6.3: Integrate mode toggle and conditional panels into settings - -- **Files to modify:** - - `src/App.svelte` (or settings layout component) — add mode toggle, conditional rendering - - `src/components/ProviderConfig.svelte` — no changes needed (wrapped conditionally by parent) -- **Implementation details:** - - At the top of settings, render `ModeToggle`. Below it: - ```svelte - {#if settings.transcriptionMode === 'local'} - - {:else} - - {/if} - ``` - - When mode changes (Req #41), call `save_settings()` to trigger backend `apply_settings()` which unloads Whisper if switching to Cloud. - - If switching to Local mode with no model downloaded (Req #22): WhisperConfig panel should prominently show the "no model" state with a prompt and "Download Now" button. - -- **Dependencies:** Task 6.1, Task 6.2 -- **Acceptance criteria:** Mode toggle is visible at top of settings. Switching modes shows the correct panel. Cloud panel preserves existing provider config. Settings persist on mode change. - -## 6. Data Model Changes - -No database changes. All persistence is via the existing JSON settings file (`~/.pisum-transcript.json`). New fields are added to `AppSettings` with `#[serde(default)]` for backward compatibility. - -## 7. API Changes - -No HTTP API changes. New Tauri IPC commands are defined in Task 5.3: - -| Command | Parameters | Returns | Purpose | -|---------|------------|---------|---------| -| `get_available_models` | none | `Vec` | List model tiers with download status | -| `download_model` | `model_id: String` | `()` | Start model download (emits progress events) | -| `cancel_download` | none | `()` | Cancel active download | -| `delete_model` | `model_id: String` | `()` | Remove downloaded model file | -| `get_whisper_status` | none | `WhisperStatus` | Engine state (ready/loading/noModel/notActive) | - -Event: `whisper-download-progress` — emitted during download with `DownloadProgress` payload. - -## 8. Dependencies & Risks - -### External Dependencies - -| Dependency | Version | Purpose | Risk | -|------------|---------|---------|------| -| `whisper-rs` | 0.16 | Whisper.cpp Rust bindings | C++ compilation adds build time. Metal/Vulkan SDK required. | -| `futures-util` | 0.3 | Async stream processing for downloads | Widely used, low risk | -| HuggingFace CDN | — | Model file hosting | Network dependency for downloads only (not inference) | - -### Technical Risks - -| Risk | Impact | Mitigation | -|------|--------|------------| -| whisper-rs C++ build failures in CI | Build blocked | Test CI builds early in Phase 3. Vulkan SDK install step needed for Windows CI. | -| Large model files slow down dev iteration | Dev friction | Use `base` model (~60 MB) during development | -| WhisperContext is not `Send` | Threading issues | Hold context in `RwLock`, access only from the transcription thread. Verify whisper-rs thread safety. | -| GPU acceleration fails silently | Poor performance | Req #39 handles this — whisper.cpp falls back to CPU. Log a warning. | -| Reqwest streaming requires `stream` feature | Compile error | Already called out in Task 5.2 | - -### Assumptions - -- whisper-rs v0.16 API is stable and matches the code snippets above. Verify against actual crate docs before implementing. -- HuggingFace CDN allows direct file downloads without authentication for public repos. -- The app's Tauri bundle identifier in `tauri.conf.json` is `net.pisum.transcript` (confirmed from exploration). - -## 9. Testing Strategy - -### Unit Tests - -| Area | Test Cases | -|------|-----------| -| `resample_for_whisper()` | Stereo→mono mixdown. 48kHz→16kHz resampling. 16kHz passthrough. Empty input. | -| `ModelTier` registry | `get_model_tier()` returns correct tier. Unknown ID returns `None`. | -| `verify_model()` | Valid file passes. Missing file fails. Wrong-size file fails. | -| `WhisperConfig` serde | Default deserialization. Round-trip serialization. | - -### Integration Tests - -| Scenario | How to Test | -|----------|-------------| -| Cloud mode transcription | Existing test path — unchanged. | -| Local mode transcription | Record short audio clip, process through full pipeline with `base` model. Verify non-empty English text returned. | -| Model download + verify | Download `base` model (~60 MB), verify file exists and passes integrity check. | -| Mode switching | Switch Local→Cloud, verify `WHISPER_ENGINE` is `None`. Switch back, transcribe, verify model reloads. | - -### Manual Testing - -| Scenario | Platforms | -|----------|-----------| -| Metal GPU acceleration | macOS Apple Silicon | -| Vulkan GPU acceleration | Windows with iGPU | -| CPU fallback | Any (disable GPU features to verify) | -| German→English translation | Both platforms | -| Download progress UI | Both platforms | -| Download cancellation | Both platforms | -| Settings persistence across app restart | Both platforms | - -### Edge Cases - -- Transcribe in Local mode with no model downloaded → clear error message (Req #5) -- Delete the currently-loaded model → engine unloads, status updates -- Cancel download mid-stream → partial file cleaned up (Req #30) -- Very short recording (<0.5s) → Whisper should still return result (may be empty) -- Model file corrupted after download → detected on load, user prompted to re-download (Req #38) - -## 10. Requirement Traceability - -### Functional Requirements - -| PRD Ref | Requirement Summary | Task(s) | Notes | -|---------|-------------------|---------|-------| -| 4.1 #1 | Transcription mode setting | 1.1, 6.1 | | -| 4.1 #2 | Local mode bypasses cloud pool | 4.1 | | -| 4.1 #3 | Cloud mode uses existing pool | 4.1 | Unchanged path | -| 4.1 #4 | Persist transcription mode | 1.1 | Via `AppSettings` serde | -| 4.1 #5 | Prevent local transcription without model | 3.3, 6.2 | Error in `ensure_whisper_loaded()` + UI prompt | -| 4.2 #6 | Raw PCM for Whisper, encoded for cloud | 4.1 | Pipeline branching | -| 4.2 #7 | Skip encoding, resample to 16kHz mono | 2.1, 4.1 | | -| 4.2 #8 | Use existing rubato resampler | 2.1 | Same sinc params | -| 4.2 #9 | Mix down multi-channel to mono | 2.1 | Channel averaging | -| 4.2 #10 | Pass resampled samples to WhisperState::full() | 3.2 | | -| 4.3 #11 | Use whisper-rs v0.16 | 3.1 | | -| 4.3 #12 | Metal feature on macOS | 3.1 | Platform-conditional Cargo.toml | -| 4.3 #13 | Vulkan feature on Windows | 3.1 | Platform-conditional Cargo.toml | -| 4.3 #14 | Lazy load model, keep in memory | 3.3 | `ensure_whisper_loaded()` | -| 4.3 #15 | Configure translate task | 3.2 | `params.set_translate(true)` | -| 4.3 #16 | Translate task for English output | 3.2 | | -| 4.3 #17 | Configurable language hint | 1.1, 3.2, 6.2 | `WhisperLanguage` enum, language dropdown | -| 4.3 #18 | Concatenate text segments | 3.2 | Loop over segments | -| 4.3 #19 | No system prompt for Whisper | 4.1 | Local path doesn't call `active_system_prompt()` | -| 4.4 #20 | Three model tiers | 5.1 | `MODEL_TIERS` const | -| 4.4 #21 | Store in app data dir via Tauri 2 API | 5.1, 5.3 | `app.path().app_data_dir().join("models")` | -| 4.4 #22 | Prompt for download, don't auto-start | 6.2 | UI prompt with "Download Now" button | -| 4.4 #23 | Download from HuggingFace | 5.1, 5.2 | URLs in `MODEL_TIERS` | -| 4.4 #24 | Download progress indicator | 5.2, 6.2 | Tauri events + progress bar | -| 4.4 #25 | Cancel download | 5.2, 5.3, 6.2 | `cancel_download()` command | -| 4.4 #26 | Verify model integrity (file size) | 5.1, 5.2 | `verify_model()` after download | -| 4.4 #27 | Switch between downloaded models | 1.1, 6.2 | Model dropdown bound to `whisper_config.selected_model` | -| 4.4 #28 | Delete downloaded models | 5.1, 5.3, 6.2 | `delete_model()` command + UI button | -| 4.4 #29 | Display downloaded models with sizes | 5.1, 5.3, 6.2 | `list_models()` → model list UI | -| 4.4 #30 | Handle download failures, cleanup partials | 5.2 | Error handling + `remove_file` on failure | -| 4.4 #31 | One download at a time | 5.2 | `DOWNLOAD_IN_PROGRESS` atomic flag | -| 4.5 #32 | Mode toggle in settings UI | 6.1, 6.3 | | -| 4.5 #33 | Whisper config panel in Local mode | 6.2, 6.3 | | -| 4.5 #34 | Cloud config panel in Cloud mode | 6.3 | Conditional rendering | -| 4.5 #35 | Model dropdown shows download status | 6.2 | Per-model status badges | -| 4.5 #36 | Visual indicator for model ready state | 6.2 | Status badge component | -| 4.6 #37 | Error notification on inference failure | 4.1 | Existing `categorize_error()` handles `Transcription` | -| 4.6 #38 | Notify on missing/corrupt model, prompt re-download | 3.3, 6.2 | `ensure_whisper_loaded()` error + UI state | -| 4.6 #39 | GPU fallback to CPU, notify user | 3.2 | whisper.cpp handles fallback internally; log warning at info level since detection is not exposed by whisper-rs | -| 4.7 #40 | Default to Cloud mode | 1.1 | `TranscriptionMode::default() = Cloud` | -| 4.7 #41 | Unload Whisper on mode switch to Cloud | 3.3 | `apply_settings()` calls `unload_whisper()` | - -### User Stories - -| PRD Story | Summary | Implementing Tasks | Fully Covered? | -|-----------|---------|-------------------|----------------| -| US-1 | Privacy-conscious local transcription | 3.2, 4.1 | Yes | -| US-2 | Offline transcription | 3.2, 4.1 | Yes | -| US-3 | No API costs | 1.1, 4.1 | Yes | -| US-4 | GPU acceleration (macOS) | 3.1 | Yes (Metal feature) | -| US-5 | Vulkan acceleration (Windows) | 3.1 | Yes (Vulkan feature) | -| US-6 | Guided model download | 5.2, 5.3, 6.2 | Yes | -| US-7 | Delete unused models | 5.1, 5.3, 6.2 | Yes | -| US-8 | Switch to smaller model | 1.1, 6.2 | Yes | - -### Success Metrics - -| Metric | How the Plan Addresses It | -|--------|--------------------------| -| <3s latency on M4 (Metal) | Task 3.1 enables Metal. Task 3.2 uses greedy decoding. Manual test. | -| <5s latency on ThinkPad E14 (Vulkan) | Task 3.1 enables Vulkan. Manual test. | -| Semantic equivalence with F16 | Manual spot-check of 5 clips with `large-v3-turbo` quantized model. | -| Download with progress on both platforms | Task 5.2 (progress events), Task 6.2 (UI). Manual test. | -| Zero runtime crashes | All whisper-rs calls wrapped in `Result`. Error categorization in hotkey manager. | -| Startup time +2s max | Task 3.3: lazy loading (no model load on startup). Manual test. | diff --git a/docs/PRD-file-logging.md b/docs/PRD-file-logging.md new file mode 100644 index 0000000..f24293e --- /dev/null +++ b/docs/PRD-file-logging.md @@ -0,0 +1,138 @@ +# PRD: File-Based Logging with Retention + +## 1. Introduction/Overview + +Pisum Transcript currently has no logging infrastructure. Errors are shown to users as OS notifications, but there is no persistent record of application behavior, warnings, or errors. This makes debugging, support, and development significantly harder. + +This feature adds structured file-based logging using the `tracing` crate, with configurable log levels, file rotation by size, automatic cleanup of old logs, and a new Logging tab in the Settings UI for user control. + +## 2. Goals + +- Provide persistent, structured log output to files for debugging and support purposes +- Implement automatic log rotation (by file size) and retention (by age) to prevent unbounded disk usage +- Allow users to configure logging behavior (log level, retention settings) through the Settings UI +- Replace ad-hoc error handling with consistent `tracing` instrumentation across all backend modules +- Make it easy for users to locate and share log files when reporting issues + +## 3. User Stories + +- As a **user experiencing issues**, I want application logs written to a file so that I can share them when reporting a bug. +- As a **developer debugging a problem**, I want to set the log level to DEBUG or TRACE so that I can see detailed internal behavior without rebuilding the app. +- As a **user with limited disk space**, I want logs to automatically rotate and old logs to be deleted so that they don't consume excessive storage. +- As a **user**, I want a button in Settings to open the log folder so that I can quickly find log files. +- As a **developer**, I want structured log output with timestamps, levels, and module context so that I can efficiently search and filter logs. + +## 4. Functional Requirements + +### 4.1 Logging Library Integration + +1. The system must use the `tracing` crate as the logging facade across all Rust backend modules. +2. The system must use `tracing-appender` (or equivalent) to write logs to files. +3. The system must use `tracing-subscriber` to configure log formatting and filtering. +4. The system must initialize the logging subsystem early in application startup (before other subsystems). + +### 4.2 Log Output + +5. The system must write log entries to files in a dedicated logs directory under the user's home folder (e.g., `~/.pisum-transcript/logs/`). +6. Each log entry must include: timestamp (ISO 8601, local time), log level, module/target path, and the log message. +7. The system must support the standard five log levels: ERROR, WARN, INFO, DEBUG, TRACE. +8. The default log level must be INFO. +9. The system must also output logs to stdout/stderr when running in development mode (`tauri dev`). + +### 4.3 Log Rotation and Retention + +10. The system must rotate log files when a file exceeds a configurable maximum size (default: 1 MB). +11. Rotated files must be named with a timestamp or sequential suffix to preserve ordering. +12. The system must delete log files older than a configurable number of days (default: 7 days). +13. The retention cleanup must run at application startup. + +### 4.4 Configuration + +14. The following logging settings must be added to the application config schema: + - `log_level`: one of `error`, `warn`, `info`, `debug`, `trace` (default: `info`) + - `log_max_file_size_mb`: maximum log file size in MB before rotation (default: 10) + - `log_retention_days`: number of days to keep log files (default: 7) +15. Changes to `log_level` should take effect immediately (without app restart) if feasible, or on next restart. +16. Changes to rotation/retention settings must take effect on next app restart. + +### 4.5 Settings UI - Logging Tab + +17. The Settings UI must include a new **Logging** tab accessible from the tab navigation. +18. The Logging tab must display a dropdown/select for **Log Level** (ERROR, WARN, INFO, DEBUG, TRACE). +19. The Logging tab must display a numeric input for **Max File Size** (in MB). +20. The Logging tab must display a numeric input for **Retention Period** (in days). +21. The Logging tab must include an **Open Log Folder** button that opens the logs directory in the system file explorer. +22. The Logging tab must show the current log file path as read-only informational text. + +### 4.6 Instrumentation + +23. All existing modules (`audio`, `ai`, `hotkey`, `output`, `config`, `tray`) must use `tracing` macros (`info!`, `warn!`, `error!`, `debug!`, `trace!`) instead of silent error propagation where appropriate. +24. Key application lifecycle events must be logged at INFO level: app startup, config loaded, hotkey registered, recording started/stopped, transcription started/completed, provider selected. +25. All errors that currently produce OS notifications must also be logged at ERROR level with full context. +26. Performance-sensitive paths (audio recording callbacks) must use TRACE level to avoid impacting performance at default log levels. + +## 5. Non-Goals (Out of Scope) + +- **Remote/cloud log shipping**: Logs are local only. No telemetry or remote collection. +- **Frontend (Svelte) logging**: This PRD covers Rust backend logging only. Frontend console logging is not in scope. +- **Log viewer UI**: The Settings UI provides a button to open the log folder, but does not include an in-app log viewer or log search. +- **Log encryption or redaction**: Logs are stored as plain text. Sensitive data redaction is not in scope. +- **Custom log format configuration**: The log format is fixed (not user-configurable). + +## 6. Design Considerations + +### Logging Tab Layout + +The Logging tab in Settings should follow the same layout patterns as existing tabs (General, Audio, Provider, etc.): + +- Section header: "Logging" +- Log Level: dropdown select with options ERROR / WARN / INFO / DEBUG / TRACE +- Max File Size: numeric input with "MB" suffix label +- Retention Period: numeric input with "days" suffix label +- Log File Location: read-only text showing the path +- Open Log Folder: button aligned with existing button styles + +## 7. Technical Considerations + +### Dependencies + +- `tracing` — logging facade (macros: `info!`, `warn!`, `error!`, `debug!`, `trace!`) +- `tracing-subscriber` — subscriber configuration, formatting, filtering +- `tracing-appender` — file appender with rotation support + +### Log File Location + +Store logs in a `logs/` subdirectory under the user's home folder: +- Windows: `%USERPROFILE%/.pisum-transcript/logs/` +- macOS: `~/.pisum-transcript/logs/` +- Linux: `~/.pisum-transcript/logs/` + +This is consistent with the existing config file location (`~/.pisum-transcript.json`). + +### Integration Points + +- **Config module** (`src-tauri/src/config/`): Add new logging fields to the config schema and manager. +- **App initialization** (`src-tauri/src/lib.rs`): Initialize tracing subscriber in `run()` before other setup. +- **Hotkey manager** (`src-tauri/src/hotkey/manager.rs`): Add tracing instrumentation alongside existing notification logic. +- **Tray module** (`src-tauri/src/tray.rs`): Expose an IPC command to open the log folder. +- **Frontend** (`src/`): Add Logging tab component and wire up IPC for settings and open-folder action. + +### Constraints + +- The `tracing` global subscriber can only be set once. If dynamic log level changes are needed, use a `reload` layer from `tracing-subscriber`. +- File I/O in the logging layer must be non-blocking or handled on a dedicated thread to avoid impacting audio recording performance. + +## 8. Success Metrics + +- All backend modules produce structured log output to files at appropriate levels. +- Log files are automatically rotated when exceeding the configured size limit. +- Log files older than the retention period are automatically cleaned up on startup. +- Users can change log level and retention settings from the Settings UI Logging tab. +- Users can locate log files via the Open Log Folder button. +- No measurable performance regression in audio recording or transcription latency at the default INFO log level. + +## 9. Open Questions + +- [x] Should `tracing-appender`'s built-in rotation be used, or a custom rotation implementation for more control over size-based rotation and age-based cleanup? -> built-in +- [x] Should the log level change take effect immediately via a `reload` layer, or require an app restart? (Immediate is better UX but adds complexity.) -> immediate +- [x] Should log output include span context (e.g., request IDs for transcription calls), or just flat key-value fields? -> flat diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index be69cea..d40ceda 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -575,8 +575,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", + "js-sys", "num-traits", "serde", + "wasm-bindgen", "windows-link 0.2.1", ] @@ -2242,6 +2244,25 @@ dependencies = [ "serde", ] +[[package]] +name = "is-docker" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928bae27f42bc99b60d9ac7334e3a21d10ad8f1835a4e12ec3ec0464765ed1b3" +dependencies = [ + "once_cell", +] + +[[package]] +name = "is-wsl" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "173609498df190136aa7dea1a91db051746d339e18476eed5ca40521f02d7aa5" +dependencies = [ + "is-docker", + "once_cell", +] + [[package]] name = "itertools" version = "0.13.0" @@ -2367,6 +2388,12 @@ dependencies = [ "selectors 0.24.0", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "leb128fmt" version = "0.1.0" @@ -2528,6 +2555,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "matches" version = "0.1.10" @@ -2728,6 +2764,15 @@ dependencies = [ "zbus", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -3114,6 +3159,17 @@ version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" +[[package]] +name = "open" +version = "5.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43bb73a7fa3799b198970490a51174027ba0d4ec504b03cd08caf513d40024bc" +dependencies = [ + "is-wsl", + "libc", + "pathdiff", +] + [[package]] name = "openssl" version = "0.10.76" @@ -3228,6 +3284,12 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "pathdiff" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" + [[package]] name = "percent-encoding" version = "2.3.2" @@ -3462,7 +3524,9 @@ dependencies = [ "objc2-app-kit 0.3.2", "ogg", "once_cell", + "open", "reqwest 0.12.28", + "rolling-file", "rubato", "serde", "serde_json", @@ -3472,6 +3536,9 @@ dependencies = [ "tauri-plugin-notification", "thiserror 2.0.18", "tokio", + "tracing", + "tracing-appender", + "tracing-subscriber", "uuid", "whisper-rs", "windows 0.58.0", @@ -4059,6 +4126,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rolling-file" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8395b4f860856b740f20a296ea2cd4d823e81a2658cf05ef61be22916026a906" +dependencies = [ + "chrono", +] + [[package]] name = "rubato" version = "0.16.2" @@ -4495,6 +4571,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -5155,6 +5240,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "tiff" version = "0.11.3" @@ -5433,6 +5527,18 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-appender" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf" +dependencies = [ + "crossbeam-channel", + "thiserror 2.0.18", + "time", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" version = "0.1.31" @@ -5451,6 +5557,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -5628,6 +5764,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index edb1e55..c682189 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -33,6 +33,11 @@ base64 = "0.22" arboard = "3" enigo = "0.3" uuid = { version = "1", features = ["v4"] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter", "registry"] } +tracing-appender = "0.2" +rolling-file = "0.2" +open = "5" [target.'cfg(target_os = "macos")'.dependencies] objc2 = "0.6" objc2-app-kit = { version = "0.3.2", features = ["NSApplication", "NSRunningApplication"] } diff --git a/src-tauri/src/ai/pool.rs b/src-tauri/src/ai/pool.rs index 8cf65bd..088928e 100644 --- a/src-tauri/src/ai/pool.rs +++ b/src-tauri/src/ai/pool.rs @@ -2,6 +2,8 @@ use std::sync::atomic::{AtomicUsize, Ordering}; +use tracing::{debug, error, info, warn}; + use super::gemini::GeminiProvider; use super::openai::OpenAiProvider; use super::provider::{TranscriptionProvider, TranscriptionResult}; @@ -34,6 +36,7 @@ impl ProviderPool { self.providers.clear(); self.current_index.store(0, Ordering::Relaxed); + info!(count = entries.len(), "Rebuilding provider pool"); for entry in entries { match entry.provider_type.as_str() { "gemini" | "Gemini" => { @@ -74,14 +77,17 @@ impl ProviderPool { let idx = (start + i) % len; let provider = &self.providers[idx]; + debug!(provider = provider.provider_name(), attempt = i + 1, "Trying provider"); match provider.transcribe(audio_data, mime_type, system_prompt).await { Ok(result) => return Ok(result), Err(e) => { + warn!(provider = provider.provider_name(), error = %e, "Provider failed"); errors.push(format!("{}: {}", provider.provider_name(), e)); } } } + error!(errors = ?errors, "All providers failed"); Err(AppError::Transcription(format!( "All providers failed: {}", errors.join("; ") diff --git a/src-tauri/src/audio/recorder.rs b/src-tauri/src/audio/recorder.rs index 0f430a2..3d511d4 100644 --- a/src-tauri/src/audio/recorder.rs +++ b/src-tauri/src/audio/recorder.rs @@ -6,6 +6,8 @@ use std::sync::mpsc::{self, Sender}; use std::sync::{Arc, Mutex}; use std::thread::{self, JoinHandle}; +use tracing::{debug, error, info}; + use crate::error::AppError; /// Message sent to the recording thread @@ -32,12 +34,15 @@ impl AudioRecorderHandle { .default_input_device() .ok_or_else(|| AppError::Audio("No input device found".to_string()))?; + debug!(device = ?device.name().unwrap_or_default(), "Using input device"); + let config = device .default_input_config() .map_err(|e| AppError::Audio(format!("Failed to get input config: {}", e)))?; let sample_rate = config.sample_rate().0; let channels = config.channels(); + info!(sample_rate, channels, format = ?config.sample_format(), "Audio recording started"); let samples = Arc::new(Mutex::new(Vec::new())); let is_recording = Arc::new(AtomicBool::new(true)); @@ -56,7 +61,9 @@ impl AudioRecorderHandle { } }; - let err_fn = |_err| {}; + let err_fn = |err| { + error!(error = %err, "Audio stream error"); + }; let stream = match config_clone.sample_format() { cpal::SampleFormat::F32 => { @@ -146,6 +153,7 @@ impl AudioRecorderHandle { /// Stop recording and return (samples, sample_rate, channels). pub fn stop(mut self) -> Result<(Vec, u32, u16), AppError> { + info!("Stopping audio recording"); let _ = self.command_tx.send(RecorderCommand::Stop); if let Some(handle) = self.thread_handle.take() { @@ -158,6 +166,7 @@ impl AudioRecorderHandle { .map_err(|_| AppError::Audio("Failed to lock samples".to_string()))? .clone(); + info!(sample_count = samples.len(), "Audio recording stopped"); Ok((samples, self.sample_rate, self.channels)) } } diff --git a/src-tauri/src/config/manager.rs b/src-tauri/src/config/manager.rs index ab09dee..d294275 100644 --- a/src-tauri/src/config/manager.rs +++ b/src-tauri/src/config/manager.rs @@ -2,6 +2,8 @@ use std::path::PathBuf; +use tracing::{info, warn}; + use crate::error::AppError; use super::presets::get_builtin_presets; @@ -25,6 +27,7 @@ pub fn init() -> Result { return Ok(false); } + info!("First launch — creating default settings"); let defaults = AppSettings::default(); save_settings(&defaults)?; Ok(true) @@ -47,12 +50,15 @@ pub fn load_settings() -> Result { } } + info!("Settings loaded successfully"); + // Validate active_preset_id: fall back to first built-in if invalid if !settings.presets.iter().any(|p| p.id == settings.active_preset_id) { let fallback_id = builtins .first() .map(|p| p.id.clone()) .unwrap_or_else(|| "de-transcribe".to_string()); + warn!(invalid_id = %settings.active_preset_id, fallback = %fallback_id, "Invalid active preset, falling back"); settings.active_preset_id = fallback_id; // Persist the corrected setting save_settings(&settings)?; diff --git a/src-tauri/src/config/schema.rs b/src-tauri/src/config/schema.rs index 35ce1f4..dd5655a 100644 --- a/src-tauri/src/config/schema.rs +++ b/src-tauri/src/config/schema.rs @@ -37,6 +37,9 @@ pub struct AppSettings { #[serde(default)] pub whisper_config: WhisperConfig, + + #[serde(default)] + pub logging_config: LoggingConfig, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -142,6 +145,41 @@ fn default_whisper_model() -> String { "small".to_string() } +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct LoggingConfig { + #[serde(default = "default_log_level")] + pub log_level: String, + + #[serde(default = "default_log_max_file_size_mb")] + pub log_max_file_size_mb: u32, + + #[serde(default = "default_log_retention_days")] + pub log_retention_days: u32, +} + +impl Default for LoggingConfig { + fn default() -> Self { + Self { + log_level: default_log_level(), + log_max_file_size_mb: default_log_max_file_size_mb(), + log_retention_days: default_log_retention_days(), + } + } +} + +fn default_log_level() -> String { + "info".to_string() +} + +fn default_log_max_file_size_mb() -> u32 { + 1 +} + +fn default_log_retention_days() -> u32 { + 7 +} + impl Default for AppSettings { fn default() -> Self { Self { @@ -156,6 +194,7 @@ impl Default for AppSettings { max_recording_duration_secs: default_max_recording_duration_secs(), transcription_mode: TranscriptionMode::default(), whisper_config: WhisperConfig::default(), + logging_config: LoggingConfig::default(), } } } diff --git a/src-tauri/src/hotkey/manager.rs b/src-tauri/src/hotkey/manager.rs index 0e49b81..b90084e 100644 --- a/src-tauri/src/hotkey/manager.rs +++ b/src-tauri/src/hotkey/manager.rs @@ -10,6 +10,8 @@ use global_hotkey::{GlobalHotKeyEvent, GlobalHotKeyManager, HotKeyState}; use once_cell::sync::Lazy; use tauri::AppHandle; +use tracing::{debug, error, info}; + use super::conflict::HotkeyBinding; use super::parse::{parse_code, parse_modifiers}; use crate::audio; @@ -80,6 +82,8 @@ pub fn init(app: &AppHandle) -> Result<(), AppError> { /// Register a hotkey binding. Must be called from the main thread. pub fn register(binding: &HotkeyBinding) -> Result<(), AppError> { + info!(key = %binding.key, modifiers = ?binding.modifiers, "Registering hotkey"); + // Unregister existing hotkey first unregister()?; @@ -99,6 +103,7 @@ pub fn register(binding: &HotkeyBinding) -> Result<(), AppError> { let mut registry = REGISTRY.lock().unwrap(); *registry = Some((hotkey_id, hotkey)); + info!("Hotkey registered successfully"); Ok(()) } @@ -204,6 +209,7 @@ fn handle_hotkey_press() { return; } + info!("Recording started"); match audio::recorder::AudioRecorderHandle::start() { Ok(recorder) => { { @@ -242,6 +248,8 @@ fn handle_hotkey_press() { }); } Err(e) => { + error!(error = %e, "Audio recorder initialization failed"); + #[cfg(target_os = "macos")] if e.to_string().contains("No input device") { tray::send_notification( @@ -258,13 +266,15 @@ fn handle_hotkey_press() { /// Stop recording, encode audio, and transcribe. Called from both modes and the max-duration timer. fn stop_and_transcribe() { + info!("Recording stopped, starting transcription pipeline"); + let recorder = { let mut active = ACTIVE_RECORDER.lock().unwrap(); active.take() }; let Some(recorder) = recorder else { - // No active recording (edge case: release without press) + debug!("No active recording to stop"); return; }; @@ -290,6 +300,7 @@ fn stop_and_transcribe() { match result { Ok(text) => { + info!("Transcription complete, pasting result"); if let Err(e) = crate::output::clipboard::set_clipboard_text(&text) { tray::send_notification( "Output Error", @@ -344,6 +355,8 @@ fn process_and_transcribe( .unwrap_or(crate::config::schema::TranscriptionMode::Cloud) }; + debug!(mode = ?mode, sample_count = samples.len(), sample_rate, channels, "Processing audio"); + match mode { crate::config::schema::TranscriptionMode::Local => { transcribe_local(&samples, sample_rate, channels) @@ -367,6 +380,7 @@ fn transcribe_local( return Err(AppError::Audio("No speech detected (audio is silent)".into())); } + debug!("Resampling audio for Whisper"); let resampled = audio::encoder::resample_for_whisper(samples, sample_rate, channels)?; // Get the app handle for model path resolution @@ -377,6 +391,7 @@ fn transcribe_local( .ok_or_else(|| AppError::Transcription("App handle not available".into()))? }; + debug!("Ensuring Whisper model is loaded"); crate::ensure_whisper_loaded(&app_handle)?; let (language, translate) = { @@ -398,6 +413,7 @@ fn transcribe_local( let engine = engine .as_ref() .ok_or_else(|| AppError::Transcription("Whisper engine not loaded".into()))?; + info!(language = %language, translate, "Running Whisper inference"); engine.transcribe(&resampled, &language, translate) } @@ -414,6 +430,7 @@ fn transcribe_cloud( .unwrap_or(crate::config::schema::AudioFormat::Opus) }; + debug!(format = ?preferred_format, "Encoding audio for cloud transcription"); let (audio_data, mime_type) = match preferred_format { crate::config::schema::AudioFormat::Opus => { match audio::encoder::encode_to_opus(samples, sample_rate, channels) { @@ -446,8 +463,10 @@ fn transcribe_cloud( let rt = tokio::runtime::Runtime::new() .map_err(|e| AppError::Transcription(format!("Failed to create runtime: {}", e)))?; + info!(mime_type, audio_size = audio_data.len(), "Sending audio to cloud provider"); let result = rt.block_on(pool.transcribe(&audio_data, mime_type, &system_prompt))?; + info!("Cloud transcription complete"); Ok(result.text) } @@ -467,6 +486,7 @@ fn parse_hotkey(binding: &HotkeyBinding) -> Result { /// Categorize an AppError into a user-friendly notification title and body fn categorize_error(error: &AppError) -> (&'static str, String) { + error!(error = ?error, "Pipeline error"); let body = error.to_string(); let title = match error { AppError::Audio(_) => "Recording Error", diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 227f80f..dc27d03 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -3,6 +3,7 @@ mod audio; mod config; mod error; mod hotkey; +mod logging; mod output; mod tray; mod whisper; @@ -406,6 +407,19 @@ fn get_whisper_status(app: AppHandle) -> Result { }) } +// ── Logging commands ────────────────────────────────────────────── + +#[tauri::command] +fn open_log_folder() -> Result<(), String> { + let dir = logging::log_dir(); + open::that(&dir).map_err(|e| format!("Failed to open log folder: {}", e)) +} + +#[tauri::command] +fn get_log_path() -> String { + logging::log_dir().to_string_lossy().to_string() +} + // ── Settings application ───────────────────────────────────────── /// Apply settings: rebuild provider pool, re-register hotkey @@ -415,6 +429,11 @@ async fn apply_settings(settings: &AppSettings, app: &AppHandle) { *cached = settings.clone(); } + // Update log level dynamically + if let Err(e) = logging::set_log_level(&settings.logging_config.log_level) { + tracing::warn!("Failed to update log level: {}", e); + } + // Rebuild provider pool from enabled providers let entries: Vec = settings .providers @@ -488,6 +507,8 @@ pub fn run() { cancel_whisper_download, delete_whisper_model, get_whisper_status, + open_log_folder, + get_log_path, ]) .setup(|app| { // Hide from macOS dock — this is a menu-bar-only (tray) app @@ -513,7 +534,14 @@ pub fn run() { let settings = config::manager::load_settings() .map_err(|e| Box::new(e) as Box)?; + // Initialize logging (after config load, before other subsystems) + if let Err(e) = logging::init(&settings.logging_config) { + eprintln!("Warning: Failed to initialize logging: {}", e); + } + tracing::info!("Application starting"); + // Register hotkey from config + tracing::info!("Registering hotkey from config"); let binding = HotkeyBinding { modifiers: settings.hotkey.modifiers.clone(), key: settings.hotkey.key.clone(), @@ -548,6 +576,8 @@ pub fn run() { *cached = settings.clone(); } + tracing::info!("Application setup complete"); + // First launch: enable auto-start, show welcome notification, open settings if is_first_launch { if settings.start_with_system { diff --git a/src-tauri/src/logging.rs b/src-tauri/src/logging.rs new file mode 100644 index 0000000..a5c79b7 --- /dev/null +++ b/src-tauri/src/logging.rs @@ -0,0 +1,140 @@ +//! Structured file logging with size-based rotation, age-based retention, and dynamic log level + +use std::path::{Path, PathBuf}; +use std::sync::{Mutex, OnceLock}; + +use rolling_file::{BasicRollingFileAppender, RollingConditionBasic}; +use tracing_subscriber::filter::EnvFilter; +use tracing_subscriber::reload; +use tracing_subscriber::{fmt, layer::SubscriberExt, util::SubscriberInitExt, Registry}; + +use crate::config::schema::LoggingConfig; +use crate::error::AppError; + +/// Type-erased reload handle for dynamic log level changes. +/// We store a closure that captures the concrete handle type. +static LOG_LEVEL_UPDATER: OnceLock Result<(), String> + Send>>> = + OnceLock::new(); + +/// Non-blocking writer guard — must live for the app's lifetime +static _GUARD: OnceLock = OnceLock::new(); + +/// Initialize the logging subsystem. +/// +/// - Writes to `~/.pisum-transcript/logs/pisum-transcript.log` +/// - Rotates when file exceeds `config.log_max_file_size_mb` +/// - Deletes log files older than `config.log_retention_days` on startup +/// - Outputs to stdout in debug builds +pub fn init(config: &LoggingConfig) -> Result<(), AppError> { + let log_dir = log_dir(); + std::fs::create_dir_all(&log_dir) + .map_err(|e| AppError::Config(format!("Failed to create log directory: {}", e)))?; + + // Clean up old log files + cleanup_old_logs(&log_dir, config.log_retention_days); + + // Size-based rolling file appender + let max_bytes = config.log_max_file_size_mb as u64 * 1024 * 1024; + let condition = RollingConditionBasic::new().max_size(max_bytes); + let file_appender = BasicRollingFileAppender::new( + log_dir.join("pisum-transcript.log"), + condition, + 10, // keep up to 10 rotated files; age-based cleanup handles the rest + ) + .map_err(|e| AppError::Config(format!("Failed to create log appender: {}", e)))?; + + // Non-blocking writer + let (non_blocking, guard) = tracing_appender::non_blocking(file_appender); + let _ = _GUARD.set(guard); + + // Reloadable filter layer + let filter = build_filter(&config.log_level); + let (filter_layer, reload_handle) = reload::Layer::new(filter); + + // Store a type-erased updater closure + let _ = LOG_LEVEL_UPDATER.set(Mutex::new(Box::new(move |level: &str| { + let new_filter = build_filter(level); + reload_handle + .reload(new_filter) + .map_err(|e| format!("{}", e)) + }))); + + // File fmt layer + let file_layer = fmt::layer() + .with_writer(non_blocking) + .with_target(true) + .with_ansi(false); + + // Optional stdout layer for dev mode + let stdout_layer = if cfg!(debug_assertions) { + Some( + fmt::layer() + .with_target(true) + .with_ansi(true), + ) + } else { + None + }; + + Registry::default() + .with(filter_layer) + .with(file_layer) + .with(stdout_layer) + .init(); + + Ok(()) +} + +/// Change the log level at runtime without restarting the app. +pub fn set_log_level(level: &str) -> Result<(), AppError> { + let updater = LOG_LEVEL_UPDATER + .get() + .ok_or_else(|| AppError::Config("Logging not initialized".to_string()))?; + + let updater = updater.lock().unwrap(); + (updater)(level).map_err(|e| AppError::Config(format!("Failed to reload log level: {}", e))) +} + +/// Get the log directory path. +pub fn log_dir() -> PathBuf { + dirs::home_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join(".pisum-transcript") + .join("logs") +} + +/// Build an EnvFilter from a level string, defaulting to "info" on failure. +fn build_filter(level: &str) -> EnvFilter { + EnvFilter::try_new(level).unwrap_or_else(|_| EnvFilter::new("info")) +} + +/// Delete log files older than `retention_days` from the given directory. +fn cleanup_old_logs(dir: &Path, retention_days: u32) { + let Ok(entries) = std::fs::read_dir(dir) else { + return; + }; + + let cutoff = std::time::SystemTime::now() + - std::time::Duration::from_secs(retention_days as u64 * 24 * 60 * 60); + + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_file() { + continue; + } + + // Only clean up .log files (includes rotated files like pisum-transcript.log.1) + let name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + if !name.contains(".log") { + continue; + } + + if let Ok(metadata) = path.metadata() { + if let Ok(modified) = metadata.modified() { + if modified < cutoff { + let _ = std::fs::remove_file(&path); + } + } + } + } +} diff --git a/src-tauri/src/output/clipboard.rs b/src-tauri/src/output/clipboard.rs index fede775..6a05142 100644 --- a/src-tauri/src/output/clipboard.rs +++ b/src-tauri/src/output/clipboard.rs @@ -1,11 +1,13 @@ //! Clipboard write via arboard use arboard::Clipboard; +use tracing::debug; use crate::error::AppError; /// Copy text to the system clipboard pub fn set_clipboard_text(text: &str) -> Result<(), AppError> { + debug!(len = text.len(), "Setting clipboard text"); let mut clipboard = Clipboard::new() .map_err(|e| AppError::Output(format!("Failed to access clipboard: {}", e)))?; clipboard diff --git a/src-tauri/src/output/paste.rs b/src-tauri/src/output/paste.rs index c33a18d..564c402 100644 --- a/src-tauri/src/output/paste.rs +++ b/src-tauri/src/output/paste.rs @@ -1,11 +1,13 @@ //! Paste simulation via enigo (Ctrl+V / Cmd+V) use enigo::{Direction, Enigo, Key, Keyboard, Settings}; +use tracing::debug; use crate::error::AppError; /// Simulate a paste keystroke (Ctrl+V on Windows/Linux, Cmd+V on macOS) pub fn simulate_paste() -> Result<(), AppError> { + debug!("Simulating paste keystroke"); let mut enigo = Enigo::new(&Settings::default()) .map_err(|e| AppError::Output(format!("Failed to create input simulator: {}", e)))?; diff --git a/src-tauri/src/tray.rs b/src-tauri/src/tray.rs index 62cdad6..0512f7d 100644 --- a/src-tauri/src/tray.rs +++ b/src-tauri/src/tray.rs @@ -7,6 +7,8 @@ use tauri::{ AppHandle, Manager, }; +use tracing::{debug, info}; + static APP_HANDLE: Lazy>> = Lazy::new(|| RwLock::new(None)); /// Set up the system tray icon and menu. @@ -37,6 +39,8 @@ pub fn setup_tray(app: &tauri::App) -> Result<(), Box> { #[cfg(target_os = "macos")] let tray_builder = tray_builder.icon_as_template(true); + info!("System tray initialized"); + let _tray = tray_builder .on_menu_event(move |app, event| match event.id().as_ref() { "settings" => { @@ -79,6 +83,7 @@ pub fn send_info_notification(title: &str, message: &str) { } fn send_notification_impl(title: &str, message: &str, force: bool) { + debug!(title, "Sending notification"); if !force { if let Ok(settings) = crate::SETTINGS.read() { if !settings.show_tray_notifications { diff --git a/src/components/LoggingConfig.svelte b/src/components/LoggingConfig.svelte new file mode 100644 index 0000000..d025aac --- /dev/null +++ b/src/components/LoggingConfig.svelte @@ -0,0 +1,123 @@ + + +
+

Logging

+ + + + + + + + {#if logPath} +
+

Log File Location

+

{logPath}

+
+ {/if} + + +
diff --git a/src/components/SettingsPage.svelte b/src/components/SettingsPage.svelte index 90a266c..8ae6e1a 100644 --- a/src/components/SettingsPage.svelte +++ b/src/components/SettingsPage.svelte @@ -7,6 +7,7 @@ import ProviderConfig from './ProviderConfig.svelte'; import PresetConfig from './PresetConfig.svelte'; import GeneralConfig from './GeneralConfig.svelte'; + import LoggingConfig from './LoggingConfig.svelte'; import ModeToggle from './ModeToggle.svelte'; import WhisperConfig from './WhisperConfig.svelte'; @@ -14,7 +15,7 @@ let { settings }: { settings: AppSettings } = $props(); - let activeTab = $state<'general' | 'hotkey' | 'audio' | 'transcription' | 'presets'>('transcription'); + let activeTab = $state<'general' | 'hotkey' | 'audio' | 'transcription' | 'presets' | 'logging'>('transcription'); let appVersion = $state(''); getVersion().then((v) => (appVersion = v)); @@ -28,6 +29,7 @@ { id: 'presets' as const, label: 'Presets' }, { id: 'hotkey' as const, label: 'Hotkey' }, { id: 'audio' as const, label: 'Audio' }, + { id: 'logging' as const, label: 'Logging' }, { id: 'general' as const, label: 'General' }, ]; @@ -72,6 +74,8 @@ {:else if activeTab === 'audio'} + {:else if activeTab === 'logging'} + {:else if activeTab === 'general'} {/if} diff --git a/src/lib/commands.ts b/src/lib/commands.ts index 0f5b6bc..691c347 100644 --- a/src/lib/commands.ts +++ b/src/lib/commands.ts @@ -89,3 +89,12 @@ export async function deleteWhisperModel(modelId: string): Promise { export async function getWhisperStatus(): Promise { return invoke('get_whisper_status'); } + +// Logging +export async function openLogFolder(): Promise { + return invoke('open_log_folder'); +} + +export async function getLogPath(): Promise { + return invoke('get_log_path'); +} diff --git a/src/lib/types.ts b/src/lib/types.ts index 19bf195..f496b6d 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -10,6 +10,7 @@ export interface AppSettings { maxRecordingDurationSecs: number; transcriptionMode: TranscriptionMode; whisperConfig: WhisperConfig; + loggingConfig: LoggingConfig; } export interface Preset { @@ -46,6 +47,12 @@ export interface WhisperConfig { translateToEnglish: boolean; } +export interface LoggingConfig { + logLevel: 'error' | 'warn' | 'info' | 'debug' | 'trace'; + logMaxFileSizeMb: number; + logRetentionDays: number; +} + export interface WhisperModelInfo { id: string; name: string;