diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5df8745..b8f4f34 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -85,7 +85,7 @@ jobs: sed -i "s/\"version\": \".*\"/\"version\": \"$NEW_VERSION\"/" src-tauri/tauri.conf.json # Update Chocolatey nuspec version - sed -i "s/.*<\/version>/$NEW_VERSION<\/version>/" packages/chocolatey/pisum-langue.nuspec + sed -i "s/.*<\/version>/$NEW_VERSION<\/version>/" packages/chocolatey/pisum-transcript.nuspec # Update Cargo.lock cd src-tauri && cargo generate-lockfile && cd .. @@ -93,7 +93,7 @@ jobs: - name: Commit and tag run: | VERSION="${{ steps.bump.outputs.version }}" - git add package.json package-lock.json src-tauri/Cargo.toml src-tauri/Cargo.lock src-tauri/tauri.conf.json packages/chocolatey/pisum-langue.nuspec + git add package.json package-lock.json src-tauri/Cargo.toml src-tauri/Cargo.lock src-tauri/tauri.conf.json packages/chocolatey/pisum-transcript.nuspec git commit -m "chore: bump version to $VERSION" git tag -a "v$VERSION" -m "Release v$VERSION" git push origin main @@ -125,9 +125,9 @@ jobs: uses: softprops/action-gh-release@v2 with: tag_name: v${{ steps.get_version.outputs.version }} - name: Pisum Langue v${{ steps.get_version.outputs.version }} + name: Pisum Transcript v${{ steps.get_version.outputs.version }} body: | - ## Pisum Langue v${{ steps.get_version.outputs.version }} + ## Pisum Transcript v${{ steps.get_version.outputs.version }} A cross-platform, Hotkey-driven, system-tray-style dictation tool. @@ -135,34 +135,34 @@ jobs: **macOS (Homebrew) - Recommended:** ```bash - brew tap mschnecke/pisum-langue - brew install --cask pisum-langue + brew tap mschnecke/pisum-transcript + brew install --cask pisum-transcript ``` **macOS (Direct Download):** - Download the `.pkg` installer: `Pisum.Langue_${{ steps.get_version.outputs.version }}_aarch64.pkg` (Apple Silicon) + Download the `.pkg` installer: `Pisum.Transcript_${{ steps.get_version.outputs.version }}_aarch64.pkg` (Apple Silicon) **Windows (Chocolatey):** ```powershell - choco install pisum-langue --source https://www.myget.org/F/mschnecke/api/v3/index.json + choco install pisum-transcript --source https://www.myget.org/F/mschnecke/api/v3/index.json ``` **Windows (Direct Download):** - Download `Pisum.Langue_${{ steps.get_version.outputs.version }}_x64_en-US.msi` from assets below. + Download `Pisum.Transcript_${{ steps.get_version.outputs.version }}_x64_en-US.msi` from assets below. ### Quick Start - **Default hotkey:** Configure your shortcuts in the settings - - **Settings stored in:** `~/.pisum-langue.json` + - **Settings stored in:** `~/.pisum-transcript.json` ### Post-Installation (macOS) Grant Accessibility permissions: 1. Open System Settings > Privacy & Security > Accessibility - 2. Enable Pisum Langue in the list + 2. Enable Pisum Transcript in the list ### Changelog - See [CHANGELOG.md](https://github.com/mschnecke/langue/blob/main/CHANGELOG.md) for details. + See [CHANGELOG.md](https://github.com/mschnecke/pisum-transcript/blob/main/CHANGELOG.md) for details. draft: true prerelease: false generate_release_notes: false @@ -235,7 +235,7 @@ jobs: uses: softprops/action-gh-release@v2 with: tag_name: v${{ needs.create-release.outputs.version }} - files: dist/Pisum.Langue_${{ needs.create-release.outputs.version }}_${{ matrix.arch }}.pkg + files: dist/Pisum.Transcript_${{ needs.create-release.outputs.version }}_${{ matrix.arch }}.pkg - name: Rename Windows MSI if: matrix.platform == 'windows-latest' @@ -250,14 +250,14 @@ jobs: exit 1 fi echo "Found MSI: $MSI_FILE" - mv "$MSI_FILE" "$MSI_DIR/Pisum.Langue_${VERSION}_x64_en-US.msi" + mv "$MSI_FILE" "$MSI_DIR/Pisum.Transcript_${VERSION}_x64_en-US.msi" - name: Upload Windows assets to release if: matrix.platform == 'windows-latest' uses: softprops/action-gh-release@v2 with: tag_name: v${{ needs.create-release.outputs.version }} - files: ./src-tauri/target/release/bundle/msi/Pisum.Langue_${{ needs.create-release.outputs.version }}_x64_en-US.msi + files: ./src-tauri/target/release/bundle/msi/Pisum.Transcript_${{ needs.create-release.outputs.version }}_x64_en-US.msi publish-release: needs: [create-release, build-tauri] @@ -282,7 +282,7 @@ jobs: uses: peter-evans/repository-dispatch@v2 with: token: ${{ secrets.HOMEBREW_TAP_TOKEN }} - repository: mschnecke/homebrew-pisum-langue + repository: mschnecke/homebrew-pisum-transcript event-type: update-cask client-payload: '{"version": "${{ needs.create-release.outputs.version }}"}' @@ -295,12 +295,12 @@ jobs: - name: Download Windows installer run: | $version = "${{ needs.create-release.outputs.version }}" - Invoke-WebRequest -Uri "https://github.com/mschnecke/langue/releases/download/v$version/Pisum.Langue_${version}_x64_en-US.msi" -OutFile "Pisum.Langue-Setup.msi" + Invoke-WebRequest -Uri "https://github.com/mschnecke/pisum-transcript/releases/download/v$version/Pisum.Transcript_${version}_x64_en-US.msi" -OutFile "Pisum.Transcript-Setup.msi" - name: Generate checksum id: checksum run: | - $hash = (Get-FileHash -Path "Pisum.Langue-Setup.msi" -Algorithm SHA256).Hash + $hash = (Get-FileHash -Path "Pisum.Transcript-Setup.msi" -Algorithm SHA256).Hash echo "sha256=$hash" >> $env:GITHUB_OUTPUT - name: Update Chocolatey package @@ -310,14 +310,14 @@ jobs: # Update chocolateyInstall.ps1 $installScript = Get-Content packages/chocolatey/tools/chocolateyInstall.ps1 -Raw - $installScript = $installScript -replace 'url64bit\s*=\s*''.*''', "url64bit = 'https://github.com/mschnecke/langue/releases/download/v$version/Pisum.Langue_${version}_x64_en-US.msi'" + $installScript = $installScript -replace 'url64bit\s*=\s*''.*''', "url64bit = 'https://github.com/mschnecke/pisum-transcript/releases/download/v$version/Pisum.Transcript_${version}_x64_en-US.msi'" $installScript = $installScript -replace 'checksum64\s*=\s*''.*''', "checksum64 = '$sha256'" Set-Content -Path packages/chocolatey/tools/chocolateyInstall.ps1 -Value $installScript # Update nuspec version - $nuspec = Get-Content packages/chocolatey/pisum-langue.nuspec -Raw + $nuspec = Get-Content packages/chocolatey/pisum-transcript.nuspec -Raw $nuspec = $nuspec -replace '.*', "$version" - Set-Content -Path packages/chocolatey/pisum-langue.nuspec -Value $nuspec + Set-Content -Path packages/chocolatey/pisum-transcript.nuspec -Value $nuspec - name: Pack Chocolatey package run: | @@ -334,6 +334,6 @@ jobs: MYGET_API_KEY: ${{ secrets.MYGET_API_KEY }} run: | $version = "${{ needs.create-release.outputs.version }}" - dotnet nuget push "packages/chocolatey/pisum-langue.$version.nupkg" ` + dotnet nuget push "packages/chocolatey/pisum-transcript.$version.nupkg" ` --source "https://www.myget.org/F/mschnecke/api/v3/index.json" ` --api-key $env:MYGET_API_KEY diff --git a/.vscode/launch.json b/.vscode/launch.json index b8b787e..78c2d33 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,6 +6,9 @@ "type": "node", "request": "launch", "runtimeExecutable": "npm", + "osx": { + "runtimeExecutable": "/usr/local/bin/npm" + }, "windows": { "runtimeExecutable": "C:\\Program Files\\nodejs\\npm.cmd" }, diff --git a/CLAUDE.md b/CLAUDE.md index e0495b4..8c12008 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -**Pisum Langue** is an AI-driven transcription utility. The user holds a global hotkey to record speech, releases it to stop, and the transcribed text is pasted at the cursor position. The project uses **Tauri 2 (Rust backend) + Svelte 5 (TypeScript frontend)**, Vite 6, and Tailwind CSS. +**Pisum Transcript** is an AI-driven transcription utility. The user holds a global hotkey to record speech, releases it to stop, and the transcribed text is pasted at the cursor position. The project uses **Tauri 2 (Rust backend) + Svelte 5 (TypeScript frontend)**, Vite 6, and Tailwind CSS. ## Repository State diff --git a/README.md b/README.md index 62192cd..f191403 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Pisum Langue +# Pisum Transcript Hotkey-driven, system-tray-style dictation tool. Hold a global hotkey to record speech, release to transcribe via AI, and the result is pasted at the cursor position. @@ -20,7 +20,7 @@ Hotkey-driven, system-tray-style dictation tool. Hold a global hotkey to record ## Prerequisites -- [Node.js](https://nodejs.org/) 18+ +- [Node.js](https://nodejs.org/) 24+ (see `.nvmrc`) - [Rust](https://rustup.rs/) 1.70+ - Platform-specific: - **Windows**: Visual Studio Build Tools diff --git a/docs/IMPL-build-process.md b/docs/IMPL-build-process.md deleted file mode 100644 index 61c3297..0000000 --- a/docs/IMPL-build-process.md +++ /dev/null @@ -1,369 +0,0 @@ -# Implementation Plan: Build Process with GitHub Actions - -> Generated from: `docs/PRD-build-process.md` -> Date: 2026-03-14 - -## 1. Overview - -This feature introduces GitHub Actions CI/CD pipelines for Pisum Langue, a Tauri 2 desktop application. Two workflows will be created: - -1. **CI Workflow** — Validates that the app builds on macOS and Windows for every PR and push to `main`. -2. **Release Workflow** — Builds platform installers and publishes them as GitHub Releases, triggered by `v*` tags or manual dispatch with version bumping. - -The project currently has zero CI/CD infrastructure. The reference project (`github-global-hotkey`) provides proven patterns for Tauri builds, macOS `.pkg` packaging, and the draft-then-publish release flow. - -## 2. Architecture & Design - -### Workflow Structure - -``` -.github/ -└── workflows/ - ├── ci.yml # PR/push build verification - └── release.yml # Tag/manual release pipeline -``` - -### CI Workflow Flow - -``` -push to main / PR → checkout → setup Node 24 + Rust stable (cached) - → install platform deps (Opus on macOS) - → npm ci → npm run build - → tauri build (matrix: macOS aarch64 + Windows) - → upload artifacts (7-day retention) -``` - -### Release Workflow Flow - -``` -workflow_dispatch or v* tag push - ↓ -[bump-version] (dispatch only) → update 3 files + Cargo.lock → commit → tag → push - ↓ -[create-release] → create draft GitHub Release - ↓ -[build-tauri] (matrix: macOS + Windows) → build → upload assets to draft release - ↓ -[publish-release] → un-draft the release -``` - -### macOS Post-Build Packaging - -The `.app` bundle produced by `tauri-apps/tauri-action` needs a post-build script to create a `.pkg` installer (consistent with the reference project). This script uses `pkgbuild` to wrap the `.app` into an installable `.pkg`. - -## 3. Phases & Milestones - -### Phase 1: CI Workflow -**Goal:** Every PR and push to `main` is automatically built on both platforms. -**Deliverable:** Green/red build status on PRs; build artifacts available for 7 days. - -### Phase 2: Release Workflow -**Goal:** Maintainers can produce and publish platform installers via tag push or manual dispatch. -**Deliverable:** GitHub Releases with macOS `.app`/`.pkg` and Windows MSI installers. - -## 4. Files Overview - -### Files to Create -| File Path | Purpose | -|-----------|---------| -| `.github/workflows/ci.yml` | CI build verification workflow | -| `.github/workflows/release.yml` | Release build and publish workflow | -| `scripts/create-macos-pkg.sh` | Post-build script to create `.pkg` from `.app` bundle | - -### Files to Modify -| File Path | What Changes | -|-----------|-------------| -| None | No existing files need modification for the workflow files themselves. Version bumps are handled dynamically by the release workflow at runtime. | - -## 5. Task Breakdown - -### Phase 1: CI Workflow - -#### Task 1.1: Create the CI workflow file - -- **Files to create:** - - `.github/workflows/ci.yml` — Full CI pipeline definition -- **Implementation details:** - - Trigger configuration: - ```yaml - on: - push: - branches: [main] - pull_request: - branches: [main] - ``` - - Matrix strategy for `macos-latest` and `windows-latest` - - Setup steps: - - `actions/checkout@v4` - - `actions/setup-node@v4` with `node-version-file: '.nvmrc'` and `cache: 'npm'` - - `dtolnay/rust-toolchain@stable` (with `targets: aarch64-apple-darwin` on macOS) - - `Swatinem/rust-cache@v2` with `workspaces: 'src-tauri -> target'` - - Platform-specific dependency step (macOS only): - ```yaml - - name: Install Opus (macOS) - if: matrix.os == 'macos-latest' - run: brew install opus - ``` - - Build steps: - ```yaml - - run: npm ci - - run: npm run build - - name: Build Tauri app - uses: tauri-apps/tauri-action@v0 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - args: >- - ${{ matrix.os == 'macos-latest' && '--target aarch64-apple-darwin --bundles app' || '--bundles msi' }} - ``` - - macOS post-build `.pkg` creation step (runs `scripts/create-macos-pkg.sh`) - - Upload artifacts with `actions/upload-artifact@v4` and `retention-days: 7` -- **Dependencies:** None -- **Acceptance criteria:** - - Pushing to `main` or opening a PR against `main` triggers builds on both platforms - - Build failures produce red status checks - - Artifacts are downloadable for 7 days - -#### Task 1.2: Create the macOS `.pkg` packaging script - -- **Files to create:** - - `scripts/create-macos-pkg.sh` — Shell script to create `.pkg` from `.app` -- **Implementation details:** - - Script must be executable (`chmod +x`) - - Uses `pkgbuild` (available on macOS runners by default) - - Key logic: - ```bash - #!/bin/bash - set -euo pipefail - - APP_NAME="PisumLangue" - APP_PATH="src-tauri/target/aarch64-apple-darwin/release/bundle/macos/${APP_NAME}.app" - PKG_OUTPUT="src-tauri/target/aarch64-apple-darwin/release/bundle/macos/${APP_NAME}.pkg" - IDENTIFIER="com.pisumlangue.app" - - pkgbuild \ - --component "$APP_PATH" \ - --install-location "/Applications" \ - --identifier "$IDENTIFIER" \ - "$PKG_OUTPUT" - - echo "Created: $PKG_OUTPUT" - ``` - - Follow the reference project's approach for paths and naming -- **Dependencies:** None (can be done in parallel with Task 1.1) -- **Acceptance criteria:** - - Script produces a valid `.pkg` file from the `.app` bundle - - CI workflow successfully runs this script on macOS - -### Phase 2: Release Workflow - -#### Task 2.1: Create the release workflow file - -- **Files to create:** - - `.github/workflows/release.yml` — Full release pipeline definition -- **Implementation details:** - - Permissions: - ```yaml - permissions: - contents: write - ``` - - Trigger configuration: - ```yaml - on: - push: - tags: ['v*'] - workflow_dispatch: - inputs: - version: - description: 'Version bump type (patch/minor/major) or exact version (e.g. 0.2.0)' - required: true - default: 'patch' - ``` - - - **Job 1: `bump-version`** (only on `workflow_dispatch`): - - Runs on `ubuntu-latest` - - Condition: `if: github.event_name == 'workflow_dispatch'` - - Checkout with token for push access - - Parse version input (bump type or exact version) - - Read current version from `package.json` - - Compute new version using semver logic (shell-based, no external tools — consistent with reference project) - - Update version in three files: - - `package.json` — update `"version"` field via `jq` or `sed` - - `src-tauri/Cargo.toml` — update `version = "..."` in `[package]` - - `src-tauri/tauri.conf.json` — update `"version"` field via `jq` - - Install Rust toolchain and run `cargo generate-lockfile` in `src-tauri/` to update `Cargo.lock` - - Configure git user: - ```yaml - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - ``` - - Commit all changed files (including `Cargo.lock`), create tag, push both - - Output the new version and tag name for downstream jobs - - - **Job 2: `create-release`**: - - Needs: `bump-version` (but use `if: always()` logic to also run on tag push) - - Condition: succeeds or was skipped (tag push path) - - Determine version from tag ref or bump-version output - - Create draft release using `softprops/action-gh-release@v2`: - ```yaml - - uses: softprops/action-gh-release@v2 - with: - tag_name: ${{ needs.bump-version.outputs.tag || github.ref_name }} - name: ${{ needs.bump-version.outputs.version || github.ref_name }} - draft: true - generate_release_notes: true - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ``` - - - **Job 3: `build-tauri`** (matrix: macOS + Windows): - - Needs: `create-release` - - Same setup as CI workflow (Node 24, Rust stable, caching, Opus on macOS) - - Build with `tauri-apps/tauri-action@v0` - - macOS: `--target aarch64-apple-darwin --bundles app`, then run `scripts/create-macos-pkg.sh` - - Windows: `--bundles msi` - - Upload installers to the draft release using `softprops/action-gh-release@v2`: - ```yaml - - uses: softprops/action-gh-release@v2 - with: - tag_name: ${{ needs.create-release.outputs.tag }} - files: | - src-tauri/target/aarch64-apple-darwin/release/bundle/macos/*.pkg - src-tauri/target/aarch64-apple-darwin/release/bundle/macos/*.app - src-tauri/target/release/bundle/msi/*.msi - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ``` - - Use conditional glob patterns per platform (macOS paths on macOS, MSI path on Windows) - - - **Job 4: `publish-release`**: - - Needs: `build-tauri` - - Runs on `ubuntu-latest` - - Un-draft the release using `actions/github-script@v7`: - ```yaml - - uses: actions/github-script@v7 - with: - script: | - const { data: releases } = await github.rest.repos.listReleases({ - owner: context.repo.owner, - repo: context.repo.repo, - }); - const draft = releases.find(r => r.draft && r.tag_name === '${{ needs.create-release.outputs.tag }}'); - if (draft) { - await github.rest.repos.updateRelease({ - owner: context.repo.owner, - repo: context.repo.repo, - release_id: draft.id, - draft: false, - }); - } - ``` - -- **Dependencies:** Task 1.2 (uses the same `.pkg` script) -- **Acceptance criteria:** - - Pushing a `v*` tag triggers the full release pipeline (skipping bump-version) - - Manual dispatch with "patch" bumps version, commits, tags, and triggers release - - Manual dispatch with exact version (e.g., "0.3.0") sets that version exactly - - Draft release is created, assets uploaded, then published - - Both macOS and Windows installers appear as release assets - -## 6. Data Model Changes - -No data model changes required. This feature only adds CI/CD configuration files. - -## 7. API Changes - -No API changes required. This feature only adds CI/CD configuration files. - -## 8. Dependencies & Risks - -### External Dependencies (GitHub Actions) -| Action | Version | Purpose | -|--------|---------|---------| -| `actions/checkout` | v4 | Repository checkout | -| `actions/setup-node` | v4 | Node.js setup with caching | -| `dtolnay/rust-toolchain` | stable | Rust toolchain installation | -| `Swatinem/rust-cache` | v2 | Rust build caching | -| `tauri-apps/tauri-action` | v0 | Tauri application building | -| `actions/upload-artifact` | v4 | CI artifact upload | -| `softprops/action-gh-release` | v2 | GitHub Release creation/asset upload | -| `actions/github-script` | v7 | Release un-drafting | - -### Risks & Mitigations -| Risk | Mitigation | -|------|-----------| -| macOS runner architecture mismatch (x86 vs ARM) | Explicitly specify `--target aarch64-apple-darwin`; GitHub's `macos-latest` runs on ARM (M-series) | -| Opus installation failure on macOS | Homebrew is pre-installed on GitHub macOS runners; `brew install opus` is reliable | -| Version bump race condition on concurrent dispatches | Low risk — manual dispatch is infrequent; sequential `bump-version` job with git push will fail on conflict | -| `tauri-apps/tauri-action@v0` breaking changes | Pin to `v0` as per PRD; monitor for Tauri 2 compatibility | -| Release asset upload from matrix jobs to same release | `softprops/action-gh-release` supports concurrent uploads to the same release | - -### Assumptions -- GitHub-hosted runners have sufficient resources for Tauri builds -- `macos-latest` runners are ARM-based (Apple Silicon), making `aarch64-apple-darwin` the native target -- The `GITHUB_TOKEN` has sufficient permissions for release creation when `permissions: contents: write` is set - -## 9. Testing Strategy - -Since this feature is entirely CI/CD configuration (no application code changes), testing is manual/observational: - -- **CI Workflow verification:** - - Open a PR against `main` → verify both macOS and Windows jobs run and pass - - Push to `main` → verify CI triggers - - Introduce a deliberate build error in a PR → verify the pipeline fails - -- **Release Workflow verification:** - - Push a `v0.1.1` tag → verify full release pipeline runs (skip bump, build, publish) - - Use `workflow_dispatch` with `patch` → verify version bumps from `0.1.0` to `0.1.1`, commits, tags, builds, and publishes - - Use `workflow_dispatch` with exact version `0.2.0` → verify exact version is set - - Verify all release assets are downloadable - - Verify `.pkg` installer is valid on macOS - - Verify MSI installer is valid on Windows - -- **Edge cases:** - - Version bump with dirty working tree (should not happen on `main`) - - Multiple concurrent workflow runs (rely on sequential job ordering) - -## 10. Requirement Traceability - -### Functional Requirements - -| PRD Ref | Requirement Summary | Task(s) | Notes | -|---------|-------------------|---------|-------| -| 4 #1 | CI triggers on push to `main` and PRs targeting `main` | 1.1 | `on.push.branches` + `on.pull_request.branches` | -| 4 #2 | CI runs on `windows-latest` and `macos-latest` | 1.1 | Matrix strategy | -| 4 #3 | Install Node 24 and Rust stable with caching | 1.1 | `setup-node` + `rust-toolchain` + `rust-cache` | -| 4 #4 | Install platform-specific deps (Opus on macOS) | 1.1 | Conditional `brew install opus` step | -| 4 #5 | Install frontend deps via `npm ci` | 1.1 | Explicit `npm ci` step | -| 4 #6 | Build frontend (`npm run build`) | 1.1 | Explicit `npm run build` step | -| 4 #7 | Build Tauri app with platform-specific bundles + `.pkg` on macOS | 1.1, 1.2 | `tauri-action` + `create-macos-pkg.sh` | -| 4 #8 | Pipeline fails on build step failure | 1.1 | Default GitHub Actions behavior (fail-fast) | -| 4 #9 | Release triggers on `v*` tags | 2.1 | `on.push.tags` | -| 4 #10 | Release supports `workflow_dispatch` with version bump input | 2.1 | `workflow_dispatch.inputs.version` | -| 4 #11 | Version bump updates 3 files, commits, and creates tag | 2.1 | `bump-version` job | -| 4 #12 | Release builds on both platforms in parallel | 2.1 | Matrix in `build-tauri` job | -| 4 #13 | Release uses same setup as CI | 2.1 | Identical setup steps | -| 4 #14 | Release builds `.app`/`.pkg` on macOS, MSI on Windows | 2.1, 1.2 | Same build + pkg script | -| 4 #15 | Release creates draft GitHub Release | 2.1 | `create-release` job with `draft: true` | -| 4 #16 | Upload installers as release assets | 2.1 | `softprops/action-gh-release` in `build-tauri` | -| 4 #17 | Publish (un-draft) after all builds succeed | 2.1 | `publish-release` job | -| 4 #18 | Cache npm and Rust dependencies | 1.1, 2.1 | `setup-node` cache + `rust-cache` | -| 4 #19 | CI artifacts retained 7 days | 1.1 | `retention-days: 7` | -| 4 #20 | `GITHUB_TOKEN` passed to build and release steps | 1.1, 2.1 | `env.GITHUB_TOKEN` on relevant steps | - -### User Stories - -| PRD Ref | User Story Summary | Implementing Tasks | Fully Covered? | -|---------|-------------------|-------------------|----------------| -| US-1 | Auto-build PRs on both platforms | 1.1, 1.2 | Yes | -| US-2 | Manual release trigger with version bump | 2.1 | Yes | -| US-3 | Tag-based release trigger | 2.1 | Yes | -| US-4 | Download installers from GitHub Releases | 2.1 | Yes | - -### Success Metrics - -| Metric | How the Plan Addresses It | -|--------|--------------------------| -| Green/red build status on push/PR | CI workflow (Task 1.1) runs on every push and PR to `main` | -| Complete platform installers via tag or dispatch | Release workflow (Task 2.1) supports both triggers | -| Downloadable installers from GitHub Releases | Release workflow uploads assets and publishes the release | diff --git a/docs/IMPL-package-manager-distribution.md b/docs/IMPL-package-manager-distribution.md deleted file mode 100644 index f8037fd..0000000 --- a/docs/IMPL-package-manager-distribution.md +++ /dev/null @@ -1,380 +0,0 @@ -# Implementation Plan: Package Manager Distribution (Homebrew & Chocolatey) - -> Generated from: `docs/PRD-package-manager-distribution.md` -> Date: 2026-03-14 - -## 1. Overview - -This plan adds Homebrew (macOS) and Chocolatey (Windows) package manager support for Pisum Langue. The implementation creates the necessary package definitions, a Homebrew template cask, and wires everything into the existing `release.yml` workflow so that every release automatically publishes updated packages. - -The reference implementation at `/Users/mschnecke/workspace/github-global-hotkey` provides proven patterns for all components. The current `release.yml` already has `update-homebrew` and `update-chocolatey` jobs stubbed out — this plan fills in the missing package files and fixes issues in the workflow. - -## 2. Architecture & Design - -### Package Distribution Flow - -``` -release.yml (workflow_dispatch) - │ - ├─ bump-version ──► Updates package.json, Cargo.toml, tauri.conf.json, pisum-langue.nuspec - │ - ├─ create-release ──► Creates draft GitHub Release - │ - ├─ build-tauri ──► Builds .pkg (macOS) and .msi (Windows), uploads to release - │ - ├─ publish-release ──► Marks release as non-draft - │ - ├─ update-homebrew ──► repository-dispatch → mschnecke/homebrew-pisum-langue - │ └─ Tap workflow: downloads .pkg, computes SHA256, updates cask, commits - │ - └─ update-chocolatey ──► Downloads .msi, computes SHA256, updates install script + nuspec, - packs .nupkg, pushes to MyGet -``` - -### Repository Structure - -``` -Main repo (mschnecke/langue): - packages/ - ├── homebrew/ - │ └── pisum-langue.rb ← Template cask (placeholder version/SHA) - └── chocolatey/ - ├── pisum-langue.nuspec ← Package metadata - └── tools/ - ├── chocolateyinstall.ps1 ← Install script - └── chocolateyuninstall.ps1 ← Uninstall script - -Tap repo (mschnecke/homebrew-pisum-langue): - Casks/ - └── pisum-langue.rb ← Live cask (auto-updated by workflow) - .github/workflows/ - └── update-cask.yml ← Receives dispatch, updates cask -``` - -## 3. Phases & Milestones - -### Phase 1: Chocolatey Package Files -**Goal:** Create the Chocolatey package definition files in the main repo. -**Deliverable:** `packages/chocolatey/` directory with nuspec, install, and uninstall scripts ready for CI consumption. - -### Phase 2: Homebrew Cask Template -**Goal:** Create the template Homebrew cask in the main repo. -**Deliverable:** `packages/homebrew/pisum-langue.rb` with placeholder values. - -### Phase 3: Release Workflow Fixes -**Goal:** Fix the existing `release.yml` to correctly reference files and update the nuspec during version bumps. -**Deliverable:** Working `bump-version` and `update-homebrew` jobs with correct paths and repository name. - -### Phase 4: Homebrew Tap Repository -**Goal:** Set up the `mschnecke/homebrew-pisum-langue` repository with cask and update workflow. -**Deliverable:** Tap repo with `Casks/pisum-langue.rb` and `update-cask.yml` workflow. - -### Phase 5: Release Body Update -**Goal:** Fix the release body installation instructions to match the PRD's prescribed commands. -**Deliverable:** Updated release body template in `release.yml` with correct Homebrew and Chocolatey commands. - -## 4. Files Overview - -### Files to Create -| File Path | Purpose | -|-----------|---------| -| `packages/chocolatey/pisum-langue.nuspec` | Chocolatey package metadata | -| `packages/chocolatey/tools/chocolateyinstall.ps1` | Downloads and installs MSI from GitHub Releases | -| `packages/chocolatey/tools/chocolateyuninstall.ps1` | Uninstalls MSI via registry lookup | -| `packages/homebrew/pisum-langue.rb` | Template Homebrew cask with placeholder version/SHA256 | - -### Files in Tap Repository (mschnecke/homebrew-pisum-langue) -| File Path | Purpose | -|-----------|---------| -| `Casks/pisum-langue.rb` | Live Homebrew cask formula | -| `.github/workflows/update-cask.yml` | Receives dispatch, updates cask with new version | - -### Files to Modify -| File Path | What Changes | -|-----------|-------------| -| `.github/workflows/release.yml` | Fix homebrew repo name, add nuspec to bump-version, update release body | - -## 5. Task Breakdown - -### Phase 1: Chocolatey Package Files - -#### Task 1.1: Create `packages/chocolatey/pisum-langue.nuspec` - -- **Files to create:** - - `packages/chocolatey/pisum-langue.nuspec` — Package metadata adapted from reference repo -- **Implementation details:** - - Adapt from `/Users/mschnecke/workspace/github-global-hotkey/packages/chocolatey/global-hotkey.nuspec` - - Key fields: - ```xml - pisum-langue - 0.1.7 - Pisum Langue - Pisum Langue Team - mschnecke - https://github.com/mschnecke/langue - AI-driven transcription utility. Hold a hotkey to record speech, release to transcribe and paste. - transcription dictation speech-to-text ai hotkey - https://github.com/mschnecke/langue/releases - https://github.com/mschnecke/langue/tree/main/packages/chocolatey - ``` -- **Dependencies:** None -- **Acceptance criteria:** Valid nuspec XML that `choco pack` can parse - -#### Task 1.2: Create `packages/chocolatey/tools/chocolateyinstall.ps1` - -- **Files to create:** - - `packages/chocolatey/tools/chocolateyinstall.ps1` — Install script -- **Implementation details:** - - Adapt from reference repo's `chocolateyinstall.ps1` - - Key values: - ```powershell - $packageName = 'pisum-langue' - $packageArgs = @{ - packageName = $packageName - fileType = 'msi' - url64bit = 'https://github.com/mschnecke/langue/releases/download/v0.1.7/Pisum.Langue_0.1.7_x64_en-US.msi' - softwareName = 'Pisum Langue*' - checksum64 = 'REPLACE_WITH_ACTUAL_CHECKSUM' - checksumType64 = 'sha256' - silentArgs = '/qn /norestart' - validExitCodes = @(0, 3010, 1641) - } - Install-ChocolateyPackage @packageArgs - ``` - - The `url64bit` and `checksum64` are placeholders — they get replaced by the `update-chocolatey` job in CI -- **Dependencies:** None -- **Acceptance criteria:** Script follows `Install-ChocolateyPackage` pattern with SHA256 checksum verification - -#### Task 1.3: Create `packages/chocolatey/tools/chocolateyuninstall.ps1` - -- **Files to create:** - - `packages/chocolatey/tools/chocolateyuninstall.ps1` — Uninstall script -- **Implementation details:** - - Adapt from reference repo's `chocolateyuninstall.ps1` - - Searches registry at: - - `HKLM:\Software\Microsoft\Windows\CurrentVersion\Uninstall\*` - - `HKLM:\Software\Wow6432Node\Microsoft\Windows\CurrentVersion\Uninstall\*` - - Matches `softwareName` = `'Pisum Langue*'` - - Uses `Uninstall-ChocolateyPackage` with the MSI product code from registry - - Silent args: `/qn /norestart` -- **Dependencies:** None -- **Acceptance criteria:** Script correctly looks up and removes the MSI via registry ProductCode - -### Phase 2: Homebrew Cask Template - -#### Task 2.1: Create `packages/homebrew/pisum-langue.rb` - -- **Files to create:** - - `packages/homebrew/pisum-langue.rb` — Template cask formula -- **Implementation details:** - - Adapt from reference repo's `packages/homebrew/global-hotkey.rb` - - aarch64 only (no Intel/universal — per PRD non-goals) - - Key structure: - ```ruby - cask "pisum-langue" do - version "0.1.7" - sha256 "REPLACE_WITH_ACTUAL_CHECKSUM" - - url "https://github.com/mschnecke/langue/releases/download/v#{version}/Pisum.Langue_#{version}_aarch64.pkg" - name "Pisum Langue" - desc "AI-driven transcription utility" - homepage "https://github.com/mschnecke/langue" - - depends_on macos: ">= :catalina" - - pkg "Pisum.Langue_#{version}_aarch64.pkg" - - uninstall pkgutil: "com.pisum.langue.app" - - zap trash: [ - "~/Library/Application Support/com.pisum.langue", - "~/Library/Caches/com.pisum.langue", - "~/Library/Preferences/com.pisum.langue.plist", - "~/Library/LaunchAgents/com.pisum.langue.plist", - ] - end - ``` - - Version and SHA256 are placeholders in this template — the tap repo holds the live version -- **Dependencies:** None -- **Acceptance criteria:** Valid Ruby cask syntax with `zap` stanza covering all four paths from PRD requirement 4 - -### Phase 3: Release Workflow Fixes - -#### Task 3.1: Fix Homebrew tap repository name in `release.yml` - -- **Files to modify:** - - `.github/workflows/release.yml` — Line 283 -- **Implementation details:** - - Change `repository: mschnecke/homebrew-lange` → `repository: mschnecke/homebrew-pisum-langue` - - This is a typo in the existing workflow -- **Dependencies:** None -- **Acceptance criteria:** Repository dispatch targets the correct tap repo - -#### Task 3.2: Add nuspec version update to `bump-version` job - -- **Files to modify:** - - `.github/workflows/release.yml` — `bump-version` job, after the tauri.conf.json update (around line 85) -- **Implementation details:** - - Add sed command to update the nuspec version: - ```bash - # Update Chocolatey nuspec version - sed -i "s/.*<\/version>/$NEW_VERSION<\/version>/" packages/chocolatey/pisum-langue.nuspec - ``` - - Add `packages/chocolatey/pisum-langue.nuspec` to the `git add` command on line 93 -- **Dependencies:** Task 1.1 (nuspec must exist) -- **Acceptance criteria:** Version bump updates nuspec alongside package.json, Cargo.toml, and tauri.conf.json - -#### Task 3.3: Update release body installation instructions - -- **Files to modify:** - - `.github/workflows/release.yml` — Release body template (lines 126–163) -- **Implementation details:** - - Update Homebrew section to match PRD requirement 6: - ```markdown - **macOS (Homebrew) - Recommended:** - ```bash - brew tap mschnecke/pisum-langue - brew install --cask pisum-langue - ``` - ``` - - Update Chocolatey section to match PRD requirement 11: - ```markdown - **Windows (Chocolatey):** - ```powershell - choco install pisum-langue --source https://www.myget.org/F/mschnecke/api/v3/index.json - ``` - ``` - - Current body uses `brew tap mschnecke/langue` and `brew install --cask langue` — these need to match the actual tap name - - Current body uses `choco source add` pattern — PRD specifies v3 endpoint with `--source` flag inline -- **Dependencies:** None -- **Acceptance criteria:** Release body shows correct install commands matching PRD requirements 6 and 11 - -### Phase 4: Homebrew Tap Repository - -#### Task 4.1: Create `Casks/pisum-langue.rb` in tap repository - -- **Files to create (in tap repo):** - - `Casks/pisum-langue.rb` — Live cask formula (copy of template from Task 2.1) -- **Implementation details:** - - Same content as `packages/homebrew/pisum-langue.rb` from the main repo - - This is the file Homebrew will read when users run `brew install --cask pisum-langue` -- **Dependencies:** None -- **Acceptance criteria:** `brew tap mschnecke/pisum-langue` succeeds and shows the cask - -#### Task 4.2: Create `update-cask.yml` workflow in tap repository - -- **Files to create (in tap repo):** - - `.github/workflows/update-cask.yml` — Dispatch event handler -- **Implementation details:** - - Triggered by `repository_dispatch` with `event-type: update-cask` - - Receives version from `client-payload.version` - - Steps: - 1. Checkout the tap repo - 2. Download the `.pkg` from GitHub Releases: - ```bash - curl -L -o pisum-langue.pkg \ - "https://github.com/mschnecke/langue/releases/download/v${VERSION}/Pisum.Langue_${VERSION}_aarch64.pkg" - ``` - 3. Compute SHA256: - ```bash - SHA256=$(shasum -a 256 pisum-langue.pkg | awk '{print $1}') - ``` - 4. Update `Casks/pisum-langue.rb` with new version and SHA256 using sed - 5. Commit and push the change - - Reference: follow the same pattern from the `global-hotkey` tap repo workflow -- **Dependencies:** Task 4.1 -- **Acceptance criteria:** Dispatch event triggers workflow that successfully updates the cask file and commits - -### Phase 5: Release Body Update - -> **Note:** This phase is already covered by Task 3.3. It's listed as a separate phase for clarity since the PRD's open question #3 explicitly calls it out, but the actual work is a single edit within Task 3.3. - -## 6. Data Model Changes - -No data model changes are required. This feature is entirely CI/CD and packaging. - -## 7. API Changes - -No API changes are required. This feature does not affect the Tauri IPC layer or any runtime behavior. - -## 8. Dependencies & Risks - -### External Dependencies -| Dependency | Purpose | Risk | -|-----------|---------|------| -| MyGet.org | Chocolatey package hosting | Service availability; API key must be configured as `MYGET_API_KEY` secret | -| `peter-evans/repository-dispatch@v2` | Triggers tap repo workflow | GitHub Action must remain available | -| `HOMEBREW_TAP_TOKEN` secret | PAT with `actions:write` scope on tap repo | Must be created and kept valid | - -### Risks & Mitigations -| Risk | Impact | Mitigation | -|------|--------|-----------| -| MyGet push fails during release | Chocolatey package not updated | Workflow failure notification; can manually re-run job | -| Homebrew dispatch doesn't trigger | Cask not updated | Check PAT permissions; workflow logs on tap repo | -| SHA256 mismatch if assets re-uploaded | Install fails for users | Ensure `publish-release` completes before package jobs download artifacts | -| Tap repo naming conflict | `brew tap` fails | Verify `homebrew-pisum-langue` follows Homebrew naming convention | - -### Assumptions -- The `mschnecke/homebrew-pisum-langue` repository already exists -- MyGet account and API key are already configured (per PRD open question #2) -- `HOMEBREW_TAP_TOKEN` PAT will be created with appropriate scope - -## 9. Testing Strategy - -### Manual Verification (Post-First-Release) -- Trigger a release via `workflow_dispatch` and verify: - - `bump-version` updates `pisum-langue.nuspec` alongside other version files - - `update-homebrew` dispatches to the correct tap repo - - `update-chocolatey` successfully packs and pushes to MyGet - - Tap repo's `update-cask.yml` runs, updates cask, and commits - -### End-to-End Install Tests -- **macOS:** `brew tap mschnecke/pisum-langue && brew install --cask pisum-langue` installs successfully -- **Windows:** `choco install pisum-langue --source https://www.myget.org/F/mschnecke/api/v3/index.json` installs successfully -- **Upgrade:** After a second release, `brew upgrade pisum-langue` and `choco upgrade pisum-langue` pull the new version - -### Pre-Release Checks -- `choco pack` in `packages/chocolatey/` produces a valid `.nupkg` locally -- `pisum-langue.rb` cask syntax is valid (can lint with `brew audit --cask`) - -## 10. Requirement Traceability - -### Functional Requirements - -| PRD Ref | Requirement Summary | Task(s) | Notes | -|---------|-------------------|---------|-------| -| #1 | Separate `homebrew-pisum-langue` tap repo | — | Already created | -| #2 | Cask formula at `Casks/pisum-langue.rb` installing `.pkg` | 4.1 | | -| #3 | Cask targets aarch64 only, `depends_on macos: ">= :catalina"` | 2.1, 4.1 | | -| #4 | Cask includes `zap` stanza for app data cleanup | 2.1, 4.1 | All four paths included | -| #5 | Template cask at `packages/homebrew/pisum-langue.rb` | 2.1 | | -| #6 | Install via `brew tap` + `brew install --cask` | 4.1, 3.3 | Instructions in release body | -| #7 | Chocolatey nuspec at `packages/chocolatey/pisum-langue.nuspec` | 1.1 | | -| #8 | Install script with `Install-ChocolateyPackage` + SHA256 | 1.2 | | -| #9 | Uninstall script via registry lookup | 1.3 | | -| #10 | Publish to MyGet NuGet feed | Existing workflow | Already in `release.yml` lines 330-337 | -| #11 | Install via `choco install pisum-langue --source ...` | 1.1, 3.3 | Instructions in release body | -| #12 | `update-homebrew` job triggers dispatch to tap repo | 3.1 | Fix repo name typo | -| #13 | Tap workflow receives dispatch, updates cask | 4.2 | | -| #14 | `update-chocolatey` job downloads MSI, updates files, packs, pushes | Existing workflow | Already in `release.yml` lines 287-337 | -| #15 | `bump-version` updates nuspec version | 3.2 | | -| #16 | `HOMEBREW_TAP_TOKEN` secret configured | — (manual) | Needs `actions:write` on tap repo | -| #17 | `MYGET_API_KEY` secret configured | Existing | Already referenced in workflow | - -### User Stories - -| PRD Ref | User Story Summary | Implementing Tasks | Fully Covered? | -|---------|-------------------|-------------------|----------------| -| US-1 | macOS user installs via Homebrew | 2.1, 4.1, 4.2 | Yes | -| US-2 | Windows user installs via Chocolatey | 1.1, 1.2, 1.3 | Yes | -| US-3 | Maintainer: auto-publish on release | 3.1, 3.2, 4.2 | Yes | -| US-4 | User upgrades via package manager | 4.2 (Homebrew), existing workflow (Chocolatey) | Yes | - -### Success Metrics - -| Metric | How the Plan Addresses It | -|--------|--------------------------| -| `brew install --cask pisum-langue` works | Tap repo with cask (Phase 4) + template (Phase 2) | -| `choco install pisum-langue` works | Chocolatey package files (Phase 1) + existing workflow | -| Release auto-updates both packages | Workflow fixes (Phase 3) + tap workflow (Phase 4) | -| `brew upgrade` / `choco upgrade` works | Cask/nuspec version updates on each release | diff --git a/docs/IMPL-start-stop-recording.md b/docs/IMPL-start-stop-recording.md deleted file mode 100644 index 00ee87b..0000000 --- a/docs/IMPL-start-stop-recording.md +++ /dev/null @@ -1,290 +0,0 @@ -# Implementation Plan: Start and Stop Recording Mode - -> Generated from: `docs/PRD-start-stop-recording.md` -> Date: 2026-03-13 - -## 1. Overview - -This feature adds a **toggle recording mode** alongside the existing hold-to-record (push-to-talk) mode. In toggle mode, the user presses the hotkey once to start recording and presses again to stop and transcribe. The user selects their preferred mode in settings. The same hotkey is used for both modes — only the press/release behavior changes. - -The feature also makes the maximum recording duration configurable (resolved from the PRD's open question). - -The change touches three layers: -1. **Config schema** — new `RecordingMode` enum and `maxRecordingDuration` field -2. **Hotkey manager** — branching press/release logic based on the active mode -3. **Frontend settings UI** — recording mode selector and max duration input - -No new IPC commands are needed. The existing `save_settings` / `load_settings` flow carries the new fields automatically through `AppSettings`. - -## 2. Architecture & Design - -### State Machine - -**Hold-to-record (existing):** -``` -Idle ──[press]──► Recording ──[release]──► Transcribing ──► Idle -``` - -**Toggle mode (new):** -``` -Idle ──[press]──► Recording ──[press]──► Transcribing ──► Idle - (release events ignored) -``` - -Both modes share the same guards (no recording while transcribing, min duration check, max duration auto-stop) and the same transcription pipeline (`process_and_transcribe`). - -### Data Flow for Mode Selection - -``` -User selects mode in GeneralConfig.svelte - → onUpdate({ ...settings, recordingMode: 'toggle' }) - → persistSettings() → IPC save_settings - → apply_settings() → SETTINGS RwLock updated - → hotkey manager reads SETTINGS.recording_mode on next press/release event -``` - -No hotkey re-registration is needed when the mode changes — only the event handling logic branches. - -## 3. Phases & Milestones - -### Phase 1: Backend — Config & Hotkey Logic -**Goal:** Recording mode and configurable max duration work end-to-end via the Rust backend -**Deliverable:** Toggle mode functional when `recording_mode` is set to `toggle` in the JSON config file manually - -### Phase 2: Frontend — Settings UI -**Goal:** User can select recording mode and configure max duration from the settings window -**Deliverable:** Full feature usable from the UI with dynamic labels - -## 4. Files Overview - -### Files to Create - -_No new files required._ - -### Files to Modify - -| File Path | What Changes | -|-----------|-------------| -| `src-tauri/src/config/schema.rs` | Add `RecordingMode` enum, `recording_mode` and `max_recording_duration_secs` fields to `AppSettings` | -| `src-tauri/src/hotkey/manager.rs` | Branch press/release handlers based on recording mode; use configurable max duration | -| `src/lib/types.ts` | Add `recordingMode` and `maxRecordingDurationSecs` to `AppSettings` interface | -| `src/components/GeneralConfig.svelte` | Add recording mode selector and max duration input | -| `src/components/HotkeyConfig.svelte` | Dynamic description text based on recording mode | - -## 5. Task Breakdown - -### Phase 1: Backend — Config & Hotkey Logic - -#### Task 1.1: Add RecordingMode to config schema - -- **Files to modify:** - - `src-tauri/src/config/schema.rs` — Add enum and fields -- **Implementation details:** - - Add `RecordingMode` enum: - ```rust - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] - #[serde(rename_all = "camelCase")] - pub enum RecordingMode { - HoldToRecord, - Toggle, - } - ``` - - Add fields to `AppSettings`: - ```rust - #[serde(default = "default_recording_mode")] - pub recording_mode: RecordingMode, - - #[serde(default = "default_max_recording_duration_secs")] - pub max_recording_duration_secs: u64, - ``` - - Add default functions: - ```rust - fn default_recording_mode() -> RecordingMode { - RecordingMode::HoldToRecord - } - - fn default_max_recording_duration_secs() -> u64 { - 600 // 10 minutes - } - ``` - - Update `Default for AppSettings` impl to include both new fields -- **Dependencies:** None -- **Acceptance criteria:** Existing config files without the new fields deserialize correctly using defaults. New fields round-trip through save/load. - -#### Task 1.2: Update hotkey manager for toggle mode - -- **Files to modify:** - - `src-tauri/src/hotkey/manager.rs` — Modify press/release handlers, use configurable duration -- **Implementation details:** - - Remove the `MAX_RECORDING_DURATION` constant. Instead, read from `SETTINGS`: - ```rust - let max_secs = crate::SETTINGS - .read() - .map(|s| s.max_recording_duration_secs) - .unwrap_or(600); - let max_duration = Duration::from_secs(max_secs); - ``` - - Read recording mode from `SETTINGS` in the event loop dispatch: - ```rust - let mode = crate::SETTINGS - .read() - .map(|s| s.recording_mode.clone()) - .unwrap_or(crate::config::schema::RecordingMode::HoldToRecord); - ``` - - **For `HotKeyState::Pressed`:** Branch on mode: - - `HoldToRecord` → call `handle_hotkey_press()` (existing behavior) - - `Toggle` → check if currently recording: - - Not recording → call `handle_hotkey_press()` (start recording) - - Already recording → call `stop_and_transcribe()` (stop and process) - - **For `HotKeyState::Released`:** Branch on mode: - - `HoldToRecord` → call `handle_hotkey_release()` (existing behavior) - - `Toggle` → do nothing (ignore release) - - Extract the stop-and-transcribe logic from `handle_hotkey_release()` into a new function `stop_and_transcribe()` that both `handle_hotkey_release()` and the toggle-press-while-recording path call. This avoids duplicating the pipeline logic. - - Update `handle_hotkey_press()` to pass `max_duration` to the timer thread instead of using the removed constant. - - In toggle mode, the "already recording" guard in `handle_hotkey_press()` must be bypassed — this is handled by checking mode *before* calling `handle_hotkey_press()` in the event loop. -- **Dependencies:** Task 1.1 -- **Acceptance criteria:** - - With `recordingMode: "holdToRecord"` in config: behavior identical to current (press starts, release stops) - - With `recordingMode: "toggle"` in config: first press starts, second press stops and transcribes, release events are ignored - - Max duration auto-stop uses the configurable value - - Transcription-in-progress guard works in both modes - - Min duration check works in both modes - -### Phase 2: Frontend — Settings UI - -#### Task 2.1: Update TypeScript types - -- **Files to modify:** - - `src/lib/types.ts` — Add new fields -- **Implementation details:** - ```typescript - export interface AppSettings { - // ... existing fields ... - recordingMode: 'holdToRecord' | 'toggle'; - maxRecordingDurationSecs: number; - } - ``` -- **Dependencies:** Task 1.1 -- **Acceptance criteria:** Types match the Rust schema's JSON serialization - -#### Task 2.2: Add recording mode and max duration to GeneralConfig - -- **Files to modify:** - - `src/components/GeneralConfig.svelte` — Add mode selector and duration input -- **Implementation details:** - - Add a "Recording Mode" section after the existing toggles, using two radio-style buttons (matching the segmented control pattern used in `AudioConfig.svelte` for Opus/WAV): - - **Hold to Record** — description: "Hold the hotkey to record. Release to transcribe and paste." - - **Toggle (Start/Stop)** — description: "Press the hotkey to start recording. Press again to transcribe and paste." - - Add a "Max Recording Duration" number input below the mode selector: - - Label: "Maximum recording duration (seconds)" - - Description: "Recording auto-stops after this duration." - - Min: 10, Max: 3600 (1 hour), default: 600 - - On change: `onUpdate({ ...settings, maxRecordingDurationSecs: newValue })` - - Mode change handler: `onUpdate({ ...settings, recordingMode: newMode })` -- **Dependencies:** Task 2.1 -- **Acceptance criteria:** - - Mode selector displays with correct initial value from settings - - Changing mode persists immediately (no restart needed) - - Duration input validates min/max bounds - - Both settings survive app restart - -#### Task 2.3: Dynamic hotkey description text - -- **Files to modify:** - - `src/components/HotkeyConfig.svelte` — Dynamic description based on mode -- **Implementation details:** - - The static text on line 37 (`"Hold this key combination to record, release to transcribe and paste."`) becomes dynamic: - ```svelte -

- {#if settings.recordingMode === 'toggle'} - Press this key combination to start recording. Press again to transcribe and paste. - {:else} - Hold this key combination to record, release to transcribe and paste. - {/if} -

- ``` -- **Dependencies:** Task 2.1 -- **Acceptance criteria:** Description text updates when recording mode changes in settings - -## 6. Data Model Changes - -No database changes. Two new fields added to the `AppSettings` JSON config file (`~/.pisum-langue.json`): - -```json -{ - "recordingMode": "holdToRecord", - "maxRecordingDurationSecs": 600 -} -``` - -Both fields use `#[serde(default)]` so existing config files without them will deserialize with defaults (`holdToRecord`, `600`). No migration needed. - -## 7. API Changes - -No new IPC commands. The existing `save_settings` and `load_settings` commands automatically carry the new fields through the `AppSettings` struct. - -## 8. Dependencies & Risks - -- **No new crate dependencies.** The toggle logic is pure state management using existing `AtomicBool` and `Mutex` primitives. -- **Risk: Key repeat events.** On some OS/keyboard configurations, holding a key fires repeated `Pressed` events. In toggle mode, this could rapidly toggle recording on/off. **Mitigation:** Add a debounce guard — ignore `Pressed` events within 200ms of the last `Pressed` event in toggle mode. -- **Risk: Max duration timer race condition.** The existing timer thread calls `handle_hotkey_release()`. After refactoring, it should call `stop_and_transcribe()` directly to work correctly in both modes. This is addressed in Task 1.2. - -## 9. Testing Strategy - -### Manual Test Scenarios - -1. **Hold-to-record default:** Fresh install → hold hotkey → release → verify transcription and paste (regression) -2. **Toggle mode basic:** Switch to toggle → press hotkey → tray icon changes → press again → transcription + paste -3. **Toggle ignores release:** In toggle mode, press and release quickly → recording should continue (not stop on release) -4. **Min duration in toggle:** In toggle mode, press → press again immediately (<50ms) → recording discarded -5. **Max duration auto-stop:** Set max duration to 10s → start toggle recording → wait 10s → auto-stops and transcribes -6. **Transcription guard:** Start toggle recording → stop (transcription begins) → press again immediately → should show "transcription in progress" notification -7. **Mode switch persistence:** Set to toggle → close app → reopen → verify mode is still toggle -8. **Key repeat debounce:** In toggle mode, hold the hotkey down → should not rapidly toggle (start once, ignore repeats) -9. **Configurable duration:** Change max duration to 30s → verify both modes respect the new limit - -### Edge Cases - -- Switch mode while recording is active (should not be possible if settings window isn't reachable during recording, but verify gracefully) -- Config file missing new fields (defaults applied correctly) -- Max duration set to minimum (10s) — recording auto-stops correctly - -## 10. Requirement Traceability - -### Functional Requirements - -| PRD Ref | Requirement Summary | Task(s) | Notes | -|---------|-------------------|---------|-------| -| 4 #1 | Recording Mode setting with two options | 1.1, 2.2 | Enum in config + UI selector | -| 4 #2 | Default to Hold to Record | 1.1 | `default_recording_mode()` returns `HoldToRecord` | -| 4 #3 | Persisted across restarts | 1.1 | Serde JSON persistence via existing `save_settings` | -| 4 #4 | Immediate effect, no restart | 1.2 | Mode read from `SETTINGS` on each event | -| 4 #5 | Toggle press starts recording | 1.2 | Toggle branch in event loop | -| 4 #6 | Toggle second press stops and transcribes | 1.2 | `stop_and_transcribe()` called on press-while-recording | -| 4 #7 | Same hotkey for both modes | 1.2 | Single hotkey, mode-based branching | -| 4 #8 | Ignore release in toggle mode | 1.2 | Toggle branch returns early on `Released` | -| 4 #9 | Max duration applies in toggle mode | 1.2 | Timer uses configurable `max_recording_duration_secs` | -| 4 #10 | Min duration check in toggle mode | 1.2 | `stop_and_transcribe()` includes min duration guard | -| 4 #11 | Prevent recording during transcription | 1.2 | `IS_TRANSCRIBING` guard unchanged | -| 4 #12 | Tray icon changes on start | — | Existing `tray::set_recording_state(true)` — no changes needed | -| 4 #13 | Tray icon reverts on stop | — | Existing `tray::set_recording_state(false)` — no changes needed | -| 4 Hold #1 | Hold press starts recording | 1.2 | Existing behavior preserved in `HoldToRecord` branch | -| 4 Hold #2 | Hold release stops and transcribes | 1.2 | Existing behavior preserved in `HoldToRecord` branch | -| OQ (resolved) | Max duration configurable | 1.1, 1.2, 2.2 | New `maxRecordingDurationSecs` field + UI input | - -### User Stories - -| PRD Ref | User Story Summary | Implementing Tasks | Fully Covered? | -|---------|-------------------|-------------------|----------------| -| US-1 | Press once to start, again to stop | 1.2 | Yes | -| US-2 | Keep using hold-to-record | 1.1, 1.2 | Yes — default mode unchanged | -| US-3 | Change mode in settings | 2.2 | Yes | -| US-4 | Tray icon shows recording state | — | Yes — existing behavior, no changes needed | - -### Success Metrics - -| Metric | How the Plan Addresses It | -|--------|--------------------------| -| Functional correctness | Task 1.2 implements both modes with shared pipeline; testing strategy covers both | -| No regressions | Hold-to-record is default; existing logic preserved in its branch | -| Setting persistence | Serde default functions ensure backward compatibility; Task 2.2 tests persistence | diff --git a/docs/IMPL-transcription.md b/docs/IMPL-transcription.md deleted file mode 100644 index 9016452..0000000 --- a/docs/IMPL-transcription.md +++ /dev/null @@ -1,1143 +0,0 @@ -# Implementation Plan: AI-Driven Dictation - -> Generated from: `docs/PRD-transcription.md` -> Date: 2026-03-13 - -## 1. Overview - -Pisum Langue is a cross-platform desktop utility (Windows and macOS) that runs as a system tray / menu bar application. Users hold a global hotkey to record audio from the default microphone (push-to-talk). On release, the recorded audio is compressed (Opus in OGG container), sent to a configurable AI provider (Gemini by default) with a transcription system prompt, and the transcription result is copied to the clipboard and pasted at the current cursor position. Errors are surfaced via OS-native toast notifications. - -The project is greenfield — no code exists yet. The tech stack is **Tauri 2 (Rust backend) + Svelte 5 (TypeScript frontend)**, based on the proven architecture from the `github-global-hotkey` reference implementation. The Rust backend handles hotkey registration, audio recording/encoding, AI provider communication, clipboard management, and paste simulation. The Svelte 5 frontend provides a settings UI accessible from the system tray. Communication between frontend and backend uses Tauri's IPC command system with automatic JSON serialization via serde. - -### Key Architectural Decisions - -- **Tauri 2 over .NET/Avalonia**: Proven cross-platform approach from the reference repo, smaller binary size, native performance, built-in system tray support -- **Push-to-talk via hotkey hold/release**: Unlike the reference repo's toggle pattern, Pisum Langue uses hold-to-record with a maximum 10-minute duration -- **Gemini API**: Default provider (via API key, `gemini-2.5-flash-lite` model), behind a trait-based abstraction for swappability -- **Prompt presets (roles)**: Named presets with system prompts, selectable from the settings UI. Built-in defaults for common languages; user-created custom presets -- **Round-robin provider load balancing**: Distribute requests across configured providers, falling back on failure - -## 2. Architecture & Design - -### High-Level Component Diagram - -```text -┌─────────────────────────────────────────────────────────────┐ -│ Svelte 5 Frontend (UI) │ -│ ┌────────────┐ ┌────────────────┐ ┌───────────────────┐ │ -│ │ Settings │ │ Provider Config │ │ Hotkey Config │ │ -│ │ Page │ │ Panel │ │ Panel │ │ -│ └─────┬──────┘ └───────┬────────┘ └────────┬──────────┘ │ -└────────┼─────────────────┼─────────────────────┼─────────────┘ - │ Tauri IPC (JSON/serde) │ -┌────────┼─────────────────┼─────────────────────┼─────────────┐ -│ ▼ ▼ ▼ │ -│ ┌─────────────────────────────────────────────────────────┐ │ -│ │ Tauri Commands (lib.rs) │ │ -│ └──┬──────────┬──────────┬──────────┬──────────┬──────────┘ │ -│ │ │ │ │ │ │ -│ ▼ ▼ ▼ ▼ ▼ │ -│ ┌──────┐ ┌──────┐ ┌───────┐ ┌───────┐ ┌───────────┐ │ -│ │hotkey│ │audio │ │ ai │ │config │ │ output │ │ -│ │ │ │ │ │ │ │ │ │ │ │ -│ │mgr │ │record│ │gemini │ │schema │ │clipboard │ │ -│ │parse │ │encode│ │provid.│ │manage │ │paste sim │ │ -│ └──┬───┘ └──┬───┘ └───┬───┘ └───┬───┘ └─────┬─────┘ │ -│ │ │ │ │ │ │ -│ ▼ ▼ ▼ ▼ ▼ │ -│ ┌──────────────────────────────────────────────────────┐ │ -│ │ tray.rs (system tray, notifications, recording UI) │ │ -│ └──────────────────────────────────────────────────────┘ │ -│ ┌──────────────────────────────────────────────────────┐ │ -│ │ error.rs (centralized AppError enum via thiserror) │ │ -│ └──────────────────────────────────────────────────────┘ │ -│ Rust Backend (Tauri) │ -└──────────────────────────────────────────────────────────────┘ -``` - -### Data Flow: Push-to-Talk Recording Cycle - -```text -1. User presses hotkey - → global-hotkey event received in background thread - → Start AudioRecorderHandle on dedicated thread (cpal) - → Update tray icon to "recording" state - → Start 10-minute max-duration timer - -2. User releases hotkey (OR timer expires) - → Send Stop command to recorder thread - → Collect f32 samples, sample_rate, channels - -3. Encode audio - → Resample to Opus-compatible rate (rubato sinc interpolation) - → Encode to Opus frames (audiopus, 24kbps VoIP) - → Wrap in Ogg container (OpusHead + OpusTags + packets) - → Fallback: encode to WAV if Opus fails - -4. Transcribe - → Get system prompt from active preset - → Select provider via round-robin (skip failed providers) - → POST base64-encoded audio + active preset's system prompt to Gemini API (generateContent) - → Retry on 429/503 with exponential backoff (max 3 retries) - -5. Output - → Copy transcription text to clipboard (arboard) - → Simulate Ctrl+V / Cmd+V paste (enigo) - → Restore tray icon to idle state - → On error: show OS-native toast notification -``` - -### Hotkey Hold/Release Detection - -The reference repo uses a toggle pattern (press to start, press to stop). Pisum Langue requires **push-to-talk** (hold to record, release to stop). The `global-hotkey` crate fires events for both press and release. Implementation: - -```rust -// In hotkey event handler -match event.state() { - HotKeyState::Pressed => start_recording(), - HotKeyState::Released => stop_recording_and_transcribe(), -} -``` - -### Provider Round-Robin with Fallback - -```rust -pub struct ProviderPool { - providers: Vec>, - current_index: AtomicUsize, -} - -impl ProviderPool { - pub async fn transcribe(&self, audio: &[u8], mime: &str, prompt: &str) -> Result { - let len = self.providers.len(); - let start = self.current_index.fetch_add(1, Ordering::Relaxed) % len; - for i in 0..len { - let idx = (start + i) % len; - match self.providers[idx].transcribe(audio, mime, prompt).await { - Ok(text) => return Ok(text), - Err(e) => log::warn!("Provider {} failed: {}", idx, e), - } - } - Err(AppError::Transcription("All providers failed".into())) - } -} -``` - -## 3. Phases & Milestones - -### Phase 1: Project Scaffolding, Logging & System Tray - -**Goal:** Bootable Tauri 2 app running as a system tray/menu bar application with no main window. File-based logging initialized early so all subsequent phases have logging from the start. -**Deliverable:** App starts minimized to tray, shows right-click menu with "Settings" and "Quit" options. Settings opens a hidden window. Structured logs written to `~/.pisum-langue/logs/`. - -### Phase 2: Global Hotkey & Audio Recording -**Goal:** Register a configurable global hotkey. Hold-to-record captures audio from the default microphone and encodes to Opus/OGG. -**Deliverable:** Press and hold hotkey → tray icon changes → release → Opus file produced (verifiable via debug log / temp file). - -### Phase 3: AI Provider Abstraction & Gemini -**Goal:** Trait-based transcription provider with Gemini as the default implementation. Round-robin load balancing. -**Deliverable:** Audio recorded in Phase 2 is sent to Gemini and returns transcription text. - -### Phase 4: Clipboard & Paste Output -**Goal:** Copy transcription to clipboard and simulate paste at cursor position. -**Deliverable:** End-to-end flow works: hold hotkey → speak → release → text appears at cursor. - -### Phase 5: Settings UI -**Goal:** Svelte 5 settings UI accessible from system tray for configuring hotkey, audio format, AI provider credentials, and prompt presets. Built-in presets for common languages; user-created custom presets. -**Deliverable:** All configuration options from PRD §4.4 are functional and persisted. Preset management (create, edit, delete) works. Active preset selectable from settings UI. - -### Phase 6: Error Handling, Notifications & Auto-Start - -**Goal:** OS-native toast notifications for all error conditions. Auto-start with OS. Recording duration limit. -**Deliverable:** Every failure in the pipeline surfaces a notification. App can auto-start on login. - -## 4. Files Overview - -### Files to Create - -| File Path | Purpose | -|-----------|---------| -| `src-tauri/src/main.rs` | Entry point, calls `lib::run()` | -| `src-tauri/src/lib.rs` | Tauri command handlers, app setup, plugin registration | -| `src-tauri/src/error.rs` | Centralized `AppError` enum via `thiserror` | -| `src-tauri/src/tray.rs` | System tray setup, icon management, notifications, recording indicator | -| `src-tauri/src/hotkey/mod.rs` | Module exports | -| `src-tauri/src/hotkey/manager.rs` | Hotkey registration, event loop, push-to-talk state machine | -| `src-tauri/src/hotkey/parse.rs` | Hotkey string parsing (modifiers + key code) | -| `src-tauri/src/hotkey/conflict.rs` | Hotkey conflict detection (app + system hotkeys) | -| `src-tauri/src/audio/mod.rs` | Module exports | -| `src-tauri/src/audio/recorder.rs` | Audio capture via `cpal` on dedicated thread | -| `src-tauri/src/audio/encoder.rs` | Opus/OGG encoding with sinc resampling, WAV fallback | -| `src-tauri/src/ai/mod.rs` | Module exports | -| `src-tauri/src/ai/provider.rs` | `TranscriptionProvider` trait definition | -| `src-tauri/src/ai/gemini.rs` | Gemini API client implementation | -| `src-tauri/src/ai/pool.rs` | Round-robin provider pool with fallback | -| `src-tauri/src/config/mod.rs` | Module exports | -| `src-tauri/src/config/schema.rs` | Configuration data structures (serde) | -| `src-tauri/src/config/manager.rs` | Load/save single settings JSON file | -| `src-tauri/src/logging.rs` | File-based logging setup (`~/.pisum-langue/logs/`) | -| `src-tauri/src/config/presets.rs` | Built-in preset definitions, preset CRUD helpers | -| `src-tauri/src/output/mod.rs` | Module exports | -| `src-tauri/src/output/clipboard.rs` | Clipboard write via `arboard` | -| `src-tauri/src/output/paste.rs` | Paste simulation via `enigo` (Ctrl+V / Cmd+V) | -| `src-tauri/Cargo.toml` | Rust dependencies | -| `src-tauri/build.rs` | Tauri build script | -| `src-tauri/tauri.conf.json` | Tauri app configuration | -| `src-tauri/capabilities/default.json` | Tauri 2 capability-based permissions for plugins | -| `src-tauri/icons/` | App icons and tray icons (light/dark variants) | -| `src/App.svelte` | Root Svelte component (settings UI shell) | -| `src/main.ts` | Svelte app entry point | -| `src/app.css` | Global styles (Tailwind) | -| `src/lib/commands.ts` | Typed Tauri IPC command wrappers | -| `src/lib/types.ts` | TypeScript type definitions mirroring Rust schemas | -| `src/components/SettingsPage.svelte` | Main settings page layout | -| `src/components/HotkeyConfig.svelte` | Hotkey configuration panel (capture + display) | -| `src/components/HotkeyRecorder.svelte` | Hotkey capture widget (listens for keydown/keyup, shows modifiers in real-time) | -| `src/components/ProviderConfig.svelte` | AI provider credentials and selection | -| `src/components/AudioConfig.svelte` | Audio format and language settings | -| `src/components/PresetConfig.svelte` | Prompt preset management (list, create, edit, delete) | -| `src/stores/settings.ts` | Svelte reactive store for app settings | -| `package.json` | Node.js dependencies | -| `vite.config.ts` | Vite configuration | -| `svelte.config.js` | Svelte configuration | -| `tsconfig.json` | TypeScript configuration | -| `tailwind.config.js` | Tailwind CSS configuration | -| `postcss.config.js` | PostCSS configuration | - -### Files to Modify - -| File Path | What Changes | -|-----------|-------------| -| `CLAUDE.md` | Already updated to Tauri/Rust/Svelte tech stack | -| `.gitignore` | Add Tauri/Rust/Node build artifacts (`target/`, `node_modules/`, `dist/`) | - -## 5. Task Breakdown - -### Phase 1 Tasks: Project Scaffolding, Logging & System Tray - -#### Task 1.1: Initialize Tauri 2 + Svelte 5 Project - -- **Files to create/modify:** - - `package.json` — Node dependencies: Svelte 5, Vite 6, Tailwind CSS 3, `@tauri-apps/api`, `@tauri-apps/plugin-dialog` - - `vite.config.ts` — Vite + Svelte plugin configuration - - `svelte.config.js` — Svelte 5 configuration - - `tsconfig.json` — TypeScript strict mode, Svelte paths - - `tailwind.config.js` — Tailwind content paths - - `postcss.config.js` — PostCSS with Tailwind and Autoprefixer - - `src/main.ts` — Svelte app mount point - - `src/app.css` — Tailwind directives (`@tailwind base/components/utilities`) - - `src/App.svelte` — Root component with minimal "Settings" placeholder - - `.gitignore` — Add `target/`, `node_modules/`, `dist/` -- **Implementation details:** - - Use `npm create tauri-app@latest` structure as reference but create manually for control - - Svelte 5 with TypeScript, Vite 6, Tailwind CSS 3 -- **Dependencies:** None -- **Acceptance criteria:** `npm run dev` starts Vite dev server; `npm run build` produces `dist/` - -#### Task 1.2: Initialize Rust Backend (Cargo + Tauri) - -- **Files to create/modify:** - - `src-tauri/Cargo.toml` — Package definition and initial dependencies: - ```toml - [package] - name = "pisum-langue" - version = "0.1.0" - edition = "2021" - - [lib] - name = "pisum_langue_lib" - crate-type = ["lib", "cdylib", "staticlib"] - - [build-dependencies] - tauri-build = { version = "2", features = [] } - - [dependencies] - tauri = { version = "2", features = ["tray-icon", "macos-private-api", "image-png"] } - tauri-plugin-notification = "2" - tauri-plugin-autostart = "2" - serde = { version = "1", features = ["derive"] } - serde_json = "1" - thiserror = "2" - once_cell = "1.20" - tokio = { version = "1", features = ["rt"] } - image = { version = "0.25", default-features = false, features = ["png"] } - ``` - - `src-tauri/build.rs` — `tauri_build::build()` - - `src-tauri/src/main.rs` — Entry point calling `pisum_langue_lib::run()` - - `src-tauri/src/lib.rs` — Minimal `run()` function with Tauri builder - - `src-tauri/src/error.rs` — `AppError` enum with `Config`, `Io`, `Json` variants - - `src-tauri/tauri.conf.json` — App configuration: - ```json - { - "productName": "PisumLangue", - "identifier": "com.pisumlangue.app", - "app": { - "macOSPrivateApi": true, - "windows": [{ - "title": "Pisum Langue - Settings", - "width": 700, "height": 500, - "visible": false, "center": true - }] - } - } - ``` - - `src-tauri/capabilities/default.json` — Tauri 2 capability-based permissions: - ```json - { - "identifier": "default", - "description": "Default capabilities for Pisum Langue", - "windows": ["main"], - "permissions": [ - "core:default", - "notification:default", - "notification:allow-notify", - "notification:allow-request-permission", - "autostart:default", - "autostart:allow-enable", - "autostart:allow-disable", - "autostart:allow-is-enabled" - ] - } - ``` -- **Implementation details:** - - Tauri 2 uses a capability-based permission system. All plugin permissions must be declared in `src-tauri/capabilities/` for the frontend to invoke them via IPC -- **Dependencies:** Task 1.1 -- **Acceptance criteria:** `cargo tauri dev` launches the app; `cargo build` succeeds. Capabilities file includes all required plugin permissions. - -#### Task 1.3: System Tray with Menu - -- **Files to create/modify:** - - `src-tauri/src/tray.rs` — Tray setup and notification helper: - ```rust - use once_cell::sync::Lazy; - use std::sync::RwLock; - use tauri::{AppHandle, Manager}; - - static APP_HANDLE: Lazy>> = Lazy::new(|| RwLock::new(None)); - - pub fn setup_tray(app: &tauri::App) -> Result<(), Box> { ... } - pub fn send_notification(title: &str, message: &str) { ... } - pub fn set_recording_state(recording: bool) { ... } - pub fn set_tray_tooltip(preset_name: &str) { ... } - ``` - - `src-tauri/src/lib.rs` — Add tray setup in `.setup()` callback - - `src-tauri/icons/` — Create tray icons (idle, recording) for light/dark themes -- **Implementation details:** - - Right-click menu: "Settings" (shows hidden window), separator, "Quit" - - No preset submenu in tray — preset switching is done through the settings UI only - - "Settings" click: `app.get_webview_window("main").unwrap().show()` - - Window close event (`CloseRequested`): hide the window back to tray instead of quitting the app - - Tray icon changes color when recording (Phase 2 will activate this) - - Tray tooltip displays the active preset name (e.g., "Pisum Langue — Transcribe DE"). Updated via `set_tray_tooltip()` on startup and whenever the active preset changes - - Store `AppHandle` in global `APP_HANDLE` for notifications from any module - - macOS: use `iconAsTemplate` for automatic theme adaptation - - Windows: detect dark mode via registry (`AppsUseLightTheme`) and load appropriate icon -- **Dependencies:** Task 1.2 -- **Acceptance criteria:** App starts minimized to tray. Right-click shows "Settings" and "Quit". "Settings" opens window. Closing the settings window hides it (back to tray). "Quit" exits. No main window on launch. Tray tooltip displays the active preset name. - -#### Task 1.4: File-Based Logging - -- **Files to create/modify:** - - `src-tauri/src/logging.rs` — Logging setup: - - Use `tracing` + `tracing-subscriber` + `tracing-appender` for structured file logging - - Log directory: `~/.pisum-langue/logs/` - - Rotating log files (daily rotation, keep 7 days) - - Log levels: ERROR for user-facing failures, WARN for retries/fallbacks, INFO for pipeline events, DEBUG for development - - `src-tauri/src/lib.rs` — Initialize logging in app setup **before** other modules (tray, hotkey, etc.) - - `src-tauri/Cargo.toml` — Add `tracing = "0.1"`, `tracing-subscriber = "0.3"`, `tracing-appender = "0.2"` -- **Implementation details:** - - Logging must be initialized as the first step in the `.setup()` callback so that all subsequent module initialization is logged - - Console output in dev mode (`#[cfg(debug_assertions)]`), file-only in release -- **Dependencies:** Task 1.2 -- **Acceptance criteria:** App writes structured logs to `~/.pisum-langue/logs/`. Logs rotate daily. Old logs cleaned up after 7 days. All Phase 2+ work benefits from logging. - -### Phase 2 Tasks: Global Hotkey & Audio Recording - -#### Task 2.1: Hotkey Registration & Push-to-Talk Event Loop - -- **Files to create/modify:** - - `src-tauri/src/hotkey/mod.rs` — Module exports - - `src-tauri/src/hotkey/manager.rs` — Hotkey manager: - ```rust - use global_hotkey::{GlobalHotKeyManager, GlobalHotKeyEvent, HotKeyState}; - use once_cell::sync::Lazy; - use std::cell::RefCell; - use std::sync::Mutex; - - thread_local! { - static MANAGER: RefCell> = const { RefCell::new(None) }; - } - - static REGISTRY: Lazy>> = - Lazy::new(|| Mutex::new(None)); - - pub fn init() { /* create manager on main thread */ } - pub fn register(binding: &HotkeyBinding) -> Result<(), AppError> { ... } - pub fn unregister() -> Result<(), AppError> { ... } - fn start_event_loop(app: AppHandle) { /* background thread */ } - ``` - - `src-tauri/src/hotkey/parse.rs` — Parse hotkey binding to `global_hotkey::hotkey::HotKey`: - - Modifier mapping: ctrl/control → Modifiers::CONTROL, alt → ALT, shift → SHIFT, meta/cmd/win/super → META - - Key code mapping: A-Z, 0-9, F1-F12, special keys - - `src-tauri/src/hotkey/conflict.rs` — Hotkey conflict detection: - - Check against app's own registered hotkeys - - Check against known system hotkeys (macOS: Cmd+Q, Cmd+W, Cmd+Tab, Cmd+Space, Cmd+Shift+3/4/5; Windows: Ctrl+Alt+Del, Alt+Tab, Alt+F4, Win+L/D/E/R/Tab, Ctrl+Shift+Esc) - - `src-tauri/src/lib.rs` — Add Tauri commands: `register_hotkey`, `unregister_hotkey`, `get_current_hotkey`, `check_conflict`, `check_system_conflict` - - `src-tauri/Cargo.toml` — Add `global-hotkey = "0.6"` -- **Implementation details:** - - GlobalHotKeyManager is thread-local; registration must happen on main thread via `app.run_on_main_thread()` - - Event loop runs in background thread, listens via `GlobalHotKeyEvent::receiver()` - - Push-to-talk state machine: - - `HotKeyState::Pressed` → call `handle_hotkey_press(app_handle)` - - `HotKeyState::Released` → call `handle_hotkey_release(app_handle)` - - Only one hotkey registered at a time (single configurable hotkey per PRD) -- **Dependencies:** Task 1.3 -- **Acceptance criteria:** Hotkey registers on startup. Press event logged. Release event logged. Hotkey works across all applications. Conflict detection warns about system hotkey clashes. - -#### Task 2.2: Audio Recording with cpal - -- **Files to create/modify:** - - `src-tauri/src/audio/mod.rs` — Module exports - - `src-tauri/src/audio/recorder.rs` — Audio recorder (adapt from reference repo): - ```rust - pub struct AudioRecorderHandle { - command_tx: Sender, - samples: Arc>>, - is_recording: Arc, - sample_rate: u32, - channels: u16, - thread_handle: Option>, - } - - impl AudioRecorderHandle { - pub fn start() -> Result { ... } - pub fn stop(mut self) -> Result<(Vec, u32, u16), AppError> { ... } - pub fn is_recording(&self) -> bool { ... } - } - ``` - - `src-tauri/src/error.rs` — Add `Audio(String)` variant - - `src-tauri/Cargo.toml` — Add `cpal = "0.15"` -- **Implementation details:** - - Dedicated recording thread (cpal stream is not `Send`) - - Support f32, i16, u16 input formats with normalization to f32 [-1.0, 1.0] - - `mpsc::channel` for stop command communication - - `Arc` for recording state flag - - Store active recorder in `static ACTIVE_RECORDER: Lazy>>` -- **Dependencies:** Task 2.1 -- **Acceptance criteria:** Audio captured from default microphone. Samples accessible after stop. No audio glitches. - -#### Task 2.3: Audio Encoding (Opus/OGG + WAV Fallback) - -- **Files to create/modify:** - - `src-tauri/src/audio/encoder.rs` — Encoding functions (adapt from reference repo): - ```rust - pub fn encode_to_opus(samples: &[f32], sample_rate: u32, channels: u16) -> Result, AppError> - pub fn encode_to_wav(samples: &[f32], sample_rate: u32, channels: u16) -> Result, AppError> - pub fn opus_mime_type() -> &'static str { "audio/ogg" } - pub fn wav_mime_type() -> &'static str { "audio/wav" } - ``` - - `src-tauri/Cargo.toml` — Add: - ```toml - audiopus = "0.2" - rubato = "0.16" - ogg = "0.9" - hound = "3.5" - ``` -- **Implementation details:** - - The encoder respects the user's `audio_format` setting from config. If set to Opus, encode to Opus; if set to WAV, encode to WAV directly (no Opus attempt) - - Runtime fallback: if the selected format's encoding fails (e.g., Opus library unavailable), fall back to the other format and log a warning via `tracing::warn!` - - Resampling pipeline: detect if sample rate is Opus-compatible (8k/12k/16k/24k/48k), resample via `rubato::SincFixedIn` if not - - Sinc parameters: `sinc_len: 256`, `f_cutoff: 0.95`, `oversampling_factor: 256`, `WindowFunction::BlackmanHarris2` - - Opus encoding: `Application::Voip`, 24kbps bitrate, 20ms frames - - Ogg wrapping: OpusHead header, OpusTags header (vendor: "pisum-langue"), audio packets with 48kHz granule positions - - WAV encoding: 16-bit PCM via `hound` -- **Dependencies:** Task 2.2 -- **Acceptance criteria:** Recorded audio encodes to valid Ogg/Opus. File plays correctly in external player. WAV fallback produces valid WAV. - -#### Task 2.4: Integrate Recording with Hotkey (Push-to-Talk Orchestration) - -- **Files to create/modify:** - - `src-tauri/src/hotkey/manager.rs` — Wire press/release to recording: - ```rust - fn handle_hotkey_press(app: &AppHandle) { - // Start recording - // Update tray icon to recording state - // Start max-duration timer (10 min) - } - - fn handle_hotkey_release(app: &AppHandle) { - // Stop recording - // Encode audio - // (Phase 3: send to AI) - // (Phase 4: clipboard + paste) - // Restore tray icon - } - ``` - - `src-tauri/src/tray.rs` — Implement `set_recording_state()` to swap tray icon -- **Implementation details:** - - Max recording duration: 10 minutes (600,000 ms). Spawn a timer thread on press; if it fires before release, auto-stop recording. - - Guard against double-press: if already recording, ignore subsequent press events - - Guard against concurrent transcription: if a transcription API call is already in-flight from a previous recording, ignore the new press event and show a "transcription in progress" tray notification. Use an `Arc` flag (`is_transcribing`) to track this state - - On release with no active recording (edge case), do nothing gracefully - - Empty recording (press and immediately release, < 0.5s): skip transcription silently, restore tray icon, no notification - - macOS microphone permission: `cpal` will trigger the system permission prompt on first use. If denied, `default_input_device()` returns `None` → show error notification guiding user to System Settings > Privacy & Security > Microphone -- **Dependencies:** Tasks 2.1, 2.2, 2.3 -- **Acceptance criteria:** Hold hotkey → tray shows recording → release → Opus data produced. Recording auto-stops at 10 minutes. Tray icon updates correctly. Quick press-release (< 0.5s) is silently ignored. - -### Phase 3 Tasks: AI Provider Abstraction & Gemini - -#### Task 3.1: Transcription Provider Trait - -- **Files to create/modify:** - - `src-tauri/src/ai/mod.rs` — Module exports - - `src-tauri/src/ai/provider.rs` — Provider trait: - ```rust - use crate::error::AppError; - - pub struct TranscriptionResult { - pub text: String, - } - - pub trait TranscriptionProvider: Send + Sync { - fn transcribe( - &self, - audio_data: &[u8], - mime_type: &str, - system_prompt: &str, - ) -> impl std::future::Future> + Send; - - fn test_connection( - &self, - ) -> impl std::future::Future> + Send; - - fn provider_name(&self) -> &str; - } - ``` - - `src-tauri/src/error.rs` — Add `Transcription(String)` variant -- **Dependencies:** None (trait is standalone) -- **Acceptance criteria:** Trait compiles. Can be implemented by any provider. - -#### Task 3.2: Gemini Provider Implementation - -- **Files to create/modify:** - - `src-tauri/src/ai/gemini.rs` — Gemini provider (adapt from reference repo): - ```rust - const GEMINI_API_BASE: &str = "https://generativelanguage.googleapis.com/v1beta"; - const DEFAULT_MODEL: &str = "gemini-2.5-flash-lite"; - const MAX_RETRIES: u32 = 3; - const RETRY_DELAY_MS: u64 = 1000; - - pub struct GeminiProvider { - client: Client, - api_key: String, - model: String, - } - - impl GeminiProvider { - pub fn new(api_key: String, model: Option) -> Self { ... } - } - - impl TranscriptionProvider for GeminiProvider { - async fn transcribe(&self, audio_data: &[u8], mime_type: &str, - system_prompt: &str) - -> Result { ... } - async fn test_connection(&self) -> Result { ... } - fn provider_name(&self) -> &str { "Gemini" } - } - ``` - - `src-tauri/Cargo.toml` — Add: - ```toml - reqwest = { version = "0.12", features = ["json", "rustls-tls"] } - base64 = "0.22" - ``` -- **Implementation details:** - - Gemini API: `POST https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={API_KEY}` - - Request body: - ```json - { - "system_instruction": { "parts": [{ "text": "" }] }, - "contents": [{ "parts": [ - { "inline_data": { "mime_type": "audio/ogg", "data": "" } } - ]}], - "generationConfig": { "temperature": 0.1, "maxOutputTokens": 8192 } - } - ``` - - Retry logic: max 3 retries, exponential backoff (1s base), retry on HTTP 429/503 or response containing "overloaded"/"rate limit" - - Parse response: extract `candidates[0].content.parts[0].text` - - Auth: API key as query parameter - - Low temperature (0.1) for deterministic transcription output -- **Dependencies:** Task 3.1 -- **Acceptance criteria:** Sending recorded Opus audio returns correct transcription. Retry works on transient errors. Invalid API key returns clear error. - -#### Task 3.3: Provider Pool (Round-Robin + Fallback) - -- **Files to create/modify:** - - `src-tauri/src/ai/pool.rs` — Provider pool: - ```rust - pub struct ProviderPool { - providers: Vec>, - current_index: AtomicUsize, - } - - impl ProviderPool { - pub fn new(providers: Vec>) -> Self { ... } - pub async fn transcribe(&self, audio: &[u8], mime: &str, - system_prompt: &str) - -> Result { ... } - pub fn rebuild(&mut self, configs: &[ProviderConfig]) { ... } - } - ``` - - Store as `static PROVIDER_POOL: Lazy>` -- **Implementation details:** - - Round-robin: `AtomicUsize` counter, mod by provider count - - Fallback: on failure, try next provider in sequence until all exhausted - - `rebuild()`: called when settings change, constructs new provider instances from config. Uses a write lock, which will block until any in-flight `transcribe()` read lock is released. This is acceptable because settings changes are infrequent and user-initiated - - Initialized on first launch during app setup (in `lib.rs` `.setup()` callback) from the loaded settings. If no providers are configured (e.g., first launch before API key entry), the pool is empty and `transcribe()` returns an error prompting the user to configure a provider -- **Dependencies:** Task 3.2 -- **Acceptance criteria:** Requests cycle across providers. Failed provider is skipped. All-fail returns aggregated error. Empty pool returns clear "no providers configured" error. - -#### Task 3.4: Wire Transcription into Recording Pipeline - -- **Files to create/modify:** - - `src-tauri/src/hotkey/manager.rs` — In `handle_hotkey_release()`, after encoding: - 1. Load config, find active preset by `active_preset_id`, get its `system_prompt` - 2. Call `PROVIDER_POOL.read().transcribe(audio, mime, &active_preset.system_prompt).await` - 3. Pass result to output module (Phase 4) - - `src-tauri/src/lib.rs` — Add `test_provider_connection` Tauri command -- **Dependencies:** Tasks 2.4, 3.3 -- **Acceptance criteria:** End-to-end: hold hotkey → speak → release → transcription text returned from API. - -### Phase 4 Tasks: Clipboard & Paste Output - -#### Task 4.1: Clipboard Write - -- **Files to create/modify:** - - `src-tauri/src/output/mod.rs` — Module exports - - `src-tauri/src/output/clipboard.rs`: - ```rust - use arboard::Clipboard; - use crate::error::AppError; - - pub fn set_clipboard_text(text: &str) -> Result<(), AppError> { - let mut clipboard = Clipboard::new() - .map_err(|e| AppError::Output(format!("Failed to access clipboard: {}", e)))?; - clipboard.set_text(text.to_string()) - .map_err(|e| AppError::Output(format!("Failed to set clipboard: {}", e)))?; - Ok(()) - } - ``` - - `src-tauri/src/error.rs` — Add `Output(String)` variant - - `src-tauri/Cargo.toml` — Add `arboard = "3"` -- **Dependencies:** None -- **Acceptance criteria:** Text set via `set_clipboard_text` is retrievable from system clipboard. - -#### Task 4.2: Paste Simulation - -- **Files to create/modify:** - - `src-tauri/src/output/paste.rs`: - ```rust - use enigo::{Direction, Enigo, Key, Keyboard, Settings}; - use crate::error::AppError; - - pub fn simulate_paste() -> Result<(), AppError> { - let mut enigo = Enigo::new(&Settings::default()) - .map_err(|e| AppError::Output(format!("Failed to create input simulator: {}", e)))?; - - #[cfg(target_os = "macos")] - let modifier = Key::Meta; - #[cfg(not(target_os = "macos"))] - let modifier = Key::Control; - - enigo.key(modifier, Direction::Press).map_err(|e| AppError::Output(e.to_string()))?; - enigo.key(Key::Unicode('v'), Direction::Click).map_err(|e| AppError::Output(e.to_string()))?; - enigo.key(modifier, Direction::Release).map_err(|e| AppError::Output(e.to_string()))?; - Ok(()) - } - ``` - - `src-tauri/Cargo.toml` — Add `enigo = "0.3"` -- **Dependencies:** Task 4.1 -- **Acceptance criteria:** After `set_clipboard_text("test")` + `simulate_paste()`, "test" appears at the active cursor position in any application. - -#### Task 4.3: Wire Output into Pipeline - -- **Files to create/modify:** - - `src-tauri/src/hotkey/manager.rs` — In `handle_hotkey_release()`, after transcription: - ```rust - output::clipboard::set_clipboard_text(&result.text)?; - output::paste::simulate_paste()?; - ``` -- **Dependencies:** Tasks 3.4, 4.1, 4.2 -- **Acceptance criteria:** Full end-to-end: hold hotkey → speak → release → text pasted at cursor. Clipboard contains the transcription. - -### Phase 5 Tasks: Settings UI - -#### Task 5.1: Configuration Schema & Manager - -- **Files to create/modify:** - - `src-tauri/src/config/mod.rs` — Module exports - - `src-tauri/src/config/schema.rs` — Configuration data structures: - ```rust - #[derive(Debug, Clone, Serialize, Deserialize)] - #[serde(rename_all = "camelCase")] - pub struct AppSettings { - pub start_with_system: bool, - pub show_tray_notifications: bool, - pub hotkey: HotkeyBinding, - pub audio_format: AudioFormat, // Opus or Wav - pub presets: Vec, // Named prompt presets (roles) - pub active_preset_id: String, // ID of the currently active preset - pub providers: Vec, - } - - #[derive(Debug, Clone, Serialize, Deserialize)] - #[serde(rename_all = "camelCase")] - pub struct Preset { - pub id: String, // Unique identifier (e.g., "de-transcribe") - pub name: String, // Display name (e.g., "Transcribe DE") - pub system_prompt: String, // Full prompt sent to AI provider - pub is_builtin: bool, // Built-in presets can be edited but not deleted - } - - #[derive(Debug, Clone, Serialize, Deserialize)] - #[serde(rename_all = "camelCase")] - pub struct ProviderConfig { - pub id: String, - pub provider_type: ProviderType, // Gemini (extensible) - pub api_key: String, - pub model: Option, - pub enabled: bool, - } - - #[derive(Debug, Clone, Serialize, Deserialize)] - pub struct HotkeyBinding { - pub modifiers: Vec, - pub key: String, - } - - pub enum AudioFormat { Opus, Wav } - pub enum ProviderType { Gemini } - ``` - - `src-tauri/src/config/manager.rs` — File I/O: - ```rust - const SETTINGS_FILE: &str = ".pisum-langue.json"; // ~/ - - pub fn init() -> Result<(), AppError> { /* create file with defaults if missing */ } - pub fn load_settings() -> Result { ... } - pub fn save_settings(settings: &AppSettings) -> Result<(), AppError> { ... } - ``` - - `src-tauri/Cargo.toml` — Add `dirs = "5"`, `uuid = { version = "1", features = ["v4"] }` -- **Implementation details:** - - Single settings file: `~/.pisum-langue.json` contains all configuration (settings, hotkey, presets, providers) - - No config migration logic — use `#[serde(default)]` on all fields so missing fields get defaults when schema changes - - API keys are stored in plaintext in the settings file (accepted tradeoff) - - Auto-create file with defaults on first run - - Default hotkey: Ctrl+Shift+Space (Windows) / Cmd+Shift+Space (macOS) - - Default audio format: Opus - - Built-in presets loaded from `presets.rs` on first run and merged on subsequent loads (see Task 5.1b) - - Default active preset: `"de-transcribe"` - - Active preset fallback: if `active_preset_id` references a nonexistent preset (e.g., deleted custom preset), fall back to the first built-in preset (`"de-transcribe"`) and persist the corrected setting - - First-run detection: if no settings file exists on startup, this is a first launch. After creating defaults, signal the app to open the settings window automatically and show a notification: "Welcome to Pisum Langue! Please configure an AI provider to get started." -- **Dependencies:** None (can be built in parallel with earlier phases) -- **Acceptance criteria:** Config loads on startup. Defaults created if missing. Save/load roundtrips correctly. Invalid `active_preset_id` falls back to first built-in preset. First launch opens settings window automatically. - -#### Task 5.1b: Built-in Presets - -- **Files to create/modify:** - - `src-tauri/src/config/presets.rs` — Built-in preset definitions: - ```rust - use crate::config::schema::Preset; - - pub fn get_builtin_presets() -> Vec { - vec![ - Preset { - id: "de-transcribe".to_string(), - name: "Transcribe DE".to_string(), - system_prompt: "Transcribe the following German audio accurately. \ - Output only the transcription without any additional commentary.".to_string(), - is_builtin: true, - }, - Preset { - id: "en-transcribe".to_string(), - name: "Transcribe EN".to_string(), - system_prompt: "Transcribe the following English audio accurately. \ - Output only the transcription without any additional commentary.".to_string(), - is_builtin: true, - }, - ] - } - ``` - - `src-tauri/src/config/manager.rs` — On load, merge built-in presets: ensure all built-in presets exist in config (add missing ones, preserve user edits to existing ones) -- **Implementation details:** - - Built-in presets have `is_builtin: true` — they can be edited (system_prompt changed) but not deleted - - On config load, call `get_builtin_presets()` and insert any missing built-in presets - - User-created custom presets have `is_builtin: false` and can be fully managed (create, edit, delete) - - Preset IDs are kebab-case strings; custom presets get UUID v4 IDs -- **Dependencies:** Task 5.1 -- **Acceptance criteria:** First launch creates config with built-in presets. Adding a new built-in preset in code appears on next launch. User edits to built-in preset prompts are preserved. - -#### Task 5.2: Tauri Commands for Settings - -- **Files to create/modify:** - - `src-tauri/src/lib.rs` — Add Tauri commands: - ```rust - #[tauri::command] - async fn load_settings() -> Result { ... } - - #[tauri::command] - async fn save_settings(settings: AppSettings) -> Result<(), String> { ... } - - #[tauri::command] - async fn test_provider(provider: ProviderConfig) -> Result { ... } - - #[tauri::command] - async fn get_presets() -> Result, String> { ... } - - #[tauri::command] - async fn set_active_preset(preset_id: String) -> Result<(), String> { ... } - - #[tauri::command] - async fn save_preset(preset: Preset) -> Result<(), String> { ... } - - #[tauri::command] - async fn delete_preset(preset_id: String) -> Result<(), String> { ... } - ``` -- **Dependencies:** Task 5.1, 5.1b -- **Acceptance criteria:** Frontend can call each command and receive typed responses. Preset commands correctly CRUD presets. Deleting a built-in preset returns an error. - -#### Task 5.3: TypeScript Types & Command Wrappers - -- **Files to create/modify:** - - `src/lib/types.ts` — Mirror Rust config schema: - ```typescript - export interface AppSettings { - startWithSystem: boolean; - showTrayNotifications: boolean; - hotkey: HotkeyBinding; - audioFormat: 'opus' | 'wav'; - presets: Preset[]; - activePresetId: string; - providers: ProviderConfig[]; - } - export interface Preset { - id: string; - name: string; - systemPrompt: string; - isBuiltin: boolean; - } - export interface ProviderConfig { ... } - export interface HotkeyBinding { modifiers: string[]; key: string; } - ``` - - `src/lib/commands.ts` — Typed invoke wrappers: - ```typescript - import { invoke } from '@tauri-apps/api/core'; - export async function loadSettings(): Promise { return invoke('load_settings'); } - export async function saveSettings(settings: AppSettings): Promise { ... } - export async function testProvider(provider: ProviderConfig): Promise { ... } - export async function getPresets(): Promise { return invoke('get_presets'); } - export async function setActivePreset(presetId: string): Promise { ... } - export async function savePreset(preset: Preset): Promise { ... } - export async function deletePreset(presetId: string): Promise { ... } - export async function checkConflict(binding: HotkeyBinding): Promise { ... } - export async function checkSystemConflict(binding: HotkeyBinding): Promise { ... } - ``` - - `src/stores/settings.ts` — Svelte writable store: - ```typescript - import { writable } from 'svelte/store'; - export const settings = writable(null); - export async function initSettings() { ... } - ``` -- **Dependencies:** Task 5.2 -- **Acceptance criteria:** Types match Rust schemas. Commands compile and communicate correctly. - -#### Task 5.4: Settings UI Components - -- **Files to create/modify:** - - `src/App.svelte` — Load config on mount, render SettingsPage - - `src/components/SettingsPage.svelte` — Main layout with sections: Hotkey, Audio, Provider, Presets, General - - `src/components/HotkeyConfig.svelte` — Hotkey display + "Record New Hotkey" button that opens inline HotkeyRecorder - - `src/components/HotkeyRecorder.svelte` — Hotkey capture widget (based on reference repo pattern): - - Enters recording mode on click, captures `onkeydown`/`onkeyup` events - - Tracks modifiers (Ctrl, Alt, Shift, Meta) in real-time, displays them as user presses - - Requires at least one modifier + a non-modifier key to complete capture - - Maps key names (single char → uppercase, `Arrow*` → strip prefix) - - Calls `checkConflict` and `checkSystemConflict` to warn about clashes before saving - - Shows "Press a key combination..." prompt while recording - - `src/components/AudioConfig.svelte` — Audio format toggle (Opus/WAV) - - `src/components/ProviderConfig.svelte` — Provider list: add/remove/edit providers, API key input, model selection, "Test Connection" button - - `src/components/PresetConfig.svelte` — Preset management: - - List of all presets (built-in and custom) with active indicator - - Click to select active preset - - "Add Preset" button to create custom presets (name + system prompt textarea) - - Edit button on each preset (opens inline edit with name + system prompt textarea) - - Delete button on custom presets (built-in presets show delete as disabled) - - Built-in presets show an indicator badge -- **Implementation details:** - - Each component binds to the Svelte store, auto-saves on change (debounced 500ms) - - Tailwind CSS for styling, minimal and functional - - "Test Connection" calls `testProvider` and shows success/failure feedback - - PresetConfig uses `getPresets`, `savePreset`, `deletePreset`, `setActivePreset` commands -- **Dependencies:** Task 5.3 -- **Acceptance criteria:** All PRD §4.4 configuration options are present and functional. Changes persist across app restarts. - -### Phase 6 Tasks: Error Handling, Notifications & Auto-Start - -#### Task 6.1: Comprehensive Error Notifications - -- **Files to create/modify:** - - `src-tauri/src/tray.rs` — Ensure `send_notification()` works cross-platform - - `src-tauri/src/hotkey/manager.rs` — Wrap entire recording/transcription pipeline in error handler: - ```rust - fn handle_hotkey_release(app: &AppHandle) { - let result = std::panic::catch_unwind(|| { - // ... full pipeline ... - }); - match result { - Ok(Ok(())) => { /* success, optionally notify */ }, - Ok(Err(e)) => tray::send_notification("Transcription Error", &e.to_string()), - Err(_) => tray::send_notification("Unexpected Error", "An unexpected error occurred"), - } - } - ``` -- **Implementation details:** - - Every error in the pipeline (recording start failure, no microphone, encoding error, network failure, API auth error, quota exceeded, clipboard failure, paste failure) must trigger a notification - - Notification format: title = error category, body = actionable message - - Error categories: "Recording Error", "Encoding Error", "Transcription Error", "Network Error", "Output Error" -- **Dependencies:** Tasks 4.3, 1.3 -- **Acceptance criteria:** Disconnect microphone → notification. Invalid API key → notification. No network → notification. Every error path has a notification. - -#### Task 6.2: Auto-Start with OS - -- **Files to create/modify:** - - `src-tauri/src/lib.rs` — Register `tauri-plugin-autostart`: - ```rust - use tauri_plugin_autostart::MacosLauncher; - - tauri::Builder::default() - .plugin(tauri_plugin_autostart::init( - MacosLauncher::LaunchAgent, None - )) - // ... - ``` - - `src-tauri/src/lib.rs` — Add Tauri commands: - ```rust - #[tauri::command] - async fn set_autostart(enabled: bool, app: AppHandle) -> Result<(), String> { - let manager = app.autolaunch(); - if enabled { manager.enable()? } else { manager.disable()? } - Ok(()) - } - ``` - - `src/components/SettingsPage.svelte` — Add auto-start toggle in General section -- **Implementation details:** - - Windows: Registry-based startup via `tauri-plugin-autostart` - - macOS: LaunchAgent via `tauri-plugin-autostart` with `MacosLauncher::LaunchAgent` - - Default: enabled (per PRD). User can disable in settings. -- **Dependencies:** Task 5.4 -- **Acceptance criteria:** Toggle auto-start in settings. Restart OS → app starts in tray (when enabled). Disable → app does not start. - -#### Task 6.3: Maximum Recording Duration Timer - -- **Files to create/modify:** - - `src-tauri/src/hotkey/manager.rs` — Add duration enforcement: - ```rust - const MAX_RECORDING_DURATION: Duration = Duration::from_secs(600); // 10 minutes - - fn handle_hotkey_press(app: &AppHandle) { - // ... start recording ... - let app_clone = app.clone(); - std::thread::spawn(move || { - std::thread::sleep(MAX_RECORDING_DURATION); - if is_still_recording() { - handle_hotkey_release(&app_clone); // Auto-stop - } - }); - } - ``` -- **Dependencies:** Task 2.4 -- **Acceptance criteria:** Recording auto-stops after 10 minutes. Transcription proceeds normally after auto-stop. - -#### Task 6.4: macOS Post-Install Permission Notification - -- **Files to create/modify:** - - `packages/macos/postinstall` — Shell script for macOS installer: - - Display OS notification guiding user to grant Accessibility permissions in System Settings > Privacy & Security > Accessibility - - Uses `osascript -e 'display notification ...'` - - `src-tauri/tauri.conf.json` — Reference postinstall script in macOS bundle config -- **Dependencies:** Task 1.2 -- **Acceptance criteria:** After macOS installation, user sees notification about Accessibility permissions. - -## 6. Data Model Changes - -No database is used. All state is persisted in a single JSON file: - -- `~/.pisum-langue.json` — All settings: hotkey, audio format, presets (built-in + custom), active preset ID, provider credentials, auto-start, notifications -- `~/.pisum-langue/logs/` — Rotating log files - -## 7. API Changes - -No HTTP API is exposed. The application communicates with external APIs: - -### Gemini API (outbound) - -- **Endpoint:** `POST https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={API_KEY}` -- **Default model:** `gemini-2.5-flash-lite` -- **Auth:** API key as query parameter -- **Request:** - ```json - { - "system_instruction": { "parts": [{ "text": "" }] }, - "contents": [{ "parts": [ - { "inline_data": { "mime_type": "audio/ogg", "data": "" } } - ]}], - "generationConfig": { "temperature": 0.1, "maxOutputTokens": 8192 } - } - ``` -- **Response:** `{ "candidates": [{ "content": { "parts": [{ "text": "..." }] } }] }` - -### Tauri IPC Commands (internal) - -| Command | Direction | Purpose | -|---------|-----------|---------| -| `load_settings` | Frontend → Backend | Load all settings | -| `save_settings` | Frontend → Backend | Persist all settings | -| `register_hotkey` | Frontend → Backend | Register new hotkey | -| `unregister_hotkey` | Frontend → Backend | Remove current hotkey | -| `check_conflict` | Frontend → Backend | Check hotkey conflict with app | -| `check_system_conflict` | Frontend → Backend | Check hotkey conflict with OS | -| `test_provider` | Frontend → Backend | Test API key validity | -| `get_presets` | Frontend → Backend | Get all presets (built-in + custom) | -| `set_active_preset` | Frontend → Backend | Set active preset by ID | -| `save_preset` | Frontend → Backend | Create or update a preset | -| `delete_preset` | Frontend → Backend | Delete a custom preset (rejects built-in) | -| `set_autostart` | Frontend → Backend | Toggle OS auto-start | - -## 8. Dependencies & Risks - -### External Dependencies - -| Dependency | Version | Purpose | Risk | -|------------|---------|---------|------| -| `tauri` | 2.x | App framework | Mature, actively maintained | -| `global-hotkey` | 0.6+ | System-wide hotkey registration | Verify latest version before implementation; API may differ from 0.6 | -| `cpal` | 0.15 | Audio I/O | Cross-platform, widely used | -| `audiopus` | 0.2+ | Opus encoding | Verify latest version before implementation; wraps libopus; requires C toolchain | -| `rubato` | 0.16 | Audio resampling | Pure Rust, no system deps | -| `ogg` | 0.9 | Ogg container | Stable | -| `hound` | 3.5 | WAV encoding | Stable fallback | -| `reqwest` | 0.12 | HTTP client | Uses rustls (no OpenSSL dep) | -| `arboard` | 3 | Clipboard access | Cross-platform | -| `enigo` | 0.3+ | Input simulation | Verify latest version before implementation; requires accessibility on macOS | -| `tracing` | 0.1 | Structured logging | Mature, widely used | -| `tracing-subscriber` | 0.3 | Log output formatting | Companion to tracing | -| `tracing-appender` | 0.2 | File-based log output with rotation | Companion to tracing | -| `svelte` | 5.x | UI framework | Latest major version | -| `tailwindcss` | 3.x | CSS framework | Stable | -| Gemini API | v1beta | AI transcription via generateContent | Rate limits, requires billing | - -### Risks & Mitigations - -| Risk | Impact | Mitigation | -|------|--------|------------| -| macOS Accessibility permission required for `enigo` | Paste simulation fails silently | Post-install notification guides user to System Settings > Privacy & Security > Accessibility | -| macOS Microphone permission required for `cpal` | Recording fails | macOS prompts on first use; if denied, show error notification guiding user to System Settings > Privacy & Security > Microphone | -| `audiopus` requires C compiler for libopus | Build fails on clean machines | Document build prerequisites (Visual Studio Build Tools on Windows, Xcode CLI Tools + `brew install opus` on macOS); WAV fallback ensures app works even if Opus build fails | -| Gemini API rate limits | Transcription fails under heavy use | Round-robin multiple API keys; exponential backoff; clear error notification | -| Hotkey conflicts with other apps | Hotkey registration fails | Conflict detection against app and system hotkeys; suggest alternative hotkey | -| Large audio files (10 min recording) | API timeout | Opus compression keeps file size manageable (~1.8 MB for 10 min at 24kbps) | - -### Assumptions - -- User has a working microphone connected -- User has network access for API calls -- User has a Google AI Studio API key with Gemini API access - -### Pre-Implementation Checklist - -- **Verify crate versions:** Before starting Phase 2, check `crates.io` for the latest versions of `global-hotkey`, `audiopus`, and `enigo`. These crates have had breaking API changes between minor versions. Pin exact versions in `Cargo.toml` after verification. -- **Minimum Rust version:** Target Rust 1.80+ to use `std::sync::LazyLock` from the standard library instead of `once_cell::sync::Lazy`. If targeting Rust < 1.80, keep the `once_cell` dependency. - -## 9. Testing Strategy - -### Unit Tests (Rust) -- **Audio encoder:** Verify Opus output is valid Ogg/Opus. Verify WAV fallback produces valid WAV. Test resampling from various input rates. -- **Config manager:** Load/save roundtrip. Default creation. Invalid JSON handling. Missing fields get defaults via `#[serde(default)]`. Built-in preset merge on load (missing built-in presets added, user edits preserved). -- **Preset manager:** Create custom preset. Edit preset. Delete custom preset. Reject deletion of built-in preset. Get active preset by ID. Fallback when active preset ID is invalid. -- **Hotkey parser:** Valid hotkey strings parse correctly. Invalid strings return errors. -- **Provider pool:** Round-robin index advances. Fallback skips failed providers. All-fail returns error. Empty pool returns "no providers configured" error. -- **Hotkey conflict:** Detect app-level conflicts. Detect system hotkey conflicts on Windows and macOS. - -### Integration Tests (Rust) -- **Gemini provider:** Send known audio file, verify transcription (requires API key; skip in CI without key). -- **Recording + encoding pipeline:** Record silence for 1 second, verify Opus output is non-empty and valid. - -### Manual E2E Test Scenarios -- Hold hotkey → speak "hello world" → release → verify text appears at cursor -- Hold hotkey → speak for > 10 minutes → verify auto-stop and transcription -- Disconnect network → hold hotkey → speak → release → verify error notification -- Use invalid API key → verify error notification on transcription attempt -- Change hotkey in settings → verify old hotkey stops working, new one activates -- Test in multiple apps: browser text field, VS Code editor, Notepad, chat applications -- Switch preset in settings UI → dictate → verify transcription uses new preset's prompt -- Create custom preset in settings → verify it appears in preset list -- Delete custom preset → verify it disappears from preset list -- Edit built-in preset prompt → verify edit persists across restart -- Close settings window → verify it hides to tray (does not quit app) -- First launch (delete config file) → verify settings window opens automatically with welcome notification -- Verify tray tooltip shows active preset name (e.g., "Pisum Langue — Transcribe DE") -- Switch active preset → verify tray tooltip updates -- Press hotkey while transcription is in progress → verify "transcription in progress" notification, no crash -- Delete active custom preset → verify fallback to first built-in preset - -### Edge Cases -- No microphone connected → error notification on hotkey press -- Empty recording (press and immediately release, < 0.5s) → skip silently, no notification -- Multiple rapid press/release cycles → no crash or resource leak -- Very long utterance (10 min) → encoding and API call succeed -- Non-ASCII transcription results → clipboard and paste handle Unicode correctly - -## 10. Requirement Traceability - -### Functional Requirements - -| PRD Ref | Requirement Summary | Task(s) | Notes | -|---------|-------------------|---------|-------| -| §4.1 #1 | Configurable global hotkey across all apps (Win/macOS) | 2.1, 5.4 | `global-hotkey` crate handles OS-level registration | -| §4.1 #2 | Capture audio from default microphone when hotkey active | 2.2, 2.4 | `cpal` on dedicated thread | -| §4.1 #3 | Encode audio using selected format (Opus default, WAV fallback) | 2.3, 5.1 | User selects format in settings; runtime fallback if encoding fails | -| §4.1 #4 | Stop recording on hotkey release (push-to-talk) | 2.1, 2.4 | Press/release events from `global-hotkey` | -| §4.1 #5 | Max recording duration 10 minutes | 6.3 | Timer thread auto-stops recording | -| §4.1 #6 | Audio/visual feedback for recording state | 1.3, 2.4 | Tray icon changes during recording; tray tooltip shows active preset | -| §4.2 #1 | Send audio + active preset's system prompt to AI provider | 3.2, 3.4 | Gemini with system prompt from active preset | -| §4.2 #2 | Multiple named prompt presets with fallback on invalid active preset | 5.1, 5.1b, 5.4 | `Preset` struct, PresetConfig UI; falls back to first built-in preset | -| §4.2 #3 | Built-in presets for common languages; users can create/edit/delete custom presets | 5.1b, 5.4 | `get_builtin_presets()`, built-in presets editable but not deletable | -| §4.2 #4 | AI provider behind interface (swappable) | 3.1 | `TranscriptionProvider` trait | -| §4.2 #5 | Round-robin distribution with fallback | 3.3 | `ProviderPool` with atomic index | -| §4.3 #1 | Copy transcription to clipboard | 4.1 | `arboard` crate | -| §4.3 #2 | Simulate paste (Ctrl+V / Cmd+V) | 4.2 | `enigo` crate | -| §4.4 #1 | Settings UI from system tray | 1.3, 5.4 | Tray menu → hidden Settings window | -| §4.4 #2 | Persist settings between sessions | 5.1 | Single JSON file in user home directory | -| §4.4 #3 | Start minimized to system tray | 1.3 | Window `visible: false` in tauri.conf.json | -| §4.4 #4 | Auto-start with OS (configurable) | 6.2 | `tauri-plugin-autostart` | -| §4.4 #5 | First-run opens settings, guides provider setup | 5.1, 1.3 | Config manager detects first launch; shows welcome notification | -| §4.5 #1 | Detect network unavailable / API failure | 6.1 | reqwest error handling + retry logic | -| §4.5 #2 | OS-native toast notification on error | 6.1 | `tauri-plugin-notification` | -| §4.5 #3 | No silent error discarding | 6.1 | Every pipeline stage wrapped in error handler | - -### User Stories - -| PRD Ref | User Story Summary | Implementing Tasks | Fully Covered? | -|---------|-------------------|-------------------|----------------| -| US-1 | Hold hotkey to dictate without switching apps | 2.1, 2.2, 2.4 | Yes | -| US-2 | Transcribed text appears at cursor position | 4.1, 4.2, 4.3 | Yes | -| US-3 | Transcribed text copied to clipboard as fallback | 4.1 | Yes | -| US-4 | Configure AI provider and model | 5.1, 5.4 | Yes | -| US-5 | Switch between prompt presets for different contexts | 5.1b, 5.4 | Yes | - -### Success Metrics - -| Metric | How the Plan Addresses It | -|--------|--------------------------| -| End-to-end latency < 3s for short utterances | Opus compression minimizes upload size; Gemini flash-lite is fast for short audio; direct clipboard+paste with no intermediate steps | -| Works across common applications | `enigo` for input simulation is application-agnostic; tested in E2E scenarios across browsers, editors, chat apps | -| Swapping AI provider = new class + DI change | `TranscriptionProvider` trait; new provider = implement trait + add to `ProviderPool` | -| Minimal resource usage when idle | Tauri app is lightweight; no background threads when not recording; system tray only | diff --git a/docs/PRD-build-process.md b/docs/PRD-build-process.md deleted file mode 100644 index 1c7a0bb..0000000 --- a/docs/PRD-build-process.md +++ /dev/null @@ -1,86 +0,0 @@ -# PRD: Build Process with GitHub Actions - -## 1. Introduction/Overview - -Pisum Langue currently has no CI/CD pipeline. Developers must manually build and verify the application on each platform, which is error-prone and time-consuming. This feature introduces GitHub Actions workflows for continuous integration (CI) and automated release builds, ensuring every pull request is verified and releases are produced consistently for macOS and Windows. - -## 2. Goals - -- Automatically verify that the application builds successfully on every push and pull request -- Automate the creation of platform-specific installers (macOS `.app`/`.pkg`, Windows MSI) and publish them as GitHub Releases -- Support both manual (workflow_dispatch) and tag-based (`v*`) release triggers -- Keep the pipeline simple — no code signing, no package manager publishing for now - -## 3. User Stories - -- As a developer, I want every pull request to be automatically built so that I know the code compiles and bundles correctly on both platforms before merging. -- As a maintainer, I want to trigger a release manually with a version bump option so that I can control when new versions are published. -- As a maintainer, I want to push a `v*` tag to trigger a release build so that I have a simple git-based release workflow. -- As a user, I want to download platform-specific installers from GitHub Releases so that I can easily install the application. - -## 4. Functional Requirements - -### CI Workflow (`.github/workflows/ci.yml`) - -1. The CI workflow must trigger on pushes to `main` and on pull requests targeting `main`. -2. The CI workflow must run on both `windows-latest` and `macos-latest` runners. -3. The CI workflow must install Node.js (version 24) and Rust (stable) with appropriate caching for both npm packages and Rust dependencies. -4. The CI workflow must install platform-specific dependencies (Opus via Homebrew on macOS). -5. The CI workflow must install frontend dependencies using `npm ci`. -6. The CI workflow must build the frontend (`npm run build`). -7. The CI workflow must build the full Tauri application using `tauri-apps/tauri-action` with platform-specific bundle targets (`.app` on macOS with `aarch64-apple-darwin` target, MSI on Windows). On macOS, a `.pkg` installer must also be produced from the `.app` bundle via a post-build packaging script (see reference project). -8. The CI workflow must fail the pipeline if any build step fails. - -### Release Workflow (`.github/workflows/release.yml`) - -9. The release workflow must trigger on pushes to tags matching `v*` (e.g., `v0.2.0`). -10. The release workflow must also support `workflow_dispatch` with an input to select version bump type (`patch`, `minor`, `major`) or specify an exact version string. -11. When triggered via `workflow_dispatch`, the release workflow must update the version in `package.json`, `src-tauri/Cargo.toml`, and `src-tauri/tauri.conf.json`, commit the changes, and create a git tag. -12. The release workflow must build the Tauri application on both `windows-latest` and `macos-latest` runners in parallel. -13. The release workflow must use the same Node.js, Rust, and dependency setup as the CI workflow. -14. The release workflow must build platform-specific installers: `.app` bundle and `.pkg` installer on macOS (aarch64-apple-darwin), and MSI on Windows. -15. The release workflow must create a GitHub Release as a draft with the version number as the release name. -16. The release workflow must upload the built installers as release assets. -17. The release workflow must publish (un-draft) the GitHub Release after all platform builds and uploads succeed. - -### Shared Concerns - -18. Both workflows must cache npm dependencies (via Node.js setup action) and Rust dependencies (via `Swatinem/rust-cache` with workspace set to `src-tauri`). -19. CI build artifacts must be retained for 7 days. -20. The `GITHUB_TOKEN` secret must be passed to the Tauri build action and release creation steps. - -## 5. Non-Goals (Out of Scope) - -- Not included: Code signing or notarization for macOS or Windows -- Not included: Package manager publishing (Homebrew, Chocolatey, etc.) -- Not included: Tauri auto-updater integration -- Not included: Linux builds -- Not included: Linting or type-checking steps (ESLint, Prettier, cargo fmt, clippy, svelte-check) — the pipeline only verifies that the build succeeds -- Not included: Automated testing (no test suite exists yet) - -## 6. Technical Considerations - -- **Tauri Action**: Use `tauri-apps/tauri-action@v0` for building, consistent with the reference project (`github-global-hotkey`) -- **Node.js version**: Pinned to 24 via `.nvmrc` at the repository root; CI workflows must use this version -- **Platform targets**: macOS uses `--target aarch64-apple-darwin --bundles app`, followed by a post-build script to produce a `.pkg` installer (consistent with reference project); Windows uses `--bundles msi` -- **macOS dependency**: The Opus library must be installed via `brew install opus` before building on macOS runners (hard requirement for `audiopus` crate) -- **Version sync**: When bumping versions via workflow_dispatch, three files must be updated in lockstep: `package.json`, `src-tauri/Cargo.toml`, and `src-tauri/tauri.conf.json` -- **Git actor**: Version bump commits should be authored by `github-actions[bot]` to distinguish automated commits from human ones -- **Release creation**: Use `softprops/action-gh-release@v2` for creating GitHub Releases -- **Draft-then-publish pattern**: Create the release as a draft first, upload assets from parallel matrix jobs, then un-draft via `actions/github-script@v7` in a separate finalization job (using `needs:` dependencies). Follow the reference project's job structure: `bump-version` → `create-release` → `build-tauri` (matrix) → `publish-release` -- **Workflow permissions**: The release workflow must declare `permissions: contents: write` to allow creating releases and pushing version bump commits -- **Version bump mechanics**: When triggered via `workflow_dispatch`, the version bump job commits to `main`, pushes the commit and tag using `github-actions[bot]` credentials. `Cargo.lock` must also be committed alongside version changes. Follow the reference project's implementation for semver parsing and multi-file updates -- **Concurrency**: No explicit concurrency groups required (consistent with reference project); sequential job ordering via `needs:` is sufficient -- **Opus dependency**: The Opus library (`brew install opus`) is a hard build requirement on macOS, not optional - -## 7. Success Metrics - -- Every push and PR to `main` produces a green/red build status within a reasonable time -- A maintainer can produce a complete set of platform installers (macOS + Windows) by either pushing a tag or using the manual workflow dispatch -- Built installers are downloadable from GitHub Releases and install correctly on their respective platforms - -## 8. Open Questions - -- [x] Should the CI workflow also run on pushes to development branches (e.g., feature branches), or only on PRs targeting `main`? -> only on PRs targeting `main` -- [x] Should the macOS build also produce a `.pkg` installer (like the reference project) in addition to the `.app` bundle? -> like the reference project -- [x] What is the desired release notes format — auto-generated from commits, or manually written? -> auto-generated from commits diff --git a/docs/PRD-package-manager-distribution.md b/docs/PRD-package-manager-distribution.md deleted file mode 100644 index d4dbdb3..0000000 --- a/docs/PRD-package-manager-distribution.md +++ /dev/null @@ -1,94 +0,0 @@ -# PRD: Package Manager Distribution (Homebrew & Chocolatey) - -## 1. Introduction/Overview - -Pisum Langue is currently distributed only via GitHub Releases as direct downloads (.pkg for macOS, .msi for Windows). Users must manually find, download, and install updates. This PRD covers adding Homebrew (macOS) and Chocolatey (Windows) package manager support, and wiring both into the existing release workflow so that every release automatically publishes updated packages. - -## 2. Goals - -- Enable macOS users to install and update Pisum Langue via `brew install --cask pisum-langue` -- Enable Windows users to install and update Pisum Langue via `choco install pisum-langue` -- Automate package publishing so every GitHub Release triggers Homebrew cask and Chocolatey package updates with zero manual steps -- Follow the same proven patterns established in the `global-hotkey` reference repo - -## 3. User Stories - -- As a **macOS user**, I want to install Pisum Langue with `brew install --cask pisum-langue` so that I can use my familiar package manager and get updates through `brew upgrade`. -- As a **Windows user**, I want to install Pisum Langue with `choco install pisum-langue` so that I can manage it alongside my other Chocolatey packages. -- As a **maintainer**, I want package manager updates to happen automatically on release so that I don't have to manually update formulas, checksums, or package specs. -- As a **user upgrading**, I want `brew upgrade` or `choco upgrade` to give me the latest version so that I stay current without visiting GitHub. - -## 4. Functional Requirements - -### Homebrew Cask - -1. A separate GitHub repository `mschnecke/homebrew-pisum-langue` must be created to serve as the Homebrew tap. -2. The tap repository must contain a cask formula at `Casks/pisum-langue.rb` that installs the `.pkg` artifact from GitHub Releases. -3. The cask must target Apple Silicon (aarch64) only, with `depends_on macos: ">= :catalina"`. -4. The cask must include a `zap` stanza that removes application data from `~/Library/Application Support/com.pisum.langue`, `~/Library/Caches/com.pisum.langue`, `~/Library/Preferences/com.pisum.langue.plist`, and `~/Library/LaunchAgents/com.pisum.langue.plist`. -5. A template cask file must exist in the main repo at `packages/homebrew/pisum-langue.rb` with placeholder values for version and SHA256. -6. Users must be able to install with: `brew tap mschnecke/pisum-langue && brew install --cask pisum-langue`. - -### Chocolatey Package - -7. A Chocolatey package definition must exist at `packages/chocolatey/pisum-langue.nuspec` containing package metadata (id, version, title, authors, project URL, description, tags). -8. An install script must exist at `packages/chocolatey/tools/chocolateyinstall.ps1` that downloads and silently installs the `.msi` from GitHub Releases using `Install-ChocolateyPackage` with SHA256 checksum verification. -9. An uninstall script must exist at `packages/chocolatey/tools/chocolateyuninstall.ps1` that finds and removes the MSI via Windows Registry lookup. -10. The Chocolatey package must be published to the MyGet NuGet feed at `https://www.myget.org/F/mschnecke/api/v3/index.json`. -11. Users must be able to install with: `choco install pisum-langue --source https://www.myget.org/F/mschnecke/api/v3/index.json`. - -### Release Workflow Integration - -12. The existing `release.yml` must be updated to include an `update-homebrew` job that triggers a repository dispatch to `mschnecke/homebrew-pisum-langue` with the new version, passing the version string in the payload. -13. The Homebrew tap repository must have a workflow that receives the dispatch event, downloads the `.pkg` artifact, computes the SHA256 checksum, updates the cask formula with the new version and hash, and commits the change. -14. The existing `release.yml` must be updated to include an `update-chocolatey` job that: - - Runs on `windows-latest` after the `publish-release` job - - Downloads the built `.msi` artifact from the GitHub Release - - Computes the SHA256 checksum - - Updates `chocolateyinstall.ps1` with the new download URL and checksum - - Updates `pisum-langue.nuspec` with the new version - - Runs `choco pack` to create the `.nupkg` - - Pushes the package to MyGet using `choco push` with the `MYGET_API_KEY` secret -15. The `bump-version` job in `release.yml` must also update the version in `packages/chocolatey/pisum-langue.nuspec` alongside the existing version files (package.json, Cargo.toml, tauri.conf.json). - -### Secrets & Configuration - -16. A `HOMEBREW_TAP_TOKEN` GitHub secret must be configured with a personal access token that has permission to trigger workflows on the `mschnecke/homebrew-pisum-langue` repository. -17. A `MYGET_API_KEY` GitHub secret must be configured for publishing Chocolatey packages to the MyGet feed. - -## 5. Non-Goals (Out of Scope) - -- Not included: Publishing to the official Chocolatey Community Repository (chocolatey.org) — MyGet is sufficient for now -- Not included: Intel (x64) macOS builds or universal binaries — only aarch64 is supported -- Not included: Linux package managers (apt, snap, flatpak, AUR) -- Not included: Auto-update mechanisms within the app itself (Tauri updater plugin) -- Not included: Code signing for macOS or Windows installers -- Not included: Creating a Homebrew formula (non-cask) — the app is a GUI application, so a cask is appropriate - -## 6. Design Considerations - -- The Homebrew tap naming convention follows `homebrew-{name}` so that `brew tap mschnecke/pisum-langue` maps to the `mschnecke/homebrew-pisum-langue` repo. -- The Chocolatey package ID should be `pisum-langue` (lowercase, hyphenated) to follow Chocolatey naming conventions. -- Installation instructions in the GitHub Release body (already generated by `release.yml`) should be updated to include the Homebrew and Chocolatey commands. - -## 7. Technical Considerations - -- **Reference implementation**: The `global-hotkey` repo (`/Users/mschnecke/workspace/github-global-hotkey`) has a working implementation of this exact pattern — use it as the primary reference for workflow structure, scripts, and package definitions. -- **Workflow job dependencies**: `update-homebrew` and `update-chocolatey` must depend on `publish-release` to ensure artifacts are publicly available before package managers reference them. -- **SHA256 checksums**: Both Homebrew and Chocolatey require SHA256 checksums of the installer artifacts. These must be computed in CI after downloading the built artifact — never hardcoded. -- **MyGet feed**: The existing MyGet account (`mschnecke`) and feed should be reused. The Chocolatey install source URL for users is the v3 endpoint. -- **Homebrew tap dispatch**: The dispatch event should include the version in the payload (e.g., `{ "version": "0.1.8" }`) so the tap workflow knows which release to pull. -- **Asset naming**: macOS pkg is `Pisum.Langue_{version}_aarch64.pkg`, Windows MSI is `Pisum.Langue_{version}_x64_en-US.msi` — these names are determined by the existing Tauri build config and `create-macos-pkg.sh`. - -## 8. Success Metrics - -- `brew tap mschnecke/pisum-langue && brew install --cask pisum-langue` successfully installs the latest version on macOS (Apple Silicon) -- `choco install pisum-langue --source https://www.myget.org/F/mschnecke/api/v3/index.json` successfully installs the latest version on Windows -- A new release triggered via `release.yml` automatically updates both the Homebrew cask and Chocolatey package within the same workflow run, with no manual intervention -- `brew upgrade pisum-langue` and `choco upgrade pisum-langue` correctly pull the new version after a release - -## 9. Open Questions - -- [x] Does the `mschnecke/homebrew-pisum-langue` tap repository already exist, or does it need to be created? -> already created -- [x] Is the MyGet account and API key already configured, or do they need to be set up? -> already configured -- [x] Should the GitHub Release body template be updated in this effort, or handled separately? -> the GitHub Release body template should be updated diff --git a/docs/PRD-start-stop-recording.md b/docs/PRD-start-stop-recording.md deleted file mode 100644 index 9fd4b27..0000000 --- a/docs/PRD-start-stop-recording.md +++ /dev/null @@ -1,90 +0,0 @@ -# PRD: Start and Stop Recording Mode - -## 1. Introduction/Overview - -Currently, Pisum Langue uses a **hold-to-record** model: the user holds down a hotkey to record speech and releases it to stop recording, which triggers transcription and paste. This works well for short dictations but can be fatiguing for longer recordings since the user must keep the key held down. - -This feature introduces a **toggle recording mode** as an alternative. In toggle mode, the user presses the hotkey once to start recording and presses it again to stop. Both modes (hold-to-record and toggle) will be available, and the user can choose their preferred mode in settings. - -## 2. Goals - -- Allow users to record for extended periods without physical strain from holding a key -- Provide a choice between hold-to-record and toggle modes to accommodate different workflows -- Maintain the existing transcription and paste behavior regardless of which recording mode is used - -## 3. User Stories - -1. **As a user who dictates long passages**, I want to press a hotkey once to start recording and again to stop, so that I don't have to hold the key down the entire time. - -2. **As a user who prefers the current behavior**, I want to keep using hold-to-record mode, so that my existing workflow is not disrupted. - -3. **As a user switching between modes**, I want to change the recording mode in settings, so that I can pick the mode that fits my current task. - -4. **As a user in toggle mode**, I want the tray icon to visually indicate that recording is active, so that I have a persistent visual reminder. - -## 4. Functional Requirements - -### Recording Mode Setting - -1. The system must provide a "Recording Mode" setting with two options: **Hold to Record** and **Toggle (Start/Stop)**. -2. The system must default to **Hold to Record** to preserve existing behavior. -3. The setting must be persisted across application restarts. -4. Changing the recording mode must take effect immediately without requiring an app restart. - -### Toggle Mode Behavior - -5. In toggle mode, pressing the hotkey once must **start** audio recording. -6. In toggle mode, pressing the hotkey a second time must **stop** audio recording and trigger transcription. -7. The system must use the **same hotkey** configured for hold-to-record; behavior changes based on the selected mode. -8. The system must ignore the hotkey release event in toggle mode (release should not stop recording). -9. The existing maximum recording duration limit (10 minutes) must still apply in toggle mode — recording auto-stops and transcribes when the limit is reached. -10. The existing minimum recording duration check (50ms) must still apply in toggle mode. -11. The system must prevent starting a new recording while transcription is in progress (same guard as hold mode). - -### Visual Feedback - -12. The tray icon must change to the recording state icon when recording starts (same as current behavior, applies to both modes). -13. The tray icon must revert to idle when recording stops (same as current behavior, applies to both modes). - -### Hold-to-Record Mode (Existing — No Changes) - -1. In hold-to-record mode, pressing the hotkey must start recording (existing behavior, unchanged). -2. In hold-to-record mode, releasing the hotkey must stop recording and trigger transcription (existing behavior, unchanged). - -## 5. Non-Goals (Out of Scope) - -- **Not included:** Separate hotkey bindings for each recording mode — the same hotkey is shared, behavior depends on the selected mode. -- **Not included:** A preview or confirmation dialog before pasting — transcription and paste happen automatically in both modes. -- **Not included:** Audible feedback (chimes/sounds) for recording state changes. -- **Not included:** A third "hybrid" mode where short-press toggles and long-press holds — only the two distinct modes are offered. - -## 6. Design Considerations - -### Settings UI - -- Add a "Recording Mode" option to the settings UI, likely in the **General** or **Audio** tab. -- Use a radio group or segmented control with two options: "Hold to Record" and "Toggle (Start/Stop)". -- Include a brief description below each option: - - Hold to Record: "Hold the hotkey to record. Release to transcribe and paste." - - Toggle: "Press the hotkey to start recording. Press again to transcribe and paste." - -### Hotkey Config Label - -- The [HotkeyConfig.svelte](src/components/HotkeyConfig.svelte) description text currently reads "Hold this key combination to record, release to transcribe and paste." This must update dynamically based on the selected recording mode. - -## 7. Technical Considerations - -- **Hotkey event handling:** The current implementation in [manager.rs](src-tauri/src/hotkey/manager.rs) uses `HotKeyState::Pressed` and `HotKeyState::Released` events. Toggle mode must track internal state (idle → recording → idle) and only act on `Pressed` events, ignoring `Released`. -- **State management:** A new recording mode field is needed in the app configuration ([config/](src-tauri/src/config/)). The hotkey manager must read this setting to determine which behavior to use. -- **Tray integration:** The tray icon and tooltip updates in [tray.rs](src-tauri/src/tray.rs) already handle recording state transitions — these should work without changes for toggle mode. -- **Max duration timer:** The existing 10-minute auto-stop timer spawned on recording start should work identically in toggle mode. - -## 8. Success Metrics - -- **Functional correctness:** Both recording modes work reliably — hold-to-record behaves identically to current implementation, toggle mode correctly starts/stops on consecutive presses. -- **No regressions:** Existing hold-to-record users experience no change in behavior when the default mode is active. -- **Setting persistence:** Recording mode selection survives app restarts. - -## 9. Open Questions - -- [x] Should the max recording duration be configurable, or remain fixed at 10 minutes for both modes? -> yes diff --git a/docs/PRD-transcription.md b/docs/PRD-transcription.md deleted file mode 100644 index 9f9f3b9..0000000 --- a/docs/PRD-transcription.md +++ /dev/null @@ -1,115 +0,0 @@ -# PRD: AI-Driven Dictation - -## 1. Introduction/Overview - -Pisum Langue is a cross-platform desktop utility (Windows and macOS) that lets users dictate text anywhere on their system. The user holds a global hotkey to record (push-to-talk) and releases it to stop. The app records the speech to a compressed audio file (Opus preferred), sends it with a prompt to an AI provider, copies the resulting text to the clipboard, and pastes it at the current cursor position. - -## 2. Goals - -- Provide a system-wide hotkey that triggers speech recording from the default microphone -- Record speech to a compressed audio format (Opus preferred, WAV as fallback) -- Send the recorded audio with a prompt to a configurable AI provider (e.g., Gemini) for transcription -- Copy the transcription result to the system clipboard -- Automatically paste the result at the current cursor position -- Keep the AI provider abstracted so it can be replaced or upgraded independently -- Support both Windows and macOS - -## 3. User Stories - -1. As a user, I want to hold a hotkey and speak so that I can dictate text without switching applications. -2. As a user, I want the transcribed text to appear at my cursor position so that I can dictate directly into any text field or editor. -3. As a user, I want the transcribed text copied to my clipboard so that I can paste it manually if automatic pasting fails. -4. As a user, I want to configure which AI provider and model to use so that I can choose the best option for accuracy, speed, or cost. -5. As a user, I want to switch between prompt presets (e.g., "German transcription", "English meeting notes") so that I can quickly adapt the transcription to different contexts without editing the full prompt each time. - -## 4. Functional Requirements - -### Recording - -1. The system must register a configurable global hotkey that works across all applications on Windows and macOS. -2. The system must capture audio from the default system microphone when the hotkey is activated. -3. The system must encode the captured audio using the format selected in settings (Opus in OGG container or WAV). Opus (OGG_OPUS) is the default for Gemini API compatibility. If the selected format's encoding fails at runtime (e.g., Opus library unavailable), the system must fall back to WAV and log a warning. -4. The system must stop recording when the hotkey is released (push-to-talk mode). -5. The system must enforce a maximum recording duration of 10 minutes. Recording auto-stops when the limit is reached. -6. The system must provide audio/visual feedback (e.g., system tray icon change, small overlay) to indicate recording state. - -### Transcription - -1. The system must send the recorded audio file to an AI provider (e.g., Gemini) along with a system prompt that instructs the model to transcribe the audio. The language, vocabulary hints, and formatting instructions are all part of the prompt — there is no separate language parameter. -2. The system must support multiple named prompt presets (roles). Each preset has a name and a system prompt that controls transcription behavior (e.g., target language, vocabulary hints, formatting instructions, output style). The user selects the active preset from the settings UI. If the active preset ID references a deleted or nonexistent preset, the system must fall back to the first built-in preset (e.g., "Transcribe DE") and update the persisted setting. -3. The system must ship with sensible built-in presets (e.g., "Transcribe DE" for German transcription, "Transcribe EN" for English transcription). Users can create, edit, and delete custom presets. Built-in presets cannot be deleted but can be edited. -4. The AI provider must be abstracted behind a trait (`TranscriptionProvider`) so the implementation can be swapped without modifying consuming code. -5. The system must distribute transcription requests across configured providers in round-robin order to balance API rate limits and quotas. If a provider fails, the system must fall back to the next available provider. - -### Output - -1. The system must copy the transcription result to the system clipboard. -2. The system must simulate a paste action (Ctrl+V / Cmd+V) to insert the text at the current cursor position. - -### Error Handling & Offline Behavior - -1. The system must detect when the network is unavailable or the transcription API call fails. -2. The system must show an OS-native toast notification (Windows toast notification / macOS NSUserNotification) with a clear error message when transcription fails for any reason (network, auth, quota, etc.). -3. The system must not silently discard errors — every failure in the pipeline (recording, encoding, transcription, pasting) must surface a notification to the user. - -### Configuration - -1. The system must provide a settings UI (accessible from system tray) to configure: hotkey, audio format, AI provider credentials, and prompt presets. -2. The system must persist all settings in a single JSON settings file in the user's home directory. -3. The system must start minimized to the system tray / menu bar. -4. The system must auto-start with the OS (Windows Startup / macOS Login Items) by default. The user can disable auto-start in settings. -5. On first launch (no API key configured), the system must automatically open the settings window and show a notification guiding the user to configure an AI provider. The hotkey must still register but transcription attempts must return a clear "no provider configured" error notification. - -## 5. Non-Goals (Out of Scope) - -- Not included: A full windowed UI for reviewing, editing, or exporting transcriptions -- Not included: File upload — the only input is live microphone recording -- Not included: SRT, TXT, or DOCX export -- Not included: Translation -- Not included: Clipboard restoration — the transcription result overwrites the current clipboard content -- Not included: Streaming/real-time transcription during recording (audio is sent after recording stops) -- Not included: User accounts, authentication, or multi-user support -- Not included: Mobile platform support -- Not included: Auto-update mechanism or distribution/installer tooling - -## 6. Design Considerations - -- The app runs as a system tray (Windows) / menu bar (macOS) application with no main window -- A small floating indicator or tray icon color change shows when recording is active -- Settings are accessed via right-click on the tray/menu bar icon -- Closing the settings window hides it back to the system tray instead of quitting the app. The app only exits via the "Quit" option in the tray menu -- The active prompt preset is displayed in the tray tooltip. Preset switching is done through the settings UI -- The interaction should feel instantaneous — minimal latency between stopping recording and text appearing at the cursor -- Errors (network failure, invalid API key, no microphone, etc.) are surfaced as OS-native toast notifications (Windows toast / macOS NSUserNotification) so the user always gets feedback even when no app window is visible - -## 7. Technical Considerations - -- **Cross-platform:** Use Tauri 2 (Rust backend) with Svelte 5 (TypeScript frontend), Vite 6, and Tailwind CSS. Platform-specific behavior is isolated via conditional compilation (`#[cfg(...)]`) in Rust modules. -- **macOS Permissions:** Microphone access requires `NSMicrophoneUsageDescription` in `Info.plist`. macOS prompts the user on first use. Accessibility permission is required for paste simulation via `enigo`. A post-install notification guides the user to grant Accessibility permissions in System Settings. -- **Logging:** Use file-based logging in the user's home directory (`~/.pisum-langue/logs/`). Use `tracing` crate with rotating log files. -- **Global Hotkey:** Use the `global-hotkey` crate for cross-platform system-wide hotkey registration. The `GlobalHotKeyManager` runs on the main thread (thread-local), with an event loop in a background thread listening for press/release events. -- **Audio Recording:** Use the `cpal` crate (Cross-Platform Audio Library) to capture from the default microphone on a dedicated thread. Supports f32, i16, and u16 sample formats with normalization to f32. -- **Audio Encoding:** Encode to Opus in an OGG container (OGG_OPUS format) using `audiopus` for Opus encoding, `rubato` for high-quality sinc resampling to Opus-compatible sample rates, and the `ogg` crate for Ogg container wrapping. WAV via `hound` as fallback. -- **Clipboard & Paste Simulation:** Use the `arboard` crate for cross-platform clipboard access. Simulate Ctrl+V / Cmd+V via the `enigo` crate for cross-platform keystroke simulation. The transcription overwrites the current clipboard content (no restore). -- **Notifications:** Use `tauri-plugin-notification` for OS-native toast notifications on both Windows and macOS. -- **AI Provider Abstraction:** Define a `TranscriptionProvider` Rust trait. Implementations are instantiated and managed via a `ProviderPool` that handles round-robin distribution and fallback. -- **AI Provider:** Gemini (Google Generative Language API) is the default provider, accessed via API key. Default model: `gemini-2.5-flash-lite`. Audio is sent as base64-encoded inline data. No service account JSON files. -- **System Tray:** Use Tauri's `TrayIconBuilder` with dynamic icon theming (light/dark detection). macOS uses `iconAsTemplate` for automatic theme adaptation. Windows detects dark mode via registry. -- **Configuration:** Single JSON settings file in the user's home directory (`~/.pisum-langue.json`), serialized via `serde`. No config migration — if the schema changes, defaults are used for missing fields. -- **Auto-Start:** Use `tauri-plugin-autostart` for OS startup integration — Windows Startup registry and macOS LaunchAgent. Configurable in settings. - -## 8. Success Metrics - -- End-to-end latency (hotkey release → text pasted) under 3 seconds for short utterances (< 15 seconds of speech) -- Transcription works reliably across common applications (browsers, editors, chat apps, Office) -- Swapping the AI provider requires only adding a new trait implementation and registering it in the provider pool -- The app runs unobtrusively in the system tray with minimal resource usage when idle - -## 9. Open Questions - -- [x] Which cross-platform framework should be used? -> Tauri 2 (Rust backend) + Svelte 5 (TypeScript frontend) -- [x] Should the hotkey default to push-to-talk (hold to record) or toggle (press to start/stop)? -> push-to-talk only -- [x] What is the maximum recording duration to support? -> 10 min -- [x] Which AI provider should be the default implementation (OpenAI Whisper, Azure, Google Gemini)? -> Google Gemini (`gemini-2.5-flash-lite`) -- [x] Should the app support multiple prompt presets (e.g., "medical terminology", "casual conversation")? -> Yes, with built-in defaults and user-defined custom presets. Active preset selectable from the settings UI. -- [x] How should clipboard restoration work — always restore, or make it configurable? -> No clipboard restoration; transcription overwrites clipboard diff --git a/index.html b/index.html index d6f0f5b..a45bb52 100644 --- a/index.html +++ b/index.html @@ -3,7 +3,7 @@ - Pisum Langue - Settings + Pisum Transcript - Settings
diff --git a/package-lock.json b/package-lock.json index cfd71ea..eb1ba61 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,11 +1,11 @@ { - "name": "pisum-langue", + "name": "pisum-transcript", "version": "0.1.18", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "pisum-langue", + "name": "pisum-transcript", "version": "0.1.18", "dependencies": { "@tauri-apps/api": "^2.1.1", diff --git a/package.json b/package.json index 7403a89..dd89888 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "name": "pisum-langue", + "name": "pisum-transcript", "private": true, "version": "0.1.18", "type": "module", diff --git a/packages/chocolatey/pisum-langue.nuspec b/packages/chocolatey/pisum-langue.nuspec deleted file mode 100644 index c6279a2..0000000 --- a/packages/chocolatey/pisum-langue.nuspec +++ /dev/null @@ -1,34 +0,0 @@ - - - - pisum-langue - 0.1.18 - Pisum Langue - Pisum Langue Team - mschnecke - https://github.com/mschnecke/langue - https://github.com/mschnecke/langue/blob/main/LICENSE - https://raw.githubusercontent.com/mschnecke/langue/main/src-tauri/icons/icon.png - false - -Pisum Langue is an AI-driven transcription utility. Hold a hotkey to record speech, release to transcribe and paste at the cursor position. - -Features: -- Global hotkey-driven speech-to-text transcription -- AI-powered transcription with multiple provider support -- System tray integration with recording state indicators -- Configurable audio settings and hotkey bindings -- Cross-platform support (macOS and Windows) - - AI-driven transcription utility with global hotkey support - https://github.com/mschnecke/langue/releases - Copyright 2025 Pisum Langue Team - transcription dictation speech-to-text ai hotkey - https://github.com/mschnecke/langue/tree/main/packages/chocolatey - https://github.com/mschnecke/langue#readme - https://github.com/mschnecke/langue/issues - - - - - diff --git a/packages/chocolatey/pisum-transcript.nuspec b/packages/chocolatey/pisum-transcript.nuspec new file mode 100644 index 0000000..8d70c95 --- /dev/null +++ b/packages/chocolatey/pisum-transcript.nuspec @@ -0,0 +1,34 @@ + + + + pisum-transcript + 0.1.18 + Pisum Transcript + Pisum Transcript Team + mschnecke + https://github.com/mschnecke/pisum-transcript + https://github.com/mschnecke/pisum-transcript/blob/main/LICENSE + https://raw.githubusercontent.com/mschnecke/pisum-transcript/main/src-tauri/icons/icon.png + false + +Pisum Transcript is an AI-driven transcription utility. Hold a hotkey to record speech, release to transcribe and paste at the cursor position. + +Features: +- Global hotkey-driven speech-to-text transcription +- AI-powered transcription with multiple provider support +- System tray integration with recording state indicators +- Configurable audio settings and hotkey bindings +- Cross-platform support (macOS and Windows) + + AI-driven transcription utility with global hotkey support + https://github.com/mschnecke/pisum-transcript/releases + Copyright 2025 Pisum Transcript Team + transcription dictation speech-to-text ai hotkey + https://github.com/mschnecke/pisum-transcript/tree/main/packages/chocolatey + https://github.com/mschnecke/pisum-transcript#readme + https://github.com/mschnecke/pisum-transcript/issues + + + + + diff --git a/packages/chocolatey/tools/chocolateyinstall.ps1 b/packages/chocolatey/tools/chocolateyinstall.ps1 index 7c09491..c99818c 100644 --- a/packages/chocolatey/tools/chocolateyinstall.ps1 +++ b/packages/chocolatey/tools/chocolateyinstall.ps1 @@ -1,13 +1,13 @@ $ErrorActionPreference = 'Stop' -$packageName = 'pisum-langue' +$packageName = 'pisum-transcript' $toolsDir = "$(Split-Path -Parent $MyInvocation.MyCommand.Definition)" $packageArgs = @{ packageName = $packageName fileType = 'msi' - url64bit = 'https://github.com/mschnecke/langue/releases/download/v0.1.7/Pisum.Langue_0.1.7_x64_en-US.msi' - softwareName = 'Pisum Langue*' + url64bit = 'https://github.com/mschnecke/pisum-transcript/releases/download/v0.1.7/Pisum.Transcript_0.1.7_x64_en-US.msi' + softwareName = 'Pisum Transcript*' checksum64 = 'REPLACE_WITH_ACTUAL_CHECKSUM' checksumType64 = 'sha256' silentArgs = '/qn /norestart' diff --git a/packages/chocolatey/tools/chocolateyuninstall.ps1 b/packages/chocolatey/tools/chocolateyuninstall.ps1 index adc37a7..fcd4ae9 100644 --- a/packages/chocolatey/tools/chocolateyuninstall.ps1 +++ b/packages/chocolatey/tools/chocolateyuninstall.ps1 @@ -1,7 +1,7 @@ $ErrorActionPreference = 'Stop' -$packageName = 'pisum-langue' -$softwareName = 'Pisum Langue*' +$packageName = 'pisum-transcript' +$softwareName = 'Pisum Transcript*' $installerType = 'msi' [array]$key = Get-UninstallRegistryKey -SoftwareName $softwareName diff --git a/packages/homebrew/pisum-langue.rb b/packages/homebrew/pisum-langue.rb deleted file mode 100644 index eaaad0c..0000000 --- a/packages/homebrew/pisum-langue.rb +++ /dev/null @@ -1,27 +0,0 @@ -cask "pisum-langue" do - version "0.1.7" - sha256 "REPLACE_WITH_ACTUAL_CHECKSUM" - - url "https://github.com/mschnecke/langue/releases/download/v#{version}/Pisum.Langue_#{version}_aarch64.pkg" - name "Pisum Langue" - desc "AI-driven transcription utility" - homepage "https://github.com/mschnecke/langue" - - livecheck do - url :url - strategy :github_latest - end - - depends_on macos: ">= :catalina" - - pkg "Pisum.Langue_#{version}_aarch64.pkg" - - uninstall pkgutil: "com.pisum.langue.app" - - zap trash: [ - "~/Library/Application Support/com.pisum.langue", - "~/Library/Caches/com.pisum.langue", - "~/Library/Preferences/com.pisum.langue.plist", - "~/Library/LaunchAgents/com.pisum.langue.plist", - ] -end diff --git a/packages/homebrew/pisum-transcript.rb b/packages/homebrew/pisum-transcript.rb new file mode 100644 index 0000000..d625013 --- /dev/null +++ b/packages/homebrew/pisum-transcript.rb @@ -0,0 +1,27 @@ +cask "pisum-transcript" do + version "0.1.7" + sha256 "REPLACE_WITH_ACTUAL_CHECKSUM" + + url "https://github.com/mschnecke/pisum-transcript/releases/download/v#{version}/Pisum.Transcript_#{version}_aarch64.pkg" + name "Pisum Transcript" + desc "AI-driven transcription utility" + homepage "https://github.com/mschnecke/pisum-transcript" + + livecheck do + url :url + strategy :github_latest + end + + depends_on macos: ">= :catalina" + + pkg "Pisum.Transcript_#{version}_aarch64.pkg" + + uninstall pkgutil: "net.pisum.transcript.app" + + zap trash: [ + "~/Library/Application Support/net.pisum.transcript", + "~/Library/Caches/net.pisum.transcript", + "~/Library/Preferences/net.pisum.transcript.plist", + "~/Library/LaunchAgents/net.pisum.transcript.plist", + ] +end diff --git a/pisum-langue.code-workspace b/pisum-transcript.code-workspace similarity index 100% rename from pisum-langue.code-workspace rename to pisum-transcript.code-workspace diff --git a/scripts/create-macos-pkg.sh b/scripts/create-macos-pkg.sh index 442c543..6a9a643 100755 --- a/scripts/create-macos-pkg.sh +++ b/scripts/create-macos-pkg.sh @@ -16,9 +16,9 @@ fi SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -APP_NAME="Pisum Langue" -PKG_PREFIX="Pisum.Langue" -BUNDLE_ID="com.pisum.langue.app" +APP_NAME="Pisum Transcript" +PKG_PREFIX="Pisum.Transcript" +BUNDLE_ID="net.pisum.transcript.app" # Determine source .app location based on architecture if [ "$ARCH" == "aarch64" ]; then diff --git a/scripts/postinstall b/scripts/postinstall index c8ef01c..a77dc5a 100755 --- a/scripts/postinstall +++ b/scripts/postinstall @@ -2,7 +2,7 @@ # scripts/postinstall # Runs after the .pkg installation completes -APP_PATH="/Applications/Pisum Langue.app" +APP_PATH="/Applications/Pisum Transcript.app" # Remove quarantine attribute (allows app to run without Gatekeeper warning) if [ -d "$APP_PATH" ]; then @@ -10,9 +10,9 @@ if [ -d "$APP_PATH" ]; then fi # Notify user about Accessibility permissions -osascript -e 'display notification "Please grant Accessibility permissions in System Settings > Privacy & Security > Accessibility" with title "Pisum Langue Installed" subtitle "One more step needed"' 2>/dev/null || true +osascript -e 'display notification "Please grant Accessibility permissions in System Settings > Privacy & Security > Accessibility" with title "Pisum Transcript Installed" subtitle "One more step needed"' 2>/dev/null || true -# Launch Pisum Langue after installation +# Launch Pisum Transcript after installation if [ -d "$APP_PATH" ]; then CONSOLE_USER=$(stat -f "%Su" /dev/console) if [ -n "$CONSOLE_USER" ] && [ "$CONSOLE_USER" != "root" ]; then diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 9d46466..11502fb 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -3388,7 +3388,7 @@ dependencies = [ ] [[package]] -name = "pisum-langue" +name = "pisum-transcript" version = "0.1.18" dependencies = [ "arboard", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index a4af9b9..b25cbb5 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -1,10 +1,10 @@ [package] -name = "pisum-langue" +name = "pisum-transcript" version = "0.1.18" edition = "2021" [lib] -name = "pisum_langue_lib" +name = "pisum_transcript_lib" crate-type = ["lib", "cdylib", "staticlib"] [build-dependencies] diff --git a/src-tauri/Info.plist b/src-tauri/Info.plist index 7f845f4..92e2a3a 100644 --- a/src-tauri/Info.plist +++ b/src-tauri/Info.plist @@ -5,6 +5,6 @@ LSUIElement NSMicrophoneUsageDescription - Pisum Langue needs microphone access to record speech for transcription. + Pisum Transcript needs microphone access to record speech for transcription. diff --git a/src-tauri/capabilities/default.json b/src-tauri/capabilities/default.json index db92ef4..40b8860 100644 --- a/src-tauri/capabilities/default.json +++ b/src-tauri/capabilities/default.json @@ -1,7 +1,7 @@ { "$schema": "../gen/schemas/desktop-schema.json", "identifier": "default", - "description": "Default capabilities for Pisum Langue", + "description": "Default capabilities for Pisum Transcript", "windows": ["main"], "permissions": [ "core:default", diff --git a/src-tauri/scripts/macos-postinstall.sh b/src-tauri/scripts/macos-postinstall.sh index 2cff13e..1b78102 100644 --- a/src-tauri/scripts/macos-postinstall.sh +++ b/src-tauri/scripts/macos-postinstall.sh @@ -1,3 +1,3 @@ #!/bin/bash # macOS post-install: remind user to grant Accessibility permissions for paste simulation -osascript -e 'display notification "Please grant Accessibility access in System Settings > Privacy & Security > Accessibility to enable paste simulation." with title "Pisum Langue Installed"' +osascript -e 'display notification "Please grant Accessibility access in System Settings > Privacy & Security > Accessibility to enable paste simulation." with title "Pisum Transcript Installed"' diff --git a/src-tauri/src/audio/encoder.rs b/src-tauri/src/audio/encoder.rs index 16c46fd..cb5f5d9 100644 --- a/src-tauri/src/audio/encoder.rs +++ b/src-tauri/src/audio/encoder.rs @@ -172,7 +172,7 @@ fn wrap_in_ogg( // OpusTags comment header let mut comment_header = Vec::new(); comment_header.extend_from_slice(b"OpusTags"); - let vendor = b"pisum-langue"; + let vendor = b"pisum-transcript"; comment_header.extend_from_slice(&(vendor.len() as u32).to_le_bytes()); comment_header.extend_from_slice(vendor); comment_header.extend_from_slice(&0u32.to_le_bytes()); // No user comments diff --git a/src-tauri/src/config/manager.rs b/src-tauri/src/config/manager.rs index b5fe362..246ccba 100644 --- a/src-tauri/src/config/manager.rs +++ b/src-tauri/src/config/manager.rs @@ -7,9 +7,9 @@ use crate::error::AppError; use super::presets::get_builtin_presets; use super::schema::AppSettings; -const SETTINGS_FILE: &str = ".pisum-langue.json"; +const SETTINGS_FILE: &str = ".pisum-transcript.json"; -/// Get the settings file path (~/.pisum-langue.json) +/// Get the settings file path (~/.pisum-transcript.json) fn settings_path() -> Result { let home = dirs::home_dir() .ok_or_else(|| AppError::Config("Could not determine home directory".to_string()))?; diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 7ab0cdd..fdf5f17 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -284,7 +284,7 @@ async fn apply_settings(settings: &AppSettings, app: &AppHandle) { #[cfg_attr(mobile, tauri::mobile_entry_point)] pub fn run() { logging::init(); - tracing::info!("Starting Pisum Langue v{}", env!("CARGO_PKG_VERSION")); + tracing::info!("Starting Pisum Transcript v{}", env!("CARGO_PKG_VERSION")); tauri::Builder::default() .plugin(tauri_plugin_notification::init()) @@ -375,7 +375,7 @@ pub fn run() { } tray::send_notification( - "Welcome to Pisum Langue!", + "Welcome to Pisum Transcript!", "Please configure an AI provider to get started.", ); // Open settings window diff --git a/src-tauri/src/logging.rs b/src-tauri/src/logging.rs index b3f2a20..c4868cb 100644 --- a/src-tauri/src/logging.rs +++ b/src-tauri/src/logging.rs @@ -2,15 +2,15 @@ use std::path::PathBuf; use tracing_appender::rolling; use tracing_subscriber::{fmt, layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; -/// Returns the log directory path: `~/.pisum-langue/logs/` +/// Returns the log directory path: `~/.pisum-transcript/logs/` fn log_dir() -> PathBuf { let home = dirs::home_dir().expect("Failed to determine home directory"); - home.join(".pisum-langue").join("logs") + home.join(".pisum-transcript").join("logs") } /// Initialize file-based logging with daily rotation. /// -/// - Log directory: `~/.pisum-langue/logs/` +/// - Log directory: `~/.pisum-transcript/logs/` /// - Daily rotation, kept for 7 days (tracing-appender handles rotation; /// cleanup of old files is best-effort via a simple sweep on startup) /// - In debug builds, also logs to stdout @@ -21,14 +21,14 @@ pub fn init() { // Clean up log files older than 7 days cleanup_old_logs(&dir, 7); - let file_appender = rolling::daily(&dir, "pisum-langue.log"); + let file_appender = rolling::daily(&dir, "pisum-transcript.log"); let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender); // Leak the guard so the appender lives for the entire process std::mem::forget(_guard); let env_filter = EnvFilter::try_from_default_env() - .unwrap_or_else(|_| EnvFilter::new("info,pisum_langue_lib=debug")); + .unwrap_or_else(|_| EnvFilter::new("info,pisum_transcript_lib=debug")); let file_layer = fmt::layer() .with_writer(non_blocking) diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs index 55009a6..b13548e 100644 --- a/src-tauri/src/main.rs +++ b/src-tauri/src/main.rs @@ -2,5 +2,5 @@ #![cfg_attr(not(debug_assertions), windows_subsystem = "windows")] fn main() { - pisum_langue_lib::run(); + pisum_transcript_lib::run(); } diff --git a/src-tauri/src/tray.rs b/src-tauri/src/tray.rs index 76a84d0..ebb0bb1 100644 --- a/src-tauri/src/tray.rs +++ b/src-tauri/src/tray.rs @@ -30,7 +30,7 @@ pub fn setup_tray(app: &tauri::App) -> Result<(), Box> { let tray_builder = TrayIconBuilder::with_id("main") .icon(tray_icon) - .tooltip("Pisum Langue") + .tooltip("Pisum Transcript") .menu(&menu); // macOS: mark as template image so the system auto-inverts for dark/light mode @@ -116,7 +116,7 @@ pub fn set_tray_tooltip(preset_name: &str) { let handle = APP_HANDLE.read().unwrap(); if let Some(app) = handle.as_ref() { if let Some(tray) = app.tray_by_id("main") { - let tooltip = format!("Pisum Langue — {}", preset_name); + let tooltip = format!("Pisum Transcript — {}", preset_name); let _ = tray.set_tooltip(Some(&tooltip)); } } diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 1f774f7..c071f3c 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -1,8 +1,8 @@ { "$schema": "https://raw.githubusercontent.com/nicegui/tauri-v2-schema/main/tauri.conf.json", - "productName": "Pisum Langue", + "productName": "Pisum Transcript", "version": "0.1.18", - "identifier": "com.pisum.langue", + "identifier": "net.pisum.transcript", "build": { "beforeDevCommand": "npm run dev", "devUrl": "http://localhost:1420", @@ -14,7 +14,7 @@ "macOSPrivateApi": true, "windows": [ { - "title": "Pisum Langue", + "title": "Pisum Transcript", "width": 700, "height": 530, "minWidth": 530, diff --git a/src/components/GeneralConfig.svelte b/src/components/GeneralConfig.svelte index f7a96b9..304ac0a 100644 --- a/src/components/GeneralConfig.svelte +++ b/src/components/GeneralConfig.svelte @@ -38,7 +38,7 @@