From bf335da697a2ff26effaed5731b68c8c371d214c Mon Sep 17 00:00:00 2001 From: baiqing Date: Fri, 1 May 2026 10:10:23 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E7=BF=BB=E8=AF=91=E6=A8=A1=E5=BC=8F=20?= =?UTF-8?q?+=20Codex=20=E5=90=8E=E5=AE=A1=E4=BF=AE=E5=A4=8D=20+=20?= =?UTF-8?q?=E8=83=B6=E5=9B=8A=E7=BF=BB=E8=AF=91=E6=8C=87=E7=A4=BA=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 翻译模式(issue #4) 按住录音键时随时按一下 Shift(任一侧)→ 录音结束后走"翻译输出"管线, 按"翻译"页中选定的目标语言翻译后插入到光标。 - hotkey.rs 三平台(CGEventTap / Win32 hook / rdev)都监听 Shift down 边沿, 通过新增的 HotkeyEvent::TranslationModifierPressed 上报 - coordinator.rs 加 translation_modifier_seen 标志位(begin_session 重置); end_session 路径根据它 + 非空 target_language 选 polish 或 translate - polish.rs 抽 chat_completion 共享通道;新增 translate_to + translate_system_prompt; working_languages 作为 system prompt 头部前提注入两条管线 - types.rs::UserPreferences 加 working_languages(默认 ["简体中文"])+ translation_target_language(默认空) - Settings.tsx 删除原来的录音 section 翻译配置;新建 Translation.tsx 一级页(在风格下面), 含工作语言多选 + 目标语言下拉 + 5 步使用指南 - 新增 capsule.tsx 翻译徽章:"● 正在翻译" 蓝色药丸,绝对定位在 pill 上方 8px, opacity + translateY transition 平滑出入;CapsulePayload 加 translation: bool - capsule 窗口高度 96 → 110 给 badge 让出空间 - 内置 SUPPORTED_LANGUAGES 列表(15 种自然语言原生名) Codex 后审修复(基于 4f45dee main 上跑的整体审查) - tauri.conf.json 主窗口 decorations 由 #108 设的 false 还原为 true, Windows-only 在 lib.rs::setup 里 set_decorations(false),让 macOS 拿回原生红黄绿 - coordinator.rs::SessionState 加 session_id;spawn_recorder_error_monitor 捕获当前 id,迟到的 cpal 错误若 id 不匹配直接 drop,不会误中止后续 session - volcengine.rs 把 spawn-per-chunk 模式改成单 mpsc worker 串行 send, 彻底消除跨 consume 调用的 writer 锁 race(之前的 fix 仅在 burst flush 内部有效) 附带修复 - insertion.rs Windows/Linux 粘贴后剪贴板恢复加用户开关 restore_clipboard_after_paste (issue #111),默认 true 保留现行行为;关掉后听写文本留剪贴板,paste 失败时可手动 Ctrl+V 找回 - SettingsModal 内容区改为 flex column + overflow:hidden,X 按钮 + 标题固定, 只 section 内容区滚动;Settings.tsx 嵌入模式右栏独立 scroll - Settings → 提供商:火山引擎 "App Key" 文案改为 "App ID" (keychain account name 不变,老用户凭据不丢) - i18n 增加 capsule.translating / nav.translation / translation.* 完整命名空间 --- .../app/src-tauri/src/asr/volcengine.rs | 90 +++++---- openless-all/app/src-tauri/src/coordinator.rs | 118 +++++++++++- openless-all/app/src-tauri/src/hotkey.rs | 53 +++++- openless-all/app/src-tauri/src/insertion.rs | 15 +- openless-all/app/src-tauri/src/lib.rs | 6 + openless-all/app/src-tauri/src/polish.rs | 166 +++++++++++++--- openless-all/app/src-tauri/src/types.rs | 22 +++ openless-all/app/src-tauri/tauri.conf.json | 4 +- openless-all/app/src/components/Capsule.tsx | 49 +++++ .../app/src/components/FloatingShell.tsx | 2 + openless-all/app/src/components/Icon.tsx | 1 + .../app/src/components/SettingsModal.tsx | 25 ++- openless-all/app/src/i18n/en.ts | 34 +++- openless-all/app/src/i18n/zh-CN.ts | 34 +++- openless-all/app/src/lib/ipc.ts | 3 + openless-all/app/src/lib/types.ts | 28 +++ openless-all/app/src/pages/Settings.tsx | 32 +++- openless-all/app/src/pages/Translation.tsx | 180 ++++++++++++++++++ openless-all/app/src/state/useAppState.ts | 2 +- 19 files changed, 776 insertions(+), 88 deletions(-) create mode 100644 openless-all/app/src/pages/Translation.tsx diff --git a/openless-all/app/src-tauri/src/asr/volcengine.rs b/openless-all/app/src-tauri/src/asr/volcengine.rs index 60010d44..f212dca9 100644 --- a/openless-all/app/src-tauri/src/asr/volcengine.rs +++ b/openless-all/app/src-tauri/src/asr/volcengine.rs @@ -13,7 +13,7 @@ use parking_lot::Mutex as ParkingMutex; use serde_json::{json, Value}; use tokio::net::TcpStream; use tokio::runtime::Handle; -use tokio::sync::{oneshot, Mutex as AsyncMutex, Notify}; +use tokio::sync::{mpsc, oneshot, Mutex as AsyncMutex, Notify}; use tokio_tungstenite::tungstenite::client::IntoClientRequest; use tokio_tungstenite::tungstenite::http::header::HeaderValue; use tokio_tungstenite::tungstenite::Message; @@ -88,7 +88,12 @@ pub struct VolcengineStreamingASR { /// of the lifetime of any particular `&self` borrow. writer: SharedWriter, final_rx: ParkingMutex>>>, - /// 在飞的 audio 帧 spawn 数。consume_pcm_chunk +1,spawn 内 send 完成 -1。 + /// 单 worker 模式:consume_pcm_chunk 把 (seq, chunk) 入队这个 channel, + /// open_session 里 spawn 出的唯一 worker 串行 recv + send_binary, + /// 保证 seq 顺序严格等于实际发送顺序。session 结束时 take() 掉这个 sender, + /// worker 的 recv() 返回 None 自动退出。 + audio_tx: ParkingMutex)>>>, + /// 队列里 + worker 在飞的 audio 帧总数。consume +N,worker send 完一帧 -1。 /// send_last_frame 必须等它降到 0 才能安全发末帧,否则末帧可能被服务端先收到 /// 而把后续 chunk 当成「stream 已结束」之后的多余数据丢弃 → 尾句丢失。 pending_sends: Arc, @@ -103,6 +108,7 @@ impl VolcengineStreamingASR { state: ParkingMutex::new(SyncState::default()), writer: Arc::new(AsyncMutex::new(None)), final_rx: ParkingMutex::new(None), + audio_tx: ParkingMutex::new(None), pending_sends: Arc::new(AtomicUsize::new(0)), send_done: Arc::new(Notify::new()), } @@ -170,6 +176,33 @@ impl VolcengineStreamingASR { *self.final_rx.lock() = Some(rx); *self.writer.lock().await = Some(write); + // 起一个唯一的 audio worker:consume_pcm_chunk 把 (seq, chunk) 推到 audio_tx, + // worker 这边 FIFO recv 然后串行 send_binary。session 结束后调用方 + // (cancel / handle_frame error / fallback_to_partial_or_error) 会 take 掉 + // self.audio_tx,channel 关闭,worker 自然退出。 + let (audio_tx, mut audio_rx) = mpsc::unbounded_channel::<(i32, Vec)>(); + *self.audio_tx.lock() = Some(audio_tx); + let writer_for_worker = Arc::clone(&self.writer); + let pending_for_worker = Arc::clone(&self.pending_sends); + let notify_for_worker = Arc::clone(&self.send_done); + tokio::spawn(async move { + while let Some((seq, chunk)) = audio_rx.recv().await { + let frame = frame::build( + MessageType::AudioOnlyRequest, + Flags::PositiveSequence, + Serialization::None, + &chunk, + Some(seq), + ); + if let Err(e) = send_binary(&writer_for_worker, frame).await { + log::error!("[asr] audio frame seq={} send 失败: {}", seq, e); + } + if pending_for_worker.fetch_sub(1, Ordering::SeqCst) == 1 { + notify_for_worker.notify_waiters(); + } + } + }); + // Send the first frame: full client request with seq=1. let payload_json = self.build_first_frame_payload(&connect_id); let payload_bytes = serde_json::to_vec(&payload_json) @@ -318,6 +351,8 @@ impl VolcengineStreamingASR { st.pending_audio.clear(); st.runtime.clone() }; + // Drop audio sender → worker.recv() 返回 None → worker 退出,不再 hold writer。 + *self.audio_tx.lock() = None; if let Some(runtime) = runtime { // Close the writer asynchronously so the receive loop sees EOF. let writer = Arc::clone(&self.writer); @@ -384,6 +419,7 @@ impl VolcengineStreamingASR { code, body ))); self.state.lock().is_connected = false; + *self.audio_tx.lock() = None; return false; } @@ -447,6 +483,7 @@ impl VolcengineStreamingASR { }; self.signal_success(transcript); self.state.lock().is_connected = false; + *self.audio_tx.lock() = None; return false; } true @@ -492,16 +529,16 @@ impl VolcengineStreamingASR { self.signal_error(err); } self.state.lock().is_connected = false; + *self.audio_tx.lock() = None; } } impl AudioConsumer for VolcengineStreamingASR { fn consume_pcm_chunk(&self, pcm: &[u8]) { - // 一次性把就绪 chunk 全部 drain 出来(同一把 state 锁内分配 seq,保证 seq 单调)。 - // 然后 spawn 一个串行 send 的 task —— 不要每块一个 spawn,否则 burst flush 时多 - // 个 task 异步竞争 writer 锁,发送顺序和 seq 顺序对不上,服务端会报 - // "autoAssignedSequence (N) mismatch sequence in request (N+1)" 直接断连。 - let (runtime, chunks) = { + // 单 worker 串行 send 模式:在 state 锁内 drain 并分配 seq(seq 单调), + // 然后把 (seq, chunk) push 进 mpsc。worker 端按入队顺序 send, + // 哪怕跨多个 consume 调用、多个 spawn 也不会再有 writer 锁竞争。 + let chunks: Vec<(i32, Vec)> = { let mut st = self.state.lock(); if !st.is_connected { return; @@ -517,41 +554,30 @@ impl AudioConsumer for VolcengineStreamingASR { st.frames_sent += 1; out.push((seq, chunk)); } - (st.runtime.clone(), out) + out }; if chunks.is_empty() { return; } - let Some(runtime) = runtime else { + let Some(tx) = self.audio_tx.lock().as_ref().cloned() else { return; }; - // pending_sends + Notify 让 send_last_frame 知道何时所有 chunk 都已发出。 - // 单 task 内串行 send,所以一次性 +N、收尾 -N。 - let count = chunks.len(); - self.pending_sends.fetch_add(count, Ordering::SeqCst); - let writer = Arc::clone(&self.writer); - let pending = Arc::clone(&self.pending_sends); - let notify = Arc::clone(&self.send_done); - runtime.spawn(async move { - for (seq, chunk) in chunks { - let frame = frame::build( - MessageType::AudioOnlyRequest, - Flags::PositiveSequence, - Serialization::None, - &chunk, - Some(seq), - ); - if let Err(e) = send_binary(&writer, frame).await { - // 把丢帧错误顶到日志里,定位"为什么服务端只收到 100ms" - log::error!("[asr] audio frame seq={} send 失败: {}", seq, e); + for entry in chunks { + // pending_sends 必须在 tx.send 之前 +1:否则 worker 可能先 recv + 发送 + + // 减 1,把 usize 计数器 underflow。 + self.pending_sends.fetch_add(1, Ordering::SeqCst); + if tx.send(entry).is_err() { + // worker 已退出(cancel / 错误路径里 audio_tx 被 take)。 + // 撤销刚才的 +1,避免 send_last_frame 的 wait 永远等不到 0。 + if self.pending_sends.fetch_sub(1, Ordering::SeqCst) == 1 { + self.send_done.notify_waiters(); } + log::warn!("[asr] audio queue closed; dropping subsequent frames"); + return; } - if pending.fetch_sub(count, Ordering::SeqCst) == count { - notify.notify_waiters(); - } - }); + } } } diff --git a/openless-all/app/src-tauri/src/coordinator.rs b/openless-all/app/src-tauri/src/coordinator.rs index cff009a9..a6f46d93 100644 --- a/openless-all/app/src-tauri/src/coordinator.rs +++ b/openless-all/app/src-tauri/src/coordinator.rs @@ -60,6 +60,10 @@ struct SessionState { /// 跳过 history.append。issue #52。 cancelled: bool, focus_target: Option, + /// 单调递增的 session id。begin_session 自增。 + /// recorder error monitor 持有 captured id,处理时若与当前不等说明 + /// 是上一 session 的迟到错误,必须 drop,不要 abort 当前 active session。 + session_id: u64, } impl Default for SessionState { @@ -70,6 +74,7 @@ impl Default for SessionState { pending_stop: false, cancelled: false, focus_target: None, + session_id: 0, } } } @@ -90,6 +95,11 @@ struct Inner { hotkey: Mutex>, hotkey_status: Mutex, hotkey_trigger_held: AtomicBool, + /// 翻译模式触发标志。每次 begin_session 重置为 false;hotkey 监听器在 + /// Listening / Starting 阶段看到 Shift down 边沿时 set true。 + /// end_session 在调 polish/translate 前读这个 flag + translation_target_language + /// 决定走哪条管线。详见 issue #4。 + translation_modifier_seen: AtomicBool, } impl Coordinator { @@ -114,6 +124,7 @@ impl Coordinator { hotkey: Mutex::new(None), hotkey_status: Mutex::new(HotkeyStatus::default()), hotkey_trigger_held: AtomicBool::new(false), + translation_modifier_seen: AtomicBool::new(false), }), } } @@ -197,7 +208,8 @@ impl Coordinator { pub async fn repolish(&self, raw_text: String, mode: PolishMode) -> Result { let hotwords = enabled_phrases(&self.inner); - polish_text(&raw_text, mode, &hotwords) + let working_languages = self.inner.prefs.get().working_languages; + polish_text(&raw_text, mode, &hotwords, &working_languages) .await .map_err(|e| e.to_string()) } @@ -275,6 +287,18 @@ fn hotkey_bridge_loop(inner: Arc, rx: mpsc::Receiver) { HotkeyEvent::Cancelled => { cancel_session(&inner_cloned); } + HotkeyEvent::TranslationModifierPressed => { + // 仅在 Starting / Listening 阶段把 Shift 边沿计入"翻译模式触发"。 + // Idle 阶段按 Shift 不应该影响下一段录音;Processing/Inserting 已经过了 + // 决定走哪条管线的检查点,再 set 也没意义。 + let phase = inner_cloned.state.lock().phase; + if matches!(phase, SessionPhase::Starting | SessionPhase::Listening) { + inner_cloned + .translation_modifier_seen + .store(true, Ordering::SeqCst); + log::info!("[coord] translation modifier seen during {phase:?}"); + } + } } } } @@ -437,7 +461,12 @@ async fn begin_session(inner: &Arc) -> Result<(), String> { state.pending_stop = false; state.cancelled = false; state.focus_target = capture_focus_target(); + // 自增 session_id;spawn 出去的 recorder error monitor 会捕获这个值, + // 如果迟到错误到达时 id 已不匹配就 drop,不会误中止后续 session。 + state.session_id = state.session_id.wrapping_add(1); } + // 翻译模式标志重置;hotkey 监听器在 Shift down 时再 set true。 + inner.translation_modifier_seen.store(false, Ordering::SeqCst); #[cfg(any(debug_assertions, test))] if hotkey_injection_dry_run_enabled() { @@ -618,11 +647,24 @@ fn start_recorder_for_starting( } fn spawn_recorder_error_monitor(inner: &Arc, rx: mpsc::Receiver) { + // 捕获当前 session_id:err 来时若 id 已经不一致说明是上一 session 的迟到事件, + // 不能去 abort 当前 active 的新 session(它录得好好的)。 + let captured_session_id = inner.state.lock().session_id; let inner = Arc::clone(inner); std::thread::Builder::new() .name("openless-recorder-error-monitor".into()) .spawn(move || { if let Ok(err) = rx.recv() { + let current_session_id = inner.state.lock().session_id; + if captured_session_id != current_session_id { + log::warn!( + "[coord] recorder error from stale session {} dropped (current={}, err={})", + captured_session_id, + current_session_id, + err + ); + return; + } log::error!("[coord] recorder runtime error: {err}"); abort_recording_with_error(&inner, format!("录音中断: {err}")); } @@ -831,7 +873,20 @@ async fn end_session(inner: &Arc) -> Result<(), String> { let prefs = inner.prefs.get(); let mode = prefs.default_mode; let hotword_strs = enabled_phrases(inner); - let (polished, polish_error) = polish_or_passthrough(&raw, mode, &hotword_strs).await; + let working_languages = prefs.working_languages.clone(); + let translation_target = prefs.translation_target_language.trim().to_string(); + let translation_active = inner.translation_modifier_seen.load(Ordering::SeqCst) + && !translation_target.is_empty(); + let (polished, polish_error) = if translation_active { + log::info!( + "[coord] translation mode → target=\u{300C}{}\u{300D} working={:?}", + translation_target, + working_languages + ); + translate_or_passthrough(&raw, &translation_target, &working_languages).await + } else { + polish_or_passthrough(&raw, mode, &hotword_strs, &working_languages).await + }; // 原子化最后一次 cancel 检查 + 转 Inserting: // 在同一 lock 内决定「丢弃」还是「进入 Inserting」。一旦设到 Inserting, @@ -857,7 +912,8 @@ async fn end_session(inner: &Arc) -> Result<(), String> { let focus_target = inner.state.lock().focus_target; restore_focus_target_if_possible(focus_target); - let status = inner.inserter.insert(&polished); + let restore_clipboard = inner.prefs.get().restore_clipboard_after_paste; + let status = inner.inserter.insert(&polished, restore_clipboard); let inserted_chars = polished.chars().count() as u32; // 累计每条 enabled 词条在最终文本中的命中次数。 @@ -1041,11 +1097,12 @@ async fn polish_or_passthrough( raw: &RawTranscript, mode: PolishMode, hotwords: &[String], + working_languages: &[String], ) -> (String, Option) { if mode == PolishMode::Raw { return (raw.text.clone(), None); } - match polish_text(&raw.text, mode, hotwords).await { + match polish_text(&raw.text, mode, hotwords, working_languages).await { Ok(s) => (s, None), Err(e) => { let reason = e.to_string(); @@ -1055,7 +1112,53 @@ async fn polish_or_passthrough( } } -async fn polish_text(raw: &str, mode: PolishMode, hotwords: &[String]) -> anyhow::Result { +async fn polish_text( + raw: &str, + mode: PolishMode, + hotwords: &[String], + working_languages: &[String], +) -> anyhow::Result { + let api_key = CredentialsVault::get(CredentialAccount::ArkApiKey)?.unwrap_or_default(); + if api_key.is_empty() { + anyhow::bail!("ark api key missing"); + } + let model = CredentialsVault::get(CredentialAccount::ArkModelId)? + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "deepseek-v3-2".to_string()); + let endpoint = CredentialsVault::get(CredentialAccount::ArkEndpoint)? + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "https://ark.cn-beijing.volces.com/api/v3/chat/completions".to_string()); + let base_url = endpoint + .trim_end_matches("/chat/completions") + .trim_end_matches('/') + .to_string(); + + let config = OpenAICompatibleConfig::new("ark", "Doubao Ark", base_url, api_key, model); + let provider = OpenAICompatibleLLMProvider::new(config); + Ok(provider.polish(raw, mode, hotwords, working_languages).await?) +} + +/// 翻译路径——和 polish 一样失败时返回原文 + 失败原因,避免"不丢字"约定被违反(CLAUDE.md)。 +async fn translate_or_passthrough( + raw: &RawTranscript, + target_language: &str, + working_languages: &[String], +) -> (String, Option) { + match translate_text(&raw.text, target_language, working_languages).await { + Ok(s) => (s, None), + Err(e) => { + let reason = e.to_string(); + log::error!("[coord] translate failed, falling back to raw: {reason}"); + (raw.text.clone(), Some(reason)) + } + } +} + +async fn translate_text( + raw: &str, + target_language: &str, + working_languages: &[String], +) -> anyhow::Result { let api_key = CredentialsVault::get(CredentialAccount::ArkApiKey)?.unwrap_or_default(); if api_key.is_empty() { anyhow::bail!("ark api key missing"); @@ -1073,7 +1176,9 @@ async fn polish_text(raw: &str, mode: PolishMode, hotwords: &[String]) -> anyhow let config = OpenAICompatibleConfig::new("ark", "Doubao Ark", base_url, api_key, model); let provider = OpenAICompatibleLLMProvider::new(config); - Ok(provider.polish(raw, mode, hotwords).await?) + Ok(provider + .translate_to(raw, target_language, working_languages) + .await?) } fn read_whisper_credentials() -> (String, String, String) { @@ -1419,6 +1524,7 @@ fn emit_capsule( elapsed_ms, message, inserted_chars, + translation: inner.translation_modifier_seen.load(Ordering::SeqCst), }; let show_capsule = inner.prefs.get().show_capsule; diff --git a/openless-all/app/src-tauri/src/hotkey.rs b/openless-all/app/src-tauri/src/hotkey.rs index 39522ae1..8eb27b25 100644 --- a/openless-all/app/src-tauri/src/hotkey.rs +++ b/openless-all/app/src-tauri/src/hotkey.rs @@ -24,6 +24,9 @@ pub enum HotkeyEvent { Pressed, Released, Cancelled, + /// Shift(或未来配置项指定的修饰键)按下边沿。可在录音过程中任何时刻产生; + /// 上层据此切换到翻译输出管线。详见 issue #4。 + TranslationModifierPressed, } pub trait HotkeyAdapter: Send + Sync { @@ -36,6 +39,9 @@ struct Shared { binding: RwLock, /// 触发键当前是否处于"按住"状态。OS 自动重复事件用此去重。 trigger_held: AtomicBool, + /// Shift(翻译修饰键)当前是否按住。用于在 FLAGS_CHANGED 上识别 down 边沿 + /// (只在 false → true 时往上层发 TranslationModifierPressed)。详见 issue #4。 + translation_modifier_held: AtomicBool, } pub struct HotkeyMonitor { @@ -108,6 +114,7 @@ where let shared = Arc::new(Shared { binding: RwLock::new(binding), trigger_held: AtomicBool::new(false), + translation_modifier_held: AtomicBool::new(false), }); let thread_shared = Arc::clone(&shared); @@ -217,6 +224,7 @@ mod platform { const KEYBOARD_EVENT_KEYCODE: CgEventField = 9; + const FLAG_MASK_SHIFT: CgEventFlags = 0x0002_0000; const FLAG_MASK_CONTROL: CgEventFlags = 0x0004_0000; const FLAG_MASK_ALTERNATE: CgEventFlags = 0x0008_0000; const FLAG_MASK_COMMAND: CgEventFlags = 0x0010_0000; @@ -335,13 +343,24 @@ mod platform { } fn handle_flags_changed(ctx: &CallbackContext, event: CgEventRef) { + let flags = unsafe { CGEventGetFlags(event) }; + + // Shift 是翻译模式修饰键 — 与触发键的 keycode 检查独立,任何时刻按 Shift 都生效。 + let shift_active = (flags & FLAG_MASK_SHIFT) != 0; + let shift_was_held = ctx.shared.translation_modifier_held.load(Ordering::SeqCst); + if shift_active && !shift_was_held { + ctx.shared.translation_modifier_held.store(true, Ordering::SeqCst); + send_or_log(&ctx.tx, HotkeyEvent::TranslationModifierPressed); + } else if !shift_active && shift_was_held { + ctx.shared.translation_modifier_held.store(false, Ordering::SeqCst); + } + let keycode = unsafe { CGEventGetIntegerValueField(event, KEYBOARD_EVENT_KEYCODE) }; let trigger = ctx.shared.binding.read().trigger; let expected_keycode = trigger_to_keycode(trigger); if keycode != expected_keycode { return; } - let flags = unsafe { CGEventGetFlags(event) }; let mask = trigger_to_flag_mask(trigger); let is_active = (flags & mask) != 0; let was_held = ctx.shared.trigger_held.load(Ordering::SeqCst); @@ -414,6 +433,9 @@ mod platform { const WM_SYSKEYUP: usize = 0x0105; const VK_ESCAPE: u32 = 0x1B; + const VK_SHIFT: u32 = 0x10; + const VK_LSHIFT: u32 = 0xA0; + const VK_RSHIFT: u32 = 0xA1; const VK_LCONTROL: u32 = 0xA2; const VK_RCONTROL: u32 = 0xA3; const VK_RMENU: u32 = 0xA5; @@ -556,6 +578,23 @@ mod platform { return; } + // Shift(任一侧)= 翻译模式修饰键。在录音过程中任意时刻按下都生效。详见 issue #4。 + if matches!(vk_code, VK_SHIFT | VK_LSHIFT | VK_RSHIFT) { + match message { + WM_KEYDOWN | WM_SYSKEYDOWN => { + let was_held = ctx.shared.translation_modifier_held.swap(true, Ordering::SeqCst); + if !was_held { + send_or_log(&ctx.tx, HotkeyEvent::TranslationModifierPressed); + } + } + WM_KEYUP | WM_SYSKEYUP => { + ctx.shared.translation_modifier_held.store(false, Ordering::SeqCst); + } + _ => {} + } + return; + } + let trigger = ctx.shared.binding.read().trigger; if vk_code != trigger_to_vk_code(trigger) { return; @@ -683,6 +722,14 @@ mod platform { let _ = tx.send(HotkeyEvent::Cancelled); return; } + // Shift(任一侧)= 翻译模式修饰键。详见 issue #4。 + if matches!(key, Key::ShiftLeft | Key::ShiftRight) { + let was_held = shared.translation_modifier_held.swap(true, Ordering::SeqCst); + if !was_held { + let _ = tx.send(HotkeyEvent::TranslationModifierPressed); + } + return; + } if key == trigger_to_rdev_key(trigger) { let was_held = shared.trigger_held.swap(true, Ordering::SeqCst); if !was_held { @@ -691,6 +738,10 @@ mod platform { } } EventType::KeyRelease(key) => { + if matches!(key, Key::ShiftLeft | Key::ShiftRight) { + shared.translation_modifier_held.store(false, Ordering::SeqCst); + return; + } if key == trigger_to_rdev_key(trigger) { let was_held = shared.trigger_held.swap(false, Ordering::SeqCst); if was_held { diff --git a/openless-all/app/src-tauri/src/insertion.rs b/openless-all/app/src-tauri/src/insertion.rs index 0a1d4b6f..bbb84f14 100644 --- a/openless-all/app/src-tauri/src/insertion.rs +++ b/openless-all/app/src-tauri/src/insertion.rs @@ -25,17 +25,19 @@ impl TextInserter { } /// Insert `text` at the current cursor position. + /// `restore_clipboard_after_paste` 仅在 Windows/Linux 路径下决定 paste 之后是否恢复 + /// 用户原剪贴板。macOS 走 AX 直写,参数被忽略。详见 issue #111。 #[cfg(not(target_os = "macos"))] - pub fn insert(&self, text: &str) -> InsertStatus { + pub fn insert(&self, text: &str, restore_clipboard_after_paste: bool) -> InsertStatus { if text.is_empty() { return InsertStatus::CopiedFallback; } - insert_with_clipboard_restore(text) + insert_with_clipboard_restore(text, restore_clipboard_after_paste) } /// Insert `text` at the current cursor position. #[cfg(target_os = "macos")] - pub fn insert(&self, text: &str) -> InsertStatus { + pub fn insert(&self, text: &str, _restore_clipboard_after_paste: bool) -> InsertStatus { if text.is_empty() { return InsertStatus::CopiedFallback; } @@ -101,7 +103,7 @@ fn copy_to_clipboard_with_restore_plan(text: &str) -> Result InsertStatus { +fn insert_with_clipboard_restore(text: &str, restore_clipboard_after_paste: bool) -> InsertStatus { let restore_plan = match copy_to_clipboard_with_restore_plan(text) { Ok(plan) => plan, Err(err) => { @@ -115,7 +117,10 @@ fn insert_with_clipboard_restore(text: &str) -> InsertStatus { return InsertStatus::CopiedFallback; } - maybe_restore_clipboard(restore_plan); + if restore_clipboard_after_paste { + maybe_restore_clipboard(restore_plan); + } + // 关掉 → 听写文本留在剪贴板里,simulate_paste 没真正落地时用户能手动 Ctrl+V 找回。 insertion_success_status() } diff --git a/openless-all/app/src-tauri/src/lib.rs b/openless-all/app/src-tauri/src/lib.rs index 5f45e571..4d7b9f10 100644 --- a/openless-all/app/src-tauri/src/lib.rs +++ b/openless-all/app/src-tauri/src/lib.rs @@ -62,6 +62,9 @@ pub fn run() { // 主窗口磨砂:macOS 用 NSVisualEffectView,Windows 用 Mica。 // 没这一层的话 transparent: true 让窗口透明 → 背后只是空,不是磨砂。 + // + // decorations 留给运行时分平台决定:macOS 默认 true 用系统红黄绿; + // Windows 这里关掉 native chrome 让 React 端 WinTitleBar 接管。 if let Some(main) = app.get_webview_window("main") { #[cfg(target_os = "macos")] { @@ -83,6 +86,9 @@ pub fn run() { if let Err(e) = apply_mica(&main, None) { log::warn!("[main] mica failed: {e}"); } + if let Err(e) = main.set_decorations(false) { + log::warn!("[main] disable native decorations failed: {e}"); + } } } diff --git a/openless-all/app/src-tauri/src/polish.rs b/openless-all/app/src-tauri/src/polish.rs index 1dba4a90..e3122354 100644 --- a/openless-all/app/src-tauri/src/polish.rs +++ b/openless-all/app/src-tauri/src/polish.rs @@ -89,15 +89,42 @@ impl OpenAICompatibleLLMProvider { raw_text: &str, mode: PolishMode, hotwords: &[String], + working_languages: &[String], + ) -> Result { + let mut system_prompt = compose_system_prompt(mode, hotwords); + if let Some(premise) = working_languages_premise(working_languages) { + system_prompt = format!("{}\n\n{}", premise, system_prompt); + } + let user_prompt = prompts::user_prompt(raw_text); + self.chat_completion(&system_prompt, &user_prompt).await + } + + /// 把转写翻译成 `target_language`(前端从内置语言列表里选出来的原生名)。 + /// `working_languages` 作为前提注入头部。详见 issue #4。 + pub async fn translate_to( + &self, + raw_text: &str, + target_language: &str, + working_languages: &[String], + ) -> Result { + let mut system_prompt = prompts::translate_system_prompt(target_language); + if let Some(premise) = working_languages_premise(working_languages) { + system_prompt = format!("{}\n\n{}", premise, system_prompt); + } + let user_prompt = prompts::user_prompt(raw_text); + self.chat_completion(&system_prompt, &user_prompt).await + } + + async fn chat_completion( + &self, + system_prompt: &str, + user_prompt: &str, ) -> Result { if self.config.api_key.trim().is_empty() { return Err(LLMError::MissingCredentials); } let url = chat_completions_url(&self.config.base_url); - let system_prompt = compose_system_prompt(mode, hotwords); - let user_prompt = prompts::user_prompt(raw_text); - let body = json!({ "model": self.config.model, "stream": false, @@ -177,6 +204,23 @@ fn chat_completions_url(base_url: &str) -> String { format!("{}/chat/completions", without_trailing) } +/// 把 working_languages 拼成 system prompt 头部前提:"# 上下文\n用户的工作语言:A、B、C"。 +/// 列表为空或全空白返回 None,调用方就不拼前缀。 +fn working_languages_premise(working_languages: &[String]) -> Option { + let cleaned: Vec<&str> = working_languages + .iter() + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .collect(); + if cleaned.is_empty() { + return None; + } + Some(format!( + "# 上下文\n用户的工作语言:{}。处理任何文本时请把这一前提带进考虑(识别专名、判定语气、决定写法)。", + cleaned.join("、") + )) +} + fn compose_system_prompt(mode: PolishMode, hotwords: &[String]) -> String { let base = prompts::system_prompt(mode); let cleaned: Vec = hotwords @@ -447,43 +491,71 @@ pub mod prompts { 出:我觉得这个方案大概可以,但性能上还要再看看。", PolishMode::Structured => "# 任务(清晰结构)\n\ - 把口述整理为脉络清晰、可直接用作 AI prompt 或工作文档的结构化文本。\n\ + 把口述整理为脉络清晰、可直接复制走的结构化文本:保留用户的口语引子(润色后作为首行过渡),\ + 主动按语义把扁平事项归类成 2\u{2013}4 个主题,用双层格式呈现,尾巴查询用自然收尾句。\n\ \n\ - 内容涉及 \u{2265}2 个主题、步骤或要求时,使用两层层级:\n\ - - 第一层(主题):行首用 \"1.\" \"2.\" \"3.\" \u{2026},每个主题一行短标题;\n\ - - 第二层(要点):另起一行,行首用 \"a.\" \"b.\" \"c.\" \u{2026},每条一句。\n\ - \u{4E0D}使用带括号的中间层(如 \"1)\" \"2)\")。\n\ + 双层格式(主清单标准写法):\n\ + - 第一层(主题):行首用 \"1.\" \"2.\" \"3.\" \u{2026},每个主题一行短标题(4\u{2013}8 字最佳);\n\ + - 第二层(子项):另起一行,行首用 \"(a)\" \"(b)\" \"(c)\" \u{2026},每条一句完整陈述。\n\ + 顶层\u{4E0D}使用半括号写法(如 \"1)\" \"2)\");不在子项内再嵌第三层。\n\ \n\ - 即使原文没有显式说\u{201C}第一/第二\u{201D},只要可以归并到 \u{2265}2 个主题,也要自动归类。\n\ 单一简短主题 \u{2192} 直接输出连贯段落,\u{4E0D}硬塞层级。\n\ - 任务密集、顺序混乱、带大量口头连接词的请求 \u{2192} 去掉\u{201C}呃/那个啥/然后还有/对了/顺便/别忘了/一起\u{201D}等噪声,\ - 合并重复意图,保留每个可执行事项,输出紧凑编号清单。\n\ + 事项 \u{2265}4 条 \u{2192} 必须按语义归类(典型如\u{201C}代码与功能 / 文档与配置 / 界面与交互 / 项目清理\u{201D}),\u{4E0D}要扁平堆成一长串编号。\n\ + 合并意图相近的条目(如\u{201C}上传代码 + 修复闪退\u{201D}合成一条 (a)),但\u{4E0D}丢失任何一件事。\n\ + \n\ + # 保留口语引子并润色成自然首行\n\ + 原话开头出现\u{201C}帮我给 X 提个请求 / 帮我列个清单 / 帮我整理一下 / 帮我跟团队说\u{201D}等口语引子时,\ + 保留这层语义并润色成自然书面语,作为输出首行 + 过渡。例:\n\ + - \u{201C}呃那个啥帮我给 GitHub 提个请求啊\u{2026}\u{201D} \u{2192} \u{201C}帮忙给 GitHub 提个请求,主要包含以下内容:\u{201D}\n\ + - \u{201C}帮我列个发布前要做的事\u{201D} \u{2192} \u{201C}发布前需要完成以下事项:\u{201D}\n\ + 清理\u{201C}呃 / 啊 / 那个啥 / 就是 / 然后还有 / 别忘了\u{201D}等口癖;\ + \u{4E0D}替用户做执行决策(OpenLess 是输入法,\u{4E0D}主动\u{201C}打开 GitHub 帮你建 issue\u{201D})。\n\ + \n\ + # 尾巴查询用自然收尾句\n\ + 原话结尾以\u{201C}对了 / 顺便 / 还有 / 检查一下 / 帮我看下\u{201D}起头、且性质是\u{201C}查询 / 列出 / 确认\u{201D}\ + (与前面陈述事项的性质不同)的句子,作为收尾段单独成行,\ + 用\u{201C}最后再\u{2026}\u{201D}\u{201C}另外还需要\u{2026}\u{201D}等自然句过渡,\u{4E0D}用\u{201C}另外:\u{2026}\u{201D}标签写法。\ + 同一句连说两遍只算一次。\n\ + 若性质与前面事项一致(如再补一句\u{201C}还有把缓存改一改\u{201D}),则归入主清单的对应主题。\n\ + \n\ 开发协作语境中的 GitHub、README、issue/issues、接口、路由、缓存策略、依赖包、分支冲突等术语按原意保留,\ \u{4E0D}翻译成别的产品名或系统名,\u{4E0D}补充用户没说过的实现方案。\n\ \n\ # 示例 1\n\ 原:发布前要做几件事,第一是回归测试,要测登录页和支付页,第二是文档要更新,要改 README 和 changelog\n\ 出:\n\ + 发布前需要完成以下事项:\n\ + \n\ 1. 回归测试\n\ - a. 登录页。\n\ - b. 支付页。\n\ + (a) 登录页。\n\ + (b) 支付页。\n\ 2. 文档更新\n\ - a. 更新 README。\n\ - b. 更新 changelog。\n\ + (a) 更新 README。\n\ + (b) 更新 changelog。\n\ \n\ - # 示例 2\n\ + # 示例 2(口语引子 + 主题归类 + 自然尾巴)\n\ 原:呃那个啥帮我给GitHub提个请求啊就是首先我要上传代码还有修复一下之前那个页面闪退的bug然后还有新增一个暗色模式的功能好像还有接口请求超时的问题也得改一改对了顺便把README文档更新一下里面的安装步骤写错了还有依赖包版本要降级一下不然跑不起来另外还有侧边栏排版错乱、手机端适配有问题也一起处理下然后还有日志打印太多冗余信息要精简掉还有那个头像上传格式限制没做好还要加个校验哦对了还有合并一下分支冲突的代码别忘了还有把没用的注释全部删掉清理一下项目垃圾文件还有新增两个接口路由优化一下加载速度缓存策略也改一改 检查一下有哪些 issues。检查一下有哪些 issues。\n\ 出:\n\ - 1. 上传代码并修复页面闪退的 bug\n\ - 2. 新增暗色模式功能\n\ - 3. 解决接口请求超时的问题\n\ - 4. 更新 README 文档(修正安装步骤中的错误)\n\ - 5. 降级依赖包版本,确保程序正常运行\n\ - 6. 修复侧边栏排版混乱,并完成手机端适配\n\ - 7. 精简日志打印,删除冗余信息\n\ - 8. 完善头像上传的格式限制,增加校验功能\n\ - 9. 合并冲突的分支\n\ - 10. 检查一下还有哪些 issues", + 帮忙给 GitHub 提个请求,主要包含以下内容:\n\ + \n\ + 1. 代码与功能优化\n\ + (a) 上传最新代码,修复页面闪退的 bug\n\ + (b) 新增暗色模式功能\n\ + (c) 解决接口请求超时的问题\n\ + (d) 优化路由以及加载的缓存策略\n\ + (e) 清理冗余日志打印,精简信息\n\ + 2. 文档与配置调整\n\ + (a) 更新 README 文档,修正安装步骤错误\n\ + (b) 降级依赖包版本,确保程序正常运行\n\ + 3. 界面与交互修复\n\ + (a) 修复侧边栏排版混乱及手机端适配问题\n\ + (b) 完善头像上传功能,增加格式限制与校验\n\ + 4. 项目清理与合并\n\ + (a) 合并分支冲突\n\ + (b) 删除无用注释,清理项目垃圾文件\n\ + (c) 处理新增的两个接口\n\ + \n\ + 最后再检查一下还有哪些 issue 需要处理。", PolishMode::Formal => "# 任务(正式表达)\n\ 输出适合工作沟通和邮件的正式表达。\n\ @@ -513,6 +585,26 @@ pub mod prompts { escaped ) } + + /// 翻译模式 system prompt — 用户在 Settings 里填的任意自然语言文本作为目标语言, + /// 直接拼进来。LLM 自己理解("繁体中文"/"English"/"美式英文,正式邮件风格" 都行)。 + pub fn translate_system_prompt(target_language: &str) -> String { + format!( + "# 任务(翻译输出)\n\ + 你刚收到一段语音转写。请把它翻译成 \u{300C}{}\u{300D},\ + 保持原意、语气和必要的标点;不增不减、不解释、不加任何前缀或后缀。\n\ + \n\ + # 行为\n\ + - 直接输出翻译结果。\n\ + - 如果转写里夹杂多种语言,统一翻译到目标语言。\n\ + - 转写本来就是目标语言时,做最小润色(补标点、去口癖)后输出。\n\ + - 不要带 \u{300C}翻译:\u{300D}\u{300C}译文:\u{300D}之类前缀。\n\ + \n\ + # 输出\n\ + 只输出翻译后的文本正文。", + target_language + ) + } } #[cfg(test)] @@ -562,9 +654,25 @@ mod tests { fn structured_prompt_includes_dense_github_request_example() { let prompt = prompts::system_prompt(PolishMode::Structured); - assert!(prompt.contains("任务密集、顺序混乱、带大量口头连接词的请求")); + // 任务段:必须教会模型保留口语引子、按主题归类、用 (a) 子项、自然尾巴 + assert!(prompt.contains("# 保留口语引子并润色成自然首行")); + assert!(prompt.contains("# 尾巴查询用自然收尾句")); + assert!(prompt.contains("\"(a)\" \"(b)\" \"(c)\"")); + assert!(prompt.contains("代码与功能 / 文档与配置 / 界面与交互 / 项目清理")); assert!(prompt.contains("GitHub、README、issue/issues")); - assert!(prompt.contains("上传代码并修复页面闪退的 bug")); - assert!(prompt.contains("检查一下还有哪些 issues")); + + // 示例 1:双层格式必须用 (a) (b),且带首行过渡。 + assert!(prompt.contains("发布前需要完成以下事项:")); + assert!(prompt.contains("(a) 登录页。")); + + // 示例 2:必须呈现"引子润色 + 4 主题归类 + 自然尾巴"的目标输出。 + assert!(prompt.contains("帮忙给 GitHub 提个请求,主要包含以下内容:")); + assert!(prompt.contains("1. 代码与功能优化")); + assert!(prompt.contains("(a) 上传最新代码,修复页面闪退的 bug")); + assert!(prompt.contains("4. 项目清理与合并")); + assert!(prompt.contains("最后再检查一下还有哪些 issue 需要处理。")); + + // 防回归:旧版"另外:"标签写法不能再出现在示例输出里。 + assert!(!prompt.contains("另外:检查一下当前还有哪些 issues")); } } diff --git a/openless-all/app/src-tauri/src/types.rs b/openless-all/app/src-tauri/src/types.rs index 5762f907..e5622378 100644 --- a/openless-all/app/src-tauri/src/types.rs +++ b/openless-all/app/src-tauri/src/types.rs @@ -86,6 +86,22 @@ pub struct UserPreferences { pub show_capsule: bool, pub active_asr_provider: String, // "volcengine" | "apple-speech" | ... pub active_llm_provider: String, // "ark" | "openai" | ... + /// Windows/Linux 粘贴成功后是否恢复用户原剪贴板。默认 true 跟历史行为一致; + /// 关掉就把听写文本留在剪贴板,让 simulate_paste 实际没生效时用户能 Ctrl+V 找回。 + /// macOS 走 AX 直写,不受这个开关影响。详见 issue #111。 + pub restore_clipboard_after_paste: bool, + /// 用户的工作语言(多选,原生名)。会作为前提注入 LLM polish/translate 的 system prompt 头部, + /// 让模型知道该用户在哪些语言间工作。详见 issue #4。 + #[serde(default = "default_working_languages")] + pub working_languages: Vec, + /// 翻译输出的目标语言(单选,原生名)。空串 = 不启用翻译模式(Shift 组合键无效)。 + /// 由前端从内置语言列表中选择,后端只接收最终的原生名字符串拼进 prompt。详见 issue #4。 + #[serde(default)] + pub translation_target_language: String, +} + +fn default_working_languages() -> Vec { + vec!["简体中文".into()] } impl Default for UserPreferences { @@ -103,6 +119,9 @@ impl Default for UserPreferences { show_capsule: true, active_asr_provider: "volcengine".into(), active_llm_provider: "ark".into(), + restore_clipboard_after_paste: true, + working_languages: default_working_languages(), + translation_target_language: String::new(), } } } @@ -322,6 +341,9 @@ pub struct CapsulePayload { pub elapsed_ms: u64, pub message: Option, pub inserted_chars: Option, + /// 当前 session 是否处于翻译模式(用户按过 Shift)。前端用它在胶囊顶部 + /// 渲染"正在翻译"标签,让用户立刻知道这次输出会走翻译管线。详见 issue #4。 + pub translation: bool, } /// Snapshot of credentials read from vault — only what the UI needs to know diff --git a/openless-all/app/src-tauri/tauri.conf.json b/openless-all/app/src-tauri/tauri.conf.json index 1f2be52a..83099a0f 100644 --- a/openless-all/app/src-tauri/tauri.conf.json +++ b/openless-all/app/src-tauri/tauri.conf.json @@ -21,7 +21,7 @@ "minWidth": 980, "minHeight": 640, "resizable": true, - "decorations": false, + "decorations": true, "transparent": true, "shadow": true, "hiddenTitle": true, @@ -34,7 +34,7 @@ "url": "index.html?window=capsule", "title": "OpenLess Capsule", "width": 220, - "height": 96, + "height": 110, "decorations": false, "transparent": true, "shadow": false, diff --git a/openless-all/app/src/components/Capsule.tsx b/openless-all/app/src/components/Capsule.tsx index 7b0eac95..8e518fef 100644 --- a/openless-all/app/src/components/Capsule.tsx +++ b/openless-all/app/src/components/Capsule.tsx @@ -257,11 +257,13 @@ function Pill({ os, state, level, insertedChars, message, onCancel, onConfirm }: } export function Capsule() { + const { t } = useTranslation(); const os = detectOS(); const [state, setState] = useState(isTauri ? 'idle' : 'recording'); const [level, setLevel] = useState(isTauri ? 0 : 0.6); const [insertedChars, setInsertedChars] = useState(0); const [message, setMessage] = useState(); + const [translation, setTranslation] = useState(false); useEffect(() => { if (!isTauri) return; @@ -275,6 +277,7 @@ export function Capsule() { setLevel(p.level ?? 0); setMessage(p.message ?? undefined); if (p.insertedChars != null) setInsertedChars(p.insertedChars); + setTranslation(p.translation === true); }); if (cancelled) handle(); else unlisten = handle; @@ -285,6 +288,7 @@ export function Capsule() { }; }, []); + const onCancel = () => { void invokeOrMock('cancel_dictation', undefined, () => undefined); }; @@ -302,6 +306,7 @@ export function Capsule() { style={{ width: '100%', height: '100%', + position: 'relative', display: 'flex', alignItems: 'center', justifyContent: 'center', @@ -309,6 +314,50 @@ export function Capsule() { animation: os === 'win' ? 'none' : 'capsule-in .22s cubic-bezier(.2,.9,.3,1.1)', }} > + {/* "正在翻译" 徽章 — 嵌套两层: + 外层只负责"绝对定位 + 水平居中(translateX(-50%))",不参与动画; + 内层只负责"垂直位移 + 渐变透明度"——这样不会跟 translateX(-50%) 冲突, + 也不存在 keyframe 与 inline transform 互相覆盖导致的视觉跳变。 */} +
+
+ + {t('capsule.translating')} +
+
> = [ { id: 'history', icon: 'history', cmp: History }, { id: 'vocab', icon: 'vocab', cmp: Vocab }, { id: 'style', icon: 'style', cmp: Style }, + { id: 'translation', icon: 'translate', cmp: Translation }, ]; const RELEASE_NOTES_URL = 'https://github.com/appergb/openless/releases'; diff --git a/openless-all/app/src/components/Icon.tsx b/openless-all/app/src/components/Icon.tsx index 22efcdb8..29caa1ae 100644 --- a/openless-all/app/src/components/Icon.tsx +++ b/openless-all/app/src/components/Icon.tsx @@ -8,6 +8,7 @@ export const ICONS: Record = { history: 'M3 12a9 9 0 1 0 3-6.7M3 4v4h4', vocab: 'M5 4h11a2 2 0 0 1 2 2v13l-3-2-3 2-3-2-3 2V6a2 2 0 0 1 2-2zM8 9h7M8 13h5', style: 'M12 3a9 9 0 1 0 0 18 3 3 0 0 0 3-3v-1a2 2 0 0 1 2-2h1a3 3 0 0 0 3-3 9 9 0 0 0-9-9z', + translate:'M3 5h7M6 4v2M5 8c0 4 3 6 5 6M9 8c0 4-3 6-5 6M13 20l4-10 4 10M14.5 17h5', settings:'M12 9.5a2.5 2.5 0 1 0 0 5 2.5 2.5 0 0 0 0-5zM19.4 15a1.7 1.7 0 0 0 .3 1.8l.1.1a2 2 0 1 1-2.8 2.8l-.1-.1a1.7 1.7 0 0 0-1.8-.3 1.7 1.7 0 0 0-1 1.5V21a2 2 0 1 1-4 0v-.1a1.7 1.7 0 0 0-1.1-1.5 1.7 1.7 0 0 0-1.8.3l-.1.1a2 2 0 1 1-2.8-2.8l.1-.1a1.7 1.7 0 0 0 .3-1.8 1.7 1.7 0 0 0-1.5-1H3a2 2 0 1 1 0-4h.1a1.7 1.7 0 0 0 1.5-1.1 1.7 1.7 0 0 0-.3-1.8l-.1-.1A2 2 0 1 1 7 4.9l.1.1a1.7 1.7 0 0 0 1.8.3H9a1.7 1.7 0 0 0 1-1.5V3a2 2 0 1 1 4 0v.1a1.7 1.7 0 0 0 1 1.5 1.7 1.7 0 0 0 1.8-.3l.1-.1a2 2 0 1 1 2.8 2.8l-.1.1a1.7 1.7 0 0 0-.3 1.8V9a1.7 1.7 0 0 0 1.5 1H21a2 2 0 1 1 0 4h-.1a1.7 1.7 0 0 0-1.5 1z', help: 'M9.1 9a3 3 0 0 1 5.8 1c0 2-3 3-3 3M12 17h.01M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0z', mic: 'M12 2a3 3 0 0 0-3 3v6a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3zM19 11a7 7 0 0 1-14 0M12 18v3M8 21h8', diff --git a/openless-all/app/src/components/SettingsModal.tsx b/openless-all/app/src/components/SettingsModal.tsx index 79933f20..20911c5d 100644 --- a/openless-all/app/src/components/SettingsModal.tsx +++ b/openless-all/app/src/components/SettingsModal.tsx @@ -134,12 +134,14 @@ export function SettingsModal({ os: _os, onClose, initialSettingsSection }: Sett ))} - {/* content */} -
+ {/* content — 父容器 overflow:hidden + 列向 flex;X 和 h2 固定在头部, + 只有最里层的 scroll wrapper 真正滚动。这样模态左 sidebar、关闭按钮、 + section 标题都不会跟着内容一起飘。 */} +
-

{t(`modal.sections.${section}`)}

+

{t(`modal.sections.${section}`)}

- {section === 'settings' && } - {section === 'personalize' && } - {section === 'about' && } + {section === 'settings' ? ( + // SettingsContent 自己接管 flex:1 + 内部右栏 scroll,外层不能再加 overflow:auto。 +
+ +
+ ) : ( + // personalize / about 短内容:单一 scroll wrapper,超出时本块滚动。 +
+ {section === 'personalize' && } + {section === 'about' && } +
+ )}
diff --git a/openless-all/app/src/i18n/en.ts b/openless-all/app/src/i18n/en.ts index 089b15ca..fa335452 100644 --- a/openless-all/app/src/i18n/en.ts +++ b/openless-all/app/src/i18n/en.ts @@ -32,12 +32,14 @@ export const en: typeof zhCN = { cancelled: 'Cancelled', error: 'Something went wrong', inserted: 'Inserted {{count}}', + translating: 'Translating', }, nav: { overview: 'Overview', history: 'History', vocab: 'Vocabulary', style: 'Style', + translation: 'Translation', }, shell: { shortcutLabel: 'Recording shortcut', @@ -161,6 +163,34 @@ export const en: typeof zhCN = { formal: { name: 'Formal', desc: 'Email and workplace tone — more complete, more professional.', sample: 'Detects greetings/sign-offs in email contexts; avoids empty pleasantries.' }, }, }, + translation: { + kicker: 'TRANSLATION', + title: 'Translation', + desc: 'Translate the dictation into a target language before insertion. Target, working languages and trigger are configured here.', + statusEnabled: 'Enabled', + statusDisabled: 'Disabled', + working: { + title: 'Working languages', + desc: 'Pick the languages you regularly use (multi-select). These are passed to the LLM as a premise so polish and translation know which spellings, tone, and conventions you expect.', + }, + target: { + title: 'Translation target language', + desc: 'Pick a language and pressing Shift any time during recording will translate the transcript into it before insertion. Pick "Disabled" to make Shift a no-op (regular polish runs instead).', + disabled: 'Disabled (Shift does nothing)', + }, + howto: { + title: 'How to use', + step1: 'Place the text cursor in another app (Notes, mail, chat — anything with a text field).', + step2: 'Press the recording hotkey (currently {{trigger}}) to start dictation.', + step3: 'Press Shift any time during the recording — once is enough, no need to hold it. You can press it before you start talking, mid-sentence, or right before stopping.', + step4: 'Press the recording hotkey again to stop.', + step5: 'The transcript is sent to the LLM, translated into the target language above, then inserted at the original cursor position.', + indicatorTitle: 'How to confirm translation mode is on', + indicatorDesc: 'The moment you press Shift, a blue "● Translating" pill floats above the recording capsule at the bottom of the screen. It stays visible until insertion completes, so you always know this run goes through the translation pipeline.', + fallbackTitle: 'Safety fallbacks', + fallbackDesc: 'When the target is "Disabled", Shift is a no-op. If the LLM call fails mid-translation, the raw transcript is inserted directly so nothing gets lost. See issue #4.', + }, + }, settings: { kicker: 'SETTINGS', title: 'Settings', @@ -187,6 +217,8 @@ export const en: typeof zhCN = { migrationNoticeDesc: 'If you changed the hotkey trigger mode before, please confirm it here once. This update changes both the default value and the preference-reading path; if you prefer push-to-talk, switch it back manually.', capsuleLabel: 'Recording capsule', capsuleDesc: 'Show a translucent capsule at the bottom of the screen while recording / transcribing.', + restoreClipboardLabel: 'Restore clipboard after insert', + restoreClipboardDesc: 'Windows / Linux only: restore your original clipboard after a successful paste (default on). Turn off to keep the dictation text in the clipboard so you can manually Ctrl+V if the simulated paste did not actually land. See issue #111.', }, providers: { llmTitle: 'LLM (polishing)', @@ -211,7 +243,7 @@ export const en: typeof zhCN = { apiKeyLabel: 'API Key', baseUrlLabel: 'Base URL', modelLabel: 'Model', - appKeyLabel: 'App Key', + appIdLabel: 'App ID', accessKeyLabel: 'Access Key', resourceIdLabel: 'Resource ID', }, diff --git a/openless-all/app/src/i18n/zh-CN.ts b/openless-all/app/src/i18n/zh-CN.ts index d927f708..408ba0f3 100644 --- a/openless-all/app/src/i18n/zh-CN.ts +++ b/openless-all/app/src/i18n/zh-CN.ts @@ -30,12 +30,14 @@ export const zhCN = { cancelled: '已取消', error: '出错了', inserted: '已插入 {{count}}', + translating: '正在翻译', }, nav: { overview: '概览', history: '历史', vocab: '词汇表', style: '风格', + translation: '翻译', }, shell: { shortcutLabel: '录音快捷键', @@ -159,6 +161,34 @@ export const zhCN = { formal: { name: '正式表达', desc: '工作沟通和邮件场景,更专业更完整。', sample: '邮件场景自动识别问候 / 落款;不引入空泛客套。' }, }, }, + translation: { + kicker: 'TRANSLATION', + title: '翻译', + desc: '把口述的内容自动翻译成目标语言后再插入。目标语言、工作语言、触发方式都在这里配置。', + statusEnabled: '已启用', + statusDisabled: '未启用', + working: { + title: '工作语言', + desc: '勾选你日常会用到的语言(多选)。这组语言会作为前提注入 LLM 的 system prompt 头部,影响润色与翻译的判断(专名拼写、语气、行文习惯)。', + }, + target: { + title: '翻译目标语言', + desc: '选了某个语言后,录音过程中任意时刻按一下 Shift,停止后就会把转写翻译成该语言再插入到光标位置。选「不启用」则 Shift 没有任何效果,走普通润色管线。', + disabled: '不启用(Shift 按下不触发翻译)', + }, + howto: { + title: '使用方法', + step1: '在另一个 app 的输入框里聚焦光标(备忘录、邮件、聊天窗口都行)。', + step2: '按一下"录音快捷键"(当前是 {{trigger}}),开始录音。', + step3: '在录音过程中任意时刻按一下 Shift——按一下即可,不需要按住,可以在开口前、说到一半、快说完时按。', + step4: '再按一下"录音快捷键"停止录音。', + step5: '系统会把转写交给大模型翻译成上面选的目标语言,然后插入到一开始那个输入框光标位置。', + indicatorTitle: '怎么知道翻译模式生效了', + indicatorDesc: '一旦按下 Shift,屏幕底部录音胶囊的上方会立刻悬浮一个蓝色"● 正在翻译"小药丸——它会一直显示到本次插入完成,让你确认这次输出会走翻译管线。', + fallbackTitle: '安全兜底', + fallbackDesc: '翻译模式选「不启用」时 Shift 是没作用的;翻译过程中如果大模型调用失败,会回退到把原始中文转写直接插入,不会丢字。详见 issue #4。', + }, + }, settings: { kicker: 'SETTINGS', title: '设置', @@ -185,6 +215,8 @@ export const zhCN = { migrationNoticeDesc: '如果你之前改过快捷键触发方式,请在这里手动确认一次。本次更新调整了快捷键方式的默认值与读取逻辑;如果你更习惯按住说话,可以重新切回“按住说话”。', capsuleLabel: '录音胶囊', capsuleDesc: '录音 / 转写时在屏幕底部显示半透明胶囊。', + restoreClipboardLabel: '插入后恢复剪贴板', + restoreClipboardDesc: '仅 Windows / Linux:粘贴成功后恢复你原来的剪贴板内容(默认开)。关掉就把听写文本留在剪贴板,模拟粘贴没真正落地时可以手动 Ctrl+V 找回。详见 issue #111。', }, providers: { llmTitle: 'LLM 模型(润色)', @@ -209,7 +241,7 @@ export const zhCN = { apiKeyLabel: 'API 密钥', baseUrlLabel: '接口地址', modelLabel: '模型', - appKeyLabel: 'App Key(应用密钥)', + appIdLabel: 'App ID(应用 ID)', accessKeyLabel: 'Access Key(访问密钥)', resourceIdLabel: '资源 ID', }, diff --git a/openless-all/app/src/lib/ipc.ts b/openless-all/app/src/lib/ipc.ts index 2e506ac4..a1cabd6b 100644 --- a/openless-all/app/src/lib/ipc.ts +++ b/openless-all/app/src/lib/ipc.ts @@ -43,6 +43,9 @@ const mockSettings: UserPreferences = { showCapsule: true, activeAsrProvider: 'volcengine', activeLlmProvider: 'ark', + restoreClipboardAfterPaste: true, + workingLanguages: ['简体中文'], + translationTargetLanguage: '', }; const mockHotkeyCapability: HotkeyCapability = { diff --git a/openless-all/app/src/lib/types.ts b/openless-all/app/src/lib/types.ts index 2c618669..8f350564 100644 --- a/openless-all/app/src/lib/types.ts +++ b/openless-all/app/src/lib/types.ts @@ -79,8 +79,34 @@ export interface UserPreferences { showCapsule: boolean; activeAsrProvider: string; activeLlmProvider: string; + /** 仅 Windows/Linux:粘贴成功后是否恢复用户原剪贴板。默认 true。详见 issue #111。 */ + restoreClipboardAfterPaste: boolean; + /** 用户的工作语言(多选,原生名);作为前提注入 LLM polish/translate prompt 头部。 */ + workingLanguages: string[]; + /** 翻译模式目标语言(单选,原生名);空串 = 不启用 Shift 翻译。详见 issue #4。 */ + translationTargetLanguage: string; } +/** 内置语言列表 — 前端 Settings UI 用,后端只接收原生名字符串拼 prompt。 + * 添加新语言时直接在这里加一项(原生名),无需修改后端。 */ +export const SUPPORTED_LANGUAGES: readonly string[] = [ + '简体中文', + '繁体中文', + 'English', + '日本語', + '한국어', + 'Français', + 'Deutsch', + 'Español', + 'Italiano', + 'Português', + 'Русский', + 'العربية', + 'Tiếng Việt', + 'ไทย', + 'हिन्दी', +] as const; + export type CapsuleState = | 'idle' | 'recording' @@ -96,6 +122,8 @@ export interface CapsulePayload { elapsedMs: number; message: string | null; insertedChars: number | null; + /** 当前 session 是否处于翻译模式(用户已按过 Shift)。详见 issue #4。 */ + translation: boolean; } export interface CredentialsStatus { diff --git a/openless-all/app/src/pages/Settings.tsx b/openless-all/app/src/pages/Settings.tsx index 20111704..08f70b28 100644 --- a/openless-all/app/src/pages/Settings.tsx +++ b/openless-all/app/src/pages/Settings.tsx @@ -64,7 +64,18 @@ export function Settings({ embedded = false, initialSection = 'recording' }: Set desc={t('settings.desc')} /> )} -
+ {/* embedded(在 SettingsModal 里)模式下:mini-sidebar 固定,仅右栏 scroll。 + 外层 flex:1 minHeight:0 让 grid 拿到确定高度;gridTemplateRows: minmax(0, 1fr) + 强制行高等于容器高度,否则 grid 默认 auto rows 会跟内容长,右栏 overflow:auto + 就退化成"没东西需要 scroll",于是大家照旧一起飘。 */} +
{SECTION_ORDER.map(s => (
-
+
{section === 'recording' && } {section === 'providers' && } {section === 'shortcuts' && } @@ -132,6 +150,8 @@ function RecordingSection() { savePrefs({ ...prefs, hotkey: { ...prefs.hotkey, mode } }); const onShowCapsuleChange = (showCapsule: boolean) => savePrefs({ ...prefs, showCapsule }); + const onRestoreClipboardChange = (restoreClipboardAfterPaste: boolean) => + savePrefs({ ...prefs, restoreClipboardAfterPaste }); const choices: Array<[HotkeyMode, string]> = [ ['toggle', t('settings.recording.modeToggle')], @@ -203,6 +223,12 @@ function RecordingSection() { + + + {capability.statusHint && (
{capability.statusHint} @@ -338,7 +364,7 @@ function ProvidersSection() { {asrProvider === 'volcengine' ? ( <> - + diff --git a/openless-all/app/src/pages/Translation.tsx b/openless-all/app/src/pages/Translation.tsx new file mode 100644 index 00000000..74d9759c --- /dev/null +++ b/openless-all/app/src/pages/Translation.tsx @@ -0,0 +1,180 @@ +// Translation.tsx — 独立的"翻译"页,从 Settings → 录音 中拆出来。 +// 用户在这里: +// - 勾选自己的工作语言(多选,用作 LLM polish/translate prompt 的前提) +// - 选一个翻译目标语言(单选;选"不启用"则 Shift 不触发翻译) +// - 看完整使用说明(怎么触发、按钮位置、胶囊显示) + +import { useTranslation } from 'react-i18next'; +import { Card, PageHeader } from './_atoms'; +import { SUPPORTED_LANGUAGES } from '../lib/types'; +import { useHotkeySettings } from '../state/HotkeySettingsContext'; +import { getHotkeyTriggerLabel } from '../lib/hotkey'; + +export function Translation() { + const { t } = useTranslation(); + const { prefs, updatePrefs: savePrefs, hotkey } = useHotkeySettings(); + + if (!prefs) { + return ( + <> + + +
{t('common.loading')}
+
+ + ); + } + + const onWorkingLanguagesChange = (workingLanguages: string[]) => + savePrefs({ ...prefs, workingLanguages }); + const toggleWorkingLanguage = (lang: string) => { + const next = prefs.workingLanguages.includes(lang) + ? prefs.workingLanguages.filter(l => l !== lang) + : [...prefs.workingLanguages, lang]; + onWorkingLanguagesChange(next); + }; + const onTargetChange = (translationTargetLanguage: string) => + savePrefs({ ...prefs, translationTargetLanguage }); + + const triggerLabel = getHotkeyTriggerLabel(hotkey?.trigger); + const enabled = prefs.translationTargetLanguage.trim() !== ''; + + return ( + <> + + +
+ + {/* 1. 工作语言 */} + +
{t('translation.working.title')}
+
+ {t('translation.working.desc')} +
+
+ {SUPPORTED_LANGUAGES.map(lang => { + const checked = prefs.workingLanguages.includes(lang); + return ( + + ); + })} +
+
+ + {/* 2. 翻译目标语言 */} + +
+
{t('translation.target.title')}
+ + {enabled ? t('translation.statusEnabled') : t('translation.statusDisabled')} + +
+
+ {t('translation.target.desc')} +
+ +
+ + {/* 3. 使用方法 */} + +
{t('translation.howto.title')}
+
    +
  1. {t('translation.howto.step1', { trigger: triggerLabel })}
  2. +
  3. {t('translation.howto.step2')}
  4. +
  5. {t('translation.howto.step3')}
  6. +
  7. {t('translation.howto.step4')}
  8. +
  9. {t('translation.howto.step5')}
  10. +
+ +
+
{t('translation.howto.indicatorTitle')}
+ {t('translation.howto.indicatorDesc')} +
+ +
+
{t('translation.howto.fallbackTitle')}
+ {t('translation.howto.fallbackDesc')} +
+
+
+ + ); +} diff --git a/openless-all/app/src/state/useAppState.ts b/openless-all/app/src/state/useAppState.ts index 34611c97..f0249b24 100644 --- a/openless-all/app/src/state/useAppState.ts +++ b/openless-all/app/src/state/useAppState.ts @@ -2,7 +2,7 @@ import { useState } from 'react'; -export type AppTab = 'overview' | 'history' | 'vocab' | 'style'; +export type AppTab = 'overview' | 'history' | 'vocab' | 'style' | 'translation'; export interface AppState { currentTab: AppTab;