Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 135 additions & 11 deletions openless-all/app/src-tauri/src/coordinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ struct SessionState {
/// recorder error monitor 持有 captured id,处理时若与当前不等说明
/// 是上一 session 的迟到错误,必须 drop,不要 abort 当前 active session。
session_id: u64,
/// 用户开始 dictation 时所处的前台 app 标签("Mail (com.apple.mail)" / Windows 窗口标题)。
/// 用作 LLM polish/translate 的上下文前提,让模型按 app 调风格。详见 issue #116。
front_app: Option<String>,
}

impl Default for SessionState {
Expand All @@ -75,6 +78,7 @@ impl Default for SessionState {
cancelled: false,
focus_target: None,
session_id: 0,
front_app: None,
}
}
}
Expand Down Expand Up @@ -209,9 +213,18 @@ impl Coordinator {
pub async fn repolish(&self, raw_text: String, mode: PolishMode) -> Result<String, String> {
let hotwords = enabled_phrases(&self.inner);
let working_languages = self.inner.prefs.get().working_languages;
polish_text(&raw_text, mode, &hotwords, &working_languages)
.await
.map_err(|e| e.to_string())
// repolish 是历史记录里手动重新润色,不再绑定原 session 的前台 app;
// 当下用户调起的 app 才是相关上下文(如果可拿)。
let front_app = capture_frontmost_app();
polish_text(
&raw_text,
mode,
&hotwords,
&working_languages,
front_app.as_deref(),
)
.await
.map_err(|e| e.to_string())
}
}

Expand Down Expand Up @@ -464,6 +477,10 @@ async fn begin_session(inner: &Arc<Inner>) -> Result<(), String> {
// 自增 session_id;spawn 出去的 recorder error monitor 会捕获这个值,
// 如果迟到错误到达时 id 已不匹配就 drop,不会误中止后续 session。
state.session_id = state.session_id.wrapping_add(1);
state.front_app = capture_frontmost_app();
if let Some(label) = state.front_app.as_deref() {
log::info!("[coord] front_app captured: {label}");
}
}
// 翻译模式标志重置;hotkey 监听器在 Shift down 时再 set true。
inner.translation_modifier_seen.store(false, Ordering::SeqCst);
Expand Down Expand Up @@ -874,18 +891,33 @@ async fn end_session(inner: &Arc<Inner>) -> Result<(), String> {
let mode = prefs.default_mode;
let hotword_strs = enabled_phrases(inner);
let working_languages = prefs.working_languages.clone();
let front_app = inner.state.lock().front_app.clone();
let translation_target = prefs.translation_target_language.trim().to_string();
let translation_active = inner.translation_modifier_seen.load(Ordering::SeqCst)
&& !translation_target.is_empty();
let (polished, polish_error) = if translation_active {
log::info!(
"[coord] translation mode → target=\u{300C}{}\u{300D} working={:?}",
"[coord] translation mode → target=\u{300C}{}\u{300D} working={:?} front_app={:?}",
translation_target,
working_languages
working_languages,
front_app
);
translate_or_passthrough(&raw, &translation_target, &working_languages).await
translate_or_passthrough(
&raw,
&translation_target,
&working_languages,
front_app.as_deref(),
)
.await
} else {
polish_or_passthrough(&raw, mode, &hotword_strs, &working_languages).await
polish_or_passthrough(
&raw,
mode,
&hotword_strs,
&working_languages,
front_app.as_deref(),
)
.await
};

// 原子化最后一次 cancel 检查 + 转 Inserting:
Expand Down Expand Up @@ -1098,11 +1130,12 @@ async fn polish_or_passthrough(
mode: PolishMode,
hotwords: &[String],
working_languages: &[String],
front_app: Option<&str>,
) -> (String, Option<String>) {
if mode == PolishMode::Raw {
return (raw.text.clone(), None);
}
match polish_text(&raw.text, mode, hotwords, working_languages).await {
match polish_text(&raw.text, mode, hotwords, working_languages, front_app).await {
Ok(s) => (s, None),
Err(e) => {
let reason = e.to_string();
Expand All @@ -1117,6 +1150,7 @@ async fn polish_text(
mode: PolishMode,
hotwords: &[String],
working_languages: &[String],
front_app: Option<&str>,
) -> anyhow::Result<String> {
let api_key = CredentialsVault::get(CredentialAccount::ArkApiKey)?.unwrap_or_default();
if api_key.is_empty() {
Expand All @@ -1135,16 +1169,19 @@ async fn polish_text(

let config = OpenAICompatibleConfig::new("ark", "Doubao Ark", base_url, api_key, model);
let provider = OpenAICompatibleLLMProvider::new(config);
Ok(provider.polish(raw, mode, hotwords, working_languages).await?)
Ok(provider
.polish(raw, mode, hotwords, working_languages, front_app)
.await?)
}

/// 翻译路径——和 polish 一样失败时返回原文 + 失败原因,避免"不丢字"约定被违反(CLAUDE.md)。
async fn translate_or_passthrough(
raw: &RawTranscript,
target_language: &str,
working_languages: &[String],
front_app: Option<&str>,
) -> (String, Option<String>) {
match translate_text(&raw.text, target_language, working_languages).await {
match translate_text(&raw.text, target_language, working_languages, front_app).await {
Ok(s) => (s, None),
Err(e) => {
let reason = e.to_string();
Expand All @@ -1158,6 +1195,7 @@ async fn translate_text(
raw: &str,
target_language: &str,
working_languages: &[String],
front_app: Option<&str>,
) -> anyhow::Result<String> {
let api_key = CredentialsVault::get(CredentialAccount::ArkApiKey)?.unwrap_or_default();
if api_key.is_empty() {
Expand All @@ -1177,7 +1215,7 @@ async fn translate_text(
let config = OpenAICompatibleConfig::new("ark", "Doubao Ark", base_url, api_key, model);
let provider = OpenAICompatibleLLMProvider::new(config);
Ok(provider
.translate_to(raw, target_language, working_languages)
.translate_to(raw, target_language, working_languages, front_app)
.await?)
}

Expand Down Expand Up @@ -1436,6 +1474,92 @@ fn capture_focus_target() -> Option<usize> {
None
}

/// 捕获用户开始 dictation 时的前台 app 标签("localizedName (bundle.id)"),用作 LLM
/// polish/translate 的上下文前提,让模型按 app 调风格。详见 issue #116。
///
/// macOS 走 NSWorkspace.frontmostApplication(公开 API,无需额外权限);
/// Windows 复用前台 HWND 拿窗口标题;Linux/其他平台返回 None。
#[cfg(target_os = "macos")]
fn capture_frontmost_app() -> Option<String> {
use objc2::msg_send;
use objc2::runtime::{AnyClass, AnyObject};

unsafe {
let cls = AnyClass::get("NSWorkspace")?;
let workspace: *mut AnyObject = msg_send![cls, sharedWorkspace];
if workspace.is_null() {
return None;
}
let app: *mut AnyObject = msg_send![workspace, frontmostApplication];
if app.is_null() {
return None;
}
let name_obj: *mut AnyObject = msg_send![app, localizedName];
let bundle_obj: *mut AnyObject = msg_send![app, bundleIdentifier];
let name = nsstring_to_string(name_obj);
let bundle = nsstring_to_string(bundle_obj);
match (name, bundle) {
(Some(n), Some(b)) => Some(format!("{n} ({b})")),
(Some(n), None) => Some(n),
(None, Some(b)) => Some(b),
(None, None) => None,
}
}
}

#[cfg(target_os = "macos")]
unsafe fn nsstring_to_string(ns_string: *mut objc2::runtime::AnyObject) -> Option<String> {
use objc2::msg_send;
if ns_string.is_null() {
return None;
}
let utf8: *const std::os::raw::c_char = unsafe { msg_send![ns_string, UTF8String] };
if utf8.is_null() {
return None;
}
let cstr = unsafe { std::ffi::CStr::from_ptr(utf8) };
let s = cstr.to_string_lossy().into_owned();
if s.is_empty() {
None
} else {
Some(s)
}
}

#[cfg(target_os = "windows")]
fn capture_frontmost_app() -> Option<String> {
use windows::Win32::UI::WindowsAndMessaging::{
GetForegroundWindow, GetWindowTextLengthW, GetWindowTextW,
};

unsafe {
let hwnd = GetForegroundWindow();
if hwnd.0.is_null() {
return None;
}
let len = GetWindowTextLengthW(hwnd);
if len <= 0 {
return None;
}
let mut buf = vec![0u16; (len + 1) as usize];
let copied = GetWindowTextW(hwnd, &mut buf);
if copied <= 0 {
return None;
}
let title = String::from_utf16_lossy(&buf[..copied as usize]);
if title.is_empty() {
None
} else {
Some(title)
Comment on lines +1549 to +1553
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Replace window title with stable app identifier

On Windows, capture_frontmost_app stores the foreground window caption from GetWindowTextW, which commonly contains document/tab names and other user content rather than just the app name. That value is then propagated into LLM context and logs, so sensitive text can be exfiltrated to the provider and persisted in telemetry even though this feature is meant to pass app context. Capture a stable app identity (process/app name) or aggressively reduce the caption to a safe app label before storing it.

Useful? React with 👍 / 👎.

}
}
}

#[cfg(not(any(target_os = "macos", target_os = "windows")))]
fn capture_frontmost_app() -> Option<String> {
None
}

#[cfg(target_os = "windows")]
fn restore_focus_target_if_possible(target: Option<usize>) {
use std::ffi::c_void;
Expand Down
85 changes: 62 additions & 23 deletions openless-all/app/src-tauri/src/polish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,27 @@ impl OpenAICompatibleLLMProvider {
mode: PolishMode,
hotwords: &[String],
working_languages: &[String],
front_app: Option<&str>,
) -> Result<String, LLMError> {
let mut system_prompt = compose_system_prompt(mode, hotwords);
if let Some(premise) = working_languages_premise(working_languages) {
if let Some(premise) = context_premise(working_languages, front_app) {
system_prompt = format!("{}\n\n{}", premise, system_prompt);
}
let user_prompt = prompts::user_prompt(raw_text);
self.chat_completion(&system_prompt, &user_prompt).await
}

/// 把转写翻译成 `target_language`(前端从内置语言列表里选出来的原生名)。
/// `working_languages` 作为前提注入头部。详见 issue #4。
/// `working_languages` 与 `front_app` 作为前提注入头部。详见 issue #4 与 #116
pub async fn translate_to(
&self,
raw_text: &str,
target_language: &str,
working_languages: &[String],
front_app: Option<&str>,
) -> Result<String, LLMError> {
let mut system_prompt = prompts::translate_system_prompt(target_language);
if let Some(premise) = working_languages_premise(working_languages) {
if let Some(premise) = context_premise(working_languages, front_app) {
system_prompt = format!("{}\n\n{}", premise, system_prompt);
}
let user_prompt = prompts::user_prompt(raw_text);
Expand Down Expand Up @@ -204,21 +206,39 @@ fn chat_completions_url(base_url: &str) -> String {
format!("{}/chat/completions", without_trailing)
}

/// 把 working_languages 拼成 system prompt 头部前提:"# 上下文\n用户的工作语言:A、B、C"。
/// 列表为空或全空白返回 None,调用方就不拼前缀。
fn working_languages_premise(working_languages: &[String]) -> Option<String> {
let cleaned: Vec<&str> = working_languages
/// 把 working_languages + front_app 拼成 system prompt 头部前提:
/// # 上下文
/// 用户的工作语言:…
/// 当前前台应用:…(请按这个 app 的常见沟通风格调整语气)
///
/// 两个字段都空时返回 None,调用方就不拼前缀。详见 issue #4 / #116。
fn context_premise(working_languages: &[String], front_app: Option<&str>) -> Option<String> {
let langs: Vec<&str> = working_languages
.iter()
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect();
if cleaned.is_empty() {
let app = front_app
.map(str::trim)
.filter(|s| !s.is_empty());

if langs.is_empty() && app.is_none() {
return None;
}
Some(format!(
"# 上下文\n用户的工作语言:{}。处理任何文本时请把这一前提带进考虑(识别专名、判定语气、决定写法)。",
cleaned.join("、")
))

let mut lines = vec!["# 上下文".to_string()];
if !langs.is_empty() {
lines.push(format!(
"用户的工作语言:{}。处理任何文本时请把这一前提带进考虑(识别专名、判定语气、决定写法)。",
langs.join("、")
));
}
if let Some(name) = app {
lines.push(format!(
"当前前台应用:{name}。请按这个应用的常见沟通风格调整语气——例如邮件类 app 偏正式、聊天类 app 偏口语、IDE / 文档类 app 偏技术或结构化。\u{4E0D}主动加入与用户原意无关的客套话。"
));
Comment on lines +236 to +239
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Sanitize front-app text before prompt composition

context_premise interpolates front_app directly into the system prompt. In the Windows path this string is window-title text that can be influenced by untrusted content (for example, a browser page title) and can include newlines/instruction-like payloads, which allows prompt-injection-style overrides of polish/translation behavior. Normalize/escape control characters and quote or constrain this field before embedding it in the prompt.

Useful? React with 👍 / 👎.

}
Some(lines.join("\n"))
}

fn compose_system_prompt(mode: PolishMode, hotwords: &[String]) -> String {
Expand Down Expand Up @@ -586,23 +606,42 @@ pub mod prompts {
)
}

/// 翻译模式 system prompt — 用户在 Settings 里填的任意自然语言文本作为目标语言,
/// 直接拼进来。LLM 自己理解("繁体中文"/"English"/"美式英文,正式邮件风格" 都行)。
/// 翻译模式 system prompt — 用户在「翻译」页选定的目标语言(内置 15 种自然语言原生名)。
/// LLM 自己理解("繁体中文"/"English"/"美式英文"/"日本語" 都行)。
/// 此 prompt 之上还有 working_languages_premise 拼出的"# 上下文"前提。
pub fn translate_system_prompt(target_language: &str) -> String {
format!(
"# 任务(翻译输出)\n\
你刚收到一段语音转写。请把它翻译成 \u{300C}{}\u{300D},\
保持原意、语气和必要的标点;不增不减、不解释、不加任何前缀或后缀。\n\
把下面收到的一段语音转写翻译成 \u{300C}{lang}\u{300D}。\n\
这是用户对着语音输入工具说的话——他正在某个 app 的输入框前,\
转译结果会直接被插入到光标位置。\n\
\n\
# 翻译规则\n\
## 必须保留原文(不要翻译)\n\
- 人名、地名、品牌名(OpenAI、Tauri、字节跳动、张三 等)。\n\
- 代码标识符、技术术语(useState、async/await、HTTP、Rust crate 名 等)。\n\
- URL、邮箱、文件路径、命令行片段。\n\
- 说话人**故意**用源语言夹进来的英文/技术词,按原样保留,\u{4E0D}替换为目标语言对应词。\n\
\n\
## 主体翻译\n\
- 句子骨架、动作、形容、连接词翻译成 \u{300C}{lang}\u{300D}。\n\
- **保持原说话语气**:口语就维持口语化(\u{4E0D}强行正式化),书面就维持书面。\n\
- **保持原意**:不增不减、不解释、不扩写、不替用户做决策。\
如\"我想给老板发个邮件说今天我们要推迟发布\"应翻译成\"I want to email my boss saying we need to delay the release today\",\
\u{800C}\u{4E0D}\u{662F}主动生成邮件正文。\n\
- 数字、日期、时间用目标语言地区常见写法(\"5月1日下午两点\" → \"May 1, 2 PM\";\
\"明天上午十点\" → \"tomorrow at 10 AM\";\"100块\" → \"100 yuan\")。\n\
- 转写已经是目标语言时:去明显口癖(嗯、那个、就是、um、you know)+ 补必要标点,\u{4E0D}做风格改写。\n\
\n\
# 行为\n\
- 直接输出翻译结果。\n\
- 如果转写里夹杂多种语言,统一翻译到目标语言。\n\
- 转写本来就是目标语言时,做最小润色(补标点、去口癖)后输出。\n\
- 不要带 \u{300C}翻译:\u{300D}\u{300C}译文:\u{300D}之类前缀。\n\
## 边界 case\n\
- 转写非常短(一两个字)也照译,\u{4E0D}因为短就硬补内容。\n\
- 转写是命令式(\"加个空格 / 删除最后一行\")时,照原意翻译,\u{4E0D}改成陈述句。\n\
- 转写全是 fillers(\"嗯嗯啊那个\")时,输出空字符串。\n\
\n\
# 输出\n\
只输出翻译后的文本正文。",
target_language
只输出翻译后的正文,\u{4E0D}带 \u{300C}翻译:\u{300D}\u{300C}译文:\u{300D}\u{300C}Translation:\u{300D}之类前缀,\
\u{4E0D}加引号、\u{4E0D}加 markdown 围栏。",
lang = target_language
)
}
}
Expand Down
Loading