Skip to content

Commit c66517b

Browse files
committed
Merge branch 'main' into pr/1679
2 parents a134903 + 91645f1 commit c66517b

183 files changed

Lines changed: 14626 additions & 5773 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 34 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apps/cli/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ impl Export {
169169
compression: cap_export::mp4::ExportCompression::Maximum,
170170
custom_bpp: None,
171171
force_ffmpeg_decoder: false,
172+
optimize_filesize: false,
172173
}
173174
.export(exporter_base, move |_f| {
174175
// print!("\rrendered frame {f}");

apps/desktop/src-tauri/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "cap-desktop"
3-
version = "0.4.8"
3+
version = "0.4.82"
44
description = "Beautiful screen recordings, owned by you."
55
authors = ["you"]
66
edition = "2024"

apps/desktop/src-tauri/src/captions.rs

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextPar
2222

2323
pub use cap_project::{CaptionSegment, CaptionSettings, CaptionWord};
2424

25-
use crate::http_client;
25+
use crate::{general_settings::GeneralSettingsStore, http_client};
2626

2727
#[derive(Debug, Serialize, Deserialize, Type, Clone)]
2828
pub struct CaptionData {
@@ -529,6 +529,7 @@ fn process_with_whisper(
529529
audio_path: &PathBuf,
530530
context: Arc<WhisperContext>,
531531
language: &str,
532+
transcription_hints: &[String],
532533
) -> Result<CaptionData, String> {
533534
log::info!("=== WHISPER TRANSCRIPTION START ===");
534535
log::info!("Processing audio file: {audio_path:?}");
@@ -544,6 +545,10 @@ fn process_with_whisper(
544545
params.set_language(Some(if language == "auto" { "auto" } else { language }));
545546
params.set_max_len(i32::MAX);
546547

548+
if let Some(initial_prompt) = build_initial_prompt(transcription_hints) {
549+
params.set_initial_prompt(&initial_prompt);
550+
}
551+
547552
log::info!("Whisper params - translate: false, token_timestamps: true, max_len: MAX");
548553

549554
let mut audio_file = File::open(audio_path)
@@ -783,10 +788,32 @@ fn process_with_whisper(
783788
})
784789
}
785790

791+
fn build_initial_prompt(transcription_hints: &[String]) -> Option<String> {
792+
let mut normalized = Vec::new();
793+
794+
for hint in transcription_hints {
795+
let value = hint.replace('\0', "").trim().to_string();
796+
if value.is_empty() || normalized.contains(&value) {
797+
continue;
798+
}
799+
normalized.push(value);
800+
}
801+
802+
if normalized.is_empty() {
803+
None
804+
} else {
805+
Some(format!(
806+
"Preferred spellings, names, and capitalization for this transcript: {}",
807+
normalized.join("; ")
808+
))
809+
}
810+
}
811+
786812
#[tauri::command]
787813
#[specta::specta]
788814
#[instrument]
789815
pub async fn transcribe_audio(
816+
app: AppHandle,
790817
video_path: String,
791818
model_path: String,
792819
language: String,
@@ -843,11 +870,18 @@ pub async fn transcribe_audio(
843870
}
844871
};
845872

873+
let transcription_hints = GeneralSettingsStore::get(&app)
874+
.ok()
875+
.flatten()
876+
.map(|settings| settings.transcription_hints)
877+
.unwrap_or_default();
878+
846879
log::info!("Starting Whisper transcription in blocking task...");
847-
let whisper_result =
848-
tokio::task::spawn_blocking(move || process_with_whisper(&audio_path, context, &language))
849-
.await
850-
.map_err(|e| format!("Whisper task panicked: {e}"))?;
880+
let whisper_result = tokio::task::spawn_blocking(move || {
881+
process_with_whisper(&audio_path, context, &language, &transcription_hints)
882+
})
883+
.await
884+
.map_err(|e| format!("Whisper task panicked: {e}"))?;
851885

852886
match whisper_result {
853887
Ok(captions) => {

apps/desktop/src-tauri/src/export.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ pub async fn generate_export_preview(
293293
.iter()
294294
.map(|s| RenderSegment {
295295
cursor: s.cursor.clone(),
296+
keyboard: s.keyboard.clone(),
296297
decoders: s.decoders.clone(),
297298
})
298299
.collect();

apps/desktop/src-tauri/src/frame_ws.rs

Lines changed: 27 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -27,45 +27,6 @@ fn pack_frame_data(
2727
data
2828
}
2929

30-
fn pack_nv12_frame_ref(
31-
data: &[u8],
32-
width: u32,
33-
height: u32,
34-
y_stride: u32,
35-
frame_number: u32,
36-
target_time_ns: u64,
37-
) -> Vec<u8> {
38-
let metadata_size = 28;
39-
let mut output = Vec::with_capacity(data.len() + metadata_size);
40-
output.extend_from_slice(data);
41-
output.extend_from_slice(&y_stride.to_le_bytes());
42-
output.extend_from_slice(&height.to_le_bytes());
43-
output.extend_from_slice(&width.to_le_bytes());
44-
output.extend_from_slice(&frame_number.to_le_bytes());
45-
output.extend_from_slice(&target_time_ns.to_le_bytes());
46-
output.extend_from_slice(&NV12_FORMAT_MAGIC.to_le_bytes());
47-
output
48-
}
49-
50-
fn pack_frame_data_ref(
51-
data: &[u8],
52-
stride: u32,
53-
height: u32,
54-
width: u32,
55-
frame_number: u32,
56-
target_time_ns: u64,
57-
) -> Vec<u8> {
58-
let metadata_size = 24;
59-
let mut output = Vec::with_capacity(data.len() + metadata_size);
60-
output.extend_from_slice(data);
61-
output.extend_from_slice(&stride.to_le_bytes());
62-
output.extend_from_slice(&height.to_le_bytes());
63-
output.extend_from_slice(&width.to_le_bytes());
64-
output.extend_from_slice(&frame_number.to_le_bytes());
65-
output.extend_from_slice(&target_time_ns.to_le_bytes());
66-
output
67-
}
68-
6930
#[derive(Clone, Copy, PartialEq, Eq)]
7031
pub enum WSFrameFormat {
7132
Rgba,
@@ -85,25 +46,33 @@ pub struct WSFrame {
8546
pub created_at: Instant,
8647
}
8748

88-
fn pack_ws_frame_ref(frame: &WSFrame) -> Vec<u8> {
49+
fn pack_ws_frame(frame: &WSFrame) -> Vec<u8> {
50+
let metadata_size = match frame.format {
51+
WSFrameFormat::Nv12 => 28usize,
52+
WSFrameFormat::Rgba => 24,
53+
};
54+
let mut buf = Vec::with_capacity(frame.data.len() + metadata_size);
55+
buf.extend_from_slice(&frame.data);
56+
8957
match frame.format {
90-
WSFrameFormat::Nv12 => pack_nv12_frame_ref(
91-
&frame.data,
92-
frame.width,
93-
frame.height,
94-
frame.stride,
95-
frame.frame_number,
96-
frame.target_time_ns,
97-
),
98-
WSFrameFormat::Rgba => pack_frame_data_ref(
99-
&frame.data,
100-
frame.stride,
101-
frame.height,
102-
frame.width,
103-
frame.frame_number,
104-
frame.target_time_ns,
105-
),
58+
WSFrameFormat::Nv12 => {
59+
buf.extend_from_slice(&frame.stride.to_le_bytes());
60+
buf.extend_from_slice(&frame.height.to_le_bytes());
61+
buf.extend_from_slice(&frame.width.to_le_bytes());
62+
buf.extend_from_slice(&frame.frame_number.to_le_bytes());
63+
buf.extend_from_slice(&frame.target_time_ns.to_le_bytes());
64+
buf.extend_from_slice(&NV12_FORMAT_MAGIC.to_le_bytes());
65+
}
66+
WSFrameFormat::Rgba => {
67+
buf.extend_from_slice(&frame.stride.to_le_bytes());
68+
buf.extend_from_slice(&frame.height.to_le_bytes());
69+
buf.extend_from_slice(&frame.width.to_le_bytes());
70+
buf.extend_from_slice(&frame.frame_number.to_le_bytes());
71+
buf.extend_from_slice(&frame.target_time_ns.to_le_bytes());
72+
}
10673
}
74+
75+
buf
10776
}
10877

10978
pub async fn create_watch_frame_ws(
@@ -138,7 +107,7 @@ pub async fn create_watch_frame_ws(
138107
{
139108
let packed = {
140109
let borrowed = camera_rx.borrow();
141-
borrowed.as_deref().map(pack_ws_frame_ref)
110+
borrowed.as_deref().map(pack_ws_frame)
142111
};
143112
if let Some(packed) = packed
144113
&& let Err(e) = socket.send(Message::Binary(packed)).await
@@ -173,7 +142,7 @@ pub async fn create_watch_frame_ws(
173142
WSFrameFormat::Rgba => "RGBA",
174143
};
175144

176-
let packed = pack_ws_frame_ref(frame);
145+
let packed = pack_ws_frame(frame);
177146
let packed_len = packed.len();
178147

179148
match socket.send(Message::Binary(packed)).await {

apps/desktop/src-tauri/src/general_settings.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,10 @@ pub struct GeneralSettingsStore {
121121
skip_serializing_if = "no"
122122
)]
123123
pub enable_native_camera_preview: bool,
124-
#[serde(default)]
124+
#[serde(default = "default_true")]
125125
pub auto_zoom_on_clicks: bool,
126+
#[serde(default = "default_true")]
127+
pub capture_keyboard_events: bool,
126128
#[serde(default)]
127129
pub post_deletion_behaviour: PostDeletionBehaviour,
128130
#[serde(default = "default_excluded_windows")]
@@ -137,6 +139,8 @@ pub struct GeneralSettingsStore {
137139
pub crash_recovery_recording: bool,
138140
#[serde(default = "default_max_fps")]
139141
pub max_fps: u32,
142+
#[serde(default = "default_transcription_hints")]
143+
pub transcription_hints: Vec<String>,
140144
#[serde(default)]
141145
pub editor_preview_quality: EditorPreviewQuality,
142146
#[serde(default)]
@@ -145,6 +149,8 @@ pub struct GeneralSettingsStore {
145149
pub camera_window_position: Option<WindowPosition>,
146150
#[serde(default)]
147151
pub camera_window_positions_by_monitor_name: BTreeMap<String, WindowPosition>,
152+
#[serde(default = "default_true")]
153+
pub has_completed_onboarding: bool,
148154
}
149155

150156
fn default_enable_native_camera_preview() -> bool {
@@ -167,6 +173,15 @@ fn default_max_fps() -> u32 {
167173
60
168174
}
169175

176+
fn default_transcription_hints() -> Vec<String> {
177+
vec![
178+
"Cap".to_string(),
179+
"TypeScript".to_string(),
180+
"My Brand Name".to_string(),
181+
"mywebsite.com".to_string(),
182+
]
183+
}
184+
170185
fn default_server_url() -> String {
171186
std::option_env!("VITE_SERVER_URL")
172187
.unwrap_or("https://cap.so")
@@ -203,17 +218,20 @@ impl Default for GeneralSettingsStore {
203218
recording_countdown: Some(3),
204219
enable_native_camera_preview: default_enable_native_camera_preview(),
205220
auto_zoom_on_clicks: false,
221+
capture_keyboard_events: true,
206222
post_deletion_behaviour: PostDeletionBehaviour::DoNothing,
207223
excluded_windows: default_excluded_windows(),
208224
delete_instant_recordings_after_upload: false,
209225
instant_mode_max_resolution: 1920,
210226
default_project_name_template: None,
211227
crash_recovery_recording: true,
212228
max_fps: 60,
229+
transcription_hints: default_transcription_hints(),
213230
editor_preview_quality: EditorPreviewQuality::Half,
214231
main_window_position: None,
215232
camera_window_position: None,
216233
camera_window_positions_by_monitor_name: BTreeMap::new(),
234+
has_completed_onboarding: false,
217235
}
218236
}
219237
}

apps/desktop/src-tauri/src/import.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ pub async fn start_video_import(app: AppHandle, source_path: PathBuf) -> Result<
506506
mic: None,
507507
system_audio: None,
508508
cursor: None,
509+
keyboard: None,
509510
}],
510511
cursors: Cursors::default(),
511512
status: Some(StudioRecordingStatus::InProgress),
@@ -599,6 +600,7 @@ pub async fn start_video_import(app: AppHandle, source_path: PathBuf) -> Result<
599600
mic: None,
600601
system_audio,
601602
cursor: None,
603+
keyboard: None,
602604
}],
603605
cursors: Cursors::default(),
604606
status: Some(StudioRecordingStatus::Complete),

0 commit comments

Comments
 (0)