CapSoftware · Brendonovich · Oct 13, 2025 · Oct 11, 2025 · Oct 11, 2025 · Oct 11, 2025
diff --git a/.gitignore b/.gitignore
@@ -47,3 +47,7 @@ tauri.windows.conf.json
 # Cursor
 .cursor
 .env*.local
+
+# Audio bug fix working files
+docs/memory-bank/
+claude-audio-fix.md
diff --git a/crates/media-info/src/lib.rs b/crates/media-info/src/lib.rs
@@ -24,7 +24,7 @@
 }
 
 impl AudioInfo {
-    pub const MAX_AUDIO_CHANNELS: u16 = 2;
+    pub const MAX_AUDIO_CHANNELS: u16 = 8;
 
     pub const fn new(
         sample_format: Sample,
@@ -62,7 +62,12 @@
             SupportedBufferSize::Unknown => 1024,
         };
 
-        let channels = config.channels().clamp(1, Self::MAX_AUDIO_CHANNELS);
+        let raw_channels = config.channels();
+        let channels = if Self::channel_layout_raw(raw_channels).is_some() {
+            raw_channels
+        } else {
+            raw_channels.min(Self::MAX_AUDIO_CHANNELS).max(1)
+        };
 
         Self {
             sample_format,
@@ -92,6 +97,12 @@
         Some(match channels {
             1 => ChannelLayout::MONO,
             2 => ChannelLayout::STEREO,
+            3 => ChannelLayout::SURROUND,
+            4 => ChannelLayout::QUAD,
+            5 => ChannelLayout::_5POINT0,
+            6 => ChannelLayout::_5POINT1,
+            7 => ChannelLayout::_6POINT1,
+            8 => ChannelLayout::_7POINT1,
             _ => return None,
         })
     }

diff --git a/crates/recording/src/feeds/microphone.rs b/crates/recording/src/feeds/microphone.rs
@@ -126,35 +126,85 @@ impl MicrophoneFeed {
 }
 
 fn get_usable_device(device: Device) -> Option<(String, Device, SupportedStreamConfig)> {
-    device
+    let device_name_for_logging = device.name().ok();
+
+    let result = device
         .supported_input_configs()
         .map_err(|error| {
             error!(
-                "Error getting supported input configs for device: {}",
-                error
+                "Error getting supported input configs for device {:?}: {}",
+                device_name_for_logging, error
             );
             error
         })
         .ok()
         .and_then(|configs| {
             let mut configs = configs.collect::<Vec<_>>();
+
+            // Log all configs for debugging
+            if let Some(ref name) = device_name_for_logging {
+                info!("Device '{}' available configs:", name);
+                for config in &configs {
+                    info!("  Format: {:?}, Min rate: {}, Max rate: {}, Sample size: {}",
+                        config.sample_format(),
+                        config.min_sample_rate().0,
+                        config.max_sample_rate().0,
+                        config.sample_format().sample_size()
+                    );
+                }
+            }
+
             configs.sort_by(|a, b| {
                 b.sample_format()
                     .sample_size()
                     .cmp(&a.sample_format().sample_size())
-                    .then(b.max_sample_rate().cmp(&a.max_sample_rate()))
+                    .then(a.max_sample_rate().cmp(&b.max_sample_rate()))
             });
-            configs
+
+            let selected = configs
                 .into_iter()
-                .filter(|c| c.min_sample_rate().0 <= 48000 && c.max_sample_rate().0 <= 48000)
-                .find(|c| ffmpeg_sample_format_for(c.sample_format()).is_some())
-        })
-        .and_then(|config| {
-            device
-                .name()
-                .ok()
-                .map(|name| (name, device, config.with_max_sample_rate()))
-        })
+                .filter(|c| c.min_sample_rate().0 <= 48000 && c.max_sample_rate().0 >= 48000)
+                .find(|c| ffmpeg_sample_format_for(c.sample_format()).is_some());
+
-            let selected = configs
-                .into_iter()
-                .filter(|c| c.min_sample_rate().0 <= 48000 && c.max_sample_rate().0 <= 48000)
-                .find(|c| ffmpeg_sample_format_for(c.sample_format()).is_some())
-        })
-        .and_then(|config| {
-            device
-                .name()
-                .ok()
-                .map(|name| (name, device, config.with_max_sample_rate()))
-        })
-                .filter(|c| c.min_sample_rate().0 <= 48000 && c.max_sample_rate().0 >= 48000)
-                .find(|c| ffmpeg_sample_format_for(c.sample_format()).is_some());
+            let selected = configs
+                .into_iter()
+                .filter(|c| {
+                    c.min_sample_rate().0 <= 48_000
+                        && c.max_sample_rate().0 >= 48_000
+                        && c.channels() >= 1
+                        && c.channels() <= AudioInfo::MAX_AUDIO_CHANNELS
+                })
+                .find(|c| ffmpeg_sample_format_for(c.sample_format()).is_some());
-            let selected = configs
-                .into_iter()
-                .filter(|c| c.min_sample_rate().0 <= 48000 && c.max_sample_rate().0 <= 48000)
-                .find(|c| ffmpeg_sample_format_for(c.sample_format()).is_some())
-        })
-        .and_then(|config| {
-            device
-                .name()
-                .ok()
-                .map(|name| (name, device, config.with_max_sample_rate()))
-        })
-                .filter(|c| c.min_sample_rate().0 <= 48000 && c.max_sample_rate().0 >= 48000)
-                .find(|c| ffmpeg_sample_format_for(c.sample_format()).is_some());
+            let selected = configs
+                .into_iter()
+                .filter(|c| {
+                    c.min_sample_rate().0 <= 48_000
+                        && c.max_sample_rate().0 >= 48_000
+                        && c.channels() >= 1
+                        && c.channels() <= AudioInfo::MAX_AUDIO_CHANNELS
+                })
+                .find(|c| ffmpeg_sample_format_for(c.sample_format()).is_some());
+            if let Some(ref config) = selected {
+                if let Ok(device_name) = device.name() {
+                    info!("Selected config for '{}': Format={:?}, Min={}, Max={}",
+                        device_name,
+                        config.sample_format(),
+                        config.min_sample_rate().0,
+                        config.max_sample_rate().0
+                    );
+                }
+            }
+
+            selected
+        });
+
+    if result.is_some() {
+        if let Some(ref name) = device_name_for_logging {
+            info!("✓ Device '{}' is usable", name);
+        }
+    } else {
+        if let Some(ref name) = device_name_for_logging {
+            warn!("✗ Device '{}' rejected - no suitable config found", name);
+        }
+    }
+
+    result.and_then(|config| {
+        let final_config = config.with_sample_rate(cpal::SampleRate(48000));
+        device
+            .name()
+            .ok()
+            .map(|name| {
+                info!("Final config for '{}': sample_rate={}, channels={}, format={:?}",
+                    name,
+                    final_config.sample_rate().0,
+                    final_config.channels(),
+                    final_config.sample_format()
+                );
+                (name, device, final_config)
+            })
+    })
 }
 
 #[derive(Reply)]
@@ -290,13 +340,30 @@ impl Message<SetInput> for MicrophoneFeed {
 
         std::thread::spawn({
             let config = config.clone();
+            let device_name_for_log = device.name().ok();
             move || {
+                info!("🎤 Building stream for '{:?}' with config: rate={}, channels={}, format={:?}",
+                    device_name_for_log,
+                    config.sample_rate().0,
+                    config.channels(),
+                    sample_format
+                );
+
                 let stream = match device.build_input_stream_raw(
                     &config.into(),
                     sample_format,
                     {
                         let actor_ref = actor_ref.clone();
+                        let mut callback_count = 0u64;
                         move |data, info| {
+                            if callback_count == 0 {
+                                info!("🎤 First audio callback - data size: {} bytes, format: {:?}",
+                                    data.bytes().len(),
+                                    data.sample_format()
+                                );
+                            }
+                            callback_count += 1;
+
                             let _ = actor_ref
                                 .tell(MicrophoneSamples {
                                     data: data.bytes().to_vec(),

diff --git a/docs/audio-device-status.md b/docs/audio-device-status.md
@@ -0,0 +1,68 @@
+# Current Audio Device Status
+
+## System Audio Configuration (as of initial investigation)
+
+### Polar 2 (BlackStar Audio Interface)
+- **Status**: Default Input Device: Yes, Default Output Device: Yes
+- **Current Sample Rate**: 48000 Hz (48 kHz)
+- **Input Channels**: 4
+- **Output Channels**: 4
+- **Transport**: USB
+- **Manufacturer**: Blackstar
+
+### Insta360 Link 2 (Webcam with Microphone)
+- **Current Sample Rate**: 48000 Hz (48 kHz)
+- **Input Channels**: 1
+- **Transport**: USB
+- **Manufacturer**: Insta360
+
+### Other Detected Audio Devices
+- LG ULTRAWIDE (HDMI audio - 48 kHz)
+- LEN T23i-20 (HDMI audio - 48 kHz)
+- USB PnP Audio Device (2x devices - 48 kHz)
+- Mac mini Speakers (Built-in - 48 kHz)
+
+## Key Observations
+
+1. ✅ **Both peripheral audio devices are at 48 kHz** - the standard sample rate
+2. ✅ **No obvious sample rate mismatch at the system level**
+3. 🤔 **Polar 2 is set as BOTH default input and output device**
+4. 📝 **Multiple audio input sources available** - need to verify which Cap uses
+
+## Implications for Investigation
+
+Since both devices are at 48 kHz, the audio slowdown issue is likely NOT a simple system-level sample rate configuration problem. Possible causes to investigate:
+
+### Hypothesis 1: Audio Metadata Misreading
+- Cap may be incorrectly detecting or storing the audio sample rate
+- Audio recorded at 48 kHz but metadata incorrectly labels it as different rate
+- During export, encoder uses wrong metadata → incorrect playback
+
+### Hypothesis 2: Timestamp/Clock Issues
+- Audio frame timestamps may be incorrect during recording
+- Could cause temporal misalignment during export
+- Common with USB audio interfaces that don't sync perfectly with system clock
+
+### Hypothesis 3: Buffer Size / Frame Rate Mismatch
+- Recording buffer size not matching expected frame duration
+- Could accumulate timing errors over the recording
+- Would explain slow + pitch down symptoms
+
+### Hypothesis 4: FFmpeg Encoding Parameters
+- Export logic may have hard-coded assumptions about audio parameters
+- Even if source is 48 kHz, export settings might be using different rate
+- Need to check FFmpeg command-line arguments in export code
+
+## Next Steps
+1. ✅ Confirmed current sample rates (both at 48 kHz)
+2. ⏳ Investigate Cap audio recording code (crates/recording/)
+3. ⏳ Check export/encoding logic (crates/export/)
+4. ⏳ Create test recording and analyze with ffprobe
+5. ⏳ Look for timestamp handling and audio sync code
+6. ⏳ Examine FFmpeg parameter generation for export
+
+## Testing Strategy
+Even though devices are at 48 kHz, still test with:
+- 96 kHz setting (to rule out any dynamic rate switching)
+- 192 kHz setting (extreme case)
+- Monitor if sample rate changes during Cap recording session
diff --git a/docs/hardware-specs.md b/docs/hardware-specs.md
@@ -0,0 +1,93 @@
+# Hardware Specifications - Audio Bug Investigation
+
+This document details the hardware specifications for the peripheral devices being used on the Mac Mini where audio export issues occur.
+
+## Problem Configuration (Mac Mini)
+
+### RODE PodMic - Microphone
+- **Type**: Dynamic broadcast microphone (analog XLR output)
+- **Frequency Response**: 20 Hz - 20 kHz
+- **Output**: XLR analog signal (no digital sample rate)
+- **Sensitivity**: -57 dB re 1 Volt/Pascal (1.60mV @ 94 dB SPL) ± 2 dB @ 1kHz
+- **Output Impedance**: 320 Ω
+- **Connection**: XLR cable to BlackStar Polar 2 audio interface
+- **Notes**:
+  - Analog microphone - sample rate is determined by the audio interface
+  - Tailored frequency response for speech and broadcast applications
+  - Not a USB microphone - requires audio interface
+
+### BlackStar Polar 2 - Audio Interface
+- **Type**: 2-channel USB audio interface
+- **A/D Conversion**: 24-bit / up to 192kHz
+- **Supported Sample Rates**:
+  - 44.1 kHz
+  - 48 kHz
+  - 88.2 kHz
+  - 96 kHz
+  - 176.4 kHz
+  - **192 kHz** (maximum)
+- **Input Channels**: 2 high headroom FET inputs
+- **Connection**: USB to Mac Mini (class-compliant, no drivers needed)
+- **Platform Support**: macOS, Windows, iOS (with adapters), Android (with adapters)
+- **Notes**:
+  - Class-compliant interface (works without drivers on macOS)
+  - Can operate at multiple sample rates - configured in macOS Audio MIDI Setup
+  - **CRITICAL**: Current configured sample rate unknown - needs verification
+
+### Insta360 Link 2 - Webcam
+- **Type**: 4K PTZ webcam with built-in microphone
+- **Video**: 4K resolution, 1/2" sensor
+- **Audio**:
+  - AI noise-canceling microphone
+  - Pickup range: up to 3 meters (optimal within 1.5m)
+  - Three audio modes: Voice Focus, Voice Suppression, Music Balance
+  - **Sample Rate**: Not officially documented (likely 48 kHz based on industry standard)
+- **Connection**: USB to Mac Mini
+- **Notes**:
+  - Built-in microphone may have different sample rate than BlackStar Polar 2
+  - If both audio sources are being used, potential for sample rate conflicts
+
+## Working Configuration (MacBook Air)
+- **Camera**: Built-in FaceTime HD camera
+- **Microphone**: Built-in microphone
+- **Audio Interface**: Internal (Apple T2/M1/M2 chip)
+- **Sample Rate**: Likely standardized at 48 kHz
+- **Result**: ✅ No audio issues
+
+## Investigation Focus
+
+### Primary Hypothesis: Sample Rate Mismatch
+The audio slowdown and pitch reduction suggests audio is being recorded at one sample rate but exported/played back assuming a different rate.
+
+**Example scenario:**
+- Audio recorded at 96 kHz (BlackStar Polar 2 setting)
+- Exported/encoded assuming 48 kHz
+- Result: Playback at 50% speed and 50% pitch
+
+**Verification needed:**
+1. Check current BlackStar Polar 2 sample rate in Audio MIDI Setup
+2. Examine Cap's audio capture code for sample rate detection
+3. Check export/encoding logic for hard-coded sample rate assumptions
+4. Test with various sample rates to identify pattern
+
+### Secondary Consideration: Multiple Audio Sources
+If both the Insta360 Link 2 microphone AND the RODE PodMic (via BlackStar Polar 2) are active:
+- Two different audio devices with potentially different sample rates
+- Need to verify which audio source Cap is actually using during recording
+- Potential for audio device selection issues
+
+## Next Steps
+1. ✅ Document hardware specifications (this file)
+2. ⏳ Check Audio MIDI Setup for current BlackStar Polar 2 configuration
+3. ⏳ Investigate Cap audio recording code
+4. ⏳ Create diagnostic test recording
+5. ⏳ Analyze audio file metadata (ffprobe)
+6. ⏳ Implement fix for sample rate handling
+
+## Testing Protocol
+For each fix attempt, test with:
+- BlackStar Polar 2 at 48 kHz (should match "expected" rate)
+- BlackStar Polar 2 at 96 kHz (likely current problematic setting)
+- BlackStar Polar 2 at 192 kHz (maximum, most extreme test case)
+
+Expected outcome: Audio export should maintain correct speed/pitch regardless of input device sample rate.