GAInTheHouse · GAInTheHouse · Dec 13, 2025 · Dec 13, 2025 · Dec 13, 2025 · Dec 13, 2025
diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml
@@ -22,17 +22,32 @@ jobs:
       uses: actions/cache@v3
       with:
         path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        key: ${{ runner.os }}-pip-quick-${{ hashFiles('**/requirements-ci.txt') }}
         restore-keys: |
-          ${{ runner.os }}-pip-
+          ${{ runner.os }}-pip-quick-
+
+    - name: Free up disk space
+      run: |
+        # Remove unnecessary packages and clean up
+        sudo rm -rf /usr/share/dotnet
+        sudo rm -rf /usr/local/lib/android
+        sudo rm -rf /opt/ghc
+        sudo rm -rf /opt/hostedtoolcache/CodeQL
+        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        df -h
 
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install pytest pytest-cov
-        # Install only essential dependencies for quick tests
-        pip install torch --index-url https://download.pytorch.org/whl/cpu
-        pip install transformers jiwer librosa soundfile
+        pip install --no-cache-dir pytest pytest-cov
+        # Install only essential dependencies for quick tests with CPU-only PyTorch
+        pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
+        pip install --no-cache-dir transformers jiwer librosa soundfile
+
+    - name: Clean up pip cache
+      run: |
+        pip cache purge || true
+        df -h
 
     - name: Run quick unit tests
       run: |

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -26,20 +26,38 @@ jobs:
       uses: actions/cache@v3
       with:
         path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        key: ${{ runner.os }}-pip-ci-${{ hashFiles('**/requirements-ci.txt') }}
         restore-keys: |
-          ${{ runner.os }}-pip-
+          ${{ runner.os }}-pip-ci-
+
+    - name: Free up disk space
+      run: |
+        # Remove unnecessary packages and clean up
+        sudo rm -rf /usr/share/dotnet
+        sudo rm -rf /usr/local/lib/android
+        sudo rm -rf /opt/ghc
+        sudo rm -rf /opt/hostedtoolcache/CodeQL
+        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        df -h
 
     - name: Install system dependencies
       run: |
         sudo apt-get update
         sudo apt-get install -y ffmpeg libsndfile1
+        sudo apt-get clean
+        sudo rm -rf /var/lib/apt/lists/*
 
     - name: Install Python dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        pip install pytest pytest-cov pytest-mock
+        # Use CI requirements with CPU-only PyTorch to save space
+        pip install --no-cache-dir -r requirements-ci.txt
+        pip install --no-cache-dir pytest pytest-cov pytest-mock
+
+    - name: Clean up pip cache
+      run: |
+        pip cache purge || true
+        df -h
 
     - name: Run unit tests
       run: |
@@ -123,11 +141,27 @@ jobs:
       with:
         python-version: '3.9'
 
+    - name: Free up disk space
+      run: |
+        # Remove unnecessary packages and clean up
+        sudo rm -rf /usr/share/dotnet
+        sudo rm -rf /usr/local/lib/android
+        sudo rm -rf /opt/ghc
+        sudo rm -rf /opt/hostedtoolcache/CodeQL
+        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        df -h
+
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        pip install pytest pytest-mock requests
+        # Use CI requirements with CPU-only PyTorch to save space
+        pip install --no-cache-dir -r requirements-ci.txt
+        pip install --no-cache-dir pytest pytest-mock requests
+
+    - name: Clean up pip cache
+      run: |
+        pip cache purge || true
+        df -h
 
     - name: Start API server in background
       run: |

diff --git a/.github/workflows/weekly-full-test.yml b/.github/workflows/weekly-full-test.yml
@@ -20,16 +20,34 @@ jobs:
       with:
         python-version: '3.9'
 
+    - name: Free up disk space
+      run: |
+        # Remove unnecessary packages and clean up
+        sudo rm -rf /usr/share/dotnet
+        sudo rm -rf /usr/local/lib/android
+        sudo rm -rf /opt/ghc
+        sudo rm -rf /opt/hostedtoolcache/CodeQL
+        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+        df -h
+
     - name: Install system dependencies
       run: |
         sudo apt-get update
         sudo apt-get install -y ffmpeg libsndfile1
+        sudo apt-get clean
+        sudo rm -rf /var/lib/apt/lists/*
 
     - name: Install all dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        pip install pytest pytest-cov pytest-html pytest-xdist
+        # Use CI requirements with CPU-only PyTorch to save space
+        pip install --no-cache-dir -r requirements-ci.txt
+        pip install --no-cache-dir pytest pytest-cov pytest-html pytest-xdist
+
+    - name: Clean up pip cache
+      run: |
+        pip cache purge || true
+        df -h
 
     - name: Run all tests in parallel
       run: |

diff --git a/Project Planning.pdf b/Project Planning.pdf
diff --git a/requirements-ci.txt b/requirements-ci.txt
@@ -0,0 +1,59 @@
+# CI-specific requirements with CPU-only PyTorch to save disk space
+# Use CPU-only PyTorch (much smaller than CUDA version)
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch>=2.0.0
+torchvision>=0.15.0
+torchaudio>=2.0.0
+
+# Core ML libraries
+transformers>=4.35.0
+accelerate>=0.24.0
+datasets>=2.14.0
+# Skip bitsandbytes and peft for CI (not needed for most tests)
+
+# Audio processing
+librosa>=0.10.0
+soundfile>=0.12.0
+pydub>=0.25.0
+audioread>=3.0.0
+
+# Evaluation
+jiwer>=3.0.0
+
+# Google Cloud (optional for CI, but keep for compatibility)
+google-cloud-storage>=2.10.0
+gcsfs>=2023.6.0
+
+# Data processing
+pandas>=2.0.0
+numpy>=1.24.0
+scikit-learn>=1.3.0
+scipy>=1.11.0
+
+# Visualization (minimal for CI)
+matplotlib>=3.7.0
+seaborn>=0.12.0
+
+# Experiment Tracking (optional for CI)
+wandb>=0.16.0
+
+# Utilities
+tqdm>=4.65.0
+pyyaml>=6.0
+python-dotenv>=1.0.0
+
+# Ollama for LLM inference (Llama 2/3)
+ollama>=0.1.0
+
+# Development
+pytest>=7.4.0
+black>=23.0.0
+flake8>=6.0.0
+
+fastapi>=0.104.0
+uvicorn>=0.24.0
+--only-binary=:all: numba
+python-multipart>=0.0.5
+
+importlib-metadata
+
diff --git a/src/agent/adaptive_scheduler.py b/src/agent/adaptive_scheduler.py
@@ -441,24 +441,38 @@ def _load_history(self):
             self.error_samples_collected = data.get('error_samples_collected', 0)
 
             # Load fine-tuning history
-            self.fine_tuning_history = [
-                FineTuningEvent(**event_data)
-                for event_data in data.get('fine_tuning_history', [])
-            ]
+            self.fine_tuning_history = []
+            for event_data in data.get('fine_tuning_history', []):
+                try:
+                    event = FineTuningEvent(**event_data)
+                    self.fine_tuning_history.append(event)
+                except (TypeError, ValueError) as e:
+                    logger.warning(f"Failed to load fine-tuning event: {e}")
+                    continue
 
             # Load performance history (limited to window size)
             performance_data = data.get('performance_history', [])
             for metric_data in performance_data[-self.performance_window_size:]:
-                metric = PerformanceMetrics(
-                    timestamp=datetime.fromisoformat(metric_data['timestamp']),
-                    error_count=metric_data['error_count'],
-                    accuracy=metric_data['accuracy'],
-                    wer=metric_data.get('wer'),
-                    cer=metric_data.get('cer'),
-                    inference_time=metric_data.get('inference_time', 0.0),
-                    cost_per_inference=metric_data.get('cost_per_inference', 0.0)
-                )
-                self.performance_history.append(metric)
+                try:
+                    # Validate timestamp before parsing
+                    timestamp_str = metric_data.get('timestamp')
+                    if not timestamp_str:
+                        logger.warning("Missing timestamp in performance metric, skipping")
+                        continue
+
+                    metric = PerformanceMetrics(
+                        timestamp=datetime.fromisoformat(timestamp_str),
+                        error_count=metric_data['error_count'],
+                        accuracy=metric_data['accuracy'],
+                        wer=metric_data.get('wer'),
+                        cer=metric_data.get('cer'),
+                        inference_time=metric_data.get('inference_time', 0.0),
+                        cost_per_inference=metric_data.get('cost_per_inference', 0.0)
+                    )
+                    self.performance_history.append(metric)
+                except (TypeError, ValueError, KeyError) as e:
+                    logger.warning(f"Failed to load performance metric: {e}")
+                    continue
 
             logger.info(f"Loaded scheduler history from {self.history_path}")
         except Exception as e:

diff --git a/src/agent/agent.py b/src/agent/agent.py
@@ -59,16 +59,17 @@ def __init__(
                 self.llm_corrector = LlamaLLMCorrector(
                     model_name=llm_model_name or "llama3.2:3b",
                     use_quantization=False,  # Not used for Ollama
-                    fast_mode=True  # Kept for compatibility
+                    fast_mode=True,  # Kept for compatibility
+                    raise_on_error=False  # Don't raise, just mark as unavailable
                 )
                 if self.llm_corrector.is_available():
                     logger.info("✅ LLM corrector initialized successfully")
                 else:
                     logger.warning("⚠️  LLM not available, using rule-based correction only")
-                    self.llm_corrector = None
+                    # Keep llm_corrector object but it will be unavailable
             except Exception as e:
-                logger.error(f"❌ Failed to initialize LLM: {e}")
-                raise  # Fail and alert if Ollama is not available
+                logger.warning(f"⚠️  Failed to initialize LLM: {e}. Continuing without LLM correction.")
+                self.llm_corrector = None
 
         # Initialize adaptive scheduler and fine-tuner (Week 3)
         self.enable_adaptive_fine_tuning = enable_adaptive_fine_tuning
@@ -194,31 +195,30 @@ def transcribe_with_agent(
 
             # Record corrections for learning
             for error in errors:
-                if error.suggested_correction or correction_method.startswith("llama"):
-                    self.self_learner.record_error(
-                        error_type=error.error_type,
-                        transcript=transcript,
-                        context={
-                            'audio_length': audio_length_seconds,
-                            'confidence': baseline_result.get('confidence'),
-                            'correction_method': correction_method
-                        },
-                        correction=corrected_transcript if correction_method.startswith("llama") else error.suggested_correction
-                    )
-
-        # Step 5: Record errors for learning (even if not corrected)
-        error_count = len(errors)
-        for error in errors:
-            self.self_learner.record_error(
-                error_type=error.error_type,
-                transcript=transcript,
-                context={
+                # Build comprehensive context
+                context = {
                     'audio_path': audio_path,  # Store path for fine-tuning
                     'audio_length': audio_length_seconds,
                     'confidence': baseline_result.get('confidence'),
                     'error_confidence': error.confidence
                 }
-            )
+
+                # Add correction info if available
+                correction = None
+                if error.suggested_correction or correction_method.startswith("llama"):
+                    context['correction_method'] = correction_method
+                    correction = corrected_transcript if correction_method.startswith("llama") else error.suggested_correction
+
+                # Record error with all context (single recording per error)
+                self.self_learner.record_error(
+                    error_type=error.error_type,
+                    transcript=transcript,
+                    context=context,
+                    correction=correction
+                )
+
+        # Step 5: Calculate error count for metrics
+        error_count = len(errors)
 
         # Step 6: Update adaptive scheduler and check for fine-tuning trigger (Week 3)
         fine_tuning_triggered = False
@@ -264,7 +264,7 @@ def transcribe_with_agent(
                 'error_threshold': self.error_detector.min_confidence_threshold,
                 'auto_correction_enabled': enable_auto_correction,
                 'correction_method': correction_method,
-                'llm_available': self.llm_corrector.is_available() if self.llm_corrector else False,
+                'llm_available': self.llm_corrector.is_available() if (self.llm_corrector and hasattr(self.llm_corrector, 'is_available')) else False,
                 'adaptive_fine_tuning_enabled': self.enable_adaptive_fine_tuning,
                 'fine_tuning_triggered': fine_tuning_triggered
             }

diff --git a/src/agent/error_detector.py b/src/agent/error_detector.py
@@ -109,9 +109,9 @@ def detect_errors(
 
         # 3. Length anomaly detection (based on audio length)
         if audio_length_seconds:
-            expected_length = audio_length_seconds * 2.5  # ~2.5 chars per second average
+            expected_length = max(1, audio_length_seconds * 2.5)  # ~2.5 chars per second average, minimum 1
             actual_length = len(transcript)
-            ratio = actual_length / expected_length if expected_length > 0 else 1.0
+            ratio = actual_length / expected_length
 
             if ratio > self.max_length_ratio:
                 errors.append(ErrorSignal(

diff --git a/src/agent/fine_tuner.py b/src/agent/fine_tuner.py
@@ -87,10 +87,10 @@ def __getitem__(self, idx):
         # Process text labels
         with self.processor.as_target_processor():
             label_ids = self.processor(
-            sample['corrected_transcript'],
-            padding=True,
-            return_tensors="pt"
-        )
+                sample['corrected_transcript'],
+                padding=True,
+                return_tensors="pt"
+            )
 
         return {
             'input_values': inputs.input_values.squeeze(0),