thebtf · thebtf · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/internal/db/gorm/migrations.go b/internal/db/gorm/migrations.go
@@ -1433,6 +1433,98 @@ func runMigrations(db *gorm.DB, embeddingDims int) error {
 			return nil
 		},
 	},
+	// Migration 043: Radical cleanup of garbage SDK-extracted observations.
+	// These observations were created by the SDK tool output extraction pipeline before v1.3.4
+	// (whitelist mode). They are trivially discoverable facts, tool errors, status transitions,
+	// and cross-project noise that pollute semantic search and degrade agent performance.
+	{
+		ID: "043_radical_observation_cleanup",
+		Migrate: func(tx *gorm.DB) error {
+			garbagePatterns := []string{
+				// Tool mechanics (trivially discoverable at runtime)
+				"Tool%Query Pattern%",
+				"Tool%Search%Pattern%",
+				"Tool%Naming Convention%",
+				"Tool%Selection%Pattern%",
+				"Tool Search%Found%",
+				"Tool%Match%Found%",
+				"Memory Store Tool%",
+				"Deferred Tool%",
+				"Exact Tool Match%",
+
+				// Task status transitions (repeated 20+ times, zero value)
+				"Task Status%Transition%",
+				"Task%Completion%Confirmed%",
+				"Status Transition%",
+				"Status%Discrepancy%",
+				"No Work Available%",
+
+				// Job tracking noise
+				"Job Status%",
+				"Job-Session ID%",
+
+				// Process output artifacts
+				"Process Output%",
+				"Stderr%Handling%",
+
+				// System prompt meta-observations
+				"Claude Anti-Sycophancy%",
+				"User Interaction Guidelines%",
+				"User Communication Guidelines%",
+				"Strict Verification Guidelines%",
+				"Copyright Enforcement%",
+				"Critical Reminders%",
+				"Search Scaling by%",
+				"Past Conversation Search%",
+				"System Prompt Access%",
+				"Anti-Sycophancy%",
+				"Keyword Extraction Guidelines%",
+				"Tone Consistency%",
+				"Zero-confirmation Rule%",
+				"Plugin Configuration Warnings%",
+				"Prioritize Internal Tools%",
+
+				// Generic discoveries with no behavioral impact
+				"Brace%Discrepancy%",
+				"Brace%Detection%",
+				"Content Structure Pattern%",
+				"Severity Classification%",
+				"Pre-commit Check%",
+				"Commit Message%Convention%",
+				"Commit Message Structure%",
+				"File Size Monitoring%",
+
+				// iSCSI debug noise (from nvmdfs project)
+				"iSCSI%",
+
+				// Timestamp-based titles from subtitle parser
+				"00:%",
+
+				// Test observations
+				"type test",
+
+				// Robocopy/npm transient noise
+				"Robocopy%",
+				"npm install completion%",
+			}
+
+			var totalDeleted int64
+			for _, pattern := range garbagePatterns {
+				result := tx.Exec("DELETE FROM observations WHERE title LIKE ?", pattern)
+				if result.Error != nil {
+					log.Warn().Err(result.Error).Str("pattern", pattern).Msg("migration 043: delete failed")
+					continue
+				}
+				totalDeleted += result.RowsAffected
+			}
+
+			log.Info().Int64("total_deleted", totalDeleted).Msg("migration 043: radical observation cleanup complete")
+			return nil
+		},
+		Rollback: func(tx *gorm.DB) error {
+			return nil
+		},
+	},
 	})
 	if err := m.Migrate(); err != nil {
 		return fmt.Errorf("run gormigrate migrations: %w", err)

diff --git a/internal/search/manager.go b/internal/search/manager.go
@@ -119,6 +119,55 @@ func (m *SearchMetrics) GetStats() map[string]any {
 	}
 }
 
+// ApplyCompositeScoring re-ranks observations using multi-signal scoring.
+// Formula: score = similarity × recencyDecay × typeWeight × max(importance, 0.3)
+// This ensures that recent, high-importance decisions rank above old generic discoveries.
+func ApplyCompositeScoring(observations []*models.Observation, similarityScores map[int64]float64) {
+	now := time.Now()
+
+	// Type weights: decisions and patterns have higher behavioral impact
+	typeWeights := map[models.ObservationType]float64{
+		"decision":  1.4,
+		"bugfix":    1.3,
+		"feature":   1.2,
+		"pattern":   1.2,
+		"discovery": 0.8,
+		"change":    0.7,
+		"refactor":  0.9,
+	}
+
+	for _, obs := range observations {
+		sim := similarityScores[obs.ID]
+		if sim == 0 {
+			sim = 0.5 // default if no similarity score
+		}
+
+		// Recency decay: half-life of 7 days
+		ageDays := now.Sub(time.Unix(obs.CreatedAtEpoch/1000, 0)).Hours() / 24.0
+		recency := math.Pow(0.5, ageDays/7.0)
+		// Floor at 0.05 so old but very important observations don't disappear
+		if recency < 0.05 {
+			recency = 0.05
+		}
+
+		// Type weight
+		tw := 1.0
+		if w, ok := typeWeights[obs.Type]; ok {
+			tw = w
+		}
+
+		// Importance (floor at 0.3 so unscored observations aren't penalized to zero)
+		imp := obs.ImportanceScore
+		if imp < 0.3 {
+			imp = 0.3
+		}
+
+		// Composite score replaces raw similarity
+		compositeScore := sim * recency * tw * imp
+		similarityScores[obs.ID] = compositeScore
+	}
+}
+
 // Manager provides unified search across PostgreSQL and pgvector.
 type Manager struct {
 	ctx              context.Context

diff --git a/internal/worker/handlers_context.go b/internal/worker/handlers_context.go
@@ -234,7 +234,6 @@ func (s *Service) handleSearchByPrompt(w http.ResponseWriter, r *http.Request) {
 	}
 
 	// Apply cross-encoder reranking if available
-	var reranked bool
 	if s.reranker != nil && len(freshObservations) > 0 && usedVector {
 		// Build candidates from observations with their bi-encoder scores
 		candidates := make([]reranking.Candidate, len(freshObservations))
@@ -291,7 +290,6 @@ func (s *Service) handleSearchByPrompt(w http.ResponseWriter, r *http.Request) {
 				}
 			}
 			freshObservations = reorderedObs
-			reranked = true
 
 			log.Debug().
 				Int("candidates", len(candidates)).
@@ -304,8 +302,14 @@ func (s *Service) handleSearchByPrompt(w http.ResponseWriter, r *http.Request) {
 	clusteredObservations := clusterObservations(freshObservations, s.config.ClusteringThreshold)
 	duplicatesRemoved := len(freshObservations) - len(clusteredObservations)
 
-	// Sort by similarity score (highest first) if we have scores and didn't rerank
-	if len(similarityScores) > 0 && len(clusteredObservations) > 0 && !reranked {
+	// Apply composite scoring (recency × type × importance) as a post-processing step.
+	// This re-weights scores already computed by vector search or cross-encoder reranking.
+	if len(clusteredObservations) > 0 {
+		search.ApplyCompositeScoring(clusteredObservations, similarityScores)
+	}
+
+	// Sort by composite score (highest first)
+	if len(similarityScores) > 0 && len(clusteredObservations) > 0 {
 		sort.Slice(clusteredObservations, func(i, j int) bool {
 			scoreI := similarityScores[clusteredObservations[i].ID]
 			scoreJ := similarityScores[clusteredObservations[j].ID]