Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions internal/db/gorm/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -1433,6 +1433,98 @@ func runMigrations(db *gorm.DB, embeddingDims int) error {
return nil
},
},
// Migration 043: Radical cleanup of garbage SDK-extracted observations.
// These observations were created by the SDK tool output extraction pipeline before v1.3.4
// (whitelist mode). They are trivially discoverable facts, tool errors, status transitions,
// and cross-project noise that pollute semantic search and degrade agent performance.
{
ID: "043_radical_observation_cleanup",
Migrate: func(tx *gorm.DB) error {
garbagePatterns := []string{
// Tool mechanics (trivially discoverable at runtime)
"Tool%Query Pattern%",
"Tool%Search%Pattern%",
"Tool%Naming Convention%",
"Tool%Selection%Pattern%",
"Tool Search%Found%",
"Tool%Match%Found%",
"Memory Store Tool%",
"Deferred Tool%",
"Exact Tool Match%",

// Task status transitions (repeated 20+ times, zero value)
"Task Status%Transition%",
"Task%Completion%Confirmed%",
"Status Transition%",
"Status%Discrepancy%",
"No Work Available%",

// Job tracking noise
"Job Status%",
"Job-Session ID%",

// Process output artifacts
"Process Output%",
"Stderr%Handling%",

// System prompt meta-observations
"Claude Anti-Sycophancy%",
"User Interaction Guidelines%",
"User Communication Guidelines%",
"Strict Verification Guidelines%",
"Copyright Enforcement%",
"Critical Reminders%",
"Search Scaling by%",
"Past Conversation Search%",
"System Prompt Access%",
"Anti-Sycophancy%",
"Keyword Extraction Guidelines%",
"Tone Consistency%",
"Zero-confirmation Rule%",
"Plugin Configuration Warnings%",
"Prioritize Internal Tools%",

// Generic discoveries with no behavioral impact
"Brace%Discrepancy%",
"Brace%Detection%",
"Content Structure Pattern%",
"Severity Classification%",
"Pre-commit Check%",
"Commit Message%Convention%",
"Commit Message Structure%",
"File Size Monitoring%",

// iSCSI debug noise (from nvmdfs project)
"iSCSI%",

// Timestamp-based titles from subtitle parser
"00:%",

// Test observations
"type test",

// Robocopy/npm transient noise
"Robocopy%",
"npm install completion%",
}

var totalDeleted int64
for _, pattern := range garbagePatterns {
result := tx.Exec("DELETE FROM observations WHERE title LIKE ?", pattern)
if result.Error != nil {
log.Warn().Err(result.Error).Str("pattern", pattern).Msg("migration 043: delete failed")
continue
}
totalDeleted += result.RowsAffected
}
Comment on lines +1512 to +1519

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This loop executes a separate DELETE query for each of the ~45 patterns. This results in many database round trips and can be inefficient, especially on a large observations table. Consider combining these into a single DELETE statement using OR conditions to improve performance. For example: DELETE FROM observations WHERE title LIKE ? OR title LIKE ? .... This would make the operation atomic and significantly faster.


log.Info().Int64("total_deleted", totalDeleted).Msg("migration 043: radical observation cleanup complete")
return nil
},
Rollback: func(tx *gorm.DB) error {
return nil
},
},
})
if err := m.Migrate(); err != nil {
return fmt.Errorf("run gormigrate migrations: %w", err)
Expand Down
49 changes: 49 additions & 0 deletions internal/search/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,55 @@ func (m *SearchMetrics) GetStats() map[string]any {
}
}

// ApplyCompositeScoring re-ranks observations using multi-signal scoring.
// Formula: score = similarity × recencyDecay × typeWeight × max(importance, 0.3)
// This ensures that recent, high-importance decisions rank above old generic discoveries.
func ApplyCompositeScoring(observations []*models.Observation, similarityScores map[int64]float64) {
now := time.Now()

// Type weights: decisions and patterns have higher behavioral impact
typeWeights := map[models.ObservationType]float64{
"decision": 1.4,
"bugfix": 1.3,
"feature": 1.2,
"pattern": 1.2,
"discovery": 0.8,
"change": 0.7,
"refactor": 0.9,
}
Comment on lines +129 to +137
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Поиск констант ObservationType в models
rg -n "ObsType\w*\s*=|ObservationType\s*=" --type=go

Repository: thebtf/engram

Length of output: 1054


🏁 Script executed:

cd internal/search && cat -n manager.go | sed -n '125,145p'

Repository: thebtf/engram

Length of output: 769


🏁 Script executed:

cat -n pkg/models/observation.go | head -30

Repository: thebtf/engram

Length of output: 1012


Используйте константы типов вместо строковых литералов для типобезопасности.

Карта typeWeights должна использовать константы из пакета models вместо строковых литералов. Однако имеется проблема: для типа "pattern" отсутствует соответствующая константа ObsType* в models.ObservationType, хотя для всех остальных типов они определены.

Требуется либо:

  1. Добавить ObsTypePattern в pkg/models/observation.go
  2. Либо удалить "pattern" из этой карты, если это не поддерживаемый тип наблюдения

После этого заменить все строковые литералы на константы:

  • "decision"models.ObsTypeDecision
  • "bugfix"models.ObsTypeBugfix
  • "feature"models.ObsTypeFeature
  • "discovery"models.ObsTypeDiscovery
  • "change"models.ObsTypeChange
  • "refactor"models.ObsTypeRefactor
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/search/manager.go` around lines 129 - 137, The map typeWeights in
internal/search/manager.go currently uses string literals for keys and includes
"pattern" which has no corresponding models constant; either add a new constant
models.ObsTypePattern in pkg/models/observation.go (matching the ObservationType
type and existing ObsType* pattern) or remove the "pattern" entry from the map
if that observation type is not supported, and then replace all remaining string
keys with the corresponding constants (models.ObsTypeDecision,
models.ObsTypeBugfix, models.ObsTypeFeature, models.ObsTypeDiscovery,
models.ObsTypeChange, models.ObsTypeRefactor) to ensure type-safety.

Comment on lines +129 to +137

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The typeWeights map is re-initialized on every call to ApplyCompositeScoring. Since its contents are static, consider defining it as a package-level variable to avoid repeated memory allocations. This can improve performance, as this function is part of the search path.


for _, obs := range observations {
sim := similarityScores[obs.ID]
if sim == 0 {
sim = 0.5 // default if no similarity score
}

// Recency decay: half-life of 7 days
ageDays := now.Sub(time.Unix(obs.CreatedAtEpoch/1000, 0)).Hours() / 24.0
recency := math.Pow(0.5, ageDays/7.0)
// Floor at 0.05 so old but very important observations don't disappear
if recency < 0.05 {
recency = 0.05
}

// Type weight
tw := 1.0
if w, ok := typeWeights[obs.Type]; ok {
tw = w
}

// Importance (floor at 0.3 so unscored observations aren't penalized to zero)
imp := obs.ImportanceScore
if imp < 0.3 {
imp = 0.3
}

// Composite score replaces raw similarity
compositeScore := sim * recency * tw * imp
similarityScores[obs.ID] = compositeScore
}
}

// Manager provides unified search across PostgreSQL and pgvector.
type Manager struct {
ctx context.Context
Expand Down
12 changes: 8 additions & 4 deletions internal/worker/handlers_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ func (s *Service) handleSearchByPrompt(w http.ResponseWriter, r *http.Request) {
}

// Apply cross-encoder reranking if available
var reranked bool
if s.reranker != nil && len(freshObservations) > 0 && usedVector {
// Build candidates from observations with their bi-encoder scores
candidates := make([]reranking.Candidate, len(freshObservations))
Expand Down Expand Up @@ -291,7 +290,6 @@ func (s *Service) handleSearchByPrompt(w http.ResponseWriter, r *http.Request) {
}
}
freshObservations = reorderedObs
reranked = true

log.Debug().
Int("candidates", len(candidates)).
Expand All @@ -304,8 +302,14 @@ func (s *Service) handleSearchByPrompt(w http.ResponseWriter, r *http.Request) {
clusteredObservations := clusterObservations(freshObservations, s.config.ClusteringThreshold)
duplicatesRemoved := len(freshObservations) - len(clusteredObservations)

// Sort by similarity score (highest first) if we have scores and didn't rerank
if len(similarityScores) > 0 && len(clusteredObservations) > 0 && !reranked {
// Apply composite scoring (recency × type × importance) as a post-processing step.
// This re-weights scores already computed by vector search or cross-encoder reranking.
if len(clusteredObservations) > 0 {
search.ApplyCompositeScoring(clusteredObservations, similarityScores)
}

// Sort by composite score (highest first)
if len(similarityScores) > 0 && len(clusteredObservations) > 0 {
sort.Slice(clusteredObservations, func(i, j int) bool {
scoreI := similarityScores[clusteredObservations[i].ID]
scoreJ := similarityScores[clusteredObservations[j].ID]
Expand Down
Loading