Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions internal/db/gorm/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,80 @@ func runMigrations(db *gorm.DB, embeddingDims int) error {
return nil
},
},
// Migration 040: One-time cleanup of garbage observations created by SDK tool output extraction.
// Deletes observations with titles matching known garbage patterns (PowerShell errors, auth failures,
// stdin terminal checks, etc.) and orphan vectors not matching any observation.
{
ID: "040_cleanup_garbage_observations",
Migrate: func(tx *gorm.DB) error {
// Delete garbage observations by title pattern
garbagePatterns := []string{
"PowerShell%Error%",
"PowerShell%Anomaly%",
"PowerShell Dot-Source%",
"Stdin Terminal%",
"Authorization Header Missing%",
"FINDSTR%Cannot%",
"Missing Authentication%",
"JavaScript Property Setting%",
"Incorrect FINDSTR%",
"Invalid Argument in Child%",
"bufio Over-read%",
"Stdin Terminal Check%",
"File Lock Handling%",
"Upstream Connection%",
"TRACE Logging Removal%",
"npm install completion%",
"Stderr Input Handling%",
"Status Discrepancy Detection%",
"Job-Session ID Synchronization%",
"Incorrect Redirection Syntax%",
"Rename node_modules%",
"Case Sensitivity in Format%",
"Cleanup Function%Parameter%",
"Cleanup by startedAt%",
"Null%Numeric Properties%",
"User Cancellation Handling%",
}
Comment on lines +1335 to +1362

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The garbagePatterns list is hardcoded directly within the migration. While functional for a one-time cleanup, for future similar cleanups, consider externalizing such lists into a configuration file or a dedicated database table. This would allow for easier updates and modifications to the patterns without requiring a new code deployment and migration, improving maintainability.

var totalDeleted int64
for _, pattern := range garbagePatterns {
result := tx.Exec(`DELETE FROM observations WHERE title LIKE ?`, pattern)
if result.Error != nil {
log.Warn().Err(result.Error).Str("pattern", pattern).Msg("migration 040: delete pattern failed")
continue
}
totalDeleted += result.RowsAffected
}

// Delete orphan vectors: observation_vectors entries whose sqlite_id
// (stored in metadata) doesn't match any existing observation.
orphanResult := tx.Exec(`
DELETE FROM observation_vectors
WHERE id IN (
SELECT ov.id FROM observation_vectors ov
LEFT JOIN observations o ON ov.metadata->>'sqlite_id' = o.id::text
WHERE o.id IS NULL
)
`)
orphanCount := int64(0)
if orphanResult.Error != nil {
// observation_vectors table might not exist or have different schema — not fatal
log.Warn().Err(orphanResult.Error).Msg("migration 040: orphan vector cleanup failed (non-fatal)")
} else {
orphanCount = orphanResult.RowsAffected
}

log.Info().
Int64("garbage_deleted", totalDeleted).
Int64("orphan_vectors_deleted", orphanCount).
Msg("migration 040: garbage cleanup complete")
return nil
},
Rollback: func(tx *gorm.DB) error {
// One-time cleanup — no rollback possible
return nil
},
},
})
if err := m.Migrate(); err != nil {
return fmt.Errorf("run gormigrate migrations: %w", err)
Expand Down
8 changes: 5 additions & 3 deletions internal/graph/falkordb/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,13 @@ func (s *FalkorDBGraphStore) GetNeighbors(_ context.Context, obsID int64, maxHop
limit = 20
}

// Variable-length path query
// Variable-length path query — use named path to avoid FalkorDB
// "Type mismatch: expected List or Null but was Path" on relationship list ops.
query := fmt.Sprintf(
"MATCH (a:Observation {id: $id})-[r:REL*1..%d]-(b:Observation) "+
"MATCH p = (a:Observation {id: $id})-[:REL*1..%d]-(b:Observation) "+
"WHERE b.id <> $id "+
"RETURN DISTINCT b.id, length(r) as hops, head([x IN r | x.type]) as rel_type "+
"WITH DISTINCT b, length(p)-1 as hops, relationships(p) as rels "+
"RETURN b.id, hops, type(rels[0]) as rel_type "+
"ORDER BY hops "+
"LIMIT %d",
maxHops, limit,
Expand Down
Loading