diff --git a/src/app/runtime-container.ts b/src/app/runtime-container.ts index f332af5..7e494f7 100644 --- a/src/app/runtime-container.ts +++ b/src/app/runtime-container.ts @@ -108,6 +108,9 @@ export interface CoreRuntimeConfig { queryExpansionMinSimilarity: number; recencyBinBoostEnabled: boolean; recencyBinBoostWeight: number; + /** EXP-21: per-entity temporal linkage (ingest write + retrieval boost). */ + perEntityTemporalLinkageEnabled: boolean; + perEntityTemporalLinkageBoostWeight: number; repairConfidenceFloor: number; repairDeltaThreshold: number; repairLoopEnabled: boolean; diff --git a/src/config.ts b/src/config.ts index 6655f20..4f64825 100644 --- a/src/config.ts +++ b/src/config.ts @@ -140,6 +140,15 @@ export interface RuntimeConfig { temporalQueryConstraintBoost: number; recencyBinBoostEnabled: boolean; recencyBinBoostWeight: number; + /** + * EXP-21: when true, ingest writes per-entity temporal linkage rows for + * every fact stored, and retrieval traverses the per-entity timeline to + * boost candidates by their chronological position. Defaults-off per + * Sprint 2 rule. See `src/services/entity-temporal-linkage.ts`. + */ + perEntityTemporalLinkageEnabled: boolean; + /** EXP-21: additive score boost applied per linkage-list rank. */ + perEntityTemporalLinkageBoostWeight: number; eventBoundaryExtractionEnabled: boolean; eventBoundaryRetrievalBoost: number; retrievalConfidenceGateEnabled: boolean; @@ -398,6 +407,10 @@ export const config: RuntimeConfig = { temporalQueryConstraintBoost: parseFloat(optionalEnv('TEMPORAL_QUERY_CONSTRAINT_BOOST') ?? '2'), recencyBinBoostEnabled: (optionalEnv('RECENCY_BIN_BOOST_ENABLED') ?? 'false') === 'true', recencyBinBoostWeight: parseFloat(optionalEnv('RECENCY_BIN_BOOST_WEIGHT') ?? '0.10'), + perEntityTemporalLinkageEnabled: + (optionalEnv('PER_ENTITY_TEMPORAL_LINKAGE_ENABLED') ?? 'false') === 'true', + perEntityTemporalLinkageBoostWeight: + parseFloat(optionalEnv('PER_ENTITY_TEMPORAL_LINKAGE_BOOST_WEIGHT') ?? '0.15'), eventBoundaryExtractionEnabled: (optionalEnv('EVENT_BOUNDARY_EXTRACTION_ENABLED') ?? 'false') === 'true', eventBoundaryRetrievalBoost: parseFloat(optionalEnv('EVENT_BOUNDARY_RETRIEVAL_BOOST') ?? '0.4'), retrievalConfidenceGateEnabled: (optionalEnv('RETRIEVAL_CONFIDENCE_GATE_ENABLED') ?? 'false') === 'true', @@ -549,6 +562,8 @@ export const INTERNAL_POLICY_CONFIG_FIELDS = [ 'temporalQueryConstraintEnabled', 'temporalQueryConstraintBoost', // Recency-bin boost (EXP-12) 'recencyBinBoostEnabled', 'recencyBinBoostWeight', + // Per-entity temporal linkage (EXP-21) + 'perEntityTemporalLinkageEnabled', 'perEntityTemporalLinkageBoostWeight', // Event boundary extraction (EXP-13) 'eventBoundaryExtractionEnabled', 'eventBoundaryRetrievalBoost', // Retrieval confidence gate (EXP-14) diff --git a/src/db/memory-repository.ts b/src/db/memory-repository.ts index 45d0402..58672f7 100644 --- a/src/db/memory-repository.ts +++ b/src/db/memory-repository.ts @@ -71,6 +71,11 @@ import { type StoreAtomicFactInput, type StoreForesightInput, } from './repository-representations.js'; +import { + listEntityTemporalLinks, + storeEntityTemporalLinks, + type StoreTemporalLinkInput, +} from './repository-entity-temporal-links.js'; export type { AgentScope, AtomicFactRow, @@ -299,6 +304,14 @@ export class MemoryRepository { return listForesightForMemory(this.pool, userId, parentMemoryId); } + async storeEntityTemporalLinks(links: StoreTemporalLinkInput[]) { + return storeEntityTemporalLinks(this.pool, links); + } + + async listEntityTemporalLinks(userId: string, entityId: string, limit: number) { + return listEntityTemporalLinks(this.pool, userId, entityId, limit); + } + async deleteBySource(userId: string, sourceSite: string) { return deleteBySource(this.pool, userId, sourceSite); } diff --git a/src/db/pg-representation-store.ts b/src/db/pg-representation-store.ts index 9aa66e8..35b0c7e 100644 --- a/src/db/pg-representation-store.ts +++ b/src/db/pg-representation-store.ts @@ -15,6 +15,11 @@ import { type StoreAtomicFactInput, type StoreForesightInput, } from './repository-representations.js'; +import { + listEntityTemporalLinks, + storeEntityTemporalLinks, + type StoreTemporalLinkInput, +} from './repository-entity-temporal-links.js'; export class PgRepresentationStore implements RepresentationStore { constructor(private pool: pg.Pool) {} @@ -25,4 +30,8 @@ export class PgRepresentationStore implements RepresentationStore { async listForesightForMemory(userId: string, parentMemoryId: string) { return listForesightForMemory(this.pool, userId, parentMemoryId); } async replaceAtomicFactsForMemory(userId: string, parentMemoryId: string, facts: StoreAtomicFactInput[]) { return replaceAtomicFactsForMemory(this.pool, userId, parentMemoryId, facts); } async replaceForesightForMemory(userId: string, parentMemoryId: string, entries: StoreForesightInput[]) { return replaceForesightForMemory(this.pool, userId, parentMemoryId, entries); } + async storeEntityTemporalLinks(links: StoreTemporalLinkInput[]) { return storeEntityTemporalLinks(this.pool, links); } + async listEntityTemporalLinks(userId: string, entityId: string, limit: number) { + return listEntityTemporalLinks(this.pool, userId, entityId, limit); + } } diff --git a/src/db/repository-entity-temporal-links.ts b/src/db/repository-entity-temporal-links.ts new file mode 100644 index 0000000..e37f317 --- /dev/null +++ b/src/db/repository-entity-temporal-links.ts @@ -0,0 +1,81 @@ +/** + * EXP-21: Per-entity temporal linkage repository. + * + * Insert one row per (entity, fact) pair so retrieval can walk a sparse + * per-entity timeline ordered by `created_at`. The table is keyed by + * lowercase entity name (TEXT) so the writer doesn't need to consult the + * `entities` table — entity extraction in `extraction.ts` already produces + * canonical names, and we want the linkage to work even when the entity + * graph is disabled. + */ + +import type pg from 'pg'; + +type Queryable = Pick | pg.PoolClient; + +export interface StoreTemporalLinkInput { + userId: string; + entityId: string; + factId: string; + createdAt?: Date; +} + +export interface EntityTemporalLinkRow { + fact_id: string; + parent_memory_id: string; + created_at: Date; +} + +/** Insert one linkage row per input. Caller dedupes (entity, fact) pairs. */ +export async function storeEntityTemporalLinks( + queryable: Queryable, + links: StoreTemporalLinkInput[], +): Promise { + if (links.length === 0) return 0; + let inserted = 0; + for (const link of links) { + const result = await queryable.query( + `INSERT INTO atomic_entity_temporal_links (user_id, entity_id, fact_id, created_at) + VALUES ($1, $2, $3, $4)`, + [ + link.userId, + link.entityId, + link.factId, + (link.createdAt ?? new Date()).toISOString(), + ], + ); + inserted += result.rowCount ?? 0; + } + return inserted; +} + +/** + * Fetch the per-entity temporal link list for a single entity. Joined to + * `memory_atomic_facts` to surface the `parent_memory_id`, which is the + * id used as the SearchResult key by the rest of the pipeline. + * + * Ordered by `created_at ASC` so position 0 is the chronologically first + * fact mentioning this entity. Callers that want most-recent-first can + * reverse — the index supports both directions. + */ +export async function listEntityTemporalLinks( + queryable: Queryable, + userId: string, + entityId: string, + limit: number, +): Promise { + const result = await queryable.query( + `SELECT l.fact_id, f.parent_memory_id, l.created_at + FROM atomic_entity_temporal_links l + JOIN memory_atomic_facts f ON f.id = l.fact_id + WHERE l.user_id = $1 AND l.entity_id = $2 + ORDER BY l.created_at ASC + LIMIT $3`, + [userId, entityId, limit], + ); + return result.rows.map((row) => ({ + fact_id: String(row.fact_id), + parent_memory_id: String(row.parent_memory_id), + created_at: row.created_at instanceof Date ? row.created_at : new Date(row.created_at), + })); +} diff --git a/src/db/schema.sql b/src/db/schema.sql index b5332bd..ba52bf7 100644 --- a/src/db/schema.sql +++ b/src/db/schema.sql @@ -432,3 +432,33 @@ CREATE INDEX IF NOT EXISTS idx_memory_atomic_facts_workspace ON memory_atomic_facts (workspace_id) WHERE workspace_id IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_memory_foresight_workspace ON memory_foresight (workspace_id) WHERE workspace_id IS NOT NULL; + +-- --------------------------------------------------------------------------- +-- EXP-21: Per-entity temporal linkage list. +-- +-- Sparse linked-list per entity sorted by `created_at`. At ingest time, when +-- the per-entity temporal linkage flag is on, every fact stored emits one row +-- per entity it mentions. At retrieval time, the search pipeline walks this +-- list in chronological order to boost facts by their position so event- +-- ordering (BEAM EO) and multi-session reasoning (BEAM MR) queries surface +-- entity-scoped chronology. +-- +-- `entity_id` is the lowercase canonical entity name (matching the extraction +-- stage's casing). `fact_id` references `memory_atomic_facts(id)` — the +-- atomic-fact projection produced by the same ingest path. ON DELETE CASCADE +-- keeps the linkage in sync with fact lifecycle (UPDATE/SUPERSEDE deletes +-- replace the projection rows, and so the linkage rows too). +-- --------------------------------------------------------------------------- + +CREATE TABLE IF NOT EXISTS atomic_entity_temporal_links ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id TEXT NOT NULL, + entity_id TEXT NOT NULL, + fact_id UUID NOT NULL REFERENCES memory_atomic_facts(id) ON DELETE CASCADE, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_atomic_entity_temporal_links_traversal + ON atomic_entity_temporal_links (user_id, entity_id, created_at DESC); +CREATE INDEX IF NOT EXISTS idx_atomic_entity_temporal_links_fact + ON atomic_entity_temporal_links (fact_id); diff --git a/src/db/stores.ts b/src/db/stores.ts index 072671f..e6b38b1 100644 --- a/src/db/stores.ts +++ b/src/db/stores.ts @@ -23,6 +23,7 @@ import type { } from './repository-types.js'; import type { CandidateRow } from './repository-vector-search.js'; import type { StoreAtomicFactInput, StoreForesightInput } from './repository-representations.js'; +import type { StoreTemporalLinkInput, EntityTemporalLinkRow } from './repository-entity-temporal-links.js'; import type { MemoryLink } from './repository-links.js'; // StoreMemoryInput is shared with the repository write path; re-exported @@ -114,6 +115,10 @@ export interface RepresentationStore { listForesightForMemory(userId: string, parentMemoryId: string): Promise; replaceAtomicFactsForMemory(userId: string, parentMemoryId: string, facts: StoreAtomicFactInput[]): Promise; replaceForesightForMemory(userId: string, parentMemoryId: string, entries: StoreForesightInput[]): Promise; + /** EXP-21: append per-entity temporal linkage rows. */ + storeEntityTemporalLinks(links: StoreTemporalLinkInput[]): Promise; + /** EXP-21: fetch the per-entity link list ordered by created_at ASC. */ + listEntityTemporalLinks(userId: string, entityId: string, limit: number): Promise; } // --------------------------------------------------------------------------- diff --git a/src/services/__tests__/entity-temporal-linkage.test.ts b/src/services/__tests__/entity-temporal-linkage.test.ts new file mode 100644 index 0000000..d97b15c --- /dev/null +++ b/src/services/__tests__/entity-temporal-linkage.test.ts @@ -0,0 +1,240 @@ +/** + * EXP-21: per-entity temporal linkage retrieval boost. + * + * Covers: + * - flag-off → no-op (returns input reference, no DB calls). + * - 3 facts about Stripe at different timestamps → ordered chronologically. + * - query without entities → no-op + empty matchedEntities. + * - candidates without matching links → applied=false but flag still on. + * - earliest mention across multiple matched entities wins the rank. + * - boost weight=0 short-circuits. + * - normalizeEntityId lowercases + collapses whitespace. + */ + +import { describe, expect, it, vi } from 'vitest'; +import { + applyEntityTemporalLinkageBoost, + normalizeEntityId, + type EntityTemporalLinkageConfig, +} from '../entity-temporal-linkage.js'; +import { createSearchResult } from './test-fixtures.js'; +import type { RepresentationStore } from '../../db/stores.js'; +import type { EntityTemporalLinkRow } from '../../db/repository-entity-temporal-links.js'; + +const USER = 'user-1'; + +function buildConfig(overrides: Partial = {}): EntityTemporalLinkageConfig { + return { + perEntityTemporalLinkageEnabled: true, + perEntityTemporalLinkageBoostWeight: 0.6, + ...overrides, + }; +} + +/** + * Minimal RepresentationStore stub backed by a fixed link map. We only + * exercise `listEntityTemporalLinks`; everything else throws to surface + * accidental dependencies on the rest of the surface. + */ +function buildRepresentation( + linksByEntity: Map, +): { store: RepresentationStore; calls: string[] } { + const calls: string[] = []; + const store: RepresentationStore = { + storeAtomicFacts: async () => { throw new Error('not used'); }, + storeForesight: async () => { throw new Error('not used'); }, + listAtomicFactsForMemory: async () => { throw new Error('not used'); }, + listForesightForMemory: async () => { throw new Error('not used'); }, + replaceAtomicFactsForMemory: async () => { throw new Error('not used'); }, + replaceForesightForMemory: async () => { throw new Error('not used'); }, + storeEntityTemporalLinks: async () => { throw new Error('not used'); }, + listEntityTemporalLinks: vi.fn(async (_userId: string, entityId: string, _limit: number) => { + calls.push(entityId); + return linksByEntity.get(entityId) ?? []; + }), + }; + return { store, calls }; +} + +function row(id: string, ms: number): EntityTemporalLinkRow { + return { fact_id: `f-${id}`, parent_memory_id: id, created_at: new Date(ms) }; +} + +describe('applyEntityTemporalLinkageBoost', () => { + it('returns the input unchanged when the flag is off', async () => { + const candidates = [ + createSearchResult({ id: 'a', score: 1.0 }), + createSearchResult({ id: 'b', score: 0.5 }), + ]; + const { store, calls } = buildRepresentation(new Map()); + + const out = await applyEntityTemporalLinkageBoost({ + query: 'tell me about Stripe', + candidates, + userId: USER, + representation: store, + config: buildConfig({ perEntityTemporalLinkageEnabled: false }), + }); + + expect(out.applied).toBe(false); + expect(out.results).toBe(candidates); + expect(calls).toEqual([]); + }); + + it('orders three Stripe facts chronologically — earliest first', async () => { + // Three facts about Stripe persisted at different times. + const facts = [ + createSearchResult({ id: 'late', score: 1.0, content: 'Stripe later' }), + createSearchResult({ id: 'early', score: 1.0, content: 'Stripe earliest' }), + createSearchResult({ id: 'mid', score: 1.0, content: 'Stripe middle' }), + ]; + // Linkage list: earliest -> mid -> late. + const links = new Map([ + ['stripe', [row('early', 1_000), row('mid', 2_000), row('late', 3_000)]], + ]); + const { store } = buildRepresentation(links); + + const out = await applyEntityTemporalLinkageBoost({ + query: 'when did we first onboard Stripe', + candidates: facts, + userId: USER, + representation: store, + config: buildConfig(), + }); + + expect(out.applied).toBe(true); + expect(out.matchedEntities).toEqual(['stripe']); + // 'early' (rank 0/2 → factor 1.0) and 'mid' (rank 1/2 → factor 0.5) + // both get a boost; 'late' (rank 2/2 → factor 0) keeps its base score. + // The chronological order still wins because the boost makes 'early' + // strictly higher than 'mid', and both higher than 'late'. + expect(out.boostedCount).toBe(2); + expect(out.results.map((r) => r.id)).toEqual(['early', 'mid', 'late']); + const byId = new Map(out.results.map((r) => [r.id, r.score])); + expect(byId.get('early')).toBeCloseTo(1.6, 5); + expect(byId.get('mid')).toBeCloseTo(1.3, 5); + expect(byId.get('late')).toBeCloseTo(1.0, 5); + }); + + it('is a no-op when the query mentions no entity', async () => { + const candidates = [createSearchResult({ id: 'a', score: 0.7 })]; + const { store, calls } = buildRepresentation(new Map()); + + const out = await applyEntityTemporalLinkageBoost({ + query: 'what was decided?', + candidates, + userId: USER, + representation: store, + config: buildConfig(), + }); + + expect(out.applied).toBe(false); + expect(out.matchedEntities).toEqual([]); + expect(calls).toEqual([]); + }); + + it('applied=false when no candidate matches a link list entry', async () => { + const candidates = [createSearchResult({ id: 'unrelated', score: 0.7 })]; + const links = new Map([ + ['stripe', [row('other', 1_000)]], + ]); + const { store } = buildRepresentation(links); + + const out = await applyEntityTemporalLinkageBoost({ + query: 'what about Stripe', + candidates, + userId: USER, + representation: store, + config: buildConfig(), + }); + + // Linkage exists but has no overlap with the candidate set, so we + // produced no rank deltas — count is zero, applied=false. + expect(out.boostedCount).toBe(0); + expect(out.applied).toBe(false); + expect(out.results).toEqual(candidates); + }); + + it('uses the strongest (smallest) rank when multiple entities link the same memory', async () => { + const candidates = [ + createSearchResult({ id: 'shared', score: 1.0 }), + createSearchResult({ id: 'only-stripe', score: 1.0 }), + ]; + // 'shared' is rank 5 in Stripe's list but rank 0 in Acme's. We expect + // rank 0 to win, giving 'shared' the full weight boost. + const stripeLinks = Array.from({ length: 6 }, (_, i) => row(`s${i}`, i * 100)); + stripeLinks[5] = row('shared', 500); + const acmeLinks = [row('shared', 50), row('only-stripe', 60)]; + + const links = new Map([ + ['stripe', stripeLinks], + ['acme', acmeLinks], + ]); + const { store } = buildRepresentation(links); + + const out = await applyEntityTemporalLinkageBoost({ + query: 'compare Stripe and Acme', + candidates, + userId: USER, + representation: store, + config: buildConfig({ perEntityTemporalLinkageBoostWeight: 1.0 }), + }); + + expect(out.applied).toBe(true); + // 'shared' got rank 0 in Acme list (full weight=1.0) → score 2.0. + // 'only-stripe' got rank 1 of 2 in Acme list (factor=0) → score 1.0. + const byId = new Map(out.results.map((r) => [r.id, r.score])); + expect(byId.get('shared')).toBeCloseTo(2.0, 5); + expect(byId.get('only-stripe')).toBeCloseTo(1.0, 5); + }); + + it('respects a weight of 0 — short-circuits before the DB call', async () => { + const candidates = [createSearchResult({ id: 'a', score: 1.0 })]; + const { store, calls } = buildRepresentation( + new Map([['stripe', [row('a', 1_000)]]]), + ); + + const out = await applyEntityTemporalLinkageBoost({ + query: 'about Stripe', + candidates, + userId: USER, + representation: store, + config: buildConfig({ perEntityTemporalLinkageBoostWeight: 0 }), + }); + + expect(out.applied).toBe(false); + expect(out.results).toBe(candidates); + expect(calls).toEqual([]); + }); + + it('handles empty candidates without touching the store', async () => { + const { store, calls } = buildRepresentation(new Map()); + + const out = await applyEntityTemporalLinkageBoost({ + query: 'about Stripe', + candidates: [], + userId: USER, + representation: store, + config: buildConfig(), + }); + + expect(out.applied).toBe(false); + expect(out.results).toEqual([]); + expect(calls).toEqual([]); + }); +}); + +describe('normalizeEntityId', () => { + it('lowercases the input', () => { + expect(normalizeEntityId('Stripe')).toBe('stripe'); + expect(normalizeEntityId('ACME')).toBe('acme'); + }); + + it('collapses interior whitespace', () => { + expect(normalizeEntityId(' New York ')).toBe('new york'); + }); + + it('preserves single spaces between words', () => { + expect(normalizeEntityId('Acme Corp')).toBe('acme corp'); + }); +}); diff --git a/src/services/entity-temporal-linkage.ts b/src/services/entity-temporal-linkage.ts new file mode 100644 index 0000000..2d0fc42 --- /dev/null +++ b/src/services/entity-temporal-linkage.ts @@ -0,0 +1,176 @@ +/** + * Per-entity temporal linkage retrieval boost (EXP-21). + * + * Targets BEAM EO (event ordering) and MR (multi-session reasoning): + * current bag-of-facts retrieval has no entity-graph-aware ordering, so + * questions like "what happened first?" lose chronology. + * + * Ingest side (see `memory-storage.ts`): when the flag is on, every stored + * fact emits one row per mentioned entity into `atomic_entity_temporal_links` + * — a sparse linked list per entity sorted by `created_at`. + * + * Retrieval side (this module): when the query mentions an entity AND the + * flag is on, walk the per-entity link list in chronological order and boost + * each fact's score by `weight * (1 - normalizedRank)` where rank=0 is the + * earliest mention. Earlier mentions surface first, which matches the + * "what happened first" framing the EO benchmark exercises. + * + * Defaults-off. Stage is wired AFTER current-state-ranking but BEFORE the + * final RRF rerank (same insertion site as the EXP-12 recency-bin boost). + */ + +import type { SearchResult } from '../db/repository-types.js'; +import type { RepresentationStore } from '../db/stores.js'; +import { extractNamedEntityCandidates } from './query-expansion.js'; + +export interface EntityTemporalLinkageConfig { + /** Master flag. When false, the stage is a strict no-op. */ + perEntityTemporalLinkageEnabled: boolean; + /** Maximum additive boost applied to the chronologically first fact. */ + perEntityTemporalLinkageBoostWeight: number; +} + +export interface EntityTemporalLinkageInput { + query: string; + candidates: SearchResult[]; + userId: string; + representation: RepresentationStore; + config: EntityTemporalLinkageConfig; + /** + * Cap on how many links per entity we fetch to avoid unbounded scans on + * pathologically chatty entities. The retrieval boost only needs the + * head of the timeline; tail facts wouldn't cross any score threshold. + */ + perEntityFetchLimit?: number; +} + +export interface EntityTemporalLinkageResult { + applied: boolean; + matchedEntities: string[]; + boostedCount: number; + results: SearchResult[]; +} + +const DEFAULT_FETCH_LIMIT = 64; + +const NO_OP = (candidates: SearchResult[]): EntityTemporalLinkageResult => ({ + applied: false, + matchedEntities: [], + boostedCount: 0, + results: candidates, +}); + +/** + * Apply the per-entity temporal-linkage retrieval boost. + * + * - Returns the candidate list unchanged when the flag is off, the query + * has no extractable entity, or the candidate set is empty. + * - Otherwise returns a re-sorted copy. The input array is not mutated. + */ +export async function applyEntityTemporalLinkageBoost( + input: EntityTemporalLinkageInput, +): Promise { + const { query, candidates, userId, representation, config } = input; + if (!config.perEntityTemporalLinkageEnabled) return NO_OP(candidates); + if (candidates.length === 0) return NO_OP(candidates); + if (!Number.isFinite(config.perEntityTemporalLinkageBoostWeight)) return NO_OP(candidates); + if (config.perEntityTemporalLinkageBoostWeight === 0) return NO_OP(candidates); + + const queryEntities = extractQueryEntities(query); + if (queryEntities.length === 0) return NO_OP(candidates); + + const fetchLimit = input.perEntityFetchLimit ?? DEFAULT_FETCH_LIMIT; + const rankByMemoryId = await buildLinkageRanks( + representation, userId, queryEntities, fetchLimit, + ); + if (rankByMemoryId.size === 0) { + return { applied: false, matchedEntities: queryEntities, boostedCount: 0, results: candidates }; + } + + const adjusted = rescoreByLinkageRank( + candidates, rankByMemoryId, config.perEntityTemporalLinkageBoostWeight, + ); + return { + applied: adjusted.boostedCount > 0, + matchedEntities: queryEntities, + boostedCount: adjusted.boostedCount, + results: adjusted.results, + }; +} + +/** + * Resolve the canonical entity ids the query refers to. We mirror ingest's + * lowercase normalization so retrieval looks up the same key the writer + * inserted under. + */ +function extractQueryEntities(query: string): string[] { + const candidates = extractNamedEntityCandidates(query); + const normalized = new Set(); + for (const candidate of candidates) { + const id = normalizeEntityId(candidate); + if (id.length >= 2) normalized.add(id); + } + return [...normalized]; +} + +/** Lowercase + collapse whitespace; used by both writer and reader. */ +export function normalizeEntityId(name: string): string { + return name.trim().toLowerCase().replace(/\s+/g, ' '); +} + +/** + * Walk the per-entity timeline for each query entity. For each linked + * memory, keep the strongest (smallest) rank seen across entities — facts + * mentioned earliest in the timeline of any matching entity get the + * largest boost. + */ +async function buildLinkageRanks( + representation: RepresentationStore, + userId: string, + entities: string[], + fetchLimit: number, +): Promise> { + const ranks = new Map(); + for (const entityId of entities) { + const links = await representation.listEntityTemporalLinks(userId, entityId, fetchLimit); + if (links.length === 0) continue; + const total = links.length; + for (let i = 0; i < links.length; i++) { + const memoryId = links[i].parent_memory_id; + const existing = ranks.get(memoryId); + if (!existing || i < existing.rank) { + ranks.set(memoryId, { rank: i, total }); + } + } + } + return ranks; +} + +interface RescoreResult { + results: SearchResult[]; + boostedCount: number; +} + +/** + * Add `weight * positionFactor` to each candidate where positionFactor = + * `1 - rank / max(total - 1, 1)`. The chronologically first fact gets the + * full weight; later facts decay linearly. Re-sort by score descending. + */ +function rescoreByLinkageRank( + candidates: SearchResult[], + rankByMemoryId: Map, + weight: number, +): RescoreResult { + let boostedCount = 0; + const adjusted = candidates.map((result) => { + const ranking = rankByMemoryId.get(result.id); + if (!ranking) return result; + const denom = Math.max(ranking.total - 1, 1); + const factor = 1 - (ranking.rank / denom); + if (factor <= 0) return result; + boostedCount += 1; + return { ...result, score: result.score + weight * factor }; + }); + adjusted.sort((left, right) => right.score - left.score); + return { results: adjusted, boostedCount }; +} diff --git a/src/services/memory-search.ts b/src/services/memory-search.ts index 40d0791..5207e05 100644 --- a/src/services/memory-search.ts +++ b/src/services/memory-search.ts @@ -69,7 +69,14 @@ async function executeSearchStep( trace.stage('as-of-search', memories, { asOf }); return { memories, activeTrace: trace, retrievalConfidence: null }; } - const pipelineStores = { search: deps.stores.search, link: deps.stores.link, memory: deps.stores.memory, entity: deps.stores.entity, pool: deps.stores.pool }; + const pipelineStores = { + search: deps.stores.search, + link: deps.stores.link, + memory: deps.stores.memory, + entity: deps.stores.entity, + representation: deps.stores.representation, + pool: deps.stores.pool, + }; const pipelineResult = await runSearchPipelineWithTrace(pipelineStores, userId, query, effectiveLimit, sourceSite, referenceTime, { namespaceScope, retrievalMode: retrievalOptions?.retrievalMode, diff --git a/src/services/memory-storage.ts b/src/services/memory-storage.ts index 5e2b2e7..07d1e28 100644 --- a/src/services/memory-storage.ts +++ b/src/services/memory-storage.ts @@ -10,6 +10,7 @@ import { classifyNetwork } from './memory-network.js'; import { buildRelationClaimSlot } from './claim-slotting.js'; import { extractConflictKeywords, mergeCandidates, type CandidateMemory } from './conflict-policy.js'; import { buildAtomicFactProjection, buildForesightProjections } from './memcell-projection.js'; +import { normalizeEntityId } from './entity-temporal-linkage.js'; import { inferNamespace, classifyNamespace } from './namespace-retrieval.js'; import { generateL1Overview } from './tiered-context.js'; import { emitAuditEvent } from './audit-events.js'; @@ -134,7 +135,7 @@ export async function storeProjection( }); const atomicFact = buildAtomicFactProjection(fact, embedding); - await deps.stores.representation.storeAtomicFacts([{ + const factIds = await deps.stores.representation.storeAtomicFacts([{ userId, parentMemoryId: memoryId, factText: atomicFact.factText, embedding: atomicFact.embedding, factType: atomicFact.factType, importance: atomicFact.importance, @@ -142,6 +143,7 @@ export async function storeProjection( keywords: atomicFact.keywords.join(' '), metadata: atomicFact.metadata, workspaceId: options.workspace?.workspaceId, agentId: options.workspace?.agentId, }]); + await maybeWriteEntityTemporalLinks(deps, userId, fact.entities, factIds, createdAt); const foresight = buildForesightProjections(fact, embedding); if (foresight.length > 0) { @@ -295,6 +297,36 @@ async function storeRelations( } } +/** + * EXP-21: emit one per-entity temporal-link row for every entity mentioned + * in the just-stored fact. No-op when the flag is off, the fact has zero + * extracted entities, or no atomic-fact projection ids were returned (e.g. + * extraction produced an empty fact). Errors are logged and propagated — + * fail closed per the Sprint 2 rule. + */ +async function maybeWriteEntityTemporalLinks( + deps: MemoryServiceDeps, + userId: string, + entities: ExtractedEntity[], + factIds: string[], + createdAt: Date, +): Promise { + if (!deps.config.perEntityTemporalLinkageEnabled) return; + if (entities.length === 0 || factIds.length === 0) return; + const factId = factIds[0]; + const seen = new Set(); + const links = entities + .map((entity) => normalizeEntityId(entity.name)) + .filter((entityId) => { + if (entityId.length < 2 || seen.has(entityId)) return false; + seen.add(entityId); + return true; + }) + .map((entityId) => ({ userId, entityId, factId, createdAt })); + if (links.length === 0) return; + await deps.stores.representation.storeEntityTemporalLinks(links); +} + /** Ensure a claim target exists for the given memory, creating one if needed. */ export async function ensureClaimTarget(deps: MemoryServiceDeps, userId: string, memoryId: string): Promise { const memory = await deps.stores.memory.getMemoryIncludingDeleted(memoryId, userId); diff --git a/src/services/search-pipeline.ts b/src/services/search-pipeline.ts index 4769c7c..d94ff81 100644 --- a/src/services/search-pipeline.ts +++ b/src/services/search-pipeline.ts @@ -9,7 +9,7 @@ import { config } from '../config.js'; import type pg from 'pg'; import type { CoreRuntimeConfig } from '../app/runtime-container.js'; import type { SearchResult } from '../db/repository-types.js'; -import type { SearchStore, SemanticLinkStore, MemoryStore, EntityStore } from '../db/stores.js'; +import type { SearchStore, SemanticLinkStore, MemoryStore, EntityStore, RepresentationStore } from '../db/stores.js'; import { embedText } from './embedding.js'; import { rewriteQuery } from './extraction.js'; import { @@ -38,6 +38,7 @@ import { applyCurrentStateRanking } from './current-state-ranking.js'; import { applyConcisenessPenalty } from './conciseness-preference.js'; import { applyInstructionBoost } from './instruction-boost.js'; import { applyRecencyBinBoost } from './recency-bin-ranking.js'; +import { applyEntityTemporalLinkageBoost } from './entity-temporal-linkage.js'; import { applyEventBoundaryBoost } from './event-boundary-ranking.js'; import { protectLiteralListAnswerCandidates } from './literal-list-protection.js'; import { applyTemporalQueryConstraints } from './temporal-query-constraints.js'; @@ -88,6 +89,8 @@ export type SearchPipelineRuntimeConfig = Pick< | 'repairLoopMinSimilarity' | 'recencyBinBoostEnabled' | 'recencyBinBoostWeight' + | 'perEntityTemporalLinkageEnabled' + | 'perEntityTemporalLinkageBoostWeight' | 'eventBoundaryExtractionEnabled' | 'eventBoundaryRetrievalBoost' | 'rerankSkipMinGap' @@ -141,6 +144,8 @@ export interface SearchPipelineStores { link: SemanticLinkStore; memory: MemoryStore; entity: EntityStore | null; + /** EXP-21: optional representation store for per-entity temporal linkage. */ + representation?: RepresentationStore; /** Raw pool access — only used by personalizedPageRank. */ pool: pg.Pool; } @@ -721,10 +726,14 @@ async function applyExpansionAndReranking( referenceTime, ); + const linkageBoosted = await applyEntityTemporalLinkageStage( + stores, userId, query, ranked.candidates, trace, policyConfig, + ); + return selectAndExpandCandidates( stores, userId, - ranked.candidates, + linkageBoosted, queryEmbedding, limit, referenceTime, @@ -734,6 +743,42 @@ async function applyExpansionAndReranking( ); } +/** + * EXP-21: per-entity temporal linkage boost. Sits between the protection + * stages and `selectAndExpandCandidates` so it runs after current-state- + * ranking / recency-bin / event-boundary but before MMR + link expansion. + * No-op when the flag is off, the representation store isn't wired, or + * the query has no extractable entity. + */ +async function applyEntityTemporalLinkageStage( + stores: SearchPipelineStores, + userId: string, + query: string, + candidates: SearchResult[], + trace: TraceCollector, + policyConfig: SearchPipelineRuntimeConfig, +): Promise { + if (!policyConfig.perEntityTemporalLinkageEnabled) return candidates; + if (!stores.representation) return candidates; + const boost = await applyEntityTemporalLinkageBoost({ + query, + candidates, + userId, + representation: stores.representation, + config: { + perEntityTemporalLinkageEnabled: policyConfig.perEntityTemporalLinkageEnabled, + perEntityTemporalLinkageBoostWeight: policyConfig.perEntityTemporalLinkageBoostWeight, + }, + }); + if (!boost.applied) return candidates; + trace.stage('entity-temporal-linkage', boost.results, { + matchedEntities: boost.matchedEntities, + boostedCount: boost.boostedCount, + weight: policyConfig.perEntityTemporalLinkageBoostWeight, + }); + return boost.results; +} + interface RankedCandidateState { candidates: SearchResult[]; protectedFingerprints: string[];