diff --git a/src/graph/community.ts b/src/graph/community.ts index 4382fec..e2a1b24 100755 --- a/src/graph/community.ts +++ b/src/graph/community.ts @@ -24,8 +24,15 @@ * - kg_stats 展示社区分布 */ +import { createHash } from "node:crypto"; import { DatabaseSync, type DatabaseSyncInstance } from "@photostructure/sqlite"; -import { updateCommunities } from "../store/store.ts"; +import { + getCommunitySummary, + getCommunitySummaryBySignature, + pruneCommunitySummaries, + updateCommunities, + upsertCommunitySummary, +} from "../store/store.ts"; export interface CommunityResult { labels: Map; @@ -168,7 +175,6 @@ export function getCommunityPeers(db: DatabaseSyncInstance, nodeId: string, limi import type { CompleteFn } from "../engine/llm.ts"; import type { EmbedFn } from "../engine/embed.ts"; -import { upsertCommunitySummary, pruneCommunitySummaries } from "../store/store.ts"; const COMMUNITY_SUMMARY_SYS = `你是知识图谱摘要引擎。根据节点列表,用简短的描述概括这组节点的主题领域。 要求: @@ -176,6 +182,10 @@ const COMMUNITY_SUMMARY_SYS = `你是知识图谱摘要引擎。根据节点列 - 描述涵盖的工具/技术/任务领域 - 不要使用"社区"这个词`; +function buildCommunityMemberSignature(memberIds: string[]): string { + return createHash("sha1").update([...memberIds].sort().join(",")).digest("hex"); +} + /** * 为所有社区生成 LLM 摘要描述 + embedding 向量 * @@ -192,6 +202,25 @@ export async function summarizeCommunities( for (const [communityId, memberIds] of communities) { if (memberIds.length === 0) continue; + const memberSignature = buildCommunityMemberSignature(memberIds); + + const current = getCommunitySummary(db, communityId); + if (current?.memberSignature === memberSignature && current.summary.trim()) { + continue; + } + + const reusable = getCommunitySummaryBySignature(db, memberSignature); + if (reusable?.summary.trim()) { + upsertCommunitySummary( + db, + communityId, + reusable.summary, + memberIds.length, + reusable.embedding, + memberSignature, + ); + continue; + } const placeholders = memberIds.map(() => "?").join(","); const members = db.prepare(` @@ -238,7 +267,7 @@ export async function summarizeCommunities( } } - upsertCommunitySummary(db, communityId, cleaned, memberIds.length, embedding); + upsertCommunitySummary(db, communityId, cleaned, memberIds.length, embedding, memberSignature); generated++; } catch (err) { console.log(` [WARN] community summary failed for ${communityId}: ${err}`); @@ -246,4 +275,4 @@ export async function summarizeCommunities( } return generated; -} \ No newline at end of file +} diff --git a/src/store/db.ts b/src/store/db.ts index 60b24cd..81f3aab 100755 --- a/src/store/db.ts +++ b/src/store/db.ts @@ -5,6 +5,7 @@ * Email: Wywelljob@gmail.com */ +import { createHash } from "node:crypto"; import { DatabaseSync, type DatabaseSyncInstance } from "@photostructure/sqlite"; import { mkdirSync } from "fs"; import { homedir } from "os"; @@ -51,7 +52,16 @@ export function closeDb(): void { function migrate(db: DatabaseSyncInstance): void { db.exec(`CREATE TABLE IF NOT EXISTS _migrations (v INTEGER PRIMARY KEY, at INTEGER NOT NULL)`); const cur = (db.prepare("SELECT MAX(v) as v FROM _migrations").get() as any)?.v ?? 0; - const steps = [m1_core, m2_messages, m3_signals, m4_fts5, m5_vectors, m6_communities]; + const steps = [ + m1_core, + m2_messages, + m3_signals, + m4_fts5, + m5_vectors, + m6_communities, + m7_community_signature, + m8_backfill_community_signatures, + ]; for (let i = cur; i < steps.length; i++) { steps[i](db); db.prepare("INSERT INTO _migrations (v,at) VALUES (?,?)").run(i + 1, Date.now()); @@ -180,12 +190,49 @@ function m5_vectors(db: DatabaseSyncInstance): void { function m6_communities(db: DatabaseSyncInstance): void { db.exec(` CREATE TABLE IF NOT EXISTS gm_communities ( - id TEXT PRIMARY KEY, - summary TEXT NOT NULL, - node_count INTEGER NOT NULL DEFAULT 0, - embedding BLOB, - created_at INTEGER NOT NULL, - updated_at INTEGER NOT NULL + id TEXT PRIMARY KEY, + summary TEXT NOT NULL, + node_count INTEGER NOT NULL DEFAULT 0, + embedding BLOB, + member_signature TEXT, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL ); `); } + +function m7_community_signature(db: DatabaseSyncInstance): void { + const cols = db.prepare("PRAGMA table_info(gm_communities)").all() as Array<{ name?: string }>; + const hasMemberSignature = cols.some((col) => col.name === "member_signature"); + if (!hasMemberSignature) { + db.exec("ALTER TABLE gm_communities ADD COLUMN member_signature TEXT"); + } + db.exec("CREATE INDEX IF NOT EXISTS ix_gm_communities_member_signature ON gm_communities(member_signature)"); +} + +function m8_backfill_community_signatures(db: DatabaseSyncInstance): void { + const missing = db.prepare(` + SELECT id FROM gm_communities + WHERE member_signature IS NULL OR member_signature='' + `).all() as Array<{ id: string }>; + + for (const row of missing) { + const members = db.prepare(` + SELECT id FROM gm_nodes + WHERE community_id=? AND status='active' + ORDER BY id + `).all(row.id) as Array<{ id: string }>; + + if (!members.length) continue; + + const memberSignature = createHash("sha1") + .update(members.map((member) => member.id).join(",")) + .digest("hex"); + + db.prepare(` + UPDATE gm_communities + SET member_signature=?, updated_at=updated_at + WHERE id=? + `).run(memberSignature, row.id); + } +} diff --git a/src/store/store.ts b/src/store/store.ts index ec472a9..97f8712 100755 --- a/src/store/store.ts +++ b/src/store/store.ts @@ -531,39 +531,85 @@ export interface CommunitySummary { id: string; summary: string; nodeCount: number; + memberSignature: string | null; createdAt: number; updatedAt: number; } export function upsertCommunitySummary( - db: DatabaseSyncInstance, id: string, summary: string, nodeCount: number, embedding?: number[], + db: DatabaseSyncInstance, + id: string, + summary: string, + nodeCount: number, + embedding?: number[] | Uint8Array, + memberSignature?: string, ): void { const now = Date.now(); - const blob = embedding ? new Uint8Array(new Float32Array(embedding).buffer) : null; + const blob = embedding + ? embedding instanceof Uint8Array + ? embedding + : new Uint8Array(new Float32Array(embedding).buffer) + : null; const ex = db.prepare("SELECT id FROM gm_communities WHERE id=?").get(id) as any; if (ex) { if (blob) { - db.prepare("UPDATE gm_communities SET summary=?, node_count=?, embedding=?, updated_at=? WHERE id=?") - .run(summary, nodeCount, blob, now, id); + db.prepare("UPDATE gm_communities SET summary=?, node_count=?, embedding=?, member_signature=?, updated_at=? WHERE id=?") + .run(summary, nodeCount, blob, memberSignature ?? null, now, id); } else { - db.prepare("UPDATE gm_communities SET summary=?, node_count=?, updated_at=? WHERE id=?") - .run(summary, nodeCount, now, id); + db.prepare("UPDATE gm_communities SET summary=?, node_count=?, member_signature=?, updated_at=? WHERE id=?") + .run(summary, nodeCount, memberSignature ?? null, now, id); } } else { - db.prepare("INSERT INTO gm_communities (id, summary, node_count, embedding, created_at, updated_at) VALUES (?,?,?,?,?,?)") - .run(id, summary, nodeCount, blob, now, now); + db.prepare("INSERT INTO gm_communities (id, summary, node_count, embedding, member_signature, created_at, updated_at) VALUES (?,?,?,?,?,?,?)") + .run(id, summary, nodeCount, blob, memberSignature ?? null, now, now); } } export function getCommunitySummary(db: DatabaseSyncInstance, id: string): CommunitySummary | null { const r = db.prepare("SELECT * FROM gm_communities WHERE id=?").get(id) as any; if (!r) return null; - return { id: r.id, summary: r.summary, nodeCount: r.node_count, createdAt: r.created_at, updatedAt: r.updated_at }; + return { + id: r.id, + summary: r.summary, + nodeCount: r.node_count, + memberSignature: r.member_signature ?? null, + createdAt: r.created_at, + updatedAt: r.updated_at, + }; +} + +export function getCommunitySummaryBySignature( + db: DatabaseSyncInstance, + memberSignature: string, +): (CommunitySummary & { embedding?: Uint8Array }) | null { + const r = db.prepare(` + SELECT * FROM gm_communities + WHERE member_signature=? + ORDER BY updated_at DESC + LIMIT 1 + `).get(memberSignature) as any; + if (!r) return null; + return { + id: r.id, + summary: r.summary, + nodeCount: r.node_count, + memberSignature: r.member_signature ?? null, + embedding: r.embedding ? (r.embedding as Uint8Array) : undefined, + createdAt: r.created_at, + updatedAt: r.updated_at, + }; } export function getAllCommunitySummaries(db: DatabaseSyncInstance): CommunitySummary[] { return (db.prepare("SELECT * FROM gm_communities ORDER BY node_count DESC").all() as any[]) - .map(r => ({ id: r.id, summary: r.summary, nodeCount: r.node_count, createdAt: r.created_at, updatedAt: r.updated_at })); + .map(r => ({ + id: r.id, + summary: r.summary, + nodeCount: r.node_count, + memberSignature: r.member_signature ?? null, + createdAt: r.created_at, + updatedAt: r.updated_at, + })); } export type ScoredCommunity = { id: string; summary: string; score: number; nodeCount: number }; @@ -640,4 +686,4 @@ export function pruneCommunitySummaries(db: DatabaseSyncInstance): number { ) `).run(); return result.changes; -} \ No newline at end of file +} diff --git a/test/graph.test.ts b/test/graph.test.ts index d70dbae..6d16482 100755 --- a/test/graph.test.ts +++ b/test/graph.test.ts @@ -11,7 +11,7 @@ import { describe, it, expect, beforeEach } from "vitest"; import { DatabaseSync, type DatabaseSyncInstance } from "@photostructure/sqlite"; import { createTestDb, insertNode, insertEdge } from "./helpers.ts"; import { personalizedPageRank, computeGlobalPageRank, invalidateGraphCache } from "../src/graph/pagerank.ts"; -import { detectCommunities, getCommunityPeers } from "../src/graph/community.ts"; +import { detectCommunities, getCommunityPeers, summarizeCommunities } from "../src/graph/community.ts"; import { detectDuplicates, dedup } from "../src/graph/dedup.ts"; import { runMaintenance } from "../src/graph/maintenance.ts"; import { saveVector } from "../src/store/store.ts"; @@ -194,6 +194,26 @@ describe("Community Detection", () => { const { count } = detectCommunities(db); expect(count).toBe(0); }); + + it("reuses existing summaries when a community is unchanged", async () => { + const a = insertNode(db, { name: "docker-build" }); + const b = insertNode(db, { name: "docker-push" }); + insertEdge(db, { fromId: a, toId: b }); + + const { communities } = detectCommunities(db); + let llmCalls = 0; + const llm = async () => { + llmCalls += 1; + return "docker deployment skills"; + }; + + const first = await summarizeCommunities(db, communities, llm); + const second = await summarizeCommunities(db, communities, llm); + + expect(first).toBe(1); + expect(second).toBe(0); + expect(llmCalls).toBe(1); + }); }); // ═══════════════════════════════════════════════════════════════ @@ -298,4 +318,4 @@ describe("runMaintenance", () => { expect(result.pagerank.topK).toHaveLength(0); expect(result.community.count).toBe(0); }); -}); \ No newline at end of file +}); diff --git a/test/helpers.ts b/test/helpers.ts index aa3eace..90ebe29 100755 --- a/test/helpers.ts +++ b/test/helpers.ts @@ -113,6 +113,19 @@ export function createTestDb(): DatabaseSyncInstance { ); `); + // m6: 社区摘要 + db.exec(` + CREATE TABLE IF NOT EXISTS gm_communities ( + id TEXT PRIMARY KEY, + summary TEXT NOT NULL, + node_count INTEGER NOT NULL DEFAULT 0, + embedding BLOB, + member_signature TEXT, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL + ); + `); + return db; } @@ -176,4 +189,4 @@ export function insertEdge( "test-session", Date.now(), ); -} \ No newline at end of file +}