Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 33 additions & 4 deletions src/graph/community.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,15 @@
* - kg_stats 展示社区分布
*/

import { createHash } from "node:crypto";
import { DatabaseSync, type DatabaseSyncInstance } from "@photostructure/sqlite";
import { updateCommunities } from "../store/store.ts";
import {
getCommunitySummary,
getCommunitySummaryBySignature,
pruneCommunitySummaries,
updateCommunities,
upsertCommunitySummary,
} from "../store/store.ts";

export interface CommunityResult {
labels: Map<string, string>;
Expand Down Expand Up @@ -168,14 +175,17 @@ export function getCommunityPeers(db: DatabaseSyncInstance, nodeId: string, limi

import type { CompleteFn } from "../engine/llm.ts";
import type { EmbedFn } from "../engine/embed.ts";
import { upsertCommunitySummary, pruneCommunitySummaries } from "../store/store.ts";

const COMMUNITY_SUMMARY_SYS = `你是知识图谱摘要引擎。根据节点列表,用简短的描述概括这组节点的主题领域。
要求:
- 只返回短语本身,不要解释
- 描述涵盖的工具/技术/任务领域
- 不要使用"社区"这个词`;

function buildCommunityMemberSignature(memberIds: string[]): string {
return createHash("sha1").update([...memberIds].sort().join(",")).digest("hex");
}

/**
* 为所有社区生成 LLM 摘要描述 + embedding 向量
*
Expand All @@ -192,6 +202,25 @@ export async function summarizeCommunities(

for (const [communityId, memberIds] of communities) {
if (memberIds.length === 0) continue;
const memberSignature = buildCommunityMemberSignature(memberIds);

const current = getCommunitySummary(db, communityId);
if (current?.memberSignature === memberSignature && current.summary.trim()) {
continue;
}

const reusable = getCommunitySummaryBySignature(db, memberSignature);
if (reusable?.summary.trim()) {
upsertCommunitySummary(
db,
communityId,
reusable.summary,
memberIds.length,
reusable.embedding,
memberSignature,
);
continue;
}

const placeholders = memberIds.map(() => "?").join(",");
const members = db.prepare(`
Expand Down Expand Up @@ -238,12 +267,12 @@ export async function summarizeCommunities(
}
}

upsertCommunitySummary(db, communityId, cleaned, memberIds.length, embedding);
upsertCommunitySummary(db, communityId, cleaned, memberIds.length, embedding, memberSignature);
generated++;
} catch (err) {
console.log(` [WARN] community summary failed for ${communityId}: ${err}`);
}
}

return generated;
}
}
61 changes: 54 additions & 7 deletions src/store/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* Email: Wywelljob@gmail.com
*/

import { createHash } from "node:crypto";
import { DatabaseSync, type DatabaseSyncInstance } from "@photostructure/sqlite";
import { mkdirSync } from "fs";
import { homedir } from "os";
Expand Down Expand Up @@ -51,7 +52,16 @@ export function closeDb(): void {
function migrate(db: DatabaseSyncInstance): void {
db.exec(`CREATE TABLE IF NOT EXISTS _migrations (v INTEGER PRIMARY KEY, at INTEGER NOT NULL)`);
const cur = (db.prepare("SELECT MAX(v) as v FROM _migrations").get() as any)?.v ?? 0;
const steps = [m1_core, m2_messages, m3_signals, m4_fts5, m5_vectors, m6_communities];
const steps = [
m1_core,
m2_messages,
m3_signals,
m4_fts5,
m5_vectors,
m6_communities,
m7_community_signature,
m8_backfill_community_signatures,
];
for (let i = cur; i < steps.length; i++) {
steps[i](db);
db.prepare("INSERT INTO _migrations (v,at) VALUES (?,?)").run(i + 1, Date.now());
Expand Down Expand Up @@ -180,12 +190,49 @@ function m5_vectors(db: DatabaseSyncInstance): void {
function m6_communities(db: DatabaseSyncInstance): void {
db.exec(`
CREATE TABLE IF NOT EXISTS gm_communities (
id TEXT PRIMARY KEY,
summary TEXT NOT NULL,
node_count INTEGER NOT NULL DEFAULT 0,
embedding BLOB,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
id TEXT PRIMARY KEY,
summary TEXT NOT NULL,
node_count INTEGER NOT NULL DEFAULT 0,
embedding BLOB,
member_signature TEXT,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
`);
}

function m7_community_signature(db: DatabaseSyncInstance): void {
const cols = db.prepare("PRAGMA table_info(gm_communities)").all() as Array<{ name?: string }>;
const hasMemberSignature = cols.some((col) => col.name === "member_signature");
if (!hasMemberSignature) {
db.exec("ALTER TABLE gm_communities ADD COLUMN member_signature TEXT");
}
db.exec("CREATE INDEX IF NOT EXISTS ix_gm_communities_member_signature ON gm_communities(member_signature)");
}

function m8_backfill_community_signatures(db: DatabaseSyncInstance): void {
const missing = db.prepare(`
SELECT id FROM gm_communities
WHERE member_signature IS NULL OR member_signature=''
`).all() as Array<{ id: string }>;

for (const row of missing) {
const members = db.prepare(`
SELECT id FROM gm_nodes
WHERE community_id=? AND status='active'
ORDER BY id
`).all(row.id) as Array<{ id: string }>;

if (!members.length) continue;

const memberSignature = createHash("sha1")
.update(members.map((member) => member.id).join(","))
.digest("hex");

db.prepare(`
UPDATE gm_communities
SET member_signature=?, updated_at=updated_at
WHERE id=?
`).run(memberSignature, row.id);
}
}
68 changes: 57 additions & 11 deletions src/store/store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -531,39 +531,85 @@ export interface CommunitySummary {
id: string;
summary: string;
nodeCount: number;
memberSignature: string | null;
createdAt: number;
updatedAt: number;
}

export function upsertCommunitySummary(
db: DatabaseSyncInstance, id: string, summary: string, nodeCount: number, embedding?: number[],
db: DatabaseSyncInstance,
id: string,
summary: string,
nodeCount: number,
embedding?: number[] | Uint8Array,
memberSignature?: string,
): void {
const now = Date.now();
const blob = embedding ? new Uint8Array(new Float32Array(embedding).buffer) : null;
const blob = embedding
? embedding instanceof Uint8Array
? embedding
: new Uint8Array(new Float32Array(embedding).buffer)
: null;
const ex = db.prepare("SELECT id FROM gm_communities WHERE id=?").get(id) as any;
if (ex) {
if (blob) {
db.prepare("UPDATE gm_communities SET summary=?, node_count=?, embedding=?, updated_at=? WHERE id=?")
.run(summary, nodeCount, blob, now, id);
db.prepare("UPDATE gm_communities SET summary=?, node_count=?, embedding=?, member_signature=?, updated_at=? WHERE id=?")
.run(summary, nodeCount, blob, memberSignature ?? null, now, id);
} else {
db.prepare("UPDATE gm_communities SET summary=?, node_count=?, updated_at=? WHERE id=?")
.run(summary, nodeCount, now, id);
db.prepare("UPDATE gm_communities SET summary=?, node_count=?, member_signature=?, updated_at=? WHERE id=?")
.run(summary, nodeCount, memberSignature ?? null, now, id);
}
} else {
db.prepare("INSERT INTO gm_communities (id, summary, node_count, embedding, created_at, updated_at) VALUES (?,?,?,?,?,?)")
.run(id, summary, nodeCount, blob, now, now);
db.prepare("INSERT INTO gm_communities (id, summary, node_count, embedding, member_signature, created_at, updated_at) VALUES (?,?,?,?,?,?,?)")
.run(id, summary, nodeCount, blob, memberSignature ?? null, now, now);
}
}

export function getCommunitySummary(db: DatabaseSyncInstance, id: string): CommunitySummary | null {
const r = db.prepare("SELECT * FROM gm_communities WHERE id=?").get(id) as any;
if (!r) return null;
return { id: r.id, summary: r.summary, nodeCount: r.node_count, createdAt: r.created_at, updatedAt: r.updated_at };
return {
id: r.id,
summary: r.summary,
nodeCount: r.node_count,
memberSignature: r.member_signature ?? null,
createdAt: r.created_at,
updatedAt: r.updated_at,
};
}

export function getCommunitySummaryBySignature(
db: DatabaseSyncInstance,
memberSignature: string,
): (CommunitySummary & { embedding?: Uint8Array }) | null {
const r = db.prepare(`
SELECT * FROM gm_communities
WHERE member_signature=?
ORDER BY updated_at DESC
LIMIT 1
`).get(memberSignature) as any;
if (!r) return null;
return {
id: r.id,
summary: r.summary,
nodeCount: r.node_count,
memberSignature: r.member_signature ?? null,
embedding: r.embedding ? (r.embedding as Uint8Array) : undefined,
createdAt: r.created_at,
updatedAt: r.updated_at,
};
}

export function getAllCommunitySummaries(db: DatabaseSyncInstance): CommunitySummary[] {
return (db.prepare("SELECT * FROM gm_communities ORDER BY node_count DESC").all() as any[])
.map(r => ({ id: r.id, summary: r.summary, nodeCount: r.node_count, createdAt: r.created_at, updatedAt: r.updated_at }));
.map(r => ({
id: r.id,
summary: r.summary,
nodeCount: r.node_count,
memberSignature: r.member_signature ?? null,
createdAt: r.created_at,
updatedAt: r.updated_at,
}));
}

export type ScoredCommunity = { id: string; summary: string; score: number; nodeCount: number };
Expand Down Expand Up @@ -640,4 +686,4 @@ export function pruneCommunitySummaries(db: DatabaseSyncInstance): number {
)
`).run();
return result.changes;
}
}
24 changes: 22 additions & 2 deletions test/graph.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { describe, it, expect, beforeEach } from "vitest";
import { DatabaseSync, type DatabaseSyncInstance } from "@photostructure/sqlite";
import { createTestDb, insertNode, insertEdge } from "./helpers.ts";
import { personalizedPageRank, computeGlobalPageRank, invalidateGraphCache } from "../src/graph/pagerank.ts";
import { detectCommunities, getCommunityPeers } from "../src/graph/community.ts";
import { detectCommunities, getCommunityPeers, summarizeCommunities } from "../src/graph/community.ts";
import { detectDuplicates, dedup } from "../src/graph/dedup.ts";
import { runMaintenance } from "../src/graph/maintenance.ts";
import { saveVector } from "../src/store/store.ts";
Expand Down Expand Up @@ -194,6 +194,26 @@ describe("Community Detection", () => {
const { count } = detectCommunities(db);
expect(count).toBe(0);
});

it("reuses existing summaries when a community is unchanged", async () => {
const a = insertNode(db, { name: "docker-build" });
const b = insertNode(db, { name: "docker-push" });
insertEdge(db, { fromId: a, toId: b });

const { communities } = detectCommunities(db);
let llmCalls = 0;
const llm = async () => {
llmCalls += 1;
return "docker deployment skills";
};

const first = await summarizeCommunities(db, communities, llm);
const second = await summarizeCommunities(db, communities, llm);

expect(first).toBe(1);
expect(second).toBe(0);
expect(llmCalls).toBe(1);
});
});

// ═══════════════════════════════════════════════════════════════
Expand Down Expand Up @@ -298,4 +318,4 @@ describe("runMaintenance", () => {
expect(result.pagerank.topK).toHaveLength(0);
expect(result.community.count).toBe(0);
});
});
});
15 changes: 14 additions & 1 deletion test/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,19 @@ export function createTestDb(): DatabaseSyncInstance {
);
`);

// m6: 社区摘要
db.exec(`
CREATE TABLE IF NOT EXISTS gm_communities (
id TEXT PRIMARY KEY,
summary TEXT NOT NULL,
node_count INTEGER NOT NULL DEFAULT 0,
embedding BLOB,
member_signature TEXT,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
`);

return db;
}

Expand Down Expand Up @@ -176,4 +189,4 @@ export function insertEdge(
"test-session",
Date.now(),
);
}
}