diff --git a/packages/query-tools/package.json b/packages/query-tools/package.json index 9c3dc9c87..7775dfca2 100644 --- a/packages/query-tools/package.json +++ b/packages/query-tools/package.json @@ -34,6 +34,7 @@ "build-esm": "tsc --module esnext --outDir dist/esm", "build-commonjs": "tsc --module commonjs --outDir dist/cjs", "dev": "concurrently 'npm:build-esm -- --watch' 'npm:build-commonjs -- --watch'", - "clean": "rm -rf {dist,tsconfig.tsbuildinfo}" + "clean": "rm -rf {dist,tsconfig.tsbuildinfo}", + "test": "vitest" } } diff --git a/packages/query-tools/src/cypher-execution/extract-unique-nodes-and-relationships.test.ts b/packages/query-tools/src/cypher-execution/extract-unique-nodes-and-relationships.test.ts new file mode 100644 index 000000000..4c5d8d5be --- /dev/null +++ b/packages/query-tools/src/cypher-execution/extract-unique-nodes-and-relationships.test.ts @@ -0,0 +1,235 @@ +import type { Record } from 'neo4j-driver'; +import { Node, Path, PathSegment, Relationship } from 'neo4j-driver'; + +import { describe, expect, test } from 'vitest'; +import { extractUniqueNodesAndRels } from './extract-unique-nodes-and-relationships'; + +describe('extractNodesAndRels', () => { + test('should map bolt records with a path to nodes and relationships', () => { + const startNode = new Node( + 1, + ['Person'], + { + prop1: 'prop1', + }, + 'node1', + ); + const endNode = new Node( + 2, + ['Movie'], + { + prop2: 'prop2', + }, + 'node2', + ); + + const relationship = new Relationship( + 3, + 1, + 2, + 'ACTED_IN', + {}, + 'rel1', + 'node1', + 'node2', + ); + const pathSegment = new PathSegment(startNode, relationship, endNode); + const path = new Path(startNode, endNode, [pathSegment]); + + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions + const boltRecord = { + keys: ['p'], + get: () => path, + } as unknown as Record; + + const { nodes, relationships } = extractUniqueNodesAndRels([boltRecord]); + + const [graphNodeStart] = nodes.filter( + (node) => node.elementId.toString() === 'node1', + ); + const [graphNodeEnd] = nodes.filter( + (node) => node.elementId.toString() === 'node2', + ); + const [firstRel] = relationships; + + if ( + graphNodeStart === undefined || + graphNodeEnd === undefined || + firstRel === undefined + ) { + throw new Error('Error in test data, got undefined'); + } + + expect(nodes.length).toBe(2); + + expect(graphNodeStart.labels).toEqual(['Person']); + expect(graphNodeStart.properties).toEqual({ prop1: 'prop1' }); + + expect(graphNodeEnd.labels).toEqual(['Movie']); + expect(graphNodeEnd.properties).toEqual({ prop2: 'prop2' }); + expect(relationships.length).toBe(1); + + expect(firstRel.elementId.toString()).toEqual('rel1'); + expect(firstRel.startNodeElementId.toString()).toEqual('node1'); + expect(firstRel.endNodeElementId.toString()).toEqual('node2'); + expect(firstRel.type).toEqual('ACTED_IN'); + expect(firstRel.properties).toEqual({}); + }); + + test('should deduplicate bolt records based on node id and filter out dangling relationships', () => { + const node1 = new Node( + 1, + ['Person'], + { + prop1: 'prop1', + }, + 'node1', + ); + const node2 = new Node( + 1, + ['Person'], + { + prop1: 'prop1', + }, + 'node1', + ); + const relationship = new Relationship( + 2, + 1, + 34, + 'ACTED_IN', + {}, + 'rel1', + 'node1', + 'node34', + ); + + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions + const boltRecord = { + keys: ['n'], + get: () => [node1, node2, relationship], + } as unknown as Record; + + const { nodes, relationships, limitHit } = extractUniqueNodesAndRels([ + boltRecord, + ]); + expect(limitHit).toBe(false); + expect(nodes.length).toBe(1); + expect(relationships.length).toBe(0); + }); + + test('should respect the max nodes limit and filter out dangling relations', () => { + const startNode = new Node( + 1, + ['Person'], + { + prop1: 'prop1', + }, + 'node1', + ); + const endNode = new Node( + 2, + ['Movie'], + { + prop2: 'prop2', + }, + 'node2', + ); + const relationship = new Relationship( + 3, + 1, + 2, + 'ACTED_IN', + {}, + 'rel1', + 'node1', + 'node2', + ); + const pathSegment = new PathSegment(startNode, relationship, endNode); + const path = new Path(startNode, endNode, [pathSegment]); + + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions + const boltRecord = { + keys: ['p'], + get: () => path, + } as unknown as Record; + + const { nodes, relationships, limitHit } = extractUniqueNodesAndRels( + [boltRecord], + { nodeLimit: 1 }, + ); + expect(limitHit).toBe(true); + expect(nodes.length).toBe(1); + const [graphNodeStart] = nodes; + expect(graphNodeStart).toBeDefined(); + if (graphNodeStart === undefined) { + throw new Error('Error in test data, got undefined'); + } + expect(graphNodeStart.labels).toEqual(['Person']); + expect(graphNodeStart.properties).toEqual({ prop1: 'prop1' }); + expect(relationships.length).toBe(0); + }); + + test('should respect the max nodes limit and not filter out dangling relations when asked to keep them', () => { + const startNode = new Node( + 1, + ['Person'], + { + prop1: 'prop1', + }, + 'node1', + ); + const endNode = new Node( + 2, + ['Movie'], + { + prop2: 'prop2', + }, + 'node2', + ); + const relationship = new Relationship( + 3, + 1, + 2, + 'ACTED_IN', + {}, + 'rel1', + 'node1', + 'node2', + ); + const pathSegment = new PathSegment(startNode, relationship, endNode); + const path = new Path(startNode, endNode, [pathSegment]); + + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions + const boltRecord = { + keys: ['p'], + get: () => path, + } as unknown as Record; + + const { nodes, relationships, limitHit } = extractUniqueNodesAndRels( + [boltRecord], + { + nodeLimit: 1, + keepDanglingRels: true, + }, + ); + expect(limitHit).toBe(true); + expect(nodes.length).toBe(1); + const [graphNodeStart] = nodes; + expect(graphNodeStart).toBeDefined(); + if (graphNodeStart === undefined) { + throw new Error('Error in test data, got undefined'); + } + + expect(graphNodeStart.labels).toEqual(['Person']); + expect(graphNodeStart.properties).toEqual({ prop1: 'prop1' }); + expect(relationships.length).toBe(1); + }); + + test('should handle empty results', () => { + const { nodes, relationships, limitHit } = extractUniqueNodesAndRels([]); + expect(limitHit).toBe(false); + expect(nodes.length).toBe(0); + expect(relationships.length).toBe(0); + }); +}); diff --git a/packages/query-tools/src/cypher-execution/extract-unique-nodes-and-relationships.ts b/packages/query-tools/src/cypher-execution/extract-unique-nodes-and-relationships.ts new file mode 100644 index 000000000..1bb0338fc --- /dev/null +++ b/packages/query-tools/src/cypher-execution/extract-unique-nodes-and-relationships.ts @@ -0,0 +1,127 @@ +import type { + Integer, + Node, + Path, + Record, + RecordShape, + Relationship, +} from 'neo4j-driver'; +import { isNode, isPath, isRelationship } from 'neo4j-driver'; +import { CypherProperty } from '../data-types/cypher-data-types'; + +export type Properties = RecordShape; + +/** + * Result type containing deduplicated nodes and relationships extracted from Neo4j records. + */ +export type DeduplicatedNodesAndRels = { + /** Array of unique nodes found in the records */ + nodes: Node[]; + /** Array of unique relationships found in the records */ + relationships: Relationship[]; + /** Whether the max node limit was reached during extraction */ + limitHit: boolean; +}; + +/** + * Extracts and deduplicates nodes and relationships from Neo4j query records. + * + * This function processes Neo4j records to find all nodes and relationships, + * removing duplicates based on their element IDs. It can handle various data + * structures including individual nodes/relationships, paths, arrays, and + * nested objects. + * + * @param records - Array of Neo4j records to process + * @param options - Configuration options for extraction + * @param options.nodeLimit - Maximum number of unique nodes to extract (optional) + * @param options.keepDanglingRels - Whether to keep relationships whose start/end nodes are missing (default: false) + * + * @returns The {@link DeduplicatedNodesAndRels} containing unique nodes and relationships + */ +export const extractUniqueNodesAndRels = ( + records: Record[], + { + nodeLimit, + keepDanglingRels = false, + }: { nodeLimit?: number; keepDanglingRels?: boolean } = {}, +): DeduplicatedNodesAndRels => { + let limitHit = false; + + const items = new Set(); + + for (const record of records) { + for (const key of record.keys) { + items.add(record.get(key)); + } + } + + const paths: Path[] = []; + + const nodeMap = new Map(); + function addNode(n: Node) { + if (!limitHit) { + const id = n.elementId.toString(); + if (!nodeMap.has(id)) { + nodeMap.set(id, n); + } + if (typeof nodeLimit === 'number' && nodeMap.size === nodeLimit) { + limitHit = true; + } + } + } + + const relMap = new Map(); + function addRel(r: Relationship) { + const id = r.elementId.toString(); + if (!relMap.has(id)) { + relMap.set(id, r); + } + } + + const findAllEntities = (item: unknown) => { + if (typeof item !== 'object' || !item) { + return; + } + + if (isRelationship(item)) { + addRel(item); + } else if (isNode(item)) { + addNode(item); + } else if (isPath(item)) { + paths.push(item); + } else if (Array.isArray(item)) { + item.forEach(findAllEntities); + } else { + Object.values(item).forEach(findAllEntities); + } + }; + + findAllEntities(Array.from(items)); + + for (const path of paths) { + addNode(path.start); + addNode(path.end); + for (const segment of path.segments) { + addNode(segment.start); + addNode(segment.end); + addRel(segment.relationship); + } + } + + const nodes = Array.from(nodeMap.values()); + + const relationships = Array.from(relMap.values()).filter((item) => { + if (keepDanglingRels) { + return true; + } + + // We'd get dangling relationships from + // match ()-[a:ACTED_IN]->() return a; + // or from hitting the node limit + const start = item.startNodeElementId.toString(); + const end = item.endNodeElementId.toString(); + return nodeMap.has(start) && nodeMap.has(end); + }); + + return { nodes, relationships, limitHit }; +}; diff --git a/packages/query-tools/src/index.ts b/packages/query-tools/src/index.ts index 2bc4ebfa6..1a624195a 100644 --- a/packages/query-tools/src/index.ts +++ b/packages/query-tools/src/index.ts @@ -1,5 +1,6 @@ export { FRIENDLY_ERROR_MESSAGES } from './connectionErrorHandler'; export type { ConnectionError } from './connectionErrorHandler'; +export * from './cypher-execution/extract-unique-nodes-and-relationships'; export { deserializeTypeAnnotations, serializeTypeAnnotations, @@ -16,6 +17,7 @@ export type { } from './metadataPoller'; export type { Neo4jConnection, QueryResultWithLimit } from './neo4jConnection'; export type { Database } from './queries/databases'; +export { graphResultTransformer } from './result-transformers/graph-result-transformer'; export { Neo4jSchemaPoller } from './schemaPoller'; export type { ConnnectionResult } from './schemaPoller'; export type { CypherDataType } from './types/cypher-data-types'; diff --git a/packages/query-tools/src/result-transformers/graph-result-transformer.ts b/packages/query-tools/src/result-transformers/graph-result-transformer.ts new file mode 100644 index 000000000..22b0bebce --- /dev/null +++ b/packages/query-tools/src/result-transformers/graph-result-transformer.ts @@ -0,0 +1,57 @@ +import { + Integer, + Record, + ResultSummary, + resultTransformers, +} from 'neo4j-driver'; + +import { + DeduplicatedNodesAndRels, + extractUniqueNodesAndRels, +} from '../cypher-execution/extract-unique-nodes-and-relationships'; + +/** + * Result type for graph queries that includes deduplicated nodes and relationships + * along with the original records and query summary. + * + * See {@link DeduplicatedNodesAndRels} for the type of the nodes and relationships. + */ +export type GraphResult = DeduplicatedNodesAndRels & { + /** Original Neo4j records returned by the query */ + records: Record[]; + summary: ResultSummary; +}; + +/** + * A result transformer that processes Neo4j query results into a graph format + * with deduplicated nodes and relationships. + * + * This transformer extracts unique nodes and relationships from query records + * while preserving the original records and summary information. It's particularly + * useful for graph visualization and analysis where duplicate entities need to be + * consolidated. + * + * @example + * ```typescript + * const result: GraphResult = await driver.executeQuery( + * 'MATCH p=(name: $name)-[]->() RETURN p', + * { name: 'John' }, + * { resultTransformer: graphResultTransformer }, + * ); + * + * console.log(result.nodes, result.relationships); + * ``` + * + * @returns ResultTransformer producing {@link GraphResult} + */ +export const graphResultTransformer = + resultTransformers.mappedResultTransformer({ + map(record) { + return record; + }, + collect(records, summary): GraphResult { + const { nodes, relationships, limitHit } = + extractUniqueNodesAndRels(records); + return { nodes, relationships, limitHit, records, summary }; + }, + });