-
Notifications
You must be signed in to change notification settings - Fork 4
ENG-1230 Backlinks in JSON-LD export #659
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ export const jsonLdContext = (baseUrl: string): Record<string, string> => ({ | |
| prov: "http://www.w3.org/ns/prov#", | ||
| sioc: "http://rdfs.org/sioc/ns#", | ||
| dgb: "https://discoursegraphs.com/schema/dg_base", | ||
| dg: "https://discoursegraphs.com/schema/dg_core", | ||
| subClassOf: "rdfs:subClassOf", | ||
| title: "dc:title", | ||
| label: "rdfs:label", | ||
|
|
@@ -32,6 +33,7 @@ export const jsonLdContext = (baseUrl: string): Record<string, string> => ({ | |
| relationDef: "dgb:RelationDef", | ||
| relationInstance: "dgb:RelationInstance", | ||
| inverseOf: "owl:inverseOf", | ||
| backlink: "dg:backlink", | ||
| pages: `${baseUrl}/page/`, | ||
| }); | ||
|
|
||
|
|
@@ -105,7 +107,10 @@ export const getJsonLdData = async ({ | |
| nodeLabelByType: Record<string, string>; | ||
| updateExportProgress: (progress: number) => Promise<void>; | ||
| }): Promise< | ||
| Record<string, string | Record<string, string> | Record<string, string>[]> | ||
| Record< | ||
| string, | ||
| string | Record<string, string> | Record<string, string | string[]>[] | ||
| > | ||
| > => { | ||
| const roamUrl = canonicalRoamUrl(); | ||
| const getRelationData = () => | ||
|
|
@@ -131,40 +136,71 @@ export const getJsonLdData = async ({ | |
| .filter((s) => s.content !== undefined) | ||
| .map((node) => [node.label, node["@id"]]), | ||
| ); | ||
| const nodeSet = new Set(pageData.map((n) => n.uid)); | ||
|
|
||
| await Promise.all( | ||
| const nodes = await Promise.all( | ||
| pageData.map(async (page: Result) => { | ||
| const r = await pageToMarkdown(page, { | ||
| const md = await pageToMarkdown(page, { | ||
| ...settings, | ||
| allNodes, | ||
| linkType: "roam url", | ||
| }); | ||
| page.content = r.content; | ||
| const { content } = md; | ||
| page.content = content; | ||
| const { text, uid, type } = page; | ||
| const { date, displayName, modified } = getPageMetadata(text); | ||
| const nodeType = nodeSchemaUriByName[type as string]; | ||
| if (!nodeType) { | ||
| internalError({ | ||
| error: `Unknown node type "${type as string}" for page "${text}"`, | ||
| }); | ||
| } | ||
| const directBacklinks = ( | ||
| await (window.roamAlphaAPI.data.backend.q( | ||
| `[:find ?uid | ||
| :where | ||
| [?page :block/uid "${uid}"] | ||
| [?refBlock :block/refs ?page] | ||
| [?refBlock :block/page ?refPage] | ||
| [?refPage :block/uid ?uid] | ||
| ]`, | ||
| ) as Promise<Array<[string]>>) | ||
| ) | ||
| .map((x) => x[0]) | ||
| .filter((x) => nodeSet.has(x)); | ||
| const indirectBacklinks = ( | ||
| await (window.roamAlphaAPI.data.backend.q( | ||
| `[:find ?uid | ||
| :where | ||
| [?page :block/uid "${uid}"] | ||
| [?block :block/page ?page] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That clause requires the target page to have at least one block, which might not always be the case.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. I tried to do this with an or-join, but or-join seems irremediably broken: The results are always much more than either clause separately. |
||
| [?refBlock :block/refs ?block] | ||
| [?refBlock :block/page ?refPage] | ||
| [?refPage :block/uid ?uid] | ||
| ]`, | ||
| ) as Promise<Array<[string]>>) | ||
| ) | ||
| .map((x) => x[0]) | ||
| .filter((x) => nodeSet.has(x)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we filtering here? This means that queries that only return a subset of nodes will not return all backlinks.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Precisely to ensure we only mention backlinks from discourse graph nodes (node-specific filtering, requested above.)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That isn't what this is achieving though. This export can be called (and often is) on arbitrary queries. So if a user calls it on a subset of dgraph nodes, the backlinks will be inaccurate. |
||
| directBacklinks.push(...indirectBacklinks); | ||
| const backlinks = [...new Set(directBacklinks)]; | ||
| const r: Record<string, string | string[]> = { | ||
| "@id": `pages:${uid}`, // eslint-disable-line @typescript-eslint/naming-convention | ||
| "@type": nodeType, // eslint-disable-line @typescript-eslint/naming-convention | ||
| title: text, | ||
| content, | ||
| modified: modified?.toJSON(), | ||
| created: date.toJSON(), | ||
| creator: displayName, | ||
| }; | ||
| if (backlinks.length > 0) { | ||
| r["backlink"] = backlinks.map((x) => `pages:${x}`); | ||
| } | ||
| numTreatedPages += 1; | ||
| await updateExportProgress(0.1 + (numTreatedPages / numPages) * 0.75); | ||
| return r; | ||
| }), | ||
| ); | ||
|
|
||
| const nodes = pageData.map(({ text, uid, content, type }) => { | ||
| const { date, displayName, modified } = getPageMetadata(text); | ||
| const nodeType = nodeSchemaUriByName[type]; | ||
| if (!nodeType) { | ||
| internalError({ | ||
| error: `Unknown node type "${type}" for page "${text}"`, | ||
| }); | ||
| } | ||
| const r = { | ||
| "@id": `pages:${uid}`, // eslint-disable-line @typescript-eslint/naming-convention | ||
| "@type": nodeType ?? "nodeSchema", // eslint-disable-line @typescript-eslint/naming-convention | ||
| title: text, | ||
| content: content as string, | ||
| modified: modified?.toJSON(), | ||
| created: date.toJSON(), | ||
| creator: displayName, | ||
| }; | ||
| return r; | ||
| }); | ||
| const nodeSet = new Set(pageData.map((n) => n.uid)); | ||
| const rels = await getRelationData(); | ||
| await updateExportProgress(1); | ||
| const relations = uniqJsonArray( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Potential performance concern running this on every single link. Consider batching.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're doing this on every page, not on every link.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, that's what I meant. This would still be in the thousands.