Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 61 additions & 25 deletions apps/roam/src/utils/jsonld.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export const jsonLdContext = (baseUrl: string): Record<string, string> => ({
prov: "http://www.w3.org/ns/prov#",
sioc: "http://rdfs.org/sioc/ns#",
dgb: "https://discoursegraphs.com/schema/dg_base",
dg: "https://discoursegraphs.com/schema/dg_core",
subClassOf: "rdfs:subClassOf",
title: "dc:title",
label: "rdfs:label",
Expand All @@ -32,6 +33,7 @@ export const jsonLdContext = (baseUrl: string): Record<string, string> => ({
relationDef: "dgb:RelationDef",
relationInstance: "dgb:RelationInstance",
inverseOf: "owl:inverseOf",
backlink: "dg:backlink",
pages: `${baseUrl}/page/`,
});

Expand Down Expand Up @@ -105,7 +107,10 @@ export const getJsonLdData = async ({
nodeLabelByType: Record<string, string>;
updateExportProgress: (progress: number) => Promise<void>;
}): Promise<
Record<string, string | Record<string, string> | Record<string, string>[]>
Record<
string,
string | Record<string, string> | Record<string, string | string[]>[]
>
> => {
const roamUrl = canonicalRoamUrl();
const getRelationData = () =>
Expand All @@ -131,40 +136,71 @@ export const getJsonLdData = async ({
.filter((s) => s.content !== undefined)
.map((node) => [node.label, node["@id"]]),
);
const nodeSet = new Set(pageData.map((n) => n.uid));

await Promise.all(
const nodes = await Promise.all(
pageData.map(async (page: Result) => {
const r = await pageToMarkdown(page, {
const md = await pageToMarkdown(page, {
...settings,
allNodes,
linkType: "roam url",
});
page.content = r.content;
const { content } = md;
page.content = content;
const { text, uid, type } = page;
const { date, displayName, modified } = getPageMetadata(text);
const nodeType = nodeSchemaUriByName[type as string];
if (!nodeType) {
internalError({
error: `Unknown node type "${type as string}" for page "${text}"`,
});
}
const directBacklinks = (
await (window.roamAlphaAPI.data.backend.q(
`[:find ?uid
:where
[?page :block/uid "${uid}"]
[?refBlock :block/refs ?page]
[?refBlock :block/page ?refPage]
[?refPage :block/uid ?uid]
]`,
) as Promise<Array<[string]>>)
)
.map((x) => x[0])
.filter((x) => nodeSet.has(x));
const indirectBacklinks = (
await (window.roamAlphaAPI.data.backend.q(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential performance concern running this on every single link. Consider batching.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're doing this on every page, not on every link.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, that's what I meant. This would still be in the thousands.

`[:find ?uid
:where
[?page :block/uid "${uid}"]
[?block :block/page ?page]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That clause requires the target page to have at least one block, which might not always be the case.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I tried to do this with an or-join, but or-join seems irremediably broken: The results are always much more than either clause separately.
I will do two queries.

[?refBlock :block/refs ?block]
[?refBlock :block/page ?refPage]
[?refPage :block/uid ?uid]
]`,
) as Promise<Array<[string]>>)
)
.map((x) => x[0])
.filter((x) => nodeSet.has(x));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we filtering here? This means that queries that only return a subset of nodes will not return all backlinks.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Precisely to ensure we only mention backlinks from discourse graph nodes (node-specific filtering, requested above.)

Copy link
Contributor

@mdroidian mdroidian Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That isn't what this is achieving though. This export can be called (and often is) on arbitrary queries. So if a user calls it on a subset of dgraph nodes, the backlinks will be inaccurate.

directBacklinks.push(...indirectBacklinks);
const backlinks = [...new Set(directBacklinks)];
const r: Record<string, string | string[]> = {
"@id": `pages:${uid}`, // eslint-disable-line @typescript-eslint/naming-convention
"@type": nodeType, // eslint-disable-line @typescript-eslint/naming-convention
title: text,
content,
modified: modified?.toJSON(),
created: date.toJSON(),
creator: displayName,
};
if (backlinks.length > 0) {
r["backlink"] = backlinks.map((x) => `pages:${x}`);
}
numTreatedPages += 1;
await updateExportProgress(0.1 + (numTreatedPages / numPages) * 0.75);
return r;
}),
);

const nodes = pageData.map(({ text, uid, content, type }) => {
const { date, displayName, modified } = getPageMetadata(text);
const nodeType = nodeSchemaUriByName[type];
if (!nodeType) {
internalError({
error: `Unknown node type "${type}" for page "${text}"`,
});
}
const r = {
"@id": `pages:${uid}`, // eslint-disable-line @typescript-eslint/naming-convention
"@type": nodeType ?? "nodeSchema", // eslint-disable-line @typescript-eslint/naming-convention
title: text,
content: content as string,
modified: modified?.toJSON(),
created: date.toJSON(),
creator: displayName,
};
return r;
});
const nodeSet = new Set(pageData.map((n) => n.uid));
const rels = await getRelationData();
await updateExportProgress(1);
const relations = uniqJsonArray(
Expand Down
5 changes: 0 additions & 5 deletions apps/website/public/schema/dg_base.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,6 @@ dgb:destination a dgb:Role ;
rdfs:range dgb:NodeSchema ;
rdfs:comment "The destination of a binary relation"@en .

dgb:textRefersToNode a owl:ObjectProperty;
rdfs:domain dgb:NodeSchema;
rdfs:range dgb:NodeSchema;
rdfs:comment "The text of a node refers to another node"@en .


# examples

Expand Down
23 changes: 21 additions & 2 deletions apps/website/public/schema/dg_core.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,18 @@
@prefix : <http://www.w3.org/2000/01/rdf-schema#> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix vs: <http://www.w3.org/2003/06/sw-vocab-status/ns#> .
@prefix sioc: <http://rdfs.org/sioc/ns#> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix dgb: <https://discoursegraphs.com/schema/dg_base#> .
@prefix dg: <https://discoursegraphs.com/schema/dg_core#> .
# http://purl.org/spar/po
@prefix po: <http://www.essepuntato.it/2008/12/pattern#> .

<https://discoursegraphs.com/schema/dg_core#>
dc:date "2025-12-31" ;
rdfs:comment "DiscourseGraph core vocabulary"@en ;
rdfs:label "DiscourseGraph core vocabulary"@en ;
owl:versionInfo "0 (tentative)" ;
a owl:Ontology.

dg:Question a dgb:NodeSchema;
rdfs:label "Question"@en;
Expand Down Expand Up @@ -90,3 +97,15 @@ dg:curatedFrom a dgb:RelationDef;
rdfs:label "Curated from"@en;
rdfs:range dg:Evidence;
rdfs:domain dg:Source.

dg:containsRec a owl:TransitiveProperty.
po:contains rdfs:subPropertyOf dg:containsRec.
dc:hasPart rdfs:subPropertyOf dg:containsRec.

dg:containsRef a owl:ObjectProperty;
owl:propertyChainAxiom (dg:containsRec dc:references);
rdfs:label "Contains a reference to"@en.

dg:backlink a owl:ObjectProperty;
owl:inverseOf dg:containsRef;
rdfs:label "is referred by"@en.