Skip to content

Commit

Permalink
data-driven listing and abstract pages for research papers from arxiv…
Browse files Browse the repository at this point in the history
…, also includes github repos where available
  • Loading branch information
akollegger committed Dec 2, 2024
1 parent 225824c commit 6fd3768
Show file tree
Hide file tree
Showing 13 changed files with 1,290 additions and 20 deletions.
857 changes: 846 additions & 11 deletions package-lock.json

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,22 @@
"start": "astro dev",
"build": "astro check && astro build",
"preview": "astro preview",
"astro": "astro"
"astro": "astro",
"test": "vitest"
},
"dependencies": {
"@astrojs/check": "^0.9.3",
"@astrojs/starlight": "^0.26.1",
"@astrojs/starlight-tailwind": "^2.0.3",
"@astrojs/tailwind": "^5.1.2",
"@effect/platform-node": "^0.65.0",
"astro": "^4.14.5",
"date-fns": "^4.1.0",
"effect": "^3.11.0",
"sharp": "^0.33.5",
"tailwindcss": "^3.4.15",
"typescript": "^5.5.4"
"typescript": "^5.5.4",
"vitest": "^2.1.7",
"xml-js": "^1.6.11"
}
}
82 changes: 82 additions & 0 deletions src/components/GrArxivPage.astro
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@

---
import { format } from 'date-fns';

import {extractArxivID, getPaper} from '../lib/arxiv'

import StarlightPage from '@astrojs/starlight/components/StarlightPage.astro';

const props = Astro.props;

const arxivEntry = props.arxiv

const arxivid = extractArxivID(arxivEntry.id)

const paper = getPaper(arxivid)

---
<StarlightPage
frontmatter={{ title: arxivEntry.title, editUrl:false, tableOfContents:false} }}
>
<p class="byline text-xs">
<time itemprop="published" datetime={format(arxivEntry.published, 'yyyy-MM-dd')}>
Published {format(arxivEntry.published, 'MMMM do, yyyy')}
</time>
<address class="author text-xs">By
{arxivEntry.author.map( (author:any, i:number) => (
<span>{(i ? ', ' : '')}<a rel="author" class="url fn n">{author.name}</a></span>
))}
</address>
<cite class="arxivid text-xs">
<a href={arxivEntry.id}>arXiv:{arxivid}</a>
[ {arxivEntry.category.join(", ")}
]
</cite>
<cite class="github text-xs">
{paper?.github !== undefined && paper.github !== "" ?
(<a href={"https://github.com/" + paper?.github}>github:{paper?.github}</a>)
: ''
}

</cite>
</p>

<h2 id="quote">Abstract</h2>
<blockquote><p>{arxivEntry.summary}</p></blockquote>

</StarlightPage>



<!-- <StarlightPage
frontmatter={{ title: `${entry.title.text}`, editUrl:false, tableOfContents:false} }}
>
<p class="byline text-xs">
<time pubdate={format(pubDate, 'yyyy-MM-dd')} title="August 28th, 2011">Published {format(pubDate, 'MMMM do, yyyy')}</time>
<address class="author text-xs">By
{authors.map( (author:any, i:number) => (
<span key={i}>{(i ? ', ' : '')}
<a rel="author" class="url fn n">{author.name.text}</a>
</span>
))}
</address>
<cite class="arxivid text-xs">
<a href={entry.id.text}>arXiv:{arxivid}</a>
[ {categories.map( (category:any) => category._attributes.term).join(", ")}
]
</cite>
<cite class="github text-xs">
{paper?.github !== undefined && paper.github !== "" ?
(<a href={"https://github.com/" + paper?.github}>github:{paper?.github}</a>)
: ''
}
</cite>
</p>
<h2 id="quote">Abstract</h2>
<blockquote><p>{entry.summary.text}</p></blockquote>
</StarlightPage> -->
11 changes: 10 additions & 1 deletion src/content/config.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
import { defineCollection } from 'astro:content';
import { z, defineCollection } from 'astro:content';
import { docsSchema } from '@astrojs/starlight/schema';

const researchCollection = defineCollection({
type: 'data',
schema: z.object({
title: z.string(),
canonicalURL: z.string().url()
})
})

export const collections = {
docs: defineCollection({ schema: docsSchema() }),
research: researchCollection
};
10 changes: 5 additions & 5 deletions src/content/docs/appendices/notation.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ The GraphRAG pattern catalog uses a data notation called `gram` to describe
logical graph structures called patterns that are composed of nodes, relationships
and subjects.

The Gram notation is intended to be self-descriptive and explicit, able to
represent data and structures that are often implicit in a physical graph models.
For example, paths are present in any connected graph, however storing path-level
information isn't normally supported. You can find paths, even store paths, but
there is no way to "say something" about a path.
> The Gram notation is intended to be self-descriptive and explicit, able to
> represent data and structures that are often implicit in a physical graph models.
> For example, paths are present in any connected graph, however storing path-level
> information isn't normally supported. You can find paths, even store paths, but
> there is no way to "say something" about a path.
Gram starts with a notion of "subjects" as a self-describing data structure
in two parts:
Expand Down
2 changes: 2 additions & 0 deletions src/content/docs/appendices/research.mdx
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
---
title: GraphRAG Papers
description: Foundational research papers about GraphRAG and Knowledge Graphs
editUrl: false
tableOfContents: false
---

- [Graph Retrieval-Augmented Generation: A Survey](https://arxiv.org/abs/2408.08921)
Expand Down
86 changes: 86 additions & 0 deletions src/data/papers.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
[
{
"arxivid": "2408.08921",
"github": "pengboci/GraphRAG-Survey"
},
{
"arxivid": "2312.16890",
"github": "HKUDS/DiffKG"
},
{
"arxivid": "2306.08302",
"github": ""
},
{
"arxivid": "2310.04560",
"github": "google-research/talk-like-a-graph"
},
{
"arxivid": "2311.07509",
"github": "datadotworld/cwd-benchmark-data"
},
{
"arxivid": "2402.07630",
"github": "XiaoxinHe/G-Retriever"
},
{
"arxivid": "2404.12491",
"github": "urchade/GraphER"
},
{
"arxivid": "2404.16130",
"github": "microsoft/graphrag"
},
{
"arxivid": "2404.17723",
"github": ""
},
{
"arxivid": "2408.04948",
"github": ""
},
{
"arxivid": "2406.14550",
"github": ""
},
{
"arxivid": "2410.05779",
"github": "HKUDS/LightRAG"
},
{
"arxivid": "2410.08815",
"github": "Li-Z-Q/StructRAG"
},
{
"arxivid": "2307.07697",
"github": "IDEA-FinAI/ToG"
},
{
"arxivid": "2405.14831",
"github": "OSU-NLP-Group/HippoRAG"
},
{
"arxivid": "2408.04187",
"github": "MedicineToken/Medical-Graph-RAG"
},
{
"arxivid": "2405.18414",
"github": ""
},
{
"arxivid": "2405.16506",
"github": "HuieL/GRAG"
},
{
"arxivid": "2410.23875",
"github": "liyichen-cly/PoG"
},
{
"arxivid": "2410.18415",
"github": ""
},
{
"arxivid": "2404.07103",
"github": "PeterGriffinJin/Graph-CoT"
}
]
48 changes: 48 additions & 0 deletions src/lib/arxiv.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { assert, expect, test } from 'vitest';

import { Effect, Either } from "effect"
import { FetchHttpClient } from "@effect/platform"

import { getArxivDetails } from './arxiv'
import { type ArxivEntry } from './arxiv'

test('arxiv fetch well-known entry', async () => {
const arxivid = '2402.07630';

const program = getArxivDetails(arxivid).pipe(
Effect.scoped,
Effect.provide(FetchHttpClient.layer) // provide a real implementation of fetch()
);

const result = await Effect.runPromise(program)

expect(Either.isRight(result))

expect(Either.getOrThrow(result).title).toBe("G-Retriever: Retrieval-Augmented Generation for Textual Graph Understanding and Question Answering")

// DEBUG
// Either.match(result,
// {
// onLeft: (e) => console.error(e),
// onRight: (result) => console.log(result.feed.entry[0])
// }
// )

});

test('arxiv get many entries', async () => {
const arxivids = ['2402.07630', '2311.07509', '2306.08302'];

const program = Effect.forEach(arxivids, (arxivid) =>
getArxivDetails(arxivid)
).pipe(
Effect.map( Either.all ), // gather all the successes into one array
Effect.scoped,
Effect.provide(FetchHttpClient.layer)
)

const result = await Effect.runPromise(program)

expect(Either.isRight(result))

})
Loading

0 comments on commit 6fd3768

Please sign in to comment.