diff --git a/README.zh-CN.md b/README.zh-CN.md index 67be5867e..4da6a6b38 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -271,6 +271,7 @@ npm link | **devto** | `top` `tag` `user` | 公开 | | **dictionary** | `search` `synonyms` `examples` | 公开 | | **arxiv** | `search` `paper` | 公开 | +| **pubmed** | `search` `article` `author` `citations` `related` | 公开 | | **openreview** | `search` `venue` `paper` `reviews` | 公开 | | **paperreview** | `submit` `review` `feedback` | 公开 | | **wikipedia** | `search` `summary` `random` `trending` | 公开 | diff --git a/cli-manifest.json b/cli-manifest.json index f4355459e..a78177cb0 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -15181,6 +15181,329 @@ "modulePath": "producthunt/today.js", "sourceFile": "producthunt/today.js" }, + { + "site": "pubmed", + "name": "article", + "aliases": [ + "paper", + "read" + ], + "description": "Get detailed information for a PubMed article by PMID", + "access": "read", + "domain": "pubmed.ncbi.nlm.nih.gov", + "strategy": "public", + "browser": false, + "args": [ + { + "name": "pmid", + "type": "str", + "required": true, + "positional": true, + "help": "PubMed ID, e.g. 37780221" + }, + { + "name": "full-abstract", + "type": "boolean", + "default": false, + "required": false, + "help": "Do not truncate the abstract in table output" + } + ], + "columns": [ + "field", + "value" + ], + "type": "js", + "modulePath": "pubmed/article.js", + "sourceFile": "pubmed/article.js" + }, + { + "site": "pubmed", + "name": "author", + "description": "Search PubMed articles by author name and optional affiliation", + "access": "read", + "domain": "pubmed.ncbi.nlm.nih.gov", + "strategy": "public", + "browser": false, + "args": [ + { + "name": "name", + "type": "str", + "required": true, + "positional": true, + "help": "Author name, e.g. \"Smith J\"" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "Max results (1-100)" + }, + { + "name": "affiliation", + "type": "str", + "required": false, + "help": "Filter by author affiliation" + }, + { + "name": "position", + "type": "str", + "default": "any", + "required": false, + "help": "Author position: any, first, or last", + "choices": [ + "any", + "first", + "last" + ] + }, + { + "name": "year-from", + "type": "int", + "required": false, + "help": "Filter publication year from" + }, + { + "name": "year-to", + "type": "int", + "required": false, + "help": "Filter publication year to" + }, + { + "name": "sort", + "type": "str", + "default": "date", + "required": false, + "help": "Sort by date or relevance", + "choices": [ + "date", + "relevance" + ] + } + ], + "columns": [ + "rank", + "pmid", + "title", + "authors", + "journal", + "year", + "article_type", + "doi", + "url" + ], + "type": "js", + "modulePath": "pubmed/author.js", + "sourceFile": "pubmed/author.js" + }, + { + "site": "pubmed", + "name": "citations", + "description": "Get PubMed citation relationships for an article", + "access": "read", + "domain": "pubmed.ncbi.nlm.nih.gov", + "strategy": "public", + "browser": false, + "args": [ + { + "name": "pmid", + "type": "str", + "required": true, + "positional": true, + "help": "PubMed ID, e.g. 37780221" + }, + { + "name": "direction", + "type": "str", + "default": "citedby", + "required": false, + "help": "citedby or references", + "choices": [ + "citedby", + "references" + ] + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "Max results (1-100)" + } + ], + "columns": [ + "rank", + "pmid", + "title", + "authors", + "journal", + "year", + "article_type", + "doi", + "url" + ], + "type": "js", + "modulePath": "pubmed/citations.js", + "sourceFile": "pubmed/citations.js" + }, + { + "site": "pubmed", + "name": "related", + "description": "Find articles related to a PubMed article", + "access": "read", + "domain": "pubmed.ncbi.nlm.nih.gov", + "strategy": "public", + "browser": false, + "args": [ + { + "name": "pmid", + "type": "str", + "required": true, + "positional": true, + "help": "PubMed ID, e.g. 37780221" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "Max results (1-100)" + }, + { + "name": "score", + "type": "boolean", + "default": false, + "required": false, + "help": "Show similarity scores when available" + } + ], + "columns": [ + "rank", + "pmid", + "title", + "authors", + "journal", + "year", + "article_type", + "score", + "doi", + "url" + ], + "type": "js", + "modulePath": "pubmed/related.js", + "sourceFile": "pubmed/related.js" + }, + { + "site": "pubmed", + "name": "search", + "description": "Search PubMed articles with advanced filters", + "access": "read", + "domain": "pubmed.ncbi.nlm.nih.gov", + "strategy": "public", + "browser": false, + "args": [ + { + "name": "query", + "type": "str", + "required": true, + "positional": true, + "help": "Search query, e.g. \"machine learning cancer\"" + }, + { + "name": "limit", + "type": "int", + "default": 20, + "required": false, + "help": "Max results (1-100)" + }, + { + "name": "author", + "type": "str", + "required": false, + "help": "Filter by author name" + }, + { + "name": "journal", + "type": "str", + "required": false, + "help": "Filter by journal name" + }, + { + "name": "year-from", + "type": "int", + "required": false, + "help": "Filter publication year from" + }, + { + "name": "year-to", + "type": "int", + "required": false, + "help": "Filter publication year to" + }, + { + "name": "article-type", + "type": "str", + "required": false, + "help": "Filter by publication type, e.g. Review or Clinical Trial" + }, + { + "name": "has-abstract", + "type": "boolean", + "default": false, + "required": false, + "help": "Only include articles with abstracts" + }, + { + "name": "free-full-text", + "type": "boolean", + "default": false, + "required": false, + "help": "Only include free full text articles" + }, + { + "name": "humans-only", + "type": "boolean", + "default": false, + "required": false, + "help": "Only include human studies" + }, + { + "name": "english-only", + "type": "boolean", + "default": false, + "required": false, + "help": "Only include English articles" + }, + { + "name": "sort", + "type": "str", + "default": "relevance", + "required": false, + "help": "Sort by relevance, date, author, or journal", + "choices": [ + "relevance", + "date", + "author", + "journal" + ] + } + ], + "columns": [ + "rank", + "pmid", + "title", + "authors", + "journal", + "year", + "article_type", + "doi", + "url" + ], + "type": "js", + "modulePath": "pubmed/search.js", + "sourceFile": "pubmed/search.js" + }, { "site": "quark", "name": "ls", diff --git a/clis/pubmed/article.js b/clis/pubmed/article.js new file mode 100644 index 000000000..9dd74b50a --- /dev/null +++ b/clis/pubmed/article.js @@ -0,0 +1,50 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { eutilsFetch, parseArticleXml, requirePmid, truncateText } from './utils.js'; + +cli({ + site: 'pubmed', + name: 'article', + aliases: ['paper', 'read'], + access: 'read', + description: 'Get detailed information for a PubMed article by PMID', + domain: 'pubmed.ncbi.nlm.nih.gov', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { name: 'pmid', positional: true, required: true, help: 'PubMed ID, e.g. 37780221' }, + { name: 'full-abstract', type: 'boolean', default: false, help: 'Do not truncate the abstract in table output' }, + ], + columns: ['field', 'value'], + func: async (args) => { + const pmid = requirePmid(args.pmid); + const xml = await eutilsFetch('efetch', { + id: pmid, + rettype: 'abstract', + }, { retmode: 'xml', label: 'pubmed article' }); + const article = parseArticleXml(xml, pmid); + if (!article) { + throw new EmptyResultError('pubmed article', `No article found for PMID ${pmid}.`); + } + if (!article.title) { + throw new CommandExecutionError(`pubmed article ${pmid} did not include a title`, 'PubMed EFetch response shape may have changed.'); + } + const abstract = args['full-abstract'] ? article.abstract : truncateText(article.abstract, 500); + return [ + { field: 'PMID', value: article.pmid }, + { field: 'Title', value: article.title }, + { field: 'Authors', value: article.authors.join(', ') }, + { field: 'Journal', value: article.journal }, + { field: 'Year', value: article.year }, + { field: 'Date', value: article.date }, + { field: 'Article Type', value: article.article_type }, + { field: 'Language', value: article.language }, + { field: 'DOI', value: article.doi || null }, + { field: 'PMC ID', value: article.pmc || null }, + { field: 'MeSH Terms', value: article.mesh_terms || null }, + { field: 'Keywords', value: article.keywords || null }, + { field: 'Abstract', value: abstract || null }, + { field: 'URL', value: article.url }, + ]; + }, +}); diff --git a/clis/pubmed/author.js b/clis/pubmed/author.js new file mode 100644 index 000000000..3a67e08b3 --- /dev/null +++ b/clis/pubmed/author.js @@ -0,0 +1,64 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { ArgumentError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { + LINK_COLUMNS, + eutilsFetch, + fetchSummaryRows, + requireBoundedInt, + requireChoice, + requireText, + requireYear, +} from './utils.js'; + +cli({ + site: 'pubmed', + name: 'author', + access: 'read', + description: 'Search PubMed articles by author name and optional affiliation', + domain: 'pubmed.ncbi.nlm.nih.gov', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { name: 'name', positional: true, required: true, help: 'Author name, e.g. "Smith J"' }, + { name: 'limit', type: 'int', default: 20, help: 'Max results (1-100)' }, + { name: 'affiliation', help: 'Filter by author affiliation' }, + { name: 'position', default: 'any', choices: ['any', 'first', 'last'], help: 'Author position: any, first, or last' }, + { name: 'year-from', type: 'int', help: 'Filter publication year from' }, + { name: 'year-to', type: 'int', help: 'Filter publication year to' }, + { name: 'sort', default: 'date', choices: ['date', 'relevance'], help: 'Sort by date or relevance' }, + ], + columns: LINK_COLUMNS, + func: async (args) => { + const name = requireText(args.name, 'author'); + const limit = requireBoundedInt(args.limit, 20, 100); + const position = requireChoice(args.position, ['any', 'first', 'last'], 'position', 'any'); + const sort = requireChoice(args.sort, ['date', 'relevance'], 'sort', 'date'); + const yearFrom = requireYear(args['year-from'], 'year-from'); + const yearTo = requireYear(args['year-to'], 'year-to'); + const authorTag = position === 'first' ? '1au' : position === 'last' ? 'lastau' : 'au'; + const terms = [`${name}[${authorTag}]`]; + if (args.affiliation) terms.push(`${requireText(args.affiliation, 'affiliation')}[ad]`); + if (yearFrom || yearTo) { + const from = yearFrom || 1800; + const to = yearTo || new Date().getFullYear(); + if (from > to) { + throw new ArgumentError('pubmed year-from must be <= year-to'); + } + terms.push(`${from}:${to}[PDAT]`); + } + const esearch = await eutilsFetch('esearch', { + term: terms.join(' AND '), + retmax: limit, + usehistory: 'y', + sort: sort === 'date' ? 'pub_date' : '', + }, { label: 'pubmed author' }); + const pmids = esearch?.esearchresult?.idlist; + if (!Array.isArray(pmids)) { + throw new CommandExecutionError('pubmed author did not return an id list', 'PubMed ESearch response shape may have changed.'); + } + if (pmids.length === 0) { + throw new EmptyResultError('pubmed author', `No articles found for author "${name}".`); + } + return fetchSummaryRows(pmids, 'pubmed author summary'); + }, +}); diff --git a/clis/pubmed/citations.js b/clis/pubmed/citations.js new file mode 100644 index 000000000..17ab58f03 --- /dev/null +++ b/clis/pubmed/citations.js @@ -0,0 +1,36 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { EmptyResultError } from '@jackwener/opencli/errors'; +import { LINK_COLUMNS, eutilsFetch, fetchSummaryRows, requireBoundedInt, requireChoice, requirePmid } from './utils.js'; + +cli({ + site: 'pubmed', + name: 'citations', + access: 'read', + description: 'Get PubMed citation relationships for an article', + domain: 'pubmed.ncbi.nlm.nih.gov', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { name: 'pmid', positional: true, required: true, help: 'PubMed ID, e.g. 37780221' }, + { name: 'direction', default: 'citedby', choices: ['citedby', 'references'], help: 'citedby or references' }, + { name: 'limit', type: 'int', default: 20, help: 'Max results (1-100)' }, + ], + columns: LINK_COLUMNS, + func: async (args) => { + const pmid = requirePmid(args.pmid); + const direction = requireChoice(args.direction, ['citedby', 'references'], 'direction', 'citedby'); + const limit = requireBoundedInt(args.limit, 20, 100); + const linkname = direction === 'citedby' ? 'pubmed_pubmed_citedin' : 'pubmed_pubmed_refs'; + const result = await eutilsFetch('elink', { + id: pmid, + dbfrom: 'pubmed', + cmd: 'neighbor', + linkname, + }, { label: 'pubmed citations' }); + const links = result?.linksets?.[0]?.linksetdbs?.[0]?.links; + if (!Array.isArray(links) || links.length === 0) { + throw new EmptyResultError('pubmed citations', `No ${direction} links found for PMID ${pmid}.`); + } + return fetchSummaryRows(links.slice(0, limit).map(String), 'pubmed citations summary'); + }, +}); diff --git a/clis/pubmed/pubmed.test.js b/clis/pubmed/pubmed.test.js new file mode 100644 index 000000000..bd21ae0e5 --- /dev/null +++ b/clis/pubmed/pubmed.test.js @@ -0,0 +1,276 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { getRegistry } from '@jackwener/opencli/registry'; +import { ArgumentError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { + LINK_COLUMNS, + RELATED_COLUMNS, + SEARCH_COLUMNS, + buildEutilsUrl, + buildSearchQuery, + parseArticleXml, + requireBoundedInt, + requirePmid, +} from './utils.js'; +import './search.js'; +import './article.js'; +import './author.js'; +import './citations.js'; +import './related.js'; + +const SUMMARY_RESULT = { + result: { + uids: ['123', '456'], + 123: { + uid: '123', + title: 'Cancer machine learning.', + authors: [{ name: 'Alice A' }, { name: 'Bob B' }, { name: 'Carol C' }, { name: 'Dan D' }], + fulljournalname: 'Journal of Tests', + pubdate: '2024 Jan', + pubtype: ['Journal Article', 'Review'], + articleids: [{ idtype: 'doi', value: '10.1000/test' }], + }, + 456: { + uid: '456', + title: 'Second article.', + authors: [{ name: 'Eve E' }], + source: 'Test Source', + pubdate: '2023', + pubtype: ['Journal Article'], + articleids: [], + }, + }, +}; + +const ARTICLE_XML = ` + + +
+ + Journal of Tests + 2024Jan02 + + Detailed PubMed article & title. + Background text.Conclusion text. + + AliceExample + BobB + + eng + Review +
+ Neoplasms + machine learning +
+ 10.1000/detailPMC123 +
`; + +function jsonResponse(body, ok = true, status = 200) { + return { + ok, + status, + json: vi.fn().mockResolvedValue(body), + text: vi.fn().mockResolvedValue(typeof body === 'string' ? body : JSON.stringify(body)), + }; +} + +function xmlResponse(body, ok = true, status = 200) { + return { + ok, + status, + json: vi.fn().mockRejectedValue(new Error('not json')), + text: vi.fn().mockResolvedValue(body), + }; +} + +afterEach(() => { + vi.unstubAllGlobals(); +}); + +describe('pubmed adapter registration', () => { + it('registers five public read commands with expected listing columns', () => { + const registry = getRegistry(); + for (const name of ['search', 'article', 'author', 'citations', 'related']) { + const command = registry.get(`pubmed/${name}`); + expect(command).toBeDefined(); + expect(command.strategy).toBe('public'); + expect(command.browser).toBe(false); + expect(command.access).toBe('read'); + } + expect(registry.get('pubmed/search').columns).toEqual(SEARCH_COLUMNS); + expect(registry.get('pubmed/author').columns).toEqual(LINK_COLUMNS); + expect(registry.get('pubmed/citations').columns).toEqual(LINK_COLUMNS); + expect(registry.get('pubmed/related').columns).toEqual(RELATED_COLUMNS); + }); +}); + +describe('pubmed utility contracts', () => { + it('rejects invalid PMIDs and silently-clamped limits', () => { + expect(requirePmid('37780221')).toBe('37780221'); + expect(() => requirePmid('PMID:37780221')).toThrow(ArgumentError); + expect(requireBoundedInt(undefined, 20, 100)).toBe(20); + expect(requireBoundedInt('100', 20, 100)).toBe(100); + expect(() => requireBoundedInt('2abc', 20, 100)).toThrow(ArgumentError); + expect(() => requireBoundedInt(101, 20, 100)).toThrow(/<= 100/); + expect(() => requireBoundedInt(0, 20, 100)).toThrow(ArgumentError); + }); + + it('builds E-utilities URLs with optional NCBI metadata', () => { + vi.stubEnv('NCBI_API_KEY', 'key-1'); + vi.stubEnv('NCBI_EMAIL', 'dev@example.com'); + const url = buildEutilsUrl('esearch', { term: 'cancer', retmax: 5 }); + expect(url).toContain('/esearch.fcgi?'); + expect(url).toContain('db=pubmed'); + expect(url).toContain('api_key=key-1'); + expect(url).toContain('email=dev%40example.com'); + expect(url).toContain('term=cancer'); + }); + + it('composes search filters without dropping date boundaries', () => { + expect(buildSearchQuery('cancer', { + author: 'Smith J', + journal: 'Nature', + yearFrom: 2020, + yearTo: 2024, + articleType: 'Review', + hasAbstract: true, + hasFullText: true, + humanOnly: true, + englishOnly: true, + })).toBe('cancer AND Smith J[Author] AND Nature[Journal] AND 2020:2024[PDAT] AND Review[PT] AND hasabstract[text] AND free full text[sb] AND humans[mesh] AND english[lang]'); + expect(() => buildSearchQuery('cancer', { yearFrom: 2025, yearTo: 2020 })).toThrow(ArgumentError); + }); + + it('parses EFetch XML into article details', () => { + const article = parseArticleXml(ARTICLE_XML, '123'); + expect(article.title).toBe('Detailed PubMed article & title.'); + expect(article.abstract).toBe('Background text. Conclusion text.'); + expect(article.authors).toEqual(['Alice Example', 'Bob B']); + expect(article.journal).toBe('Journal of Tests'); + expect(article.doi).toBe('10.1000/detail'); + expect(article.mesh_terms).toBe('Neoplasms'); + }); +}); + +describe('pubmed search command', () => { + it('returns summary rows for ESearch ids', async () => { + vi.stubGlobal('fetch', vi.fn() + .mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: ['123', '456'] } })) + .mockResolvedValueOnce(jsonResponse(SUMMARY_RESULT))); + const rows = await getRegistry().get('pubmed/search').func({ query: 'cancer', limit: 2, sort: 'date' }); + expect(rows).toHaveLength(2); + expect(rows[0]).toMatchObject({ rank: 1, pmid: '123', title: 'Cancer machine learning', article_type: 'Review', doi: '10.1000/test' }); + expect(rows[0].url).toBe('https://pubmed.ncbi.nlm.nih.gov/123/'); + }); + + it('rejects bad query, limit, sort, and year args before fetch', async () => { + const fetchMock = vi.fn(); + vi.stubGlobal('fetch', fetchMock); + const command = getRegistry().get('pubmed/search'); + await expect(command.func({ query: ' ', limit: 2 })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ query: 'cancer', limit: 101 })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ query: 'cancer', sort: 'bad' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ query: 'cancer', 'year-from': 2025, 'year-to': 2020 })).rejects.toBeInstanceOf(ArgumentError); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it('maps empty and API error envelopes to typed errors', async () => { + const command = getRegistry().get('pubmed/search'); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: [] } }))); + await expect(command.func({ query: 'nothing' })).rejects.toBeInstanceOf(EmptyResultError); + + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(jsonResponse({ esearchresult: { errorlist: { phrasesnotfound: ['bad field'] } } }))); + await expect(command.func({ query: 'bad' })).rejects.toBeInstanceOf(CommandExecutionError); + }); + + it('maps HTTP, fetch, JSON, and partial summary failures to CommandExecutionError', async () => { + const command = getRegistry().get('pubmed/search'); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(jsonResponse({}, false, 500))); + await expect(command.func({ query: 'cancer' })).rejects.toBeInstanceOf(CommandExecutionError); + + vi.stubGlobal('fetch', vi.fn().mockRejectedValueOnce(new Error('network down'))); + await expect(command.func({ query: 'cancer' })).rejects.toBeInstanceOf(CommandExecutionError); + + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce({ ok: true, status: 200, json: vi.fn().mockRejectedValue(new Error('bad json')) })); + await expect(command.func({ query: 'cancer' })).rejects.toBeInstanceOf(CommandExecutionError); + + vi.stubGlobal('fetch', vi.fn() + .mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: ['123', '456'] } })) + .mockResolvedValueOnce(jsonResponse({ result: { 123: SUMMARY_RESULT.result[123] } }))); + await expect(command.func({ query: 'cancer' })).rejects.toBeInstanceOf(CommandExecutionError); + }); +}); + +describe('pubmed article command', () => { + it('returns field/value rows for a valid article', async () => { + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(xmlResponse(ARTICLE_XML))); + const rows = await getRegistry().get('pubmed/article').func({ pmid: '123' }); + expect(rows).toContainEqual({ field: 'PMID', value: '123' }); + expect(rows).toContainEqual({ field: 'DOI', value: '10.1000/detail' }); + expect(rows.find(row => row.field === 'Abstract').value).toContain('Background text'); + }); + + it('rejects invalid or missing articles with typed errors', async () => { + const command = getRegistry().get('pubmed/article'); + await expect(command.func({ pmid: 'abc' })).rejects.toBeInstanceOf(ArgumentError); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(xmlResponse('not found'))); + await expect(command.func({ pmid: '123' })).rejects.toBeInstanceOf(EmptyResultError); + }); +}); + +describe('pubmed author command', () => { + it('searches author position and affiliation filters', async () => { + const fetchMock = vi.fn() + .mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: ['123'] } })) + .mockResolvedValueOnce(jsonResponse({ result: { 123: SUMMARY_RESULT.result[123] } })); + vi.stubGlobal('fetch', fetchMock); + const rows = await getRegistry().get('pubmed/author').func({ name: 'Smith J', position: 'first', affiliation: 'Harvard', limit: 1 }); + expect(rows[0].pmid).toBe('123'); + const url = fetchMock.mock.calls[0][0]; + expect(url).toContain('Smith+J%5B1au%5D'); + expect(url).toContain('Harvard%5Bad%5D'); + }); + + it('rejects invalid author filters and empty results', async () => { + const command = getRegistry().get('pubmed/author'); + await expect(command.func({ name: '', position: 'any' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ name: 'Smith', position: 'middle' })).rejects.toBeInstanceOf(ArgumentError); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(jsonResponse({ esearchresult: { idlist: [] } }))); + await expect(command.func({ name: 'Smith' })).rejects.toBeInstanceOf(EmptyResultError); + }); +}); + +describe('pubmed citations and related commands', () => { + it('returns cited-by summary rows with PMID round-trip ids', async () => { + vi.stubGlobal('fetch', vi.fn() + .mockResolvedValueOnce(jsonResponse({ linksets: [{ linksetdbs: [{ links: ['123'] }] }] })) + .mockResolvedValueOnce(jsonResponse({ result: { 123: SUMMARY_RESULT.result[123] } }))); + const rows = await getRegistry().get('pubmed/citations').func({ pmid: '999', direction: 'citedby', limit: 1 }); + expect(rows).toHaveLength(1); + expect(rows[0].pmid).toBe('123'); + }); + + it('rejects invalid citation args and empty relationships', async () => { + const command = getRegistry().get('pubmed/citations'); + await expect(command.func({ pmid: 'bad' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ pmid: '999', direction: 'sideways' })).rejects.toBeInstanceOf(ArgumentError); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(jsonResponse({ linksets: [{ linksetdbs: [] }] }))); + await expect(command.func({ pmid: '999', direction: 'citedby' })).rejects.toBeInstanceOf(EmptyResultError); + }); + + it('returns related rows with optional score', async () => { + vi.stubGlobal('fetch', vi.fn() + .mockResolvedValueOnce(jsonResponse({ linksets: [{ linksetdbs: [{ links: [{ id: '999', score: 999 }, { id: '123', score: 42 }] }] }] })) + .mockResolvedValueOnce(jsonResponse({ result: { 123: SUMMARY_RESULT.result[123] } }))); + const rows = await getRegistry().get('pubmed/related').func({ pmid: '999', score: true, limit: 1 }); + expect(rows).toHaveLength(1); + expect(rows[0]).toMatchObject({ pmid: '123', score: 42 }); + }); + + it('rejects invalid related args and empty related links', async () => { + const command = getRegistry().get('pubmed/related'); + await expect(command.func({ pmid: 'bad' })).rejects.toBeInstanceOf(ArgumentError); + await expect(command.func({ pmid: '999', limit: 101 })).rejects.toBeInstanceOf(ArgumentError); + vi.stubGlobal('fetch', vi.fn().mockResolvedValueOnce(jsonResponse({ linksets: [{ linksetdbs: [{ links: [{ id: '999' }] }] }] }))); + await expect(command.func({ pmid: '999' })).rejects.toBeInstanceOf(EmptyResultError); + }); +}); diff --git a/clis/pubmed/related.js b/clis/pubmed/related.js new file mode 100644 index 000000000..257af646a --- /dev/null +++ b/clis/pubmed/related.js @@ -0,0 +1,45 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { EmptyResultError } from '@jackwener/opencli/errors'; +import { RELATED_COLUMNS, eutilsFetch, fetchSummaryRows, requireBoundedInt, requirePmid } from './utils.js'; + +cli({ + site: 'pubmed', + name: 'related', + access: 'read', + description: 'Find articles related to a PubMed article', + domain: 'pubmed.ncbi.nlm.nih.gov', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { name: 'pmid', positional: true, required: true, help: 'PubMed ID, e.g. 37780221' }, + { name: 'limit', type: 'int', default: 20, help: 'Max results (1-100)' }, + { name: 'score', type: 'boolean', default: false, help: 'Show similarity scores when available' }, + ], + columns: RELATED_COLUMNS, + func: async (args) => { + const pmid = requirePmid(args.pmid); + const limit = requireBoundedInt(args.limit, 20, 100); + const result = await eutilsFetch('elink', { + id: pmid, + dbfrom: 'pubmed', + cmd: 'neighbor_score', + linkname: 'pubmed_pubmed', + }, { label: 'pubmed related' }); + const rawLinks = result?.linksets?.[0]?.linksetdbs?.[0]?.links; + if (!Array.isArray(rawLinks) || rawLinks.length === 0) { + throw new EmptyResultError('pubmed related', `No related articles found for PMID ${pmid}.`); + } + const links = rawLinks + .map(link => typeof link === 'string' ? { id: link, score: null } : { id: String(link?.id ?? ''), score: Number.isFinite(Number(link?.score)) ? Number(link.score) : null }) + .filter(link => link.id && link.id !== pmid) + .slice(0, limit); + if (links.length === 0) { + throw new EmptyResultError('pubmed related', `No related articles found for PMID ${pmid}.`); + } + const rows = await fetchSummaryRows(links.map(link => link.id), 'pubmed related summary'); + return rows.map((row, index) => ({ + ...row, + score: args.score ? links[index].score : null, + })); + }, +}); diff --git a/clis/pubmed/search.js b/clis/pubmed/search.js new file mode 100644 index 000000000..fb7847466 --- /dev/null +++ b/clis/pubmed/search.js @@ -0,0 +1,75 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { + SEARCH_COLUMNS, + buildSearchQuery, + eutilsFetch, + fetchSummaryRows, + requireBoundedInt, + requireChoice, + requireText, + requireYear, +} from './utils.js'; + +cli({ + site: 'pubmed', + name: 'search', + access: 'read', + description: 'Search PubMed articles with advanced filters', + domain: 'pubmed.ncbi.nlm.nih.gov', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { name: 'query', positional: true, required: true, help: 'Search query, e.g. "machine learning cancer"' }, + { name: 'limit', type: 'int', default: 20, help: 'Max results (1-100)' }, + { name: 'author', help: 'Filter by author name' }, + { name: 'journal', help: 'Filter by journal name' }, + { name: 'year-from', type: 'int', help: 'Filter publication year from' }, + { name: 'year-to', type: 'int', help: 'Filter publication year to' }, + { name: 'article-type', help: 'Filter by publication type, e.g. Review or Clinical Trial' }, + { name: 'has-abstract', type: 'boolean', default: false, help: 'Only include articles with abstracts' }, + { name: 'free-full-text', type: 'boolean', default: false, help: 'Only include free full text articles' }, + { name: 'humans-only', type: 'boolean', default: false, help: 'Only include human studies' }, + { name: 'english-only', type: 'boolean', default: false, help: 'Only include English articles' }, + { name: 'sort', default: 'relevance', choices: ['relevance', 'date', 'author', 'journal'], help: 'Sort by relevance, date, author, or journal' }, + ], + columns: SEARCH_COLUMNS, + func: async (args) => { + const query = requireText(args.query, 'query'); + const limit = requireBoundedInt(args.limit, 20, 100); + const yearFrom = requireYear(args['year-from'], 'year-from'); + const yearTo = requireYear(args['year-to'], 'year-to'); + const sort = requireChoice(args.sort, ['relevance', 'date', 'author', 'journal'], 'sort', 'relevance'); + const sortMap = { + relevance: '', + date: 'pub_date', + author: 'Author', + journal: 'JournalName', + }; + const searchQuery = buildSearchQuery(query, { + author: args.author, + journal: args.journal, + yearFrom, + yearTo, + articleType: args['article-type'], + hasAbstract: args['has-abstract'], + hasFullText: args['free-full-text'], + humanOnly: args['humans-only'], + englishOnly: args['english-only'], + }); + const esearch = await eutilsFetch('esearch', { + term: searchQuery, + retmax: limit, + usehistory: 'y', + sort: sortMap[sort], + }, { label: 'pubmed search' }); + const pmids = esearch?.esearchresult?.idlist; + if (!Array.isArray(pmids)) { + throw new CommandExecutionError('pubmed search did not return an id list', 'PubMed ESearch response shape may have changed.'); + } + if (pmids.length === 0) { + throw new EmptyResultError('pubmed search', `No articles matched "${query}".`); + } + return fetchSummaryRows(pmids, 'pubmed search summary'); + }, +}); diff --git a/clis/pubmed/utils.js b/clis/pubmed/utils.js new file mode 100644 index 000000000..4b1c849b1 --- /dev/null +++ b/clis/pubmed/utils.js @@ -0,0 +1,309 @@ +import { ArgumentError, CommandExecutionError } from '@jackwener/opencli/errors'; + +export const EUTILS_BASE = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils'; +export const SEARCH_COLUMNS = ['rank', 'pmid', 'title', 'authors', 'journal', 'year', 'article_type', 'doi', 'url']; +export const LINK_COLUMNS = ['rank', 'pmid', 'title', 'authors', 'journal', 'year', 'article_type', 'doi', 'url']; +export const RELATED_COLUMNS = ['rank', 'pmid', 'title', 'authors', 'journal', 'year', 'article_type', 'score', 'doi', 'url']; + +let lastRequestAt = 0; + +export function requireText(value, label) { + const text = String(value ?? '').trim(); + if (!text) { + throw new ArgumentError(`pubmed ${label} cannot be empty`); + } + return text; +} + +export function requirePmid(value, label = 'pmid') { + const pmid = requireText(value, label); + if (!/^\d+$/.test(pmid)) { + throw new ArgumentError(`pubmed ${label} must be a numeric PMID`, 'Example: 37780221'); + } + return pmid; +} + +export function requireBoundedInt(value, defaultValue, maxValue, label = 'limit') { + const raw = value ?? defaultValue; + const text = String(raw).trim(); + if (!/^\d+$/.test(text)) { + throw new ArgumentError(`pubmed ${label} must be a positive integer`); + } + const n = Number(text); + if (!Number.isSafeInteger(n) || n < 1) { + throw new ArgumentError(`pubmed ${label} must be a positive integer`); + } + if (n > maxValue) { + throw new ArgumentError(`pubmed ${label} must be <= ${maxValue}`); + } + return n; +} + +export function requireYear(value, label) { + if (value === undefined || value === null || value === '') { + return undefined; + } + const year = requireBoundedInt(value, 1900, 3000, label); + if (year < 1800) { + throw new ArgumentError(`pubmed ${label} must be >= 1800`); + } + return year; +} + +export function requireChoice(value, choices, label, defaultValue) { + const text = String(value ?? defaultValue).trim(); + if (!choices.includes(text)) { + throw new ArgumentError(`pubmed ${label} must be one of: ${choices.join(', ')}`); + } + return text; +} + +export function buildEutilsUrl(tool, params = {}) { + const searchParams = new URLSearchParams(); + searchParams.set('db', 'pubmed'); + if (!params.retmode) { + searchParams.set('retmode', 'json'); + } + if (process.env.NCBI_API_KEY) { + searchParams.set('api_key', process.env.NCBI_API_KEY); + } + if (process.env.NCBI_EMAIL) { + searchParams.set('email', process.env.NCBI_EMAIL); + } + for (const [key, value] of Object.entries(params)) { + if (value !== undefined && value !== null && value !== '') { + searchParams.set(key, String(value)); + } + } + return `${EUTILS_BASE}/${tool}.fcgi?${searchParams.toString()}`; +} + +async function waitForRateLimit() { + if (process.env.NODE_ENV === 'test') { + return; + } + const delayMs = process.env.NCBI_API_KEY ? 110 : 360; + const now = Date.now(); + const waitMs = Math.max(0, lastRequestAt + delayMs - now); + if (waitMs > 0) { + await new Promise(resolve => setTimeout(resolve, waitMs)); + } + lastRequestAt = Date.now(); +} + +export async function eutilsFetch(tool, params = {}, { retmode = 'json', label = 'PubMed E-utilities' } = {}) { + const url = buildEutilsUrl(tool, { ...params, retmode }); + await waitForRateLimit(); + let response; + try { + response = await fetch(url); + } + catch (error) { + const detail = error instanceof Error ? error.message : String(error); + throw new CommandExecutionError(`${label} request failed`, detail); + } + if (!response.ok) { + throw new CommandExecutionError(`${label} HTTP ${response.status}`, 'Check NCBI availability, request parameters, and optional NCBI_API_KEY.'); + } + if (retmode === 'xml') { + return response.text(); + } + try { + const json = await response.json(); + assertNoEutilsError(json, label); + return json; + } + catch (error) { + if (error instanceof CommandExecutionError) { + throw error; + } + const detail = error instanceof Error ? error.message : String(error); + throw new CommandExecutionError(`${label} returned invalid JSON`, detail); + } +} + +export function assertNoEutilsError(json, label = 'PubMed E-utilities') { + const error = json?.error + || json?.esearchresult?.errorlist?.phrasesnotfound?.join(', ') + || json?.esearchresult?.errorlist?.fieldsnotfound?.join(', '); + if (error) { + throw new CommandExecutionError(`${label} returned an error`, String(error)); + } +} + +export function buildPubMedUrl(pmid) { + return `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`; +} + +export function decodeXmlEntities(value) { + return String(value ?? '') + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/'/g, "'") + .replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16))) + .replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number.parseInt(dec, 10))); +} + +export function cleanText(value) { + return decodeXmlEntities(value).replace(/\s+/g, ' ').trim(); +} + +export function truncateText(value, maxLength) { + const text = cleanText(value); + if (!text || text.length <= maxLength) { + return text; + } + return `${text.slice(0, maxLength - 3)}...`; +} + +export function extractFirst(xml, tag) { + const match = String(xml ?? '').match(new RegExp(`<${tag}\\b[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'i')); + return match ? cleanText(match[1].replace(/<[^>]+>/g, ' ')) : ''; +} + +export function extractAll(xml, tag) { + const re = new RegExp(`<${tag}\\b[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'gi'); + const out = []; + let match; + while ((match = re.exec(String(xml ?? ''))) !== null) { + out.push(cleanText(match[1].replace(/<[^>]+>/g, ' '))); + } + return out; +} + +export function extractAttribute(xml, tag, attr) { + const match = String(xml ?? '').match(new RegExp(`<${tag}\\b[^>]*\\b${attr}="([^"]*)"`, 'i')); + return match ? decodeXmlEntities(match[1]) : ''; +} + +export function extractAuthors(authorList, maxAuthors = 3) { + if (!Array.isArray(authorList) || authorList.length === 0) { + return ''; + } + const names = authorList.map(author => author?.name || author?.collectivename || [author?.lastname, author?.initials].filter(Boolean).join(' ')).filter(Boolean); + const shown = names.slice(0, maxAuthors); + if (names.length > maxAuthors) { + shown.push('et al.'); + } + return shown.join(', '); +} + +export function extractDoi(articleIds) { + if (!Array.isArray(articleIds)) { + return ''; + } + const doi = articleIds.find(id => String(id?.idtype ?? '').toLowerCase() === 'doi'); + return String(doi?.value ?? '').trim(); +} + +export function articleTypeFromList(types) { + const values = Array.isArray(types) + ? types.map(type => typeof type === 'string' ? type : type?.value).filter(Boolean) + : []; + const priority = ['Systematic Review', 'Meta-Analysis', 'Review', 'Randomized Controlled Trial', 'Clinical Trial', 'Case Reports', 'Journal Article']; + for (const wanted of priority) { + const found = values.find(type => type.toLowerCase() === wanted.toLowerCase()); + if (found) { + return found; + } + } + return values[0] || 'Journal Article'; +} + +export function summaryToRow(article, rank, pmid = article?.uid) { + const id = String(pmid ?? article?.uid ?? '').trim(); + return { + rank, + pmid: id, + title: truncateText(String(article?.title ?? '').replace(/\.$/, ''), 120), + authors: extractAuthors(article?.authors, 3), + journal: truncateText(article?.fulljournalname || article?.source || '', 60), + year: String(article?.pubdate ?? '').split(' ')[0] || '', + article_type: articleTypeFromList(article?.pubtype), + doi: extractDoi(article?.articleids), + url: buildPubMedUrl(id), + }; +} + +export function ensureCompleteSummaryRows(pmids, result, commandLabel) { + if (!result || typeof result !== 'object' || !result.result || typeof result.result !== 'object') { + throw new CommandExecutionError(`${commandLabel} returned an unreadable summary payload`); + } + const rows = pmids.map((pmid, index) => { + const article = result.result[pmid]; + if (!article) { + return null; + } + return summaryToRow(article, index + 1, pmid); + }); + if (rows.some(row => row === null)) { + throw new CommandExecutionError(`${commandLabel} omitted summaries for one or more PMIDs`, 'Refusing to return a partial result set.'); + } + return rows; +} + +export function buildSearchQuery(query, filters = {}) { + const terms = [requireText(query, 'query')]; + if (filters.author) terms.push(`${requireText(filters.author, 'author')}[Author]`); + if (filters.journal) terms.push(`${requireText(filters.journal, 'journal')}[Journal]`); + if (filters.yearFrom || filters.yearTo) { + const from = filters.yearFrom || 1800; + const to = filters.yearTo || new Date().getFullYear(); + if (from > to) { + throw new ArgumentError('pubmed year-from must be <= year-to'); + } + terms.push(`${from}:${to}[PDAT]`); + } + if (filters.articleType) terms.push(`${requireText(filters.articleType, 'article-type')}[PT]`); + if (filters.hasAbstract) terms.push('hasabstract[text]'); + if (filters.hasFullText) terms.push('free full text[sb]'); + if (filters.humanOnly) terms.push('humans[mesh]'); + if (filters.englishOnly) terms.push('english[lang]'); + return terms.join(' AND '); +} + +export function parseArticleXml(xml, pmid) { + const text = String(xml ?? ''); + if (!text || /]*>([\s\S]*?)<\/Article>/i)?.[1] || text; + const journalBlock = articleBlock.match(/]*>([\s\S]*?)<\/Journal>/i)?.[1] || ''; + const journalIssue = journalBlock.match(/]*>([\s\S]*?)<\/JournalIssue>/i)?.[1] || ''; + const pubDate = journalIssue.match(/]*>([\s\S]*?)<\/PubDate>/i)?.[1] || ''; + const authorBlocks = [...text.matchAll(/]*>([\s\S]*?)<\/Author>/gi)].map(match => match[1]); + const authors = authorBlocks.map(block => { + const name = extractFirst(block, 'CollectiveName') || [extractFirst(block, 'LastName'), extractFirst(block, 'ForeName') || extractFirst(block, 'Initials')].filter(Boolean).join(' '); + return name; + }).filter(Boolean); + const abstract = extractAll(articleBlock, 'AbstractText').join(' '); + const pubTypes = extractAll(articleBlock, 'PublicationType'); + const meshTerms = extractAll(text, 'DescriptorName'); + const keywords = extractAll(text, 'Keyword'); + const doi = text.match(/]*IdType="doi"[^>]*>([\s\S]*?)<\/ArticleId>/i)?.[1] || ''; + const pmc = text.match(/]*IdType="pmc"[^>]*>([\s\S]*?)<\/ArticleId>/i)?.[1] || ''; + return { + pmid, + title: extractFirst(articleBlock, 'ArticleTitle'), + abstract, + authors, + journal: extractFirst(journalBlock, 'Title') || extractFirst(journalBlock, 'ISOAbbreviation'), + year: extractFirst(pubDate, 'Year') || extractFirst(text, 'MedlineDate').slice(0, 4), + date: [extractFirst(pubDate, 'Year'), extractFirst(pubDate, 'Month'), extractFirst(pubDate, 'Day')].filter(Boolean).join(' '), + doi: cleanText(doi), + pmc: cleanText(pmc), + article_type: articleTypeFromList(pubTypes), + language: extractFirst(articleBlock, 'Language'), + mesh_terms: meshTerms.slice(0, 10).join(', '), + keywords: keywords.slice(0, 10).join(', '), + url: buildPubMedUrl(pmid), + }; +} + +export async function fetchSummaryRows(pmids, commandLabel) { + const result = await eutilsFetch('esummary', { id: pmids.join(',') }, { label: commandLabel }); + return ensureCompleteSummaryRows(pmids, result, commandLabel); +} diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 772a3de35..9f518faa7 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -124,6 +124,7 @@ export default defineConfig({ { text: 'Yahoo Finance', link: '/adapters/browser/yahoo-finance' }, { text: 'arXiv', link: '/adapters/browser/arxiv' }, { text: 'dblp', link: '/adapters/browser/dblp' }, + { text: 'PubMed', link: '/adapters/browser/pubmed' }, { text: 'paperreview.ai', link: '/adapters/browser/paperreview' }, { text: 'Barchart', link: '/adapters/browser/barchart' }, { text: 'Hugging Face', link: '/adapters/browser/hf' }, diff --git a/docs/adapters/browser/pubmed.md b/docs/adapters/browser/pubmed.md new file mode 100644 index 000000000..a85dd6360 --- /dev/null +++ b/docs/adapters/browser/pubmed.md @@ -0,0 +1,56 @@ +# PubMed + +**Mode**: 🌐 Public · **Domain**: `pubmed.ncbi.nlm.nih.gov` + +## Commands + +| Command | Description | +|---------|-------------| +| `opencli pubmed search` | Search PubMed articles with filters | +| `opencli pubmed article` | Get article metadata and abstract by PMID | +| `opencli pubmed author` | Search articles by author and affiliation | +| `opencli pubmed citations` | List cited-by or reference relationships | +| `opencli pubmed related` | Find related PubMed articles | + +## Usage Examples + +```bash +# Search articles +opencli pubmed search "machine learning cancer" --year-from 2023 --has-abstract --limit 10 + +# Search by author +opencli pubmed author "Smith J" --position first --affiliation Harvard + +# Read one article by PMID +opencli pubmed article 37780221 --full-abstract + +# Citation relationships +opencli pubmed citations 37780221 --direction citedby --limit 20 +opencli pubmed citations 37780221 --direction references --limit 20 + +# Related articles with scores +opencli pubmed related 37780221 --score +``` + +## Output + +Listing commands return `pmid`, `title`, `authors`, `journal`, `year`, `article_type`, `doi`, and `url` where available. The `pmid` column is the stable identifier for `opencli pubmed article `. + +`article` returns field/value rows for title, authors, journal, year/date, DOI/PMC ID, MeSH terms, keywords, abstract, and PubMed URL. + +## Prerequisites + +- No browser required. Commands use the NCBI E-utilities public API. +- Optional: set `NCBI_API_KEY` for the higher NCBI rate limit. +- Optional: set `NCBI_EMAIL` so NCBI can identify your tool usage. + +```bash +export NCBI_API_KEY=YOUR_API_KEY +export NCBI_EMAIL=you@example.com +``` + +## Failure Semantics + +- Invalid `pmid`, `limit`, year, `sort`, `position`, or `direction` values fail before network access with `ArgumentError`. +- HTTP errors, fetch failures, invalid JSON, E-utilities error envelopes, and partial summary payloads fail with `CommandExecutionError`. +- Valid no-result searches and missing relationships fail with `EmptyResultError`. diff --git a/docs/adapters/index.md b/docs/adapters/index.md index 3a57fd715..30bdb0876 100644 --- a/docs/adapters/index.md +++ b/docs/adapters/index.md @@ -95,6 +95,7 @@ Run `opencli list` for the live registry. | **[yahoo-finance](./browser/yahoo-finance.md)** | `quote` | 🌐 Public | | **[arxiv](./browser/arxiv.md)** | `search` `paper` | 🌐 Public | | **[dblp](./browser/dblp.md)** | `search` `paper` | 🌐 Public | +| **[pubmed](./browser/pubmed.md)** | `search` `article` `author` `citations` `related` | 🌐 Public | | **[openreview](./browser/openreview.md)** | `search` `venue` `paper` `reviews` | 🌐 Public | | **[paperreview](./browser/paperreview.md)** | `submit` `review` `feedback` | 🌐 Public | | **[barchart](./browser/barchart.md)** | `quote` `options` `greeks` `flow` | 🌐 Public |