Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/mcp/local-tools.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {
scrapeMedia,
scrapeListMembers,
scrapeBookmarks,
scrapeArticle,
scrapeNotifications,
scrapeTrending,
scrapeSpaces,
Expand Down Expand Up @@ -630,6 +631,11 @@ export async function x_get_bookmarks({ limit = 100 }) {
return scrapeBookmarks(pg, { limit });
}

export async function x_read_article({ url }) {
const { page: pg } = await ensureBrowser();
return scrapeArticle(pg, url);
}

export async function x_clear_bookmarks() {
const { page: pg } = await ensureBrowser();
await pg.goto('https://x.com/i/bookmarks', { waitUntil: 'networkidle2' });
Expand Down Expand Up @@ -1369,6 +1375,7 @@ export const toolMap = {
x_reply,
x_bookmark,
x_get_bookmarks,
x_read_article,
x_clear_bookmarks,
x_auto_like,
// Discovery
Expand Down
11 changes: 11 additions & 0 deletions src/mcp/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,17 @@ const TOOLS = [
required: ['title', 'body'],
},
},
{
name: 'x_read_article',
description: 'Read the full content of an X Article. Accepts a tweet URL or direct article URL. For quote tweets containing articles, discovers the real article by clicking through.',
inputSchema: {
type: 'object',
properties: {
url: { type: 'string', description: 'Tweet URL (x.com/user/status/ID) or article URL (x.com/user/article/ID)' },
},
required: ['url'],
},
},
// ====== Creator ======
{
name: 'x_creator_analytics',
Expand Down
2 changes: 2 additions & 0 deletions src/scrapers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ export const {
scrapeMedia,
scrapeListMembers,
scrapeBookmarks,
scrapeArticle,
scrapeNotifications,
scrapeTrending,
scrapeCommunityMembers,
Expand Down Expand Up @@ -313,6 +314,7 @@ export default {
scrapeMedia,
scrapeListMembers,
scrapeBookmarks,
scrapeArticle,
scrapeNotifications,
scrapeTrending,
scrapeCommunityMembers,
Expand Down
97 changes: 97 additions & 0 deletions src/scrapers/twitter/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,102 @@ export async function scrapeBookmarks(page, options = {}) {
return bookmarks.slice(0, limit);
}

// ============================================================================
// Article Scraper
// ============================================================================

/**
* Scrape the full content of an X Article.
*
* Accepts either a tweet URL (x.com/user/status/ID) or a direct article URL
* (x.com/user/article/ID). For tweet URLs, discovers the article link by
* checking for anchor tags or clicking the article-cover-image element.
*
* @param {import('puppeteer').Page} page - Puppeteer page instance
* @param {string} url - Tweet or article URL
* @returns {{ title, author, handle, text, images, url }}
*/
export async function scrapeArticle(page, url) {
// If given a tweet URL, discover the article URL first
if (url.includes('/status/') && !url.includes('/article/')) {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
await randomDelay(3000, 5000);

// Try finding a direct article link on the page
let articleUrl = await page.evaluate(() => {
const links = [...document.querySelectorAll('a[href*="/article/"]')];
const match = links.find(a => a.href.match(/\/article\/\d+$/));
return match?.href || '';
});

// Fallback: click article-cover-image (handles quoted-tweet articles)
if (!articleUrl) {
const cover = await page.$('[data-testid="article-cover-image"]');
if (cover) {
await cover.click();
await new Promise(r => setTimeout(r, 5000));
articleUrl = await page.evaluate(() => {
const links = [...document.querySelectorAll('a[href*="/article/"]')];
const match = links.find(a => a.href.match(/\/article\/\d+$/));
return match?.href || '';
});
if (!articleUrl) {
const hasReadView = await page.evaluate(() =>
!!document.querySelector('[data-testid="twitterArticleReadView"]'));
if (hasReadView) articleUrl = page.url();
}
}
}

if (!articleUrl) throw new Error('No article found on this tweet');
url = articleUrl;
}

await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
await randomDelay(3000, 5000);

// Scroll to load lazy content
for (let i = 0; i < 25; i++) {
await page.evaluate(() => window.scrollBy(0, 800));
await new Promise(r => setTimeout(r, 500));
}

const article = await page.evaluate(() => {
const title = document.querySelector('[data-testid="twitter-article-title"]')?.textContent?.trim() || '';
const readView = document.querySelector('[data-testid="twitterArticleReadView"]');
if (!readView) return null;

const userNameEl = document.querySelector('[data-testid="User-Name"]');
const authorName = userNameEl?.querySelector('span')?.textContent?.trim() || '';
const authorHandle = userNameEl?.querySelector('a[href^="/"]')?.getAttribute('href')?.replace('/', '') || '';

// Clean the article text — strip header (author, timestamp, stats) and footer (bio)
const fullText = readView.innerText;
const lines = fullText.split('\n');
let startIdx = 0;
for (let i = 0; i < Math.min(lines.length, 15); i++) {
if (lines[i].length > 100) { startIdx = i; break; }
}
let endIdx = lines.length;
for (let i = lines.length - 1; i > Math.max(0, lines.length - 10); i--) {
if (lines[i] === authorName || lines[i] === '@' + authorHandle || lines[i] === 'Following') {
endIdx = Math.min(endIdx, i);
}
}
const cleanText = lines.slice(startIdx, endIdx).join('\n').trim();

// Filter out profile images
const images = [...readView.querySelectorAll('img')]
.map(i => i.src)
.filter(s => s.includes('twimg') && !s.includes('_normal.') && !s.includes('_bigger.') && !s.includes('profile_images'));

return { title, author: authorName, handle: authorHandle, text: cleanText, images, url: location.href };
});

if (!article) throw new Error('Article content not found');
return article;
}

// ============================================================================
// Notifications Scraper
// ============================================================================
Expand Down Expand Up @@ -943,6 +1039,7 @@ export default {
scrapeMedia,
scrapeListMembers,
scrapeBookmarks,
scrapeArticle,
scrapeNotifications,
scrapeTrending,
scrapeCommunityMembers,
Expand Down