-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.js
33 lines (27 loc) · 1.29 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import puppeteer from 'puppeteer';
async function scrapeData(url) {
const browser = await puppeteer.launch({ headless: false, args: ['--no-sandbox', '--disable-setuid-sandbox'] });
const page = await browser.newPage();
await page.goto(url, { waitUntil: 'domcontentloaded' });
const articles = await page.evaluate(() => {
return Array.from(document.querySelectorAll('article.post-block')).map(element => ({
title: element.querySelector('h2.post-block__title a')?.textContent.trim(),
link: element.querySelector('h2.post-block__title a')?.href,
category: element.querySelector('.article__primary-category__link')?.textContent.trim(),
author: element.querySelector('.river-byline__authors a')?.textContent.trim(),
date: element.querySelector('.river-byline__full-date-time')?.textContent.trim(),
excerpt: element.querySelector('.post-block__content')?.textContent.trim()
}));
});
await browser.close();
return articles;
}
async function getByCategory(category) {
const url = `https://techcrunch.com/category/${category}/`;
return scrapeData(url);
}
async function getByTag(tag) {
const url = `https://techcrunch.com/tag/${tag}/`;
return scrapeData(url);
}
export { getByCategory, getByTag }