A NodeJS script that scrapes data from X public profiles.
- cp config/zproxy.json.example config/zproxy.json
- Node
- Node Version Manager Tool - nvm
- Node Version: 18.1.0
- Puppeteer
- Node library which provides a high-level API to control Chrome
- Proxy: Residential Zone
- Typescript
- TypeScript is JavaScript with syntax for types. Doc
- Node.Js With TypeScript
➜ x-scraper git:(main) ✗ tree -I 'node_modules|build'
.
├── LICENSE
├── README.md
├── config
│ ├── config.json
│ ├── cookies.json
│ ├── zproxy.json
│ └── zproxy.json.example
├── log
│ └── development.log
├── package-lock.json
├── package.json
├── src
│ ├── environment
│ │ ├── config.ts
│ │ └── zproxy.ts
│ ├── index.ts
│ ├── pages
│ │ ├── identifiers.ts
│ │ ├── index.ts
│ │ └── pageParse.ts
│ └── utils
│ ├── index.ts
│ └── scrapedLogger.ts
├── tsconfig.json
├── types
│ └── index.d.ts
└── yarn.lock
8 directories, 20 files
build
: The latest generated javascript code.config
: Configuration.src
: The main coding part of the scraper, written by typescript.types
: Type or Interface definition.
npm run start:dev
Starts the application in development using nodemon and ts-node to do cold reloading.
npm run build
Builds the app at build, cleaning the folder first.
npm run start
Starts the app in production by first building the project with npm run build
, and then executing the compiled JavaScript at build/index.js
.
config/cookies.json
┌─────────┬───────┬───────────────────────────────┐
│ (index) │ tries │ identifier │
├─────────┼───────┼───────────────────────────────┤
│ 0 │ 0 │ { identifier: 'deepseek_ai' } │
└─────────┴───────┴───────────────────────────────┘
>> Queue Size: 1
{ tries: 0, identifier: { identifier: 'deepseek_ai' } }
// Cookies loaded successfully.
// Visiting URL: https://x.com/deepseek_ai
// Scraped Data: {
"rawData": {
"__typename": "User",
"id": "VXNlcjoxNzE0NTgwOTYyNTY5NTg4NzM2",
"rest_id": "1714580962569588736",
"affiliates_highlighted_label": {},
"has_graduated_access": true,
"parody_commentary_fan_label": "None",
"is_blue_verified": true,
"profile_image_shape": "Circle",
"legacy": {
"following": true,
"can_dm": false,
"can_media_tag": true,
"created_at": "Wed Oct 18 09:55:45 +0000 2023",
"default_profile": true,
"default_profile_image": false,
"description": "Unravel the mystery of AGI with curiosity. Answer the essential question with long-termism.",
"entities": {
"description": {
"urls": []
},
"url": {
"urls": [
{
"display_url": "deepseek.com",
"expanded_url": "https://www.deepseek.com/",
"url": "https://t.co/Un4k2rqn4o",
"indices": [
0,
23
]
}
]
}
},
"fast_followers_count": 0,
"favourites_count": 32,
"followers_count": 920342,
"friends_count": 0,
"has_custom_timelines": false,
"is_translator": false,
"listed_count": 2741,
"location": "",
"media_count": 84,
"name": "DeepSeek",
"normal_followers_count": 920342,
"pinned_tweet_ids_str": [
"1884103376868368589"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/1714580962569588736/1698208997",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1717417613775757312/Uk1zNOj4_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "deepseek_ai",
"statuses_count": 131,
"translator_type": "none",
"url": "https://t.co/Un4k2rqn4o",
"verified": false,
"want_retweets": true,
"withheld_in_countries": []
},
"tipjar_settings": {},
"legacy_extended_profile": {},
"is_profile_translatable": false,
"has_hidden_subscriptions_on_profile": false,
"verification_info": {
"is_identity_verified": true,
"reason": {
"description": {
"text": "This account is verified. Learn more",
"entities": [
{
"from_index": 26,
"to_index": 36,
"ref": {
"url": "https://help.twitter.com/managing-your-account/about-twitter-verified-accounts",
"url_type": "ExternalUrl"
}
}
]
},
"verified_since_msec": "1699427323764"
}
},
"highlights_info": {
"can_highlight_tweets": true,
"highlighted_tweets": "32"
},
"user_seed_tweet_count": 0,
"business_account": {},
"creator_subscriptions_count": 0
},
"name": "DeepSeek",
"pictureUrl": "https://pbs.twimg.com/profile_images/1717417613775757312/Uk1zNOj4_200x200.jpg",
"following": 0,
"followers": 920342,
"dateCreated": "2023-10-18T09:55:45.000Z",
"description": "Unravel the mystery of AGI with curiosity. Answer the essential question with long-termism.",
"website": "https://t.co/Un4k2rqn4o",
"joinedDate": "October 2023"
}