forked from chahakshahcs5/nicepage-template-scrapper
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrape.js
105 lines (99 loc) · 2.66 KB
/
scrape.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import scrape from "website-scraper";
import archiver from "archiver";
import fs from "fs";
import dotenv from "dotenv";
import { Readable } from "stream";
dotenv.config();
import { uploadFile } from "./gdrive.js";
class MyPlugin {
apply(registerAction) {
registerAction("error", async ({ error }) => {
console.error(error);
});
registerAction("afterResponse", async ({ response }) => {
if (response.statusCode === 404) {
return null;
}
return Promise.resolve(response.body);
});
}
}
const scrapePage = async (url, directory) => {
try {
return await scrape({
urls: [url],
directory: `./templates/${directory}`,
urlFilter: function (url) {
if (
url.includes("capp.nicepge.com") ||
url.includes("nicepage.io/nicepage.css")
) {
return true;
}
if (
url.includes("https://nicepage") ||
url.includes("freepik") ||
(url.includes("nicepage.io") && !url.includes("html"))
) {
return false;
}
return true;
},
recursive: true,
maxRecursiveDepth: 1,
plugins: [new MyPlugin()],
request: {
headers: {
referer: "https://nicepage.com",
},
timeout: {
request: 10000,
},
},
});
} catch (error) {
console.log(error);
}
};
async function zipDirectory(sourceDir, outPath) {
const archive = archiver("zip", { zlib: { level: 9 } });
const stream = fs.createWriteStream(outPath);
return new Promise((resolve, reject) => {
archive
.directory(sourceDir, false)
.on("error", (err) => reject(err))
.pipe(stream);
stream.on("close", () => resolve());
archive.finalize();
});
}
const main = async () => {
try {
for (let i = process.env.START_INDEX; i < process.env.END_INDEX; i++) {
const url = `website${i}.nicepage.io/`;
const data = await scrapePage(`https://${url}`, url.split(".")[0]);
if (data?.[0]?.saved) {
await zipDirectory(
`./templates/${url.split(".")[0]}/`,
`./templates/${url.split(".")[0]}.zip`
);
fs.rmSync(`./templates/${url.split(".")[0]}/`, {
recursive: true,
force: true,
});
const buffer = fs.readFileSync(`./templates/${url.split(".")[0]}.zip`);
await uploadFile(
`${url.split(".")[0]}.zip`,
process.env.FOLDER_ID,
Readable.from(buffer),
"application/zip"
);
fs.rmSync(`./templates/${url.split(".")[0]}.zip`);
}
console.log(i);
}
} catch (error) {
console.log(error);
}
};
export default main;