-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScraper.js
106 lines (86 loc) · 3.21 KB
/
Scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
function onOpen() {
var ui = SpreadsheetApp.getUi();
ui.createMenu('⭐ Scraper Menu')
.addSeparator()
.addItem('Run Scraper! ☢️', 'letsCheck')
.addSeparator()
.addItem('Get All Sites! 🌐', 'getAllSites')
.addSeparator()
.addItem('Get All Plugins! ⚠️', 'getAllPlugins')
.addToUi();
}
/**
* Cheerio XML/HTML parser/scraper
* https://github.com/tani/cheeriogs?tab=readme-ov-file
* 1ReeQ6WO8kKNxoaA_O0XEQ589cIrRvEBA9qcWpNqdOP17i47u6N9M5Xh0
*/
/**
* Globals
*/
function createGlobals(){
const sheetObj = SpreadsheetApp.getActiveSpreadsheet();
const activeSheetName = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet().getName();
SpreadsheetApp.getUi().alert( 'Active sheet is ' + activeSheetName );
const sheetData = mkHelpers.readRange(sheetObj,activeSheetName); // for cleanData
const cleanData = mkHelpers.getCleanData(sheetData); // object array
const singleSheetObj = sheetObj.getSheetByName(activeSheetName); // for setSingleValue
// output objects in a single variable
const result = [];
result.push( { cleanData, singleSheetObj } ); // variable object, multi valued
return result;
Logger.log( "Global options and variables created" );
}
function letsCheck(){
Logger.log( "Getting globals" );
const globals = createGlobals();
Logger.log( "Scraping started" );
let results = [];
globals[0].cleanData.forEach(function(data,index){ // Main Loop
if(data.enabled){
let checkTime = mkHelpers.whatTimeIsNow();
let link = data.link; // URL to check
let searchKey = data.selector; // selector to find
let fullPage = getContent_(link); // the full HTML page
if (fullPage[0]) {
fullPage = fullPage[1]
} else {
mkHelpers.setSingleValue( globals[0].singleSheetObj, index, 6, checkTime); // check time
mkHelpers.setSingleValue( globals[0].singleSheetObj, index, 7, fullPage[1]); // check status
return
}
let content = Cheerio.load(fullPage); // Loaded in Cheerio
// let's check different types
let rawValue = "";
if (data.type == 'attr') {
let innerrawValue = content( searchKey ).attr(data.value);
rawValue = innerrawValue;
}
if (data.type == 'text') {
let innerrawValue = content( searchKey ).text();
let cleanTitle = innerrawValue.trim();
rawValue = cleanTitle;
}
if(compareValues_( data.title, rawValue )){
Logger.log("Nothing is changed since last check");
results.push({'oldvalue':data.title,
'newvalue':rawValue,
'changed':false,
'lastcheck': checkTime});
mkHelpers.setSingleValue( globals[0].singleSheetObj, index, 6, checkTime); // check time
mkHelpers.setSingleValue( globals[0].singleSheetObj, index, 7, "Not Updated"); // check status
} else {
Logger.log("Value changed, update sheet - old value: " + data.title + " - new value: " + rawValue);
results.push({'oldvalue':data.title,
'newvalue':rawValue,
'changed':true,
'lastcheck': checkTime});
mkHelpers.setSingleValue( globals[0].singleSheetObj, index, 5, rawValue); // new title
mkHelpers.setSingleValue( globals[0].singleSheetObj, index, 6, checkTime); // check time
mkHelpers.setSingleValue( globals[0].singleSheetObj, index, 7, "Updated"); // check status
}
} else {
//not enabled
Logger.log("Row is not enabled!")
}
}) // END FOREACH
} // END