Skip to content

Commit 57bc35e

Browse files
committedJun 11, 2021
upload
1 parent 7a50393 commit 57bc35e

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed
 

‎index.js

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
function getWikipediaEntitiesAsArray (response) {
2+
var entityResultList = []
3+
var entities = response.entities;
4+
entities.forEach(entity => {
5+
if (entity.metadata && entity.metadata.wikipedia_url) {
6+
entityResult = entity.name.toLowerCase()
7+
entityResultList.push(entityResult)
8+
}
9+
});
10+
return entityResultList
11+
}
12+
13+
function retrieveEntities (line) {
14+
var apiKey = "YOUR API KEY HERE";
15+
var apiEndpoint = 'https://language.googleapis.com/v1beta2/documents:analyzeEntities?key=' + apiKey;
16+
var nlData = {
17+
"document": {
18+
"language": "de-de",
19+
"type": "PLAIN_TEXT",
20+
"content": line
21+
},
22+
"encodingType": "UTF8" };
23+
var nlOptions = {
24+
"method": "post",
25+
"contentType": "application/json",
26+
"payload": JSON.stringify(nlData),
27+
"muteHttpExceptions": true
28+
};
29+
var response = UrlFetchApp.fetch(apiEndpoint, nlOptions);
30+
entityResultList = getWikipediaEntitiesAsArray(JSON.parse(response))
31+
entityResultList = entityResultList.sort()
32+
return entityResultList;
33+
};
34+
35+
function scrapeTitleDescription(url) {
36+
var html = UrlFetchApp.fetch(url).getContentText();
37+
var $ = Cheerio.load(html);
38+
var title = $('title').first().text()
39+
var desc = $('meta[name=description]').attr('content')
40+
return title + ". " + desc
41+
}
42+
43+
function mySleep(sec) {
44+
SpreadsheetApp.flush();
45+
Utilities.sleep(sec*1000);
46+
SpreadsheetApp.flush();
47+
}
48+
49+
function start () {
50+
var ss = SpreadsheetApp.getActiveSpreadsheet();
51+
52+
//Clean NLP Results Sheet
53+
var sheetNLPResult = ss.getSheetByName("NLP Results");
54+
sheetNLPResult.clear()
55+
sheetNLPResult.getRange(1, 1).setValue("URL")
56+
sheetNLPResult.getRange(1, 2).setValue("Entity")
57+
sheetNLPResult.getRange(1, 3).setValue("Clicks")
58+
sheetNLPResult.getRange(1, 4).setValue("Impressions")
59+
sheetNLPResult.getRange(1, 5).setValue("CTR")
60+
61+
//Discover Sheet
62+
var sheetDiscover = ss.getSheetByName("Discover");
63+
var valuesDiscover = sheetDiscover.getRange("A:D").getValues()
64+
sheetDiscover.getRange(1, 5).setValue("Title + Desc")
65+
sheetDiscover.getRange(1, 6).setValue("Entities")
66+
67+
//Loop discover to get click, impressions, ctr
68+
for (var k = 0; k < valuesDiscover.length; k++) {
69+
try {
70+
mySleep(0.2)
71+
Logger.log(valuesDiscover[k][0]) //Log URL
72+
73+
titleAndDesc = scrapeTitleDescription(valuesDiscover[k][0]) //Get title + desc from crawl
74+
sheetDiscover.getRange(k+1, 5).setValue(titleAndDesc)
75+
76+
var clicks = valuesDiscover[k][1]
77+
var impressions = valuesDiscover[k][2]
78+
79+
if(titleAndDesc != "") {
80+
entityResultList = retrieveEntities(titleAndDesc) //Get entities
81+
82+
if(entityResultList.length !== 0) {
83+
clicks = clicks / entityResultList.length
84+
impressions = impressions / entityResultList.length
85+
86+
var entityResultListString = entityResultList.sort().join(" | ")
87+
sheetDiscover.getRange(k+1, 6).setValue(entityResultListString) //Add pipe seperated entities in Discover sheet
88+
89+
var last = sheetNLPResult.getDataRange().getValues().length; //last row with data
90+
91+
//NLP Results Sheet with single entities
92+
for (var j = 0; j < entityResultList.length; j++) {
93+
sheetNLPResult.getRange(last+j+1, 1).setValue(valuesDiscover[k][0])
94+
sheetNLPResult.getRange(last+j+1, 2).setValue(entityResultList[j])
95+
sheetNLPResult.getRange(last+j+1, 3).setValue(clicks)
96+
sheetNLPResult.getRange(last+j+1, 4).setValue(impressions)
97+
}
98+
}
99+
}
100+
} catch (e) {
101+
Logger.log(e)
102+
}
103+
}
104+
}

0 commit comments

Comments
 (0)
Please sign in to comment.