|
| 1 | +function getWikipediaEntitiesAsArray (response) { |
| 2 | + var entityResultList = [] |
| 3 | + var entities = response.entities; |
| 4 | + entities.forEach(entity => { |
| 5 | + if (entity.metadata && entity.metadata.wikipedia_url) { |
| 6 | + entityResult = entity.name.toLowerCase() |
| 7 | + entityResultList.push(entityResult) |
| 8 | + } |
| 9 | + }); |
| 10 | + return entityResultList |
| 11 | +} |
| 12 | + |
| 13 | +function retrieveEntities (line) { |
| 14 | + var apiKey = "YOUR API KEY HERE"; |
| 15 | + var apiEndpoint = 'https://language.googleapis.com/v1beta2/documents:analyzeEntities?key=' + apiKey; |
| 16 | + var nlData = { |
| 17 | + "document": { |
| 18 | + "language": "de-de", |
| 19 | + "type": "PLAIN_TEXT", |
| 20 | + "content": line |
| 21 | + }, |
| 22 | + "encodingType": "UTF8" }; |
| 23 | + var nlOptions = { |
| 24 | + "method": "post", |
| 25 | + "contentType": "application/json", |
| 26 | + "payload": JSON.stringify(nlData), |
| 27 | + "muteHttpExceptions": true |
| 28 | + }; |
| 29 | + var response = UrlFetchApp.fetch(apiEndpoint, nlOptions); |
| 30 | + entityResultList = getWikipediaEntitiesAsArray(JSON.parse(response)) |
| 31 | + entityResultList = entityResultList.sort() |
| 32 | + return entityResultList; |
| 33 | +}; |
| 34 | + |
| 35 | +function scrapeTitleDescription(url) { |
| 36 | + var html = UrlFetchApp.fetch(url).getContentText(); |
| 37 | + var $ = Cheerio.load(html); |
| 38 | + var title = $('title').first().text() |
| 39 | + var desc = $('meta[name=description]').attr('content') |
| 40 | + return title + ". " + desc |
| 41 | +} |
| 42 | + |
| 43 | +function mySleep(sec) { |
| 44 | + SpreadsheetApp.flush(); |
| 45 | + Utilities.sleep(sec*1000); |
| 46 | + SpreadsheetApp.flush(); |
| 47 | +} |
| 48 | + |
| 49 | +function start () { |
| 50 | + var ss = SpreadsheetApp.getActiveSpreadsheet(); |
| 51 | + |
| 52 | + //Clean NLP Results Sheet |
| 53 | + var sheetNLPResult = ss.getSheetByName("NLP Results"); |
| 54 | + sheetNLPResult.clear() |
| 55 | + sheetNLPResult.getRange(1, 1).setValue("URL") |
| 56 | + sheetNLPResult.getRange(1, 2).setValue("Entity") |
| 57 | + sheetNLPResult.getRange(1, 3).setValue("Clicks") |
| 58 | + sheetNLPResult.getRange(1, 4).setValue("Impressions") |
| 59 | + sheetNLPResult.getRange(1, 5).setValue("CTR") |
| 60 | + |
| 61 | + //Discover Sheet |
| 62 | + var sheetDiscover = ss.getSheetByName("Discover"); |
| 63 | + var valuesDiscover = sheetDiscover.getRange("A:D").getValues() |
| 64 | + sheetDiscover.getRange(1, 5).setValue("Title + Desc") |
| 65 | + sheetDiscover.getRange(1, 6).setValue("Entities") |
| 66 | + |
| 67 | + //Loop discover to get click, impressions, ctr |
| 68 | + for (var k = 0; k < valuesDiscover.length; k++) { |
| 69 | + try { |
| 70 | + mySleep(0.2) |
| 71 | + Logger.log(valuesDiscover[k][0]) //Log URL |
| 72 | + |
| 73 | + titleAndDesc = scrapeTitleDescription(valuesDiscover[k][0]) //Get title + desc from crawl |
| 74 | + sheetDiscover.getRange(k+1, 5).setValue(titleAndDesc) |
| 75 | + |
| 76 | + var clicks = valuesDiscover[k][1] |
| 77 | + var impressions = valuesDiscover[k][2] |
| 78 | + |
| 79 | + if(titleAndDesc != "") { |
| 80 | + entityResultList = retrieveEntities(titleAndDesc) //Get entities |
| 81 | + |
| 82 | + if(entityResultList.length !== 0) { |
| 83 | + clicks = clicks / entityResultList.length |
| 84 | + impressions = impressions / entityResultList.length |
| 85 | + |
| 86 | + var entityResultListString = entityResultList.sort().join(" | ") |
| 87 | + sheetDiscover.getRange(k+1, 6).setValue(entityResultListString) //Add pipe seperated entities in Discover sheet |
| 88 | + |
| 89 | + var last = sheetNLPResult.getDataRange().getValues().length; //last row with data |
| 90 | + |
| 91 | + //NLP Results Sheet with single entities |
| 92 | + for (var j = 0; j < entityResultList.length; j++) { |
| 93 | + sheetNLPResult.getRange(last+j+1, 1).setValue(valuesDiscover[k][0]) |
| 94 | + sheetNLPResult.getRange(last+j+1, 2).setValue(entityResultList[j]) |
| 95 | + sheetNLPResult.getRange(last+j+1, 3).setValue(clicks) |
| 96 | + sheetNLPResult.getRange(last+j+1, 4).setValue(impressions) |
| 97 | + } |
| 98 | + } |
| 99 | + } |
| 100 | + } catch (e) { |
| 101 | + Logger.log(e) |
| 102 | + } |
| 103 | + } |
| 104 | +} |
0 commit comments