@@ -171,96 +171,182 @@ Parse.Cloud.job("updateLanguageRecords", async (request) => {
171
171
request . message ( "Completed successfully." ) ;
172
172
} ) ;
173
173
174
- // A background job to populate the analytics_* fields in the books table.
174
+ // A background job to populate the analytics_* fields in our books table
175
+ // from api.bloomlibrary.org/stats. Data comes from our postgresql analytics database populated from Segment.
175
176
//
176
177
// This is scheduled on Azure under bloom-library-maintenance-{prod|dev}-daily.
177
178
// You can also run it manually via REST:
178
179
// curl -X POST -H "X-Parse-Application-Id: <app ID>" -H "X-Parse-Master-Key: <master key>" -d "{}" https://bloom-parse-server-develop.azurewebsites.net/parse/jobs/updateBookAnalytics
179
180
Parse . Cloud . job ( "updateBookAnalytics" , async ( request ) => {
180
181
request . log . info ( "updateBookAnalytics - Starting." ) ;
181
182
182
- function getConnectionInfo ( ) {
183
+ // api.bloomlibrary.org/stats looks up analytics based on a parse server query.
184
+ // The api needs the appropriate parse server url and key so it can call back to the right parse server
185
+ // instance to get the list of books we want data about from the postgresql database.
186
+ function getCurrentInstanceInfoForApiQuery ( ) {
183
187
return {
184
- url : process . env . SERVER_URL + "/" ,
185
- headers : {
186
- "X-Parse-Application-Id" : process . env . APP_ID ,
187
- } ,
188
+ url : process . env . SERVER_URL ,
189
+ appId : process . env . APP_ID ,
188
190
} ;
189
- // When testing locally, you'll need to override using something like
191
+ // But when testing locally, you need to explicitly set which environment you want
192
+ // to collect analytics data for. You'll need to override using something like
190
193
// return {
191
- // url: "https://dev-server.bloomlibrary.org/parse/",
192
- // headers: {
193
- // "X-Parse-Application-Id":
194
- // "yrXftBF6mbAuVu3fO6LnhCJiHxZPIdE7gl1DUVGR",
195
- // },
194
+ // url: "https://dev-server.bloomlibrary.org/parse",
195
+ // appId: "yrXftBF6mbAuVu3fO6LnhCJiHxZPIdE7gl1DUVGR",
196
196
// };
197
197
}
198
- function getNumberOrZero ( value ) {
198
+ function getNumberOrZero ( value , isDecimal = false ) {
199
199
if ( ! value ) return 0 ;
200
+
201
+ if ( isDecimal ) {
202
+ const number = parseFloat ( value ) ;
203
+ return isNaN ( number ) ? 0 : number ;
204
+ }
205
+
200
206
const number = parseInt ( value , 10 ) ;
201
207
return isNaN ( number ) ? 0 : number ;
202
208
}
209
+ // key/value pairs of column names to analytics results metadata
210
+ const analyticsColumnsMap = {
211
+ analytics_startedCount : {
212
+ apiResultName : "started" ,
213
+ } ,
214
+ analytics_finishedCount : {
215
+ apiResultName : "finished" ,
216
+ } ,
217
+ analytics_shellDownloads : {
218
+ apiResultName : "shelldownloads" ,
219
+ } ,
220
+ analytics_pdfDownloads : {
221
+ apiResultName : "pdfdownloads" ,
222
+ } ,
223
+ analytics_epubDownloads : {
224
+ apiResultName : "epubdownloads" ,
225
+ } ,
226
+ analytics_bloompubDownloads : {
227
+ apiResultName : "bloompubdownloads" ,
228
+ } ,
229
+ analytics_questionsInBookCount : {
230
+ apiResultName : "numquestionsinbook" ,
231
+ } ,
232
+ analytics_quizzesTakenCount : {
233
+ apiResultName : "numquizzestaken" ,
234
+ } ,
235
+ analytics_meanQuestionsCorrectPct : {
236
+ apiResultName : "meanpctquestionscorrect" ,
237
+ isDecimal : true ,
238
+ } ,
239
+ analytics_medianQuestionsCorrectPct : {
240
+ apiResultName : "medianpctquestionscorrect" ,
241
+ isDecimal : true ,
242
+ } ,
243
+ } ;
203
244
204
245
try {
205
246
const bloomApiUrl = "https://api.bloomlibrary.org/v1" ;
206
247
// "http://127.0.0.1:7071/v1"; // testing with a locally-run api
207
248
208
- //Query the api for per-books stats for all books
249
+ // Query the api for per-books stats for all books.
250
+ // What is going on behind the scenes is actually somewhat convoluted.
251
+ // We give the api the query to run to get the parse books.
252
+ // It sends that list of books to the postgresql database to get the analytics data
253
+ // and returns it to us. It would be more efficient to ask the postgresql database
254
+ // ourselves, but the api endpoint already exists, and I didn't want to provide
255
+ // postgres connection information to the parse server.
209
256
const axios = require ( "axios" ) ;
210
- const results = await axios . post (
257
+ const analyticsResults = await axios . post (
211
258
`${ bloomApiUrl } /stats/reading/per-book` ,
212
259
{
213
260
filter : {
214
261
parseDBQuery : {
215
- url : `${ getConnectionInfo ( ) . url } classes/books` ,
262
+ url : `${
263
+ getCurrentInstanceInfoForApiQuery ( ) . url
264
+ } /classes/books`,
216
265
method : "GET" ,
217
266
options : {
218
- headers : getConnectionInfo ( ) . headers ,
267
+ headers : {
268
+ "X-Parse-Application-Id" : `${
269
+ getCurrentInstanceInfoForApiQuery ( ) . appId
270
+ } `,
271
+ } ,
219
272
params : {
220
- limit : 1000000 ,
273
+ limit : 1000000 , // Default is 100. We want all of them.
221
274
keys : "objectId,bookInstanceId" ,
222
275
} ,
223
276
} ,
224
277
} ,
225
278
} ,
226
279
}
227
280
) ;
281
+ const analyticsSourceData = analyticsResults . data . stats ;
282
+
283
+ // Make a map of bookInstanceId to analytics data for efficiency
284
+ const bookInstanceIdToAnalyticsMap = { } ;
285
+ analyticsSourceData . forEach ( ( bookAnalytics ) => {
286
+ bookInstanceIdToAnalyticsMap [ bookAnalytics . bookinstanceid ] =
287
+ bookAnalytics ;
288
+ } ) ;
228
289
229
- //Loop through all books, updating analytics
290
+ // Get all the books in our parse database.
291
+ // If the analytics values need to be updated, push it into
292
+ // a new array of books to update.
293
+ const booksToUpdate = [ ] ;
230
294
const bookQuery = new Parse . Query ( "books" ) ;
231
295
bookQuery . limit ( 1000000 ) ; // Default is 100. We want all of them.
232
- bookQuery . select ( "bookInstanceId" ) ;
233
- const books = await bookQuery . find ( ) ;
234
- books . forEach ( ( book ) => {
235
- const { bookInstanceId } = book . attributes ;
236
- const bookStats = results . data . stats . find (
237
- ( bookStat ) => bookStat . bookinstanceid === bookInstanceId
238
- ) ;
239
- book . set (
240
- "analytics_finishedCount" ,
241
- getNumberOrZero ( bookStats ?. finished )
242
- ) ;
243
- book . set (
244
- "analytics_shellDownloads" ,
245
- getNumberOrZero ( bookStats ?. shelldownloads )
246
- ) ;
247
- book . set ( "updateSource" , "updateBookAnalytics" ) ;
296
+ bookQuery . select ( "bookInstanceId" , ...Object . keys ( analyticsColumnsMap ) ) ;
297
+
298
+ const allBooks = await bookQuery . find ( ) ;
299
+ allBooks . forEach ( ( book ) => {
300
+ const bookAnalytics =
301
+ bookInstanceIdToAnalyticsMap [ book . get ( "bookInstanceId" ) ] ;
302
+
303
+ let bookNeedsUpdate = false ;
304
+ Object . keys ( analyticsColumnsMap ) . forEach ( ( columnName ) => {
305
+ const newValue = getNumberOrZero (
306
+ bookAnalytics ?. [
307
+ analyticsColumnsMap [ columnName ] . apiResultName
308
+ ] ,
309
+ analyticsColumnsMap [ columnName ] . isDecimal || false
310
+ ) ;
311
+
312
+ if ( book . get ( columnName ) !== newValue ) {
313
+ book . set ( columnName , newValue ) ;
314
+ bookNeedsUpdate = true ;
315
+ }
316
+ } ) ;
317
+ if ( bookNeedsUpdate ) {
318
+ // Important to set updateSource for proper processing in beforeSave (see details there).
319
+ book . set ( "updateSource" , "updateBookAnalytics" ) ;
320
+
321
+ booksToUpdate . push ( book ) ;
322
+ }
248
323
} ) ;
249
324
250
- //Save all books
251
- const successfulUpdates = await Parse . Object . saveAll ( books , {
325
+ request . log . info ( "booksToUpdate" , booksToUpdate ) ;
326
+
327
+ //Save any books with updated analytics.
328
+ const successfulUpdates = await Parse . Object . saveAll ( booksToUpdate , {
252
329
useMasterKey : true ,
253
330
} ) ;
254
331
request . log . info (
255
332
`updateBookAnalytics - Updated analytics for ${ successfulUpdates . length } books.`
256
333
) ;
257
334
} catch ( error ) {
258
335
if ( error . code === Parse . Error . AGGREGATE_ERROR ) {
259
- error . errors . forEach ( ( iError ) => {
336
+ const maxErrors = 20 ; // Don't blow up the log.
337
+ for ( let i = 0 ; i < error . errors . length && i < maxErrors ; i ++ ) {
338
+ const iError = error . errors [ i ] ;
260
339
request . log . error (
261
340
`Couldn't process ${ iError . object . id } due to ${ iError . message } `
262
341
) ;
263
- } ) ;
342
+ }
343
+ if ( error . errors . length > maxErrors ) {
344
+ request . log . error (
345
+ `${
346
+ error . errors . length - maxErrors
347
+ } more errors were suppressed.`
348
+ ) ;
349
+ }
264
350
request . log . error (
265
351
"updateBookAnalytics - Terminated unsuccessfully."
266
352
) ;
@@ -735,8 +821,16 @@ Parse.Cloud.define("setupTables", async () => {
735
821
{ name : "bloomPUBVersion" , type : "Number" } ,
736
822
737
823
// analytics_* fields are populated by the updateBookAnalytics job.
824
+ { name : "analytics_startCount" , type : "Number" } ,
738
825
{ name : "analytics_finishedCount" , type : "Number" } ,
739
826
{ name : "analytics_shellDownloads" , type : "Number" } ,
827
+ { name : "analytics_pdfDownloads" , type : "Number" } ,
828
+ { name : "analytics_epubDownloads" , type : "Number" } ,
829
+ { name : "analytics_bloompubDownloads" , type : "Number" } ,
830
+ { name : "analytics_questionsInBookCount" , type : "Number" } ,
831
+ { name : "analytics_quizzesTakenCount" , type : "Number" } ,
832
+ { name : "analytics_meanQuestionsCorrectPct" , type : "Number" } ,
833
+ { name : "analytics_medianQuestionsCorrectPct" , type : "Number" } ,
740
834
] ,
741
835
} ,
742
836
{
0 commit comments