Skip to content

Commit 985148f

Browse files
committed
Add other columns and post-review changes (BL-13994)
1 parent ff77a43 commit 985148f

File tree

1 file changed

+133
-39
lines changed

1 file changed

+133
-39
lines changed

cloud/main.js

Lines changed: 133 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -171,96 +171,182 @@ Parse.Cloud.job("updateLanguageRecords", async (request) => {
171171
request.message("Completed successfully.");
172172
});
173173

174-
// A background job to populate the analytics_* fields in the books table.
174+
// A background job to populate the analytics_* fields in our books table
175+
// from api.bloomlibrary.org/stats. Data comes from our postgresql analytics database populated from Segment.
175176
//
176177
// This is scheduled on Azure under bloom-library-maintenance-{prod|dev}-daily.
177178
// You can also run it manually via REST:
178179
// curl -X POST -H "X-Parse-Application-Id: <app ID>" -H "X-Parse-Master-Key: <master key>" -d "{}" https://bloom-parse-server-develop.azurewebsites.net/parse/jobs/updateBookAnalytics
179180
Parse.Cloud.job("updateBookAnalytics", async (request) => {
180181
request.log.info("updateBookAnalytics - Starting.");
181182

182-
function getConnectionInfo() {
183+
// api.bloomlibrary.org/stats looks up analytics based on a parse server query.
184+
// The api needs the appropriate parse server url and key so it can call back to the right parse server
185+
// instance to get the list of books we want data about from the postgresql database.
186+
function getCurrentInstanceInfoForApiQuery() {
183187
return {
184-
url: process.env.SERVER_URL + "/",
185-
headers: {
186-
"X-Parse-Application-Id": process.env.APP_ID,
187-
},
188+
url: process.env.SERVER_URL,
189+
appId: process.env.APP_ID,
188190
};
189-
// When testing locally, you'll need to override using something like
191+
// But when testing locally, you need to explicitly set which environment you want
192+
// to collect analytics data for. You'll need to override using something like
190193
// return {
191-
// url: "https://dev-server.bloomlibrary.org/parse/",
192-
// headers: {
193-
// "X-Parse-Application-Id":
194-
// "yrXftBF6mbAuVu3fO6LnhCJiHxZPIdE7gl1DUVGR",
195-
// },
194+
// url: "https://dev-server.bloomlibrary.org/parse",
195+
// appId: "yrXftBF6mbAuVu3fO6LnhCJiHxZPIdE7gl1DUVGR",
196196
// };
197197
}
198-
function getNumberOrZero(value) {
198+
function getNumberOrZero(value, isDecimal = false) {
199199
if (!value) return 0;
200+
201+
if (isDecimal) {
202+
const number = parseFloat(value);
203+
return isNaN(number) ? 0 : number;
204+
}
205+
200206
const number = parseInt(value, 10);
201207
return isNaN(number) ? 0 : number;
202208
}
209+
// key/value pairs of column names to analytics results metadata
210+
const analyticsColumnsMap = {
211+
analytics_startedCount: {
212+
apiResultName: "started",
213+
},
214+
analytics_finishedCount: {
215+
apiResultName: "finished",
216+
},
217+
analytics_shellDownloads: {
218+
apiResultName: "shelldownloads",
219+
},
220+
analytics_pdfDownloads: {
221+
apiResultName: "pdfdownloads",
222+
},
223+
analytics_epubDownloads: {
224+
apiResultName: "epubdownloads",
225+
},
226+
analytics_bloompubDownloads: {
227+
apiResultName: "bloompubdownloads",
228+
},
229+
analytics_questionsInBookCount: {
230+
apiResultName: "numquestionsinbook",
231+
},
232+
analytics_quizzesTakenCount: {
233+
apiResultName: "numquizzestaken",
234+
},
235+
analytics_meanQuestionsCorrectPct: {
236+
apiResultName: "meanpctquestionscorrect",
237+
isDecimal: true,
238+
},
239+
analytics_medianQuestionsCorrectPct: {
240+
apiResultName: "medianpctquestionscorrect",
241+
isDecimal: true,
242+
},
243+
};
203244

204245
try {
205246
const bloomApiUrl = "https://api.bloomlibrary.org/v1";
206247
// "http://127.0.0.1:7071/v1"; // testing with a locally-run api
207248

208-
//Query the api for per-books stats for all books
249+
// Query the api for per-books stats for all books.
250+
// What is going on behind the scenes is actually somewhat convoluted.
251+
// We give the api the query to run to get the parse books.
252+
// It sends that list of books to the postgresql database to get the analytics data
253+
// and returns it to us. It would be more efficient to ask the postgresql database
254+
// ourselves, but the api endpoint already exists, and I didn't want to provide
255+
// postgres connection information to the parse server.
209256
const axios = require("axios");
210-
const results = await axios.post(
257+
const analyticsResults = await axios.post(
211258
`${bloomApiUrl}/stats/reading/per-book`,
212259
{
213260
filter: {
214261
parseDBQuery: {
215-
url: `${getConnectionInfo().url}classes/books`,
262+
url: `${
263+
getCurrentInstanceInfoForApiQuery().url
264+
}/classes/books`,
216265
method: "GET",
217266
options: {
218-
headers: getConnectionInfo().headers,
267+
headers: {
268+
"X-Parse-Application-Id": `${
269+
getCurrentInstanceInfoForApiQuery().appId
270+
}`,
271+
},
219272
params: {
220-
limit: 1000000,
273+
limit: 1000000, // Default is 100. We want all of them.
221274
keys: "objectId,bookInstanceId",
222275
},
223276
},
224277
},
225278
},
226279
}
227280
);
281+
const analyticsSourceData = analyticsResults.data.stats;
282+
283+
// Make a map of bookInstanceId to analytics data for efficiency
284+
const bookInstanceIdToAnalyticsMap = {};
285+
analyticsSourceData.forEach((bookAnalytics) => {
286+
bookInstanceIdToAnalyticsMap[bookAnalytics.bookinstanceid] =
287+
bookAnalytics;
288+
});
228289

229-
//Loop through all books, updating analytics
290+
// Get all the books in our parse database.
291+
// If the analytics values need to be updated, push it into
292+
// a new array of books to update.
293+
const booksToUpdate = [];
230294
const bookQuery = new Parse.Query("books");
231295
bookQuery.limit(1000000); // Default is 100. We want all of them.
232-
bookQuery.select("bookInstanceId");
233-
const books = await bookQuery.find();
234-
books.forEach((book) => {
235-
const { bookInstanceId } = book.attributes;
236-
const bookStats = results.data.stats.find(
237-
(bookStat) => bookStat.bookinstanceid === bookInstanceId
238-
);
239-
book.set(
240-
"analytics_finishedCount",
241-
getNumberOrZero(bookStats?.finished)
242-
);
243-
book.set(
244-
"analytics_shellDownloads",
245-
getNumberOrZero(bookStats?.shelldownloads)
246-
);
247-
book.set("updateSource", "updateBookAnalytics");
296+
bookQuery.select("bookInstanceId", ...Object.keys(analyticsColumnsMap));
297+
298+
const allBooks = await bookQuery.find();
299+
allBooks.forEach((book) => {
300+
const bookAnalytics =
301+
bookInstanceIdToAnalyticsMap[book.get("bookInstanceId")];
302+
303+
let bookNeedsUpdate = false;
304+
Object.keys(analyticsColumnsMap).forEach((columnName) => {
305+
const newValue = getNumberOrZero(
306+
bookAnalytics?.[
307+
analyticsColumnsMap[columnName].apiResultName
308+
],
309+
analyticsColumnsMap[columnName].isDecimal || false
310+
);
311+
312+
if (book.get(columnName) !== newValue) {
313+
book.set(columnName, newValue);
314+
bookNeedsUpdate = true;
315+
}
316+
});
317+
if (bookNeedsUpdate) {
318+
// Important to set updateSource for proper processing in beforeSave (see details there).
319+
book.set("updateSource", "updateBookAnalytics");
320+
321+
booksToUpdate.push(book);
322+
}
248323
});
249324

250-
//Save all books
251-
const successfulUpdates = await Parse.Object.saveAll(books, {
325+
request.log.info("booksToUpdate", booksToUpdate);
326+
327+
//Save any books with updated analytics.
328+
const successfulUpdates = await Parse.Object.saveAll(booksToUpdate, {
252329
useMasterKey: true,
253330
});
254331
request.log.info(
255332
`updateBookAnalytics - Updated analytics for ${successfulUpdates.length} books.`
256333
);
257334
} catch (error) {
258335
if (error.code === Parse.Error.AGGREGATE_ERROR) {
259-
error.errors.forEach((iError) => {
336+
const maxErrors = 20; // Don't blow up the log.
337+
for (let i = 0; i < error.errors.length && i < maxErrors; i++) {
338+
const iError = error.errors[i];
260339
request.log.error(
261340
`Couldn't process ${iError.object.id} due to ${iError.message}`
262341
);
263-
});
342+
}
343+
if (error.errors.length > maxErrors) {
344+
request.log.error(
345+
`${
346+
error.errors.length - maxErrors
347+
} more errors were suppressed.`
348+
);
349+
}
264350
request.log.error(
265351
"updateBookAnalytics - Terminated unsuccessfully."
266352
);
@@ -735,8 +821,16 @@ Parse.Cloud.define("setupTables", async () => {
735821
{ name: "bloomPUBVersion", type: "Number" },
736822

737823
// analytics_* fields are populated by the updateBookAnalytics job.
824+
{ name: "analytics_startCount", type: "Number" },
738825
{ name: "analytics_finishedCount", type: "Number" },
739826
{ name: "analytics_shellDownloads", type: "Number" },
827+
{ name: "analytics_pdfDownloads", type: "Number" },
828+
{ name: "analytics_epubDownloads", type: "Number" },
829+
{ name: "analytics_bloompubDownloads", type: "Number" },
830+
{ name: "analytics_questionsInBookCount", type: "Number" },
831+
{ name: "analytics_quizzesTakenCount", type: "Number" },
832+
{ name: "analytics_meanQuestionsCorrectPct", type: "Number" },
833+
{ name: "analytics_medianQuestionsCorrectPct", type: "Number" },
740834
],
741835
},
742836
{

0 commit comments

Comments
 (0)