From 86089bb5d6aa9179be2b2e365d342015a4a9487d Mon Sep 17 00:00:00 2001 From: Laura Eck Date: Wed, 28 Jun 2023 09:49:57 +0200 Subject: [PATCH] Revert "Mitigate document loading issues caused by Google Drive API #export method's unreliable size limit" This reverts commit 5da076207d5a72d8b727e77fb21e2cf3f90c91b0. --- package-lock.json | 4 +-- package.json | 1 - server/auth.js | 15 +--------- server/docs.js | 59 ++++++++----------------------------- server/list.js | 2 +- server/routes/categories.js | 5 ++-- test/unit/docs.test.js | 14 ++++----- 7 files changed, 26 insertions(+), 74 deletions(-) diff --git a/package-lock.json b/package-lock.json index 9d155fbc..4b2c4b8c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,6 @@ "license": "Apache-2.0", "dependencies": { "@google-cloud/datastore": "^5.1.0", - "axios": "^1.4.0", "cache-manager": "^3.3.0", "chai": "^4.2.0", "cheerio": "^1.0.0-rc.3", @@ -12685,8 +12684,7 @@ "dev": true }, "axios": { - "version": "1.7.7", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.7.tgz", + "version": "https://registry.npmjs.org/axios/-/axios-1.7.7.tgz", "integrity": "sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==", "requires": { "follow-redirects": "^1.15.6", diff --git a/package.json b/package.json index c8dc9df9..abf1a8cc 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,6 @@ "main": "server/index.js", "dependencies": { "@google-cloud/datastore": "^5.1.0", - "axios": "^1.4.0", "cache-manager": "^3.3.0", "chai": "^4.2.0", "cheerio": "^1.0.0-rc.3", diff --git a/server/auth.js b/server/auth.js index 0469c77f..3b808740 100644 --- a/server/auth.js +++ b/server/auth.js @@ -19,25 +19,12 @@ if (!process.env.GOOGLE_APPLICATION_CREDENTIALS && process.env.NODE_ENV === 'dev process.env.GOOGLE_APPLICATION_CREDENTIALS = path.join(__dirname, '.auth.json') } -// returns the authClient that can be used for making other requests +// only public method, returns the authClient that can be used for making other requests exports.getAuth = async () => { if (authClient && process.env.NODE_ENV !== 'test') return authClient return setAuthClient() } -// This is only used when data is fetched manually via a download link vs using the Google Drive library -exports.getAccessToken = async () => { - const accessToken = await exports.getAuth() - .then((client) => { - return client.getAccessToken() - }) - .catch((err) => { - console.error('Error getting access token:', err) - }) - - return accessToken.token -} - // configures the auth client if we don't already have one async function setAuthClient() { return inflight('auth', async () => { diff --git a/server/docs.js b/server/docs.js index baf9a084..a7fb85a9 100644 --- a/server/docs.js +++ b/server/docs.js @@ -1,6 +1,5 @@ 'use strict' -const axios = require('axios') const {google} = require('googleapis') const cheerio = require('cheerio') const slugify = require('limax') @@ -9,7 +8,7 @@ const xlsx = require('xlsx') const cache = require('./cache') const formatter = require('./formatter') const log = require('./logger') -const {getAuth, getAccessToken} = require('./auth') +const {getAuth} = require('./auth') const supportedTypes = new Set(['document', 'spreadsheet', 'text/html']) @@ -30,7 +29,7 @@ exports.slugify = (text = '') => { return slugify(text) } -exports.fetchDoc = async (id, resourceType, exportLinks, req) => { +exports.fetchDoc = async (id, resourceType, req) => { const data = await cache.get(id) if (data && data.content) { log.info(`CACHE HIT ${req.path}`) @@ -38,8 +37,10 @@ exports.fetchDoc = async (id, resourceType, exportLinks, req) => { } const auth = await getAuth() - const driveDoc = await fetch({id, resourceType, exportLinks, req}, auth) + + const driveDoc = await fetch({id, resourceType, req}, auth) const originalRevision = driveDoc[1] + const {html, byline, createdBy, sections} = formatter.getProcessedDocAttributes(driveDoc, req.path) const payload = {html, byline, createdBy, sections} @@ -52,7 +53,7 @@ exports.fetchDoc = async (id, resourceType, exportLinks, req) => { return payload } -async function fetchHTMLForId(id, resourceType, exportLinks, req, drive) { +async function fetchHTMLForId(id, resourceType, req, drive) { if (!supportedTypes.has(resourceType)) { return `Library does not support viewing ${resourceType}s yet.` } @@ -65,45 +66,12 @@ async function fetchHTMLForId(id, resourceType, exportLinks, req, drive) { return fetchHTML(drive, id) } - try { - const {data} = await drive.files.export({ - fileId: id, - // text/html exports are not suupported for slideshows - mimeType: resourceType === 'presentation' ? 'text/plain' : 'text/html' - }) - - return data - } catch (e) { - const errorResponse = e.response.data.error - // If the Google Drive API returns 403, we fall back to using the export link directly - if (errorResponse.code === 403 && errorResponse.message === "This file is too large to be exported.") { - console.log("falling back to using the export link...") - const manuallyFetchedData = await fetchManually(resourceType, exportLinks) - return manuallyFetchedData - } else { - throw e - } - } -} -async function fetchManually(resourceType, exportLinks) { - const accessToken = await getAccessToken() - const exportLink = exportLinks['text/html'] - const headers = {Authorization: `Bearer ${accessToken}`} - - const fetchedData = await axios({ - url: exportLink, - method: 'GET', - responseType: resourceType === 'presentation' ? 'text/plain' : 'text/html', - headers: headers + const {data} = await drive.files.export({ + fileId: id, + // text/html exports are not suupported for slideshows + mimeType: resourceType === 'presentation' ? 'text/plain' : 'text/html' }) - .then((response) => { - const fileContents = response.data - return fileContents - }) - .catch((err) => { - console.error('Error downloading file:', err) - }) - return fetchedData + return data } async function fetchOriginalRevisions(id, resourceType, req, drive) { @@ -121,13 +89,12 @@ async function fetchOriginalRevisions(id, resourceType, req, drive) { }) } -async function fetch({id, resourceType, exportLinks, req}, authClient) { +async function fetch({id, resourceType, req}, authClient) { const drive = google.drive({version: 'v3', auth: authClient}) const documentData = await Promise.all([ - fetchHTMLForId(id, resourceType, exportLinks, req, drive), + fetchHTMLForId(id, resourceType, req, drive), fetchOriginalRevisions(id, resourceType, req, drive) ]) - return documentData } diff --git a/server/list.js b/server/list.js index 63d1e721..0a056709 100644 --- a/server/list.js +++ b/server/list.js @@ -107,7 +107,7 @@ async function updateTree() { } function getOptions(driveType, id) { - const fields = 'nextPageToken,files(id,name,mimeType,parents,webViewLink,createdTime,modifiedTime,lastModifyingUser,exportLinks)' + const fields = 'nextPageToken,files(id,name,mimeType,parents,webViewLink,createdTime,modifiedTime,lastModifyingUser)' if (driveType === 'folder') { return { diff --git a/server/routes/categories.js b/server/routes/categories.js index 88a48f38..c8b09356 100644 --- a/server/routes/categories.js +++ b/server/routes/categories.js @@ -21,8 +21,9 @@ async function handleCategory(req, res) { if (!meta || !data) return 'next' - const {resourceType, tags, id, exportLinks} = meta + const {resourceType, tags, id} = meta const {breadcrumb, duplicates} = data + const layout = categories.has(root) ? root : 'default' const template = `categories/${layout}` @@ -67,7 +68,7 @@ async function handleCategory(req, res) { res.locals.docId = data.id // we need this for history later // for docs, fetch the html and then combine with the base data - const {html, byline, createdBy, sections} = await fetchDoc(id, resourceType, exportLinks, req) + const {html, byline, createdBy, sections} = await fetchDoc(id, resourceType, req) const renderData = Object.assign({}, baseRenderData, { content: html, diff --git a/test/unit/docs.test.js b/test/unit/docs.test.js index 5be96753..4568cc8d 100644 --- a/test/unit/docs.test.js +++ b/test/unit/docs.test.js @@ -9,12 +9,12 @@ const PAYLOAD_KEYS = ['html', 'byline', 'createdBy', 'sections'] describe('Docs', () => { describe('Fetching Docs', () => { it('should fetch document data with expected structure', async () => { - const doc = await fetchDoc('id-doc', 'document', {}, {}) + const doc = await fetchDoc('id-doc', 'document', {}) expect(doc).to.include.keys('html', 'byline', 'createdBy', 'sections') }) it('should parse sections correctly', async () => { - const doc = await fetchDoc('mulitsection', 'document', {}, {}) + const doc = await fetchDoc('mulitsection', 'document', {}) expect(doc).to.include.keys('html', 'sections') const {sections} = doc expect(sections.length).equals(2) @@ -24,12 +24,12 @@ describe('Docs', () => { describe('Fetching Sheets', () => { it('should fetch sheet data with expected structure', async () => { - const sheet = await fetchDoc('id-sheet', 'spreadsheet', {}, {}) + const sheet = await fetchDoc('id-sheet', 'spreadsheet', {}) expect(sheet).to.include.keys(PAYLOAD_KEYS) }) it('should successully parse the sheet to a html table', async () => { - const {html} = await fetchDoc('id-sheet', 'spreadsheet', {}, {}) + const {html} = await fetchDoc('id-sheet', 'spreadsheet', {}) expect(html).includes('') expect(html).includes('
') }) @@ -37,18 +37,18 @@ describe('Docs', () => { describe('Fetching html', () => { it('should fetch html data with expected structure', async () => { - const sheet = await fetchDoc('id-html', 'text/html', {}, {}) + const sheet = await fetchDoc('id-html', 'text/html', {}) expect(sheet).to.include.keys(PAYLOAD_KEYS) }) it('should not modify html', async () => { - const {html} = await fetchDoc('id-html', 'text/html', {}, {}) + const {html} = await fetchDoc('id-html', 'text/html', {}) expect(html).equals('

This is a raw HTML document

') }) }) it('should identify bad resource types', async () => { - const {html} = await fetchDoc('id-html', 'badtype', {}, {}) + const {html} = await fetchDoc('id-html', 'badtype', {}) expect(html).equals('Library does not support viewing badtypes yet.') }) })