Skip to content

Commit

Permalink
Revert "Mitigate document loading issues caused by Google Drive API #…
Browse files Browse the repository at this point in the history
…export method's unreliable size limit"

This reverts commit 5da0762.
  • Loading branch information
Morred authored and oamaok committed Nov 7, 2024
1 parent edd72fa commit 86089bb
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 74 deletions.
4 changes: 1 addition & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"main": "server/index.js",
"dependencies": {
"@google-cloud/datastore": "^5.1.0",
"axios": "^1.4.0",
"cache-manager": "^3.3.0",
"chai": "^4.2.0",
"cheerio": "^1.0.0-rc.3",
Expand Down
15 changes: 1 addition & 14 deletions server/auth.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,12 @@ if (!process.env.GOOGLE_APPLICATION_CREDENTIALS && process.env.NODE_ENV === 'dev
process.env.GOOGLE_APPLICATION_CREDENTIALS = path.join(__dirname, '.auth.json')
}

// returns the authClient that can be used for making other requests
// only public method, returns the authClient that can be used for making other requests
exports.getAuth = async () => {
if (authClient && process.env.NODE_ENV !== 'test') return authClient
return setAuthClient()
}

// This is only used when data is fetched manually via a download link vs using the Google Drive library
exports.getAccessToken = async () => {
const accessToken = await exports.getAuth()
.then((client) => {
return client.getAccessToken()
})
.catch((err) => {
console.error('Error getting access token:', err)
})

return accessToken.token
}

// configures the auth client if we don't already have one
async function setAuthClient() {
return inflight('auth', async () => {
Expand Down
59 changes: 13 additions & 46 deletions server/docs.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
'use strict'

const axios = require('axios')
const {google} = require('googleapis')
const cheerio = require('cheerio')
const slugify = require('limax')
Expand All @@ -9,7 +8,7 @@ const xlsx = require('xlsx')
const cache = require('./cache')
const formatter = require('./formatter')
const log = require('./logger')
const {getAuth, getAccessToken} = require('./auth')
const {getAuth} = require('./auth')

const supportedTypes = new Set(['document', 'spreadsheet', 'text/html'])

Expand All @@ -30,16 +29,18 @@ exports.slugify = (text = '') => {
return slugify(text)
}

exports.fetchDoc = async (id, resourceType, exportLinks, req) => {
exports.fetchDoc = async (id, resourceType, req) => {
const data = await cache.get(id)
if (data && data.content) {
log.info(`CACHE HIT ${req.path}`)
return data.content
}

const auth = await getAuth()
const driveDoc = await fetch({id, resourceType, exportLinks, req}, auth)

const driveDoc = await fetch({id, resourceType, req}, auth)
const originalRevision = driveDoc[1]

const {html, byline, createdBy, sections} = formatter.getProcessedDocAttributes(driveDoc, req.path)
const payload = {html, byline, createdBy, sections}

Expand All @@ -52,7 +53,7 @@ exports.fetchDoc = async (id, resourceType, exportLinks, req) => {
return payload
}

async function fetchHTMLForId(id, resourceType, exportLinks, req, drive) {
async function fetchHTMLForId(id, resourceType, req, drive) {
if (!supportedTypes.has(resourceType)) {
return `Library does not support viewing ${resourceType}s yet.`
}
Expand All @@ -65,45 +66,12 @@ async function fetchHTMLForId(id, resourceType, exportLinks, req, drive) {
return fetchHTML(drive, id)
}

try {
const {data} = await drive.files.export({
fileId: id,
// text/html exports are not suupported for slideshows
mimeType: resourceType === 'presentation' ? 'text/plain' : 'text/html'
})

return data
} catch (e) {
const errorResponse = e.response.data.error
// If the Google Drive API returns 403, we fall back to using the export link directly
if (errorResponse.code === 403 && errorResponse.message === "This file is too large to be exported.") {
console.log("falling back to using the export link...")
const manuallyFetchedData = await fetchManually(resourceType, exportLinks)
return manuallyFetchedData
} else {
throw e
}
}
}
async function fetchManually(resourceType, exportLinks) {
const accessToken = await getAccessToken()
const exportLink = exportLinks['text/html']
const headers = {Authorization: `Bearer ${accessToken}`}

const fetchedData = await axios({
url: exportLink,
method: 'GET',
responseType: resourceType === 'presentation' ? 'text/plain' : 'text/html',
headers: headers
const {data} = await drive.files.export({
fileId: id,
// text/html exports are not suupported for slideshows
mimeType: resourceType === 'presentation' ? 'text/plain' : 'text/html'
})
.then((response) => {
const fileContents = response.data
return fileContents
})
.catch((err) => {
console.error('Error downloading file:', err)
})
return fetchedData
return data
}

async function fetchOriginalRevisions(id, resourceType, req, drive) {
Expand All @@ -121,13 +89,12 @@ async function fetchOriginalRevisions(id, resourceType, req, drive) {
})
}

async function fetch({id, resourceType, exportLinks, req}, authClient) {
async function fetch({id, resourceType, req}, authClient) {
const drive = google.drive({version: 'v3', auth: authClient})
const documentData = await Promise.all([
fetchHTMLForId(id, resourceType, exportLinks, req, drive),
fetchHTMLForId(id, resourceType, req, drive),
fetchOriginalRevisions(id, resourceType, req, drive)
])

return documentData
}

Expand Down
2 changes: 1 addition & 1 deletion server/list.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ async function updateTree() {
}

function getOptions(driveType, id) {
const fields = 'nextPageToken,files(id,name,mimeType,parents,webViewLink,createdTime,modifiedTime,lastModifyingUser,exportLinks)'
const fields = 'nextPageToken,files(id,name,mimeType,parents,webViewLink,createdTime,modifiedTime,lastModifyingUser)'

if (driveType === 'folder') {
return {
Expand Down
5 changes: 3 additions & 2 deletions server/routes/categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ async function handleCategory(req, res) {

if (!meta || !data) return 'next'

const {resourceType, tags, id, exportLinks} = meta
const {resourceType, tags, id} = meta
const {breadcrumb, duplicates} = data

const layout = categories.has(root) ? root : 'default'
const template = `categories/${layout}`

Expand Down Expand Up @@ -67,7 +68,7 @@ async function handleCategory(req, res) {

res.locals.docId = data.id // we need this for history later
// for docs, fetch the html and then combine with the base data
const {html, byline, createdBy, sections} = await fetchDoc(id, resourceType, exportLinks, req)
const {html, byline, createdBy, sections} = await fetchDoc(id, resourceType, req)

const renderData = Object.assign({}, baseRenderData, {
content: html,
Expand Down
14 changes: 7 additions & 7 deletions test/unit/docs.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ const PAYLOAD_KEYS = ['html', 'byline', 'createdBy', 'sections']
describe('Docs', () => {
describe('Fetching Docs', () => {
it('should fetch document data with expected structure', async () => {
const doc = await fetchDoc('id-doc', 'document', {}, {})
const doc = await fetchDoc('id-doc', 'document', {})
expect(doc).to.include.keys('html', 'byline', 'createdBy', 'sections')
})

it('should parse sections correctly', async () => {
const doc = await fetchDoc('mulitsection', 'document', {}, {})
const doc = await fetchDoc('mulitsection', 'document', {})
expect(doc).to.include.keys('html', 'sections')
const {sections} = doc
expect(sections.length).equals(2)
Expand All @@ -24,31 +24,31 @@ describe('Docs', () => {

describe('Fetching Sheets', () => {
it('should fetch sheet data with expected structure', async () => {
const sheet = await fetchDoc('id-sheet', 'spreadsheet', {}, {})
const sheet = await fetchDoc('id-sheet', 'spreadsheet', {})
expect(sheet).to.include.keys(PAYLOAD_KEYS)
})

it('should successully parse the sheet to a html table', async () => {
const {html} = await fetchDoc('id-sheet', 'spreadsheet', {}, {})
const {html} = await fetchDoc('id-sheet', 'spreadsheet', {})
expect(html).includes('<table>')
expect(html).includes('</table>')
})
})

describe('Fetching html', () => {
it('should fetch html data with expected structure', async () => {
const sheet = await fetchDoc('id-html', 'text/html', {}, {})
const sheet = await fetchDoc('id-html', 'text/html', {})
expect(sheet).to.include.keys(PAYLOAD_KEYS)
})

it('should not modify html', async () => {
const {html} = await fetchDoc('id-html', 'text/html', {}, {})
const {html} = await fetchDoc('id-html', 'text/html', {})
expect(html).equals('<h1>This is a raw HTML document</h1>')
})
})

it('should identify bad resource types', async () => {
const {html} = await fetchDoc('id-html', 'badtype', {}, {})
const {html} = await fetchDoc('id-html', 'badtype', {})
expect(html).equals('Library does not support viewing badtypes yet.')
})
})

0 comments on commit 86089bb

Please sign in to comment.