Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update fetch file to centralize default headers #620

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion lib/fetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

const axios = require('axios')

const defaultHeaders = Object.freeze({ 'User-Agent': 'clearlydefined.io crawler ([email protected])' })

axios.defaults.headers = defaultHeaders

function buildRequestOptions(request) {
let responseType = 'text'
if (request.json) {
Expand Down Expand Up @@ -45,4 +49,4 @@ function withDefaults(opts) {
return request => callFetch(request, axiosInstance)
}

module.exports = { callFetch, withDefaults }
module.exports = { callFetch, withDefaults, defaultHeaders }
4 changes: 1 addition & 3 deletions providers/fetch/cratesioFetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ class CratesioFetch extends AbstractFetch {
try {
registryData = await request({
url: `https://crates.io/api/v1/crates/${spec.name}`,
json: true,
headers: { 'User-Agent': 'clearlydefined.io crawler ([email protected])' }
json: true
})
} catch (exception) {
if (exception.statusCode !== 404) throw exception
Expand All @@ -72,7 +71,6 @@ class CratesioFetch extends AbstractFetch {
url: `https://crates.io${version.dl_path}`,
encoding: null,
headers: {
'User-Agent': 'clearlydefined.io crawler ([email protected])',
Accept: 'text/html'
}
})
Expand Down
8 changes: 3 additions & 5 deletions providers/fetch/mavenBasedFetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: MIT

const AbstractFetch = require('./abstractFetch')
const { withDefaults } = require('../../lib/fetch')
const { callFetch, defaultHeaders } = require('../../lib/fetch')
const nodeRequest = require('request')
const { clone, get } = require('lodash')
const { promisify } = require('util')
Expand All @@ -23,14 +23,12 @@ const extensionMap = {
jar: '.jar'
}

const defaultHeaders = { headers: { 'User-Agent': 'clearlydefined.io crawler ([email protected])' } }

class MavenBasedFetch extends AbstractFetch {
constructor(providerMap, options) {
super(options)
this._providerMap = { ...providerMap }
this._handleRequestPromise = options.requestPromise || withDefaults(defaultHeaders)
this._handleRequestStream = options.requestStream || nodeRequest.defaults(defaultHeaders).get
this._handleRequestPromise = options.requestPromise || callFetch
this._handleRequestStream = options.requestStream || nodeRequest.defaults({ headers: defaultHeaders }).get
}

canHandle(request) {
Expand Down
5 changes: 2 additions & 3 deletions providers/fetch/packagistFetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const nodeRequest = require('request')
const { promisify } = require('util')
const readdir = promisify(fs.readdir)
const FetchResult = require('../../lib/fetchResult')
const { defaultHeaders } = require('../../lib/fetch')

const providerMap = {
packagist: 'https://repo.packagist.org/'
Expand Down Expand Up @@ -62,9 +63,7 @@ class PackagistFetch extends AbstractFetch {
return new Promise((resolve, reject) => {
const options = {
url: distUrl,
headers: {
'User-Agent': 'clearlydefined.io crawler ([email protected])'
}
headers: defaultHeaders
}
nodeRequest
.get(options, (error, response) => {
Expand Down
46 changes: 42 additions & 4 deletions test/unit/lib/fetchTests.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
const { fail } = require('assert')
const { callFetch, withDefaults } = require('../../../lib/fetch')
const { callFetch, withDefaults, defaultHeaders } = require('../../../lib/fetch')
const { expect } = require('chai')
const fs = require('fs')
const mockttp = require('mockttp')

function checkDefaultHeaders(headers) {
for (const [key, value] of Object.entries(defaultHeaders)) {
expect(headers).to.have.property(key.toLowerCase()).that.equals(value)
}
}
describe('CallFetch', () => {
describe('with mock server', () => {
const mockServer = mockttp.getLocal()
Expand All @@ -23,6 +28,37 @@ describe('CallFetch', () => {
expect(response).to.be.deep.equal(JSON.parse(expected))
})

it('checks if the default header user-agent and other header is present in crate components', async () => {
const path = '/crates.io/api/v1/crates/name/1.0.0/download'
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success')

await callFetch({
url: mockServer.urlFor(path),
method: 'GET',
json: true,
encoding: null,
headers: {
Accept: 'text/html'
}
})
const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
expect(requests[0].headers).to.include({ accept: 'text/html' })
})

it('checks if the default header user-agent is present in crate components', async () => {
const path = '/crates.io/api/v1/crates/name'
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success')

await callFetch({
url: mockServer.urlFor(path),
method: 'GET',
json: true
})
const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
})

it('checks if the full response is fetched', async () => {
const path = '/registry.npmjs.com/redis/0.1.0'
const expected = fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')
Expand Down Expand Up @@ -87,17 +123,17 @@ describe('CallFetch', () => {
const url = mockServer.urlFor(path)
const endpointMock = await mockServer.forGet(path).thenReply(200)

const defaultOptions = { headers: { 'user-agent': 'clearlydefined.io crawler ([email protected])' } }
const defaultOptions = { headers: defaultHeaders }
const requestWithDefaults = withDefaults(defaultOptions)
await requestWithDefaults({ url })
await requestWithDefaults({ url })

const requests = await endpointMock.getSeenRequests()
expect(requests.length).to.equal(2)
expect(requests[0].url).to.equal(url)
expect(requests[0].headers).to.include(defaultOptions.headers)
checkDefaultHeaders(requests[0].headers)
expect(requests[1].url).to.equal(url)
expect(requests[1].headers).to.include(defaultOptions.headers)
checkDefaultHeaders(requests[1].headers)
})

it('checks if the response is text with uri option in GET request', async () => {
Expand Down Expand Up @@ -129,6 +165,8 @@ describe('CallFetch', () => {
const json = await requests[0].body.getJson()
expect(json).to.deep.equal({ test: 'test' })
expect(requests[0].headers).to.include({ 'x-crawler': 'secret' })
//Check for the default header value
checkDefaultHeaders(requests[0].headers)
})

describe('test simple', () => {
Expand Down
31 changes: 24 additions & 7 deletions test/unit/providers/fetch/mavenBasedFetchTests.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@ const { expect } = require('chai')
const MavenBasedFetch = require('../../../../providers/fetch/mavenBasedFetch')
const mockttp = require('mockttp')
const sinon = require('sinon')
const { defaultHeaders } = require('../../../../lib/fetch')
const Request = require('../../../../ghcrawler').request

function checkDefaultHeaders(headers) {
for (const [key, value] of Object.entries(defaultHeaders)) {
expect(headers).to.have.property(key.toLowerCase()).that.equals(value)
}
}

describe('MavenBasedFetch', () => {
describe('find contained file stat', () => {
it('file contained in root dir', async () => {
Expand All @@ -20,24 +27,34 @@ describe('MavenBasedFetch', () => {
})
})

describe('Integration test for component not found', function () {
describe('Integration test', function () {
const path = '/remotecontent?filepath='
const mockServer = mockttp.getLocal()
beforeEach(async () => await mockServer.start())
afterEach(async () => await mockServer.stop())

it('should handle maven components not found', async () => {
const handler = new MavenBasedFetch(
let endpointMock
let handler
beforeEach(async () => {
await mockServer.start()
handler = new MavenBasedFetch(
{
mavencentral: mockServer.urlFor(path)
},
{ logger: { log: sinon.stub() } }
)
await mockServer.forAnyRequest().thenReply(404)
endpointMock = await mockServer.forAnyRequest().thenReply(404)
})
afterEach(async () => await mockServer.stop())

it('should handle maven components not found', async () => {
const request = await handler.handle(
new Request('test', 'cd:/maven/mavencentral/org.apache.httpcomponents/httpcore/4.')
)
expect(request.processControl).to.be.equal('skip')
})

it('should check for default header in any request', async () => {
await handler.handle(new Request('test', 'cd:/maven/mavencentral/org.apache.httpcomponents/httpcore/4.4.16'))
const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
})
})
})