Skip to content

Commit 59e652b

Browse files
authored
Merge pull request #620 from yashkohli88/yk/add-default-headers-centrally
Update fetch file to centralize default headers
2 parents a1d12ac + 178b4a3 commit 59e652b

File tree

6 files changed

+77
-23
lines changed

6 files changed

+77
-23
lines changed

lib/fetch.js

+5-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33

44
const axios = require('axios')
55

6+
const defaultHeaders = Object.freeze({ 'User-Agent': 'clearlydefined.io crawler ([email protected])' })
7+
8+
axios.defaults.headers = defaultHeaders
9+
610
function buildRequestOptions(request) {
711
let responseType = 'text'
812
if (request.json) {
@@ -45,4 +49,4 @@ function withDefaults(opts) {
4549
return request => callFetch(request, axiosInstance)
4650
}
4751

48-
module.exports = { callFetch, withDefaults }
52+
module.exports = { callFetch, withDefaults, defaultHeaders }

providers/fetch/cratesioFetch.js

+1-3
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ class CratesioFetch extends AbstractFetch {
5151
try {
5252
registryData = await request({
5353
url: `https://crates.io/api/v1/crates/${spec.name}`,
54-
json: true,
55-
headers: { 'User-Agent': 'clearlydefined.io crawler ([email protected])' }
54+
json: true
5655
})
5756
} catch (exception) {
5857
if (exception.statusCode !== 404) throw exception
@@ -72,7 +71,6 @@ class CratesioFetch extends AbstractFetch {
7271
url: `https://crates.io${version.dl_path}`,
7372
encoding: null,
7473
headers: {
75-
'User-Agent': 'clearlydefined.io crawler ([email protected])',
7674
Accept: 'text/html'
7775
}
7876
})

providers/fetch/mavenBasedFetch.js

+3-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// SPDX-License-Identifier: MIT
33

44
const AbstractFetch = require('./abstractFetch')
5-
const { withDefaults } = require('../../lib/fetch')
5+
const { callFetch, defaultHeaders } = require('../../lib/fetch')
66
const nodeRequest = require('request')
77
const { clone, get } = require('lodash')
88
const { promisify } = require('util')
@@ -23,14 +23,12 @@ const extensionMap = {
2323
jar: '.jar'
2424
}
2525

26-
const defaultHeaders = { headers: { 'User-Agent': 'clearlydefined.io crawler ([email protected])' } }
27-
2826
class MavenBasedFetch extends AbstractFetch {
2927
constructor(providerMap, options) {
3028
super(options)
3129
this._providerMap = { ...providerMap }
32-
this._handleRequestPromise = options.requestPromise || withDefaults(defaultHeaders)
33-
this._handleRequestStream = options.requestStream || nodeRequest.defaults(defaultHeaders).get
30+
this._handleRequestPromise = options.requestPromise || callFetch
31+
this._handleRequestStream = options.requestStream || nodeRequest.defaults({ headers: defaultHeaders }).get
3432
}
3533

3634
canHandle(request) {

providers/fetch/packagistFetch.js

+2-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ const nodeRequest = require('request')
99
const { promisify } = require('util')
1010
const readdir = promisify(fs.readdir)
1111
const FetchResult = require('../../lib/fetchResult')
12+
const { defaultHeaders } = require('../../lib/fetch')
1213

1314
const providerMap = {
1415
packagist: 'https://repo.packagist.org/'
@@ -62,9 +63,7 @@ class PackagistFetch extends AbstractFetch {
6263
return new Promise((resolve, reject) => {
6364
const options = {
6465
url: distUrl,
65-
headers: {
66-
'User-Agent': 'clearlydefined.io crawler ([email protected])'
67-
}
66+
headers: defaultHeaders
6867
}
6968
nodeRequest
7069
.get(options, (error, response) => {

test/unit/lib/fetchTests.js

+42-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
const { fail } = require('assert')
2-
const { callFetch, withDefaults } = require('../../../lib/fetch')
2+
const { callFetch, withDefaults, defaultHeaders } = require('../../../lib/fetch')
33
const { expect } = require('chai')
44
const fs = require('fs')
55
const mockttp = require('mockttp')
66

7+
function checkDefaultHeaders(headers) {
8+
for (const [key, value] of Object.entries(defaultHeaders)) {
9+
expect(headers).to.have.property(key.toLowerCase()).that.equals(value)
10+
}
11+
}
712
describe('CallFetch', () => {
813
describe('with mock server', () => {
914
const mockServer = mockttp.getLocal()
@@ -23,6 +28,37 @@ describe('CallFetch', () => {
2328
expect(response).to.be.deep.equal(JSON.parse(expected))
2429
})
2530

31+
it('checks if the default header user-agent and other header is present in crate components', async () => {
32+
const path = '/crates.io/api/v1/crates/name/1.0.0/download'
33+
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success')
34+
35+
await callFetch({
36+
url: mockServer.urlFor(path),
37+
method: 'GET',
38+
json: true,
39+
encoding: null,
40+
headers: {
41+
Accept: 'text/html'
42+
}
43+
})
44+
const requests = await endpointMock.getSeenRequests()
45+
checkDefaultHeaders(requests[0].headers)
46+
expect(requests[0].headers).to.include({ accept: 'text/html' })
47+
})
48+
49+
it('checks if the default header user-agent is present in crate components', async () => {
50+
const path = '/crates.io/api/v1/crates/name'
51+
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success')
52+
53+
await callFetch({
54+
url: mockServer.urlFor(path),
55+
method: 'GET',
56+
json: true
57+
})
58+
const requests = await endpointMock.getSeenRequests()
59+
checkDefaultHeaders(requests[0].headers)
60+
})
61+
2662
it('checks if the full response is fetched', async () => {
2763
const path = '/registry.npmjs.com/redis/0.1.0'
2864
const expected = fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')
@@ -87,17 +123,17 @@ describe('CallFetch', () => {
87123
const url = mockServer.urlFor(path)
88124
const endpointMock = await mockServer.forGet(path).thenReply(200)
89125

90-
const defaultOptions = { headers: { 'user-agent': 'clearlydefined.io crawler ([email protected])' } }
126+
const defaultOptions = { headers: defaultHeaders }
91127
const requestWithDefaults = withDefaults(defaultOptions)
92128
await requestWithDefaults({ url })
93129
await requestWithDefaults({ url })
94130

95131
const requests = await endpointMock.getSeenRequests()
96132
expect(requests.length).to.equal(2)
97133
expect(requests[0].url).to.equal(url)
98-
expect(requests[0].headers).to.include(defaultOptions.headers)
134+
checkDefaultHeaders(requests[0].headers)
99135
expect(requests[1].url).to.equal(url)
100-
expect(requests[1].headers).to.include(defaultOptions.headers)
136+
checkDefaultHeaders(requests[1].headers)
101137
})
102138

103139
it('checks if the response is text with uri option in GET request', async () => {
@@ -129,6 +165,8 @@ describe('CallFetch', () => {
129165
const json = await requests[0].body.getJson()
130166
expect(json).to.deep.equal({ test: 'test' })
131167
expect(requests[0].headers).to.include({ 'x-crawler': 'secret' })
168+
//Check for the default header value
169+
checkDefaultHeaders(requests[0].headers)
132170
})
133171

134172
describe('test simple', () => {

test/unit/providers/fetch/mavenBasedFetchTests.js

+24-7
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,15 @@ const { expect } = require('chai')
22
const MavenBasedFetch = require('../../../../providers/fetch/mavenBasedFetch')
33
const mockttp = require('mockttp')
44
const sinon = require('sinon')
5+
const { defaultHeaders } = require('../../../../lib/fetch')
56
const Request = require('../../../../ghcrawler').request
67

8+
function checkDefaultHeaders(headers) {
9+
for (const [key, value] of Object.entries(defaultHeaders)) {
10+
expect(headers).to.have.property(key.toLowerCase()).that.equals(value)
11+
}
12+
}
13+
714
describe('MavenBasedFetch', () => {
815
describe('find contained file stat', () => {
916
it('file contained in root dir', async () => {
@@ -20,24 +27,34 @@ describe('MavenBasedFetch', () => {
2027
})
2128
})
2229

23-
describe('Integration test for component not found', function () {
30+
describe('Integration test', function () {
2431
const path = '/remotecontent?filepath='
2532
const mockServer = mockttp.getLocal()
26-
beforeEach(async () => await mockServer.start())
27-
afterEach(async () => await mockServer.stop())
28-
29-
it('should handle maven components not found', async () => {
30-
const handler = new MavenBasedFetch(
33+
let endpointMock
34+
let handler
35+
beforeEach(async () => {
36+
await mockServer.start()
37+
handler = new MavenBasedFetch(
3138
{
3239
mavencentral: mockServer.urlFor(path)
3340
},
3441
{ logger: { log: sinon.stub() } }
3542
)
36-
await mockServer.forAnyRequest().thenReply(404)
43+
endpointMock = await mockServer.forAnyRequest().thenReply(404)
44+
})
45+
afterEach(async () => await mockServer.stop())
46+
47+
it('should handle maven components not found', async () => {
3748
const request = await handler.handle(
3849
new Request('test', 'cd:/maven/mavencentral/org.apache.httpcomponents/httpcore/4.')
3950
)
4051
expect(request.processControl).to.be.equal('skip')
4152
})
53+
54+
it('should check for default header in any request', async () => {
55+
await handler.handle(new Request('test', 'cd:/maven/mavencentral/org.apache.httpcomponents/httpcore/4.4.16'))
56+
const requests = await endpointMock.getSeenRequests()
57+
checkDefaultHeaders(requests[0].headers)
58+
})
4259
})
4360
})

0 commit comments

Comments
 (0)