From 4a2eba65a78c2ec77af0088b1faca563c9e90247 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Fri, 13 Dec 2024 16:25:42 +0100 Subject: [PATCH 01/19] Upgrade Azure Storage SDK to a modern version --- config/cdConfig.js | 4 +- ghcrawler/providers/queuing/storageQueue.js | 137 ++-- .../providers/queuing/storageQueueManager.js | 25 +- .../providers/storage/azureBlobFactory.js | 43 +- .../providers/storage/storageDocStore.js | 164 ++--- package-lock.json | 586 ++++++++++++++---- package.json | 6 +- providers/store/azureQueueStore.js | 33 +- 8 files changed, 649 insertions(+), 349 deletions(-) diff --git a/config/cdConfig.js b/config/cdConfig.js index 05b5a524..ced4c304 100644 --- a/config/cdConfig.js +++ b/config/cdConfig.js @@ -5,7 +5,8 @@ const config = require('painless-config') const cd_azblob = { connection: config.get('CRAWLER_AZBLOB_CONNECTION_STRING'), - container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME') + container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME'), + account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME'), } const githubToken = config.get('CRAWLER_GITHUB_TOKEN') @@ -111,6 +112,7 @@ module.exports = { }, azqueue: { connectionString: cd_azblob.connection, + account: cd_azblob.account, queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests' }, 'cd(azblob)': cd_azblob, diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js index c52e1257..5d6822c0 100644 --- a/ghcrawler/providers/queuing/storageQueue.js +++ b/ghcrawler/providers/queuing/storageQueue.js @@ -1,52 +1,46 @@ // Copyright (c) Microsoft Corporation and others. Made available under the MIT license. // SPDX-License-Identifier: MIT +const { QueueServiceClient } = require('@azure/storage-queue') const qlimit = require('qlimit') const { cloneDeep } = require('lodash') class StorageQueue { + /** + * @param {QueueServiceClient} client + * @param {string} name + * @param {string} queueName + * @param {object} formatter + * @param {object} options + */ constructor(client, name, queueName, formatter, options) { - this.client = client this.name = name this.queueName = queueName this.messageFormatter = formatter this.options = options this.logger = options.logger + this.queueClient = client.getQueueClient(this.queueName) } async subscribe() { - return new Promise((resolve, reject) => { - this.client.createQueueIfNotExists(this.queueName, error => { - if (error) { - return reject(error) - } - this.logger.info(`Subscribed to ${this.queueName} using Queue Storage`) - resolve() - }) - }) + await this.queueClient.createIfNotExists() + this.logger.info(`Subscribed to ${this.queueName} using Queue Storage`) } async unsubscribe() { - return + // No specific unsubscribe logic for Azure Queue Storage } async push(requests, option) { requests = Array.isArray(requests) ? requests : [requests] return Promise.all( requests.map( - qlimit(this.options.parallelPush || 1)(request => { + qlimit(this.options.parallelPush || 1)(async request => { const body = JSON.stringify(request) - return new Promise((resolve, reject) => { - this.client.createMessage(this.queueName, body, option, (error, queueMessageResult) => { - if (error) { - return reject(error) - } - this._log('Queued', request) - resolve(this._buildMessageReceipt(queueMessageResult, request)) - }) - }) - }) - ) + const queueMessageResult = await this.queueClient.sendMessage(body) + this._log('Queued', request) + return this._buildMessageReceipt(queueMessageResult, request) + })) ) } @@ -56,47 +50,32 @@ class StorageQueue { } async pop() { - const msgOptions = { numOfMessages: 1, visibilityTimeout: this.options.visibilityTimeout || 60 * 60 } - return new Promise((resolve, reject) => { - this.client.getMessages(this.queueName, msgOptions, (error, result) => { - if (error) { - return reject(error) - } - const message = result[0] - if (!message) { - this.logger.verbose('No messages to receive') - return resolve(null) - } - if (this.options.maxDequeueCount && message.dequeueCount > this.options.maxDequeueCount) { - this.logger.verbose('maxDequeueCount exceeded') - this.client.deleteMessage(this.queueName, message.messageId, message.popReceipt, error => { - if (error) return reject(error) - resolve(null) - }) - } else { - message.body = JSON.parse(message.messageText) - const request = this.messageFormatter(message) - request._message = message - this._log('Popped', message.body) - resolve(request) - } - }) - }) + const msgOptions = { numberOfMessages: 1, visibilityTimeout: this.options.visibilityTimeout || 60 * 60 } + const response = await this.queueClient.receiveMessages(msgOptions) + const message = response.receivedMessageItems[0] + if (!message) { + this.logger.verbose('No messages to receive') + return null + } + if (this.options.maxDequeueCount && message.dequeueCount > this.options.maxDequeueCount) { + this.logger.verbose('maxDequeueCount exceeded') + await this.queueClient.deleteMessage(message.messageId, message.popReceipt) + return null + } else { + message.body = JSON.parse(message.messageText) + const request = this.messageFormatter(message) + request._message = message + this._log('Popped', message.body) + return request + } } async done(request) { if (!request || !request._message) { return } - return new Promise((resolve, reject) => { - this.client.deleteMessage(this.queueName, request._message.messageId, request._message.popReceipt, error => { - if (error) { - return reject(error) - } - this._log('ACKed', request._message.body) - resolve() - }) - }) + await this.queueClient.deleteMessage(request._message.messageId, request._message.popReceipt) + this._log('ACKed', request._message.body) } async defer(request) { @@ -110,47 +89,21 @@ class StorageQueue { await this.updateVisibilityTimeout(request) } - updateVisibilityTimeout(request, visibilityTimeout = 0) { - return new Promise((resolve, reject) => { - // visibilityTimeout is updated to 0 to unlock/unlease the message - this.client.updateMessage( - this.queueName, - request._message.messageId, - request._message.popReceipt, - visibilityTimeout, - (error, result) => { - if (error) { - return reject(error) - } - this._log('NAKed', request._message.body) - resolve(this._buildMessageReceipt(result, request._message.body)) - } - ) + async updateVisibilityTimeout(request, visibilityTimeout = 0) { + await this.queueClient.updateMessage(request._message.messageId, request._message.popReceipt, { + visibilityTimeout }) + this._log('NAKed', request._message.body) } async flush() { - return new Promise((resolve, reject) => { - this.client.deleteQueue(this.queueName, error => { - if (error) return reject(error) - this.client.createQueueIfNotExists(this.queueName, error => { - if (error) return reject(error) - resolve() - }) - }) - }) + await this.queueClient.clearMessages() + this.logger.info(`Flushed all messages from ${this.queueName}`) } async getInfo() { - return new Promise(resolve => { - this.client.getQueueMetadata(this.queueName, (result, error) => { - if (error) { - this.logger.error(error) - resolve(null) - } - resolve({ count: result[0].approximateMessageCount }) - }) - }) + const properties = await this.queueClient.getProperties() + return { count: properties.approximateMessagesCount } } getName() { diff --git a/ghcrawler/providers/queuing/storageQueueManager.js b/ghcrawler/providers/queuing/storageQueueManager.js index 2f23a7c9..048b9bf2 100644 --- a/ghcrawler/providers/queuing/storageQueueManager.js +++ b/ghcrawler/providers/queuing/storageQueueManager.js @@ -2,14 +2,31 @@ // SPDX-License-Identifier: MIT const AttenuatedQueue = require('./attenuatedQueue') -const AzureStorage = require('azure-storage') +const { QueueServiceClient } = require('@azure/storage-queue') const Request = require('../../lib/request') const StorageQueue = require('./storageQueue') +const { DefaultAzureCredential } = require('@azure/identity') class StorageQueueManager { - constructor(connectionString) { - const retryOperations = new AzureStorage.ExponentialRetryPolicyFilter() - this.client = AzureStorage.createQueueService(connectionString).withFilter(retryOperations) + constructor(connectionString, options) { + const pipelineOptions = { + retryOptions: { + maxTries: 3, + retryDelayInMs: 1000, + maxRetryDelayInMs: 120 * 1000, + tryTimeoutInMs: 30000, + retryPolicyType: StorageRetryPolicyType.EXPONENTIAL + } + } + if (connectionString) { + this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions) + } else { + this.client = new QueueServiceClient( + `https://${options.account}.queue.core.windows.net`, + new DefaultAzureCredential(), + pipelineOptions + ) + } } createQueueClient(name, formatter, options) { diff --git a/ghcrawler/providers/storage/azureBlobFactory.js b/ghcrawler/providers/storage/azureBlobFactory.js index 2d2d5eb5..60cfb121 100644 --- a/ghcrawler/providers/storage/azureBlobFactory.js +++ b/ghcrawler/providers/storage/azureBlobFactory.js @@ -1,15 +1,44 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // SPDX-License-Identifier: MIT -const AzureStorage = require('azure-storage') +// @ts-check +const { BlobServiceClient, StorageRetryPolicyType } = require('@azure/storage-blob') const AzureStorageDocStore = require('./storageDocStore') +const { DefaultAzureCredential } = require ('@azure/identity'); +/** + * @param {object} options + * @param {string} options.account + * @param {string} options.connection + * @param {string} options.container + * @param {object} options.logger + */ module.exports = options => { options.logger.info('creating azure storage store') - const { account, key, connection, container } = options - const retryOperations = new AzureStorage.ExponentialRetryPolicyFilter() - const blobService = connection - ? AzureStorage.createBlobService(connection).withFilter(retryOperations) - : AzureStorage.createBlobService(account, key).withFilter(retryOperations) - return new AzureStorageDocStore(blobService, container, options) + const { account, connection, container } = options + + const pipelineOptions = { + retryOptions: { + maxTries: 3, + retryDelayInMs: 1000, + maxRetryDelayInMs: 120 * 1000, + tryTimeoutInMs: 30000, + retryPolicyType: StorageRetryPolicyType.EXPONENTIAL + } + } + + let blobServiceClient + if (connection) { + options.logger.info('using connection string') + blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions) + } else if (account) { + options.logger.info('using default credentials') + blobServiceClient = new BlobServiceClient(`https://${account}.blob.core.windows.net`, new DefaultAzureCredential(), pipelineOptions) + } else { + throw new Error('either connection or account must be provided') + } + + const containerClient = blobServiceClient.getContainerClient(container) + + return new AzureStorageDocStore(containerClient, options) } diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js index 8131dfa5..a9af732f 100644 --- a/ghcrawler/providers/storage/storageDocStore.js +++ b/ghcrawler/providers/storage/storageDocStore.js @@ -1,32 +1,29 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -const azure = require('azure-storage') +const { BlobServiceClient } = require('@azure/storage-blob') +const { ContainerClient } = require('@azure/storage-blob') const memoryCache = require('memory-cache') const { Readable } = require('stream') const URL = require('url') class AzureStorageDocStore { - constructor(blobService, name, options) { - this.service = blobService - this.name = name + /** + * @param {ContainerClient} containerClient + * @param {object} options + */ + constructor(containerClient, options) { + this.containerClient = containerClient this.options = options this._getBlobNameFromKey = this.options.blobKey === 'url' ? this._getBlobNameFromUrl : this._getBlobNameFromUrn } async connect() { - return this._createContainer(this.name) + await this._createContainer(this.containerClient) } - async _createContainer(name) { - return new Promise((resolve, reject) => { - this.service.createContainerIfNotExists(name, error => { - if (error) { - return reject(error) - } - resolve(this.service) - }) - }) + async _createContainer(containerClient) { + await containerClient.createIfNotExists() } async upsert(document) { @@ -43,85 +40,46 @@ class AzureStorageDocStore { if (document._metadata.extra) { blobMetadata.extra = JSON.stringify(document._metadata.extra) } - const options = { metadata: blobMetadata, contentSettings: { contentType: 'application/json' } } + const options = { metadata: blobMetadata, blobHTTPHeaders: { blobContentType: 'application/json' } } const dataStream = new Readable() dataStream.push(JSON.stringify(document)) dataStream.push(null) - return new Promise((resolve, reject) => { - dataStream - .pipe(this.service.createWriteStreamToBlockBlob(this.name, blobName, options)) - .on('error', error => { - return reject(error) - }) - .on('finish', () => { - resolve(blobName) - }) - }) + const blockBlobClient = this.containerClient.getBlockBlobClient(blobName) + await blockBlobClient.uploadStream(dataStream, 8 << 20, 5, options) } - // TODO: Consistency on whether key is a URL or URN async get(type, key) { const blobName = this._getBlobNameFromKey(type, key) - return new Promise((resolve, reject) => { - this.service.getBlobToText(this.name, blobName, (error, text) => { - if (error) { - return reject(error) - } - const result = JSON.parse(text) - resolve(result) - }) - }) + const blockBlobClient = this.containerClient.getBlockBlobClient(blobName) + const downloadBlockBlobResponse = await blockBlobClient.download(0) + const downloaded = await this._streamToString(downloadBlockBlobResponse.readableStreamBody) + return JSON.parse(downloaded) } - // TODO: Consistency on whether key is a URL or URN async etag(type, key) { const blobName = this._getBlobNameFromKey(type, key) - return new Promise(resolve => { - this.service.getBlobMetadata(this.name, blobName, (error, blob) => { - resolve(error ? null : blob.metadata.etag) - }) - }) + const blockBlobClient = this.containerClient.getBlockBlobClient(blobName) + const properties = await blockBlobClient.getProperties() + return properties.etag } // This API can only be used for the 'deadletter' store because we cannot look up documents by type performantly async list(type) { this._ensureDeadletter(type) let entries = [] - let continuationToken = null - do { - const result = await new Promise((resolve, reject) => { - this.service.listBlobsSegmented( - this.name, - continuationToken, - { - include: azure.BlobUtilities.BlobListingDetails.METADATA, - location: azure.StorageUtilities.LocationMode.PRIMARY_THEN_SECONDARY - }, - (error, response) => { - if (error) { - continuationToken = null - reject(error) - } - return resolve(response) - } - ) + for await (const blob of this.containerClient.listBlobsFlat({ includeMetadata: true })) { + const blobMetadata = blob.metadata + entries.push({ + version: blobMetadata.version, + etag: blobMetadata.etag, + type: blobMetadata.type, + url: blobMetadata.url, + urn: blobMetadata.urn, + fetchedAt: blobMetadata.fetchedat, + processedAt: blobMetadata.processedat, + extra: blobMetadata.extra ? JSON.parse(blobMetadata.extra) : undefined }) - entries = entries.concat( - result.entries.map(entry => { - const blobMetadata = entry.metadata - return { - version: blobMetadata.version, - etag: blobMetadata.etag, - type: blobMetadata.type, - url: blobMetadata.url, - urn: blobMetadata.urn, - fetchedAt: blobMetadata.fetchedat, - processedAt: blobMetadata.processedat, - extra: blobMetadata.extra ? JSON.parse(blobMetadata.extra) : undefined - } - }) - ) - } while (continuationToken && entries.length < 10000) + } return entries } @@ -129,47 +87,8 @@ class AzureStorageDocStore { async delete(type, key) { this._ensureDeadletter(type) const blobName = this._getBlobNameFromKey(type, key) - return new Promise((resolve, reject) => { - this.service.deleteBlob(this.name, blobName, error => { - if (error) { - return reject(error) - } - resolve(true) - }) - }) - } - - // This API can only be used for the 'deadletter' store because we cannot look up documents by type performantly - async count(type, force = false) { - this._ensureDeadletter(type) - const key = `${this.name}:count:${type || ''}` - if (!force) { - const cachedCount = memoryCache.get(key) - if (cachedCount) { - return cachedCount - } - } - let entryCount = 0 - let continuationToken = null - do { - const result = await new Promise((resolve, reject) => { - this.service.listBlobsSegmented( - this.name, - continuationToken, - { location: azure.StorageUtilities.LocationMode.PRIMARY_THEN_SECONDARY }, - (error, response) => { - if (error) { - continuationToken = null - reject(error) - } - return resolve(response) - } - ) - }) - entryCount += result.entries.length - } while (continuationToken) - memoryCache.put(key, entryCount, 60000) - return entryCount + const blockBlobClient = this.containerClient.getBlockBlobClient(blobName) + await blockBlobClient.delete() } async close() { @@ -216,6 +135,19 @@ class AzureStorageDocStore { } return `${this._getBlobPathFromUrn(type, urn)}.json` } + + async _streamToString(readableStream) { + return new Promise((resolve, reject) => { + const chunks = [] + readableStream.on('data', (data) => { + chunks.push(data.toString()) + }) + readableStream.on('end', () => { + resolve(chunks.join('')) + }) + readableStream.on('error', reject) + }) + } } module.exports = AzureStorageDocStore diff --git a/package-lock.json b/package-lock.json index 59e09015..2048fe70 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,13 +10,16 @@ "hasInstallScript": true, "license": "MIT", "dependencies": { + "@azure/identity": "^4.5.0", + "@azure/storage-blob": "^12.26.0", + "@azure/storage-queue": "^12.25.0", "@clearlydefined/spdx": "github:clearlydefined/spdx#v0.1.9", "@microsoft/refreshing-config": "^0.1.3", + "@types/node": "^22.10.1", "applicationinsights": "^1.5.0", "ar-async": "^0.1.4", "axios": "^1.7.4", "axios-retry": "^3.2.5", - "azure-storage": "^2.10.3", "body-parser": "^1.19.0", "debug": "^4.3.4", "decompress": "^4.2.1", @@ -52,6 +55,7 @@ "spdx-correct": "^3.2.0", "throat": "^5.0.0", "tmp": "0.1.0", + "typescript": "^5.7.2", "unbzip2-stream": "^1.3.3", "winston": "^2.3.0", "winston-azure-application-insights": "^1.5.0", @@ -81,6 +85,288 @@ "node": ">=0.10.0" } }, + "node_modules/@azure/abort-controller": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz", + "integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-auth": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/@azure/core-auth/-/core-auth-1.9.0.tgz", + "integrity": "sha512-FPwHpZywuyasDSLMqJ6fhbOK3TqUdviZNF8OqRGA4W5Ewib2lEEZ+pBsYcBa88B2NGO/SEnYPGhyBqNlE8ilSw==", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-util": "^1.11.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-client": { + "version": "1.9.2", + "resolved": "https://registry.npmjs.org/@azure/core-client/-/core-client-1.9.2.tgz", + "integrity": "sha512-kRdry/rav3fUKHl/aDLd/pDLcB+4pOFwPPTVEExuMyaI5r+JBbMWqRbCY1pn5BniDaU3lRxO9eaQ1AmSMehl/w==", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-auth": "^1.4.0", + "@azure/core-rest-pipeline": "^1.9.1", + "@azure/core-tracing": "^1.0.0", + "@azure/core-util": "^1.6.1", + "@azure/logger": "^1.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-http-compat": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@azure/core-http-compat/-/core-http-compat-2.1.2.tgz", + "integrity": "sha512-5MnV1yqzZwgNLLjlizsU3QqOeQChkIXw781Fwh1xdAqJR5AA32IUaq6xv1BICJvfbHoa+JYcaij2HFkhLbNTJQ==", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-client": "^1.3.0", + "@azure/core-rest-pipeline": "^1.3.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-lro": { + "version": "2.7.2", + "resolved": "https://registry.npmjs.org/@azure/core-lro/-/core-lro-2.7.2.tgz", + "integrity": "sha512-0YIpccoX8m/k00O7mDDMdJpbr6mf1yWo2dfmxt5A8XVZVVMz2SSKaEbMCeJRvgQ0IaSlqhjT47p4hVIRRy90xw==", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-util": "^1.2.0", + "@azure/logger": "^1.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-paging": { + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/@azure/core-paging/-/core-paging-1.6.2.tgz", + "integrity": "sha512-YKWi9YuCU04B55h25cnOYZHxXYtEvQEbKST5vqRga7hWY9ydd3FZHdeQF8pyh+acWZvppw13M/LMGx0LABUVMA==", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-rest-pipeline": { + "version": "1.18.1", + "resolved": "https://registry.npmjs.org/@azure/core-rest-pipeline/-/core-rest-pipeline-1.18.1.tgz", + "integrity": "sha512-/wS73UEDrxroUEVywEm7J0p2c+IIiVxyfigCGfsKvCxxCET4V/Hef2aURqltrXMRjNmdmt5IuOgIpl8f6xdO5A==", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-auth": "^1.8.0", + "@azure/core-tracing": "^1.0.1", + "@azure/core-util": "^1.11.0", + "@azure/logger": "^1.0.0", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-rest-pipeline/node_modules/agent-base": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz", + "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==", + "dependencies": { + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/@azure/core-rest-pipeline/node_modules/https-proxy-agent": { + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.5.tgz", + "integrity": "sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==", + "dependencies": { + "agent-base": "^7.0.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/@azure/core-tracing": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@azure/core-tracing/-/core-tracing-1.2.0.tgz", + "integrity": "sha512-UKTiEJPkWcESPYJz3X5uKRYyOcJD+4nYph+KpfdPRnQJVrZfk0KJgdnaAWKfhsBBtAf/D58Az4AvCJEmWgIBAg==", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-util": { + "version": "1.11.0", + "resolved": "https://registry.npmjs.org/@azure/core-util/-/core-util-1.11.0.tgz", + "integrity": "sha512-DxOSLua+NdpWoSqULhjDyAZTXFdP/LKkqtYuxxz1SCN289zk3OG8UOpnCQAz/tygyACBtWp/BoO72ptK7msY8g==", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-xml": { + "version": "1.4.4", + "resolved": "https://registry.npmjs.org/@azure/core-xml/-/core-xml-1.4.4.tgz", + "integrity": "sha512-J4FYAqakGXcbfeZjwjMzjNcpcH4E+JtEBv+xcV1yL0Ydn/6wbQfeFKTCHh9wttAi0lmajHw7yBbHPRG+YHckZQ==", + "dependencies": { + "fast-xml-parser": "^4.4.1", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/identity": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/@azure/identity/-/identity-4.5.0.tgz", + "integrity": "sha512-EknvVmtBuSIic47xkOqyNabAme0RYTw52BTMz8eBgU1ysTyMrD1uOoM+JdS0J/4Yfp98IBT3osqq3BfwSaNaGQ==", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-auth": "^1.9.0", + "@azure/core-client": "^1.9.2", + "@azure/core-rest-pipeline": "^1.17.0", + "@azure/core-tracing": "^1.0.0", + "@azure/core-util": "^1.11.0", + "@azure/logger": "^1.0.0", + "@azure/msal-browser": "^3.26.1", + "@azure/msal-node": "^2.15.0", + "events": "^3.0.0", + "jws": "^4.0.0", + "open": "^8.0.0", + "stoppable": "^1.1.0", + "tslib": "^2.2.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/identity/node_modules/open": { + "version": "8.4.2", + "resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz", + "integrity": "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==", + "dependencies": { + "define-lazy-prop": "^2.0.0", + "is-docker": "^2.1.1", + "is-wsl": "^2.2.0" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@azure/logger": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/@azure/logger/-/logger-1.1.4.tgz", + "integrity": "sha512-4IXXzcCdLdlXuCG+8UKEwLA1T1NHqUfanhXYHiQTn+6sfWCZXduqbtXDGceg3Ce5QxTGo7EqmbV6Bi+aqKuClQ==", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/msal-browser": { + "version": "3.28.0", + "resolved": "https://registry.npmjs.org/@azure/msal-browser/-/msal-browser-3.28.0.tgz", + "integrity": "sha512-1c1qUF6vB52mWlyoMem4xR1gdwiQWYEQB2uhDkbAL4wVJr8WmAcXybc1Qs33y19N4BdPI8/DHI7rPE8L5jMtWw==", + "dependencies": { + "@azure/msal-common": "14.16.0" + }, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@azure/msal-common": { + "version": "14.16.0", + "resolved": "https://registry.npmjs.org/@azure/msal-common/-/msal-common-14.16.0.tgz", + "integrity": "sha512-1KOZj9IpcDSwpNiQNjt0jDYZpQvNZay7QAEi/5DLubay40iGYtLzya/jbjRPLyOTZhEKyL1MzPuw2HqBCjceYA==", + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@azure/msal-node": { + "version": "2.16.2", + "resolved": "https://registry.npmjs.org/@azure/msal-node/-/msal-node-2.16.2.tgz", + "integrity": "sha512-An7l1hEr0w1HMMh1LU+rtDtqL7/jw74ORlc9Wnh06v7TU/xpG39/Zdr1ZJu3QpjUfKJ+E0/OXMW8DRSWTlh7qQ==", + "dependencies": { + "@azure/msal-common": "14.16.0", + "jsonwebtoken": "^9.0.0", + "uuid": "^8.3.0" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/@azure/storage-blob": { + "version": "12.26.0", + "resolved": "https://registry.npmjs.org/@azure/storage-blob/-/storage-blob-12.26.0.tgz", + "integrity": "sha512-SriLPKezypIsiZ+TtlFfE46uuBIap2HeaQVS78e1P7rz5OSbq0rsd52WE1mC5f7vAeLiXqv7I7oRhL3WFZEw3Q==", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.4.0", + "@azure/core-client": "^1.6.2", + "@azure/core-http-compat": "^2.0.0", + "@azure/core-lro": "^2.2.0", + "@azure/core-paging": "^1.1.1", + "@azure/core-rest-pipeline": "^1.10.1", + "@azure/core-tracing": "^1.1.2", + "@azure/core-util": "^1.6.1", + "@azure/core-xml": "^1.4.3", + "@azure/logger": "^1.0.0", + "events": "^3.0.0", + "tslib": "^2.2.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/storage-queue": { + "version": "12.25.0", + "resolved": "https://registry.npmjs.org/@azure/storage-queue/-/storage-queue-12.25.0.tgz", + "integrity": "sha512-uoobHFbH/o7wIul/sCm32X2YFq6zb1XpNdpKIms9I60mwG3BBaOpEs5pgQV5a5ONG5WMSHlo8E1dNFB5ZZIa1g==", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.4.0", + "@azure/core-client": "^1.6.2", + "@azure/core-http-compat": "^2.0.0", + "@azure/core-paging": "^1.1.1", + "@azure/core-rest-pipeline": "^1.10.1", + "@azure/core-tracing": "^1.1.2", + "@azure/core-util": "^1.6.1", + "@azure/core-xml": "^1.4.3", + "@azure/logger": "^1.0.0", + "tslib": "^2.2.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@babel/code-frame": { "version": "7.12.11", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz", @@ -905,12 +1191,11 @@ } }, "node_modules/@types/node": { - "version": "20.14.10", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.10.tgz", - "integrity": "sha512-MdiXf+nDuMvY0gJKxyfZ7/6UFsETO7mGKF54MVD/ekJS6HdFtpZFBgrh6Pseu64XTb2MLyFPlbW6hj8HYRQNOQ==", - "dev": true, + "version": "22.10.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz", + "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==", "dependencies": { - "undici-types": "~5.26.4" + "undici-types": "~6.20.0" } }, "node_modules/@types/ws": { @@ -1282,46 +1567,6 @@ "node": ">= 6" } }, - "node_modules/azure-storage": { - "version": "2.10.7", - "resolved": "https://registry.npmjs.org/azure-storage/-/azure-storage-2.10.7.tgz", - "integrity": "sha512-4oeFGtn3Ziw/fGs/zkoIpKKtygnCVIcZwzJ7UQzKTxhkGQqVCByOFbYqMGYR3L+wOsunX9lNfD0jc51SQuKSSA==", - "deprecated": "Please note: newer packages @azure/storage-blob, @azure/storage-queue and @azure/storage-file are available as of November 2019 and @azure/data-tables is available as of June 2021. While the legacy azure-storage package will continue to receive critical bug fixes, we strongly encourage you to upgrade. Migration guide can be found: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/storage/MigrationGuide.md", - "dependencies": { - "browserify-mime": "^1.2.9", - "extend": "^3.0.2", - "json-edm-parser": "~0.1.2", - "json-schema": "~0.4.0", - "md5.js": "^1.3.4", - "readable-stream": "^2.0.0", - "request": "^2.86.0", - "underscore": "^1.12.1", - "uuid": "^3.0.0", - "validator": "^13.7.0", - "xml2js": "~0.2.8", - "xmlbuilder": "^9.0.7" - }, - "engines": { - "node": ">= 0.8.26" - } - }, - "node_modules/azure-storage/node_modules/uuid": { - "version": "3.3.2", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.2.tgz", - "integrity": "sha512-yXJmeNaw3DnnKAOKJE51sL/ZaYfWJRl1pK9dr19YFCu0ObS231AB1/LbqTKRAQ5kw8A90rA6fr4riOUpTZvQZA==", - "deprecated": "Please upgrade to version 7 or higher. Older versions may use Math.random() in certain circumstances, which is known to be problematic. See https://v8.dev/blog/math-random for details.", - "bin": { - "uuid": "bin/uuid" - } - }, - "node_modules/azure-storage/node_modules/xml2js": { - "version": "0.2.8", - "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.2.8.tgz", - "integrity": "sha512-ZHZBIAO55GHCn2jBYByVPHvHS+o3j8/a/qmpEe6kxO3cTnTCWC3Htq9RYJ5G4XMwMMClD2QkXA9SNdPadLyn3Q==", - "dependencies": { - "sax": "0.5.x" - } - }, "node_modules/backo2": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/backo2/-/backo2-1.0.2.tgz", @@ -1499,11 +1744,6 @@ "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==", "dev": true }, - "node_modules/browserify-mime": { - "version": "1.2.9", - "resolved": "https://registry.npmjs.org/browserify-mime/-/browserify-mime-1.2.9.tgz", - "integrity": "sha1-rrGvKN5sDXpqLOQK22j/GEIq8x8=" - }, "node_modules/buffer": { "version": "5.4.0", "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.4.0.tgz", @@ -1535,6 +1775,11 @@ "node": "*" } }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==" + }, "node_modules/buffer-fill": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/buffer-fill/-/buffer-fill-1.0.0.tgz", @@ -2364,6 +2609,14 @@ "abstract-leveldown": "~2.6.0" } }, + "node_modules/define-lazy-prop": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz", + "integrity": "sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==", + "engines": { + "node": ">=8" + } + }, "node_modules/degenerator": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz", @@ -2559,6 +2812,14 @@ "safer-buffer": "^2.1.0" } }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, "node_modules/ee-first": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", @@ -3186,6 +3447,14 @@ "integrity": "sha512-tvtQIeLVHjDkJYnzf2dgVMxfuSGJeM/7UCG17TT4EumTfNtF+0nebF/4zWOIkCreAbtNqhGEboB6BWrwqNaw4Q==", "dev": true }, + "node_modules/events": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "engines": { + "node": ">=0.8.x" + } + }, "node_modules/express": { "version": "4.19.2", "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz", @@ -3405,6 +3674,27 @@ "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=", "dev": true }, + "node_modules/fast-xml-parser": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.5.0.tgz", + "integrity": "sha512-/PlTQCI96+fZMAOLMZK4CWG1ItCbfZ/0jx7UIJFChPNrx7tcEgerUgWbeieCM9MfHInUDyK8DWYZ+YrywDJuTg==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + }, + { + "type": "paypal", + "url": "https://paypal.me/naturalintelligence" + } + ], + "dependencies": { + "strnum": "^1.0.5" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, "node_modules/fastq": { "version": "1.16.0", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.16.0.tgz", @@ -4166,18 +4456,6 @@ "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz", "integrity": "sha1-4Ob+aijPUROIVeCG0Wkedx3iqLk=" }, - "node_modules/hash-base": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/hash-base/-/hash-base-3.0.4.tgz", - "integrity": "sha1-X8hoaEfs1zSZQDMZprCj8/auSRg=", - "dependencies": { - "inherits": "^2.0.1", - "safe-buffer": "^5.0.1" - }, - "engines": { - "node": ">=4" - } - }, "node_modules/hasha": { "version": "5.2.2", "resolved": "https://registry.npmjs.org/hasha/-/hasha-5.2.2.tgz", @@ -4272,7 +4550,6 @@ "version": "7.0.2", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", - "dev": true, "dependencies": { "agent-base": "^7.1.0", "debug": "^4.3.4" @@ -4285,7 +4562,6 @@ "version": "7.1.1", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz", "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==", - "dev": true, "dependencies": { "debug": "^4.3.4" }, @@ -4938,14 +5214,6 @@ "node": ">=4" } }, - "node_modules/json-edm-parser": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/json-edm-parser/-/json-edm-parser-0.1.2.tgz", - "integrity": "sha1-HmCw/vG8CvZ7wNFG393lSGzWFbQ=", - "dependencies": { - "jsonparse": "~1.2.0" - } - }, "node_modules/json-schema": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz", @@ -4990,13 +5258,45 @@ "graceful-fs": "^4.1.6" } }, - "node_modules/jsonparse": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/jsonparse/-/jsonparse-1.2.0.tgz", - "integrity": "sha1-XAxWhRBxYOcv50ib3eoLRMK8Z70=", - "engines": [ - "node >= 0.2.0" - ] + "node_modules/jsonwebtoken": { + "version": "9.0.2", + "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz", + "integrity": "sha512-PRp66vJ865SSqOlgqS8hujT5U4AOgMfhrwYIuIhfKaoSCZcirrmASQr8CX7cUg+RMih+hgznrjp99o+W4pJLHQ==", + "dependencies": { + "jws": "^3.2.2", + "lodash.includes": "^4.3.0", + "lodash.isboolean": "^3.0.3", + "lodash.isinteger": "^4.0.4", + "lodash.isnumber": "^3.0.3", + "lodash.isplainobject": "^4.0.6", + "lodash.isstring": "^4.0.1", + "lodash.once": "^4.0.0", + "ms": "^2.1.1", + "semver": "^7.5.4" + }, + "engines": { + "node": ">=12", + "npm": ">=6" + } + }, + "node_modules/jsonwebtoken/node_modules/jwa": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-1.4.1.tgz", + "integrity": "sha512-qiLX/xhEEFKUAJ6FiBMbes3w9ATzyk5W7Hvzpa/SLYdxNtng+gcurvrI7TbACjIXlsJyr05/S1oUhZrc63evQA==", + "dependencies": { + "buffer-equal-constant-time": "1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jsonwebtoken/node_modules/jws": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/jws/-/jws-3.2.2.tgz", + "integrity": "sha512-YHlZCB6lMTllWDtSPHz/ZXTsi8S00usEV6v1tjq8tOUZzw7DpSDWVXjXDre6ed1w/pd495ODpHZYSdkRTsa0HA==", + "dependencies": { + "jwa": "^1.4.1", + "safe-buffer": "^5.0.1" + } }, "node_modules/jsprim": { "version": "1.4.2", @@ -5018,6 +5318,25 @@ "integrity": "sha512-g3UB796vUFIY90VIv/WX3L2c8CS2MdWUww3CNrYmqza1Fg0DURc2K/O4YrnklBdQarSJ/y8JnJYDGc+1iumQjg==", "dev": true }, + "node_modules/jwa": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.0.tgz", + "integrity": "sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==", + "dependencies": { + "buffer-equal-constant-time": "1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.0.tgz", + "integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==", + "dependencies": { + "jwa": "^2.0.0", + "safe-buffer": "^5.0.1" + } + }, "node_modules/klaw-sync": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/klaw-sync/-/klaw-sync-6.0.0.tgz", @@ -5122,12 +5441,47 @@ "integrity": "sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk=", "dev": true }, + "node_modules/lodash.includes": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz", + "integrity": "sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==" + }, + "node_modules/lodash.isboolean": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz", + "integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==" + }, + "node_modules/lodash.isinteger": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz", + "integrity": "sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==" + }, + "node_modules/lodash.isnumber": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isnumber/-/lodash.isnumber-3.0.3.tgz", + "integrity": "sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==" + }, + "node_modules/lodash.isplainobject": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz", + "integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==" + }, + "node_modules/lodash.isstring": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz", + "integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==" + }, "node_modules/lodash.merge": { "version": "4.6.2", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", "dev": true }, + "node_modules/lodash.once": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz", + "integrity": "sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==" + }, "node_modules/log-symbols": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.0.0.tgz", @@ -5262,15 +5616,6 @@ "resolved": "https://registry.npmjs.org/map-async/-/map-async-0.1.1.tgz", "integrity": "sha1-yJfARJ+Fhkx0taPxlu20IVZDF0U=" }, - "node_modules/md5.js": { - "version": "1.3.4", - "resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.4.tgz", - "integrity": "sha1-6b296UogpawYsENA/Fdk1bCdkB0=", - "dependencies": { - "hash-base": "^3.0.0", - "inherits": "^2.0.1" - } - }, "node_modules/media-typer": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", @@ -7183,11 +7528,6 @@ "deprecated": "This package has been deprecated in favour of @sinonjs/samsam", "dev": true }, - "node_modules/sax": { - "version": "0.5.8", - "resolved": "https://registry.npmjs.org/sax/-/sax-0.5.8.tgz", - "integrity": "sha1-1HLbIo6zMcJQaw6MFVJK25OdEsE=" - }, "node_modules/seek-bzip": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/seek-bzip/-/seek-bzip-1.0.5.tgz", @@ -7613,6 +7953,15 @@ "node": ">= 0.8" } }, + "node_modules/stoppable": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/stoppable/-/stoppable-1.1.0.tgz", + "integrity": "sha512-KXDYZ9dszj6bzvnEMRYvxgeTHU74QBFL54XKtP3nyMuJ81CFYtABZ3bAzL2EdFUaEwJOBOgENyFj3R7oTzDyyw==", + "engines": { + "node": ">=4", + "npm": ">=6" + } + }, "node_modules/stream-shift": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.3.tgz", @@ -7672,6 +8021,11 @@ "node": ">=0.10.0" } }, + "node_modules/strnum": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz", + "integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==" + }, "node_modules/supports-color": { "version": "5.5.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", @@ -7852,8 +8206,7 @@ "node_modules/tslib": { "version": "2.6.3", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.3.tgz", - "integrity": "sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ==", - "dev": true + "integrity": "sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ==" }, "node_modules/tunnel-agent": { "version": "0.6.0", @@ -7925,6 +8278,18 @@ "is-typedarray": "^1.0.0" } }, + "node_modules/typescript": { + "version": "5.7.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz", + "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, "node_modules/unbzip2-stream": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.3.3.tgz", @@ -7934,16 +8299,10 @@ "through": "^2.3.8" } }, - "node_modules/underscore": { - "version": "1.13.6", - "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.6.tgz", - "integrity": "sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A==" - }, "node_modules/undici-types": { - "version": "5.26.5", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "dev": true + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==" }, "node_modules/universalify": { "version": "2.0.1", @@ -7992,19 +8351,10 @@ "version": "8.3.2", "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", - "dev": true, "bin": { "uuid": "dist/bin/uuid" } }, - "node_modules/validator": { - "version": "13.11.0", - "resolved": "https://registry.npmjs.org/validator/-/validator-13.11.0.tgz", - "integrity": "sha512-Ii+sehpSfZy+At5nPdnyMhx78fEoPDkR2XW/zimHEL3MyGJQOCQ7WeP20jPYRz7ZCpcKLB21NxuXHF3bxjStBQ==", - "engines": { - "node": ">= 0.10" - } - }, "node_modules/value-or-promise": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/value-or-promise/-/value-or-promise-1.0.11.tgz", @@ -8284,14 +8634,6 @@ "node": ">=4.0" } }, - "node_modules/xmlbuilder": { - "version": "9.0.7", - "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-9.0.7.tgz", - "integrity": "sha1-Ey7mPS7FVlxVfiD0wi35rKaGsQ0=", - "engines": { - "node": ">=4.0" - } - }, "node_modules/xtend": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz", diff --git a/package.json b/package.json index 2fe6b1cd..d8fe1fe5 100644 --- a/package.json +++ b/package.json @@ -34,13 +34,16 @@ "url": "https://github.com/clearlydefined/crawler" }, "dependencies": { + "@azure/identity": "^4.5.0", + "@azure/storage-blob": "^12.26.0", + "@azure/storage-queue": "^12.25.0", "@clearlydefined/spdx": "github:clearlydefined/spdx#v0.1.9", "@microsoft/refreshing-config": "^0.1.3", + "@types/node": "^22.10.1", "applicationinsights": "^1.5.0", "ar-async": "^0.1.4", "axios": "^1.7.4", "axios-retry": "^3.2.5", - "azure-storage": "^2.10.3", "body-parser": "^1.19.0", "debug": "^4.3.4", "decompress": "^4.2.1", @@ -76,6 +79,7 @@ "spdx-correct": "^3.2.0", "throat": "^5.0.0", "tmp": "0.1.0", + "typescript": "^5.7.2", "unbzip2-stream": "^1.3.3", "winston": "^2.3.0", "winston-azure-application-insights": "^1.5.0", diff --git a/providers/store/azureQueueStore.js b/providers/store/azureQueueStore.js index b1e0e461..2002c3ca 100644 --- a/providers/store/azureQueueStore.js +++ b/providers/store/azureQueueStore.js @@ -1,25 +1,46 @@ // Copyright (c) Microsoft Corporation and others. Licensed under the MIT license. // SPDX-License-Identifier: MIT -const azure = require('azure-storage') +const { DefaultAzureCredential } = require('@azure/identity') +const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue') const { promisify } = require('util') class AzureStorageQueue { constructor(options) { this.options = options + this.queueName = options.queueName this.logger = options.logger + + const { connectionString, account } = options + + const pipelineOptions = { + retryOptions: { + maxTries: 3, + retryDelayInMs: 1000, + maxRetryDelayInMs: 120 * 1000, + tryTimeoutInMs: 30000, + retryPolicyType: StorageRetryPolicyType.FIXED + } + } + if (connectionString) { + this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions) + } else { + this.client = new QueueServiceClient( + `https://${account}.queue.core.windows.net`, + new DefaultAzureCredential(), + pipelineOptions + ) + } } async connect() { - this.queueService = azure - .createQueueService(this.options.connectionString) - .withFilter(new azure.LinearRetryPolicyFilter()) - await promisify(this.queueService.createQueueIfNotExists).bind(this.queueService)(this.options.queueName) + this.queueService = this.client.getQueueClient(this.queueName) + this.queueService.createIfNotExists() } async upsert(document) { const message = Buffer.from(JSON.stringify({ _metadata: document._metadata })).toString('base64') - await promisify(this.queueService.createMessage).bind(this.queueService)(this.options.queueName, message) + return await this.queueService.sendMessage(message) } get() { From 0d00eaf8b30c399e05d6fde995ae81daaeb37143 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Tue, 17 Dec 2024 15:50:03 +0100 Subject: [PATCH 02/19] Add back AzureStorageDocStore.count method --- .../providers/storage/storageDocStore.js | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js index a9af732f..e79cecfd 100644 --- a/ghcrawler/providers/storage/storageDocStore.js +++ b/ghcrawler/providers/storage/storageDocStore.js @@ -91,6 +91,32 @@ class AzureStorageDocStore { await blockBlobClient.delete() } + // This API can only be used for the 'deadletter' store because we cannot look up documents by type performantly + async count(type, force = false) { + this._ensureDeadletter(type) + const key = `${this.name}:count:${type || ''}` + + if (!force) { + const cachedCount = memoryCache.get(key) + if (cachedCount) { + return cachedCount + } + } + + let entryCount = 0 + const properties = await this.containerClient.getProperties() + properties.blobCount + try { + for await (const blob of this.containerClient.listBlobsFlat()) { + entryCount++ + } + memoryCache.put(key, entryCount, 60000) + return entryCount + } catch (error) { + throw error + } + } + async close() { return } @@ -139,7 +165,7 @@ class AzureStorageDocStore { async _streamToString(readableStream) { return new Promise((resolve, reject) => { const chunks = [] - readableStream.on('data', (data) => { + readableStream.on('data', data => { chunks.push(data.toString()) }) readableStream.on('end', () => { From 13bfce996f2ab6f0606c05c03575395092976c4e Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Tue, 17 Dec 2024 16:03:12 +0100 Subject: [PATCH 03/19] Tweak async error handling, apply prettier --- config/cdConfig.js | 2 +- ghcrawler/providers/queuing/storageQueue.js | 18 ++++++++++++++---- .../providers/storage/azureBlobFactory.js | 8 ++++++-- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/config/cdConfig.js b/config/cdConfig.js index ced4c304..5a3592a9 100644 --- a/config/cdConfig.js +++ b/config/cdConfig.js @@ -6,7 +6,7 @@ const config = require('painless-config') const cd_azblob = { connection: config.get('CRAWLER_AZBLOB_CONNECTION_STRING'), container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME'), - account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME'), + account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME') } const githubToken = config.get('CRAWLER_GITHUB_TOKEN') diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js index 5d6822c0..c77251de 100644 --- a/ghcrawler/providers/queuing/storageQueue.js +++ b/ghcrawler/providers/queuing/storageQueue.js @@ -40,7 +40,8 @@ class StorageQueue { const queueMessageResult = await this.queueClient.sendMessage(body) this._log('Queued', request) return this._buildMessageReceipt(queueMessageResult, request) - })) + }) + ) ) } @@ -59,7 +60,11 @@ class StorageQueue { } if (this.options.maxDequeueCount && message.dequeueCount > this.options.maxDequeueCount) { this.logger.verbose('maxDequeueCount exceeded') - await this.queueClient.deleteMessage(message.messageId, message.popReceipt) + try { + await this.queueClient.deleteMessage(message.messageId, message.popReceipt) + } catch (error) { + // Ignore error + } return null } else { message.body = JSON.parse(message.messageText) @@ -102,8 +107,13 @@ class StorageQueue { } async getInfo() { - const properties = await this.queueClient.getProperties() - return { count: properties.approximateMessagesCount } + try { + const properties = await this.queueClient.getProperties() + return { count: properties.approximateMessagesCount } + } catch (error) { + this.logger.error(error) + return null + } } getName() { diff --git a/ghcrawler/providers/storage/azureBlobFactory.js b/ghcrawler/providers/storage/azureBlobFactory.js index 60cfb121..5009b68d 100644 --- a/ghcrawler/providers/storage/azureBlobFactory.js +++ b/ghcrawler/providers/storage/azureBlobFactory.js @@ -4,7 +4,7 @@ // @ts-check const { BlobServiceClient, StorageRetryPolicyType } = require('@azure/storage-blob') const AzureStorageDocStore = require('./storageDocStore') -const { DefaultAzureCredential } = require ('@azure/identity'); +const { DefaultAzureCredential } = require('@azure/identity') /** * @param {object} options @@ -33,7 +33,11 @@ module.exports = options => { blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions) } else if (account) { options.logger.info('using default credentials') - blobServiceClient = new BlobServiceClient(`https://${account}.blob.core.windows.net`, new DefaultAzureCredential(), pipelineOptions) + blobServiceClient = new BlobServiceClient( + `https://${account}.blob.core.windows.net`, + new DefaultAzureCredential(), + pipelineOptions + ) } else { throw new Error('either connection or account must be provided') } From 7003187c9c4a12ff1444a7fbe94371bff62609b2 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Tue, 17 Dec 2024 17:21:32 +0100 Subject: [PATCH 04/19] Fix code style issues --- ghcrawler/providers/queuing/storageQueue.js | 3 ++- .../providers/queuing/storageQueueManager.js | 2 +- ghcrawler/providers/storage/storageDocStore.js | 16 +++++----------- providers/store/azureQueueStore.js | 1 - 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js index c77251de..2c8bd0f4 100644 --- a/ghcrawler/providers/queuing/storageQueue.js +++ b/ghcrawler/providers/queuing/storageQueue.js @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation and others. Made available under the MIT license. // SPDX-License-Identifier: MIT +// eslint-disable-next-line no-unused-vars const { QueueServiceClient } = require('@azure/storage-queue') const qlimit = require('qlimit') const { cloneDeep } = require('lodash') @@ -31,7 +32,7 @@ class StorageQueue { // No specific unsubscribe logic for Azure Queue Storage } - async push(requests, option) { + async push(requests, _option) { requests = Array.isArray(requests) ? requests : [requests] return Promise.all( requests.map( diff --git a/ghcrawler/providers/queuing/storageQueueManager.js b/ghcrawler/providers/queuing/storageQueueManager.js index 048b9bf2..ae1a4581 100644 --- a/ghcrawler/providers/queuing/storageQueueManager.js +++ b/ghcrawler/providers/queuing/storageQueueManager.js @@ -2,7 +2,7 @@ // SPDX-License-Identifier: MIT const AttenuatedQueue = require('./attenuatedQueue') -const { QueueServiceClient } = require('@azure/storage-queue') +const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue') const Request = require('../../lib/request') const StorageQueue = require('./storageQueue') const { DefaultAzureCredential } = require('@azure/identity') diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js index e79cecfd..0e052365 100644 --- a/ghcrawler/providers/storage/storageDocStore.js +++ b/ghcrawler/providers/storage/storageDocStore.js @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -const { BlobServiceClient } = require('@azure/storage-blob') +// eslint-disable-next-line no-unused-vars const { ContainerClient } = require('@azure/storage-blob') const memoryCache = require('memory-cache') const { Readable } = require('stream') @@ -95,26 +95,20 @@ class AzureStorageDocStore { async count(type, force = false) { this._ensureDeadletter(type) const key = `${this.name}:count:${type || ''}` - if (!force) { const cachedCount = memoryCache.get(key) if (cachedCount) { return cachedCount } } - let entryCount = 0 const properties = await this.containerClient.getProperties() properties.blobCount - try { - for await (const blob of this.containerClient.listBlobsFlat()) { - entryCount++ - } - memoryCache.put(key, entryCount, 60000) - return entryCount - } catch (error) { - throw error + for await (const _blob of this.containerClient.listBlobsFlat()) { + entryCount++ } + memoryCache.put(key, entryCount, 60000) + return entryCount } async close() { diff --git a/providers/store/azureQueueStore.js b/providers/store/azureQueueStore.js index 2002c3ca..51cee901 100644 --- a/providers/store/azureQueueStore.js +++ b/providers/store/azureQueueStore.js @@ -3,7 +3,6 @@ const { DefaultAzureCredential } = require('@azure/identity') const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue') -const { promisify } = require('util') class AzureStorageQueue { constructor(options) { From 305b88251ecb30f1dd2a5fbff631c66e502d1077 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Tue, 17 Dec 2024 17:47:40 +0100 Subject: [PATCH 05/19] Fix code style issues --- ghcrawler/providers/queuing/storageQueue.js | 2 +- ghcrawler/providers/storage/storageDocStore.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js index 2c8bd0f4..75256988 100644 --- a/ghcrawler/providers/queuing/storageQueue.js +++ b/ghcrawler/providers/queuing/storageQueue.js @@ -32,7 +32,7 @@ class StorageQueue { // No specific unsubscribe logic for Azure Queue Storage } - async push(requests, _option) { + async push(requests) { requests = Array.isArray(requests) ? requests : [requests] return Promise.all( requests.map( diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js index 0e052365..a4967a92 100644 --- a/ghcrawler/providers/storage/storageDocStore.js +++ b/ghcrawler/providers/storage/storageDocStore.js @@ -104,7 +104,7 @@ class AzureStorageDocStore { let entryCount = 0 const properties = await this.containerClient.getProperties() properties.blobCount - for await (const _blob of this.containerClient.listBlobsFlat()) { + for await (const {} of this.containerClient.listBlobsFlat()) { entryCount++ } memoryCache.put(key, entryCount, 60000) From a6a56ecdbc71da05663d0e494ffe910717385a01 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Tue, 17 Dec 2024 17:51:01 +0100 Subject: [PATCH 06/19] Fix code style issues --- ghcrawler/providers/storage/storageDocStore.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js index a4967a92..9f8bc2a8 100644 --- a/ghcrawler/providers/storage/storageDocStore.js +++ b/ghcrawler/providers/storage/storageDocStore.js @@ -104,7 +104,8 @@ class AzureStorageDocStore { let entryCount = 0 const properties = await this.containerClient.getProperties() properties.blobCount - for await (const {} of this.containerClient.listBlobsFlat()) { + // eslint-disable-next-line no-unused-vars + for await (const _ of this.containerClient.listBlobsFlat()) { entryCount++ } memoryCache.put(key, entryCount, 60000) From e12ba6786f5954732ab71d4dfd12c1f139ed40d6 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Wed, 18 Dec 2024 17:38:39 +0100 Subject: [PATCH 07/19] Add support for separate service principal credentials for blobs and queues --- config/cdConfig.js | 6 +++-- .../providers/queuing/storageQueueManager.js | 16 ++++++++----- .../providers/storage/azureBlobFactory.js | 23 +++++++++++-------- providers/store/azureQueueStore.js | 17 ++++++++------ 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/config/cdConfig.js b/config/cdConfig.js index 5a3592a9..f56d03a6 100644 --- a/config/cdConfig.js +++ b/config/cdConfig.js @@ -6,7 +6,8 @@ const config = require('painless-config') const cd_azblob = { connection: config.get('CRAWLER_AZBLOB_CONNECTION_STRING'), container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME'), - account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME') + account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME'), + spnAuth: config.get('CRAWLER_AZBLOB_SPN_AUTH') } const githubToken = config.get('CRAWLER_GITHUB_TOKEN') @@ -113,7 +114,8 @@ module.exports = { azqueue: { connectionString: cd_azblob.connection, account: cd_azblob.account, - queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests' + queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests', + spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH') }, 'cd(azblob)': cd_azblob, 'cd(file)': cd_file diff --git a/ghcrawler/providers/queuing/storageQueueManager.js b/ghcrawler/providers/queuing/storageQueueManager.js index ae1a4581..7974c0e5 100644 --- a/ghcrawler/providers/queuing/storageQueueManager.js +++ b/ghcrawler/providers/queuing/storageQueueManager.js @@ -5,7 +5,7 @@ const AttenuatedQueue = require('./attenuatedQueue') const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue') const Request = require('../../lib/request') const StorageQueue = require('./storageQueue') -const { DefaultAzureCredential } = require('@azure/identity') +const { DefaultAzureCredential, ClientSecretCredential } = require('@azure/identity') class StorageQueueManager { constructor(connectionString, options) { @@ -21,11 +21,15 @@ class StorageQueueManager { if (connectionString) { this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions) } else { - this.client = new QueueServiceClient( - `https://${options.account}.queue.core.windows.net`, - new DefaultAzureCredential(), - pipelineOptions - ) + const { account, spnAuth } = options + let credential + if (spnAuth) { + const authParsed = JSON.parse(spnAuth) + credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret) + } else { + credential = new DefaultAzureCredential() + } + this.client = new QueueServiceClient(`https://${account}.queue.core.windows.net`, credential, pipelineOptions) } } diff --git a/ghcrawler/providers/storage/azureBlobFactory.js b/ghcrawler/providers/storage/azureBlobFactory.js index 5009b68d..b08af8b7 100644 --- a/ghcrawler/providers/storage/azureBlobFactory.js +++ b/ghcrawler/providers/storage/azureBlobFactory.js @@ -4,7 +4,7 @@ // @ts-check const { BlobServiceClient, StorageRetryPolicyType } = require('@azure/storage-blob') const AzureStorageDocStore = require('./storageDocStore') -const { DefaultAzureCredential } = require('@azure/identity') +const { DefaultAzureCredential, ClientSecretCredential } = require('@azure/identity') /** * @param {object} options @@ -12,10 +12,11 @@ const { DefaultAzureCredential } = require('@azure/identity') * @param {string} options.connection * @param {string} options.container * @param {object} options.logger + * @param {object} options.spnAuth */ module.exports = options => { options.logger.info('creating azure storage store') - const { account, connection, container } = options + const { account, connection, container, spnAuth } = options const pipelineOptions = { retryOptions: { @@ -31,15 +32,17 @@ module.exports = options => { if (connection) { options.logger.info('using connection string') blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions) - } else if (account) { - options.logger.info('using default credentials') - blobServiceClient = new BlobServiceClient( - `https://${account}.blob.core.windows.net`, - new DefaultAzureCredential(), - pipelineOptions - ) } else { - throw new Error('either connection or account must be provided') + let credential + if (spnAuth) { + const authParsed = JSON.parse(spnAuth) + credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret) + options.logger.info('using service principal credentials') + } else { + credential = new DefaultAzureCredential() + options.logger.info('using default credentials') + } + blobServiceClient = new BlobServiceClient(`https://${account}.blob.core.windows.net`, credential, pipelineOptions) } const containerClient = blobServiceClient.getContainerClient(container) diff --git a/providers/store/azureQueueStore.js b/providers/store/azureQueueStore.js index 51cee901..25c020b2 100644 --- a/providers/store/azureQueueStore.js +++ b/providers/store/azureQueueStore.js @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation and others. Licensed under the MIT license. // SPDX-License-Identifier: MIT -const { DefaultAzureCredential } = require('@azure/identity') +const { DefaultAzureCredential, ClientSecretCredential } = require('@azure/identity') const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue') class AzureStorageQueue { @@ -10,7 +10,7 @@ class AzureStorageQueue { this.queueName = options.queueName this.logger = options.logger - const { connectionString, account } = options + const { connectionString, account, spnAuth } = options const pipelineOptions = { retryOptions: { @@ -24,11 +24,14 @@ class AzureStorageQueue { if (connectionString) { this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions) } else { - this.client = new QueueServiceClient( - `https://${account}.queue.core.windows.net`, - new DefaultAzureCredential(), - pipelineOptions - ) + let credential + if (spnAuth) { + const authParsed = JSON.parse(spnAuth) + credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret) + } else { + credential = new DefaultAzureCredential() + } + this.client = new QueueServiceClient(`https://${account}.queue.core.windows.net`, credential, pipelineOptions) } } From 73c02a31e257461ea9030944932505462da56a51 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Thu, 19 Dec 2024 18:01:18 +0100 Subject: [PATCH 08/19] Add missing config values --- config/cdConfig.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/config/cdConfig.js b/config/cdConfig.js index f56d03a6..f07a1f3c 100644 --- a/config/cdConfig.js +++ b/config/cdConfig.js @@ -139,7 +139,9 @@ module.exports = { maxDequeueCount: 5, attenuation: { ttl: 3000 - } + }, + spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH'), + account: cd_azblob.account }, appVersion: config.get('APP_VERSION'), buildsha: config.get('BUILD_SHA') From bebef96518f7ec60c3fdab20ef1d3d5e0b1c0384 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Tue, 14 Jan 2025 16:44:50 +0100 Subject: [PATCH 09/19] Add more logging around queue message parsing --- ghcrawler/providers/queuing/storageQueue.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js index 75256988..02f43d69 100644 --- a/ghcrawler/providers/queuing/storageQueue.js +++ b/ghcrawler/providers/queuing/storageQueue.js @@ -68,7 +68,15 @@ class StorageQueue { } return null } else { - message.body = JSON.parse(message.messageText) + try { + message.body = JSON.parse(message.messageText) + } catch (error) { + this.logger.error(`Failed to parse message ${message.messageId}:`) + this.logger.error(`Raw message: ${message.messageText}`) + this.logger.error(`Parse error: ${error.message}`) + await this.queueClient.deleteMessage(message.messageId, message.popReceipt) + return null + } const request = this.messageFormatter(message) request._message = message this._log('Popped', message.body) From dad4e0d9f769f82602b771c5b9ad83b39bae39e7 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Wed, 15 Jan 2025 15:46:02 +0100 Subject: [PATCH 10/19] Decode Azure queue message before parsing --- ghcrawler/providers/queuing/storageQueue.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js index 02f43d69..b93e60d9 100644 --- a/ghcrawler/providers/queuing/storageQueue.js +++ b/ghcrawler/providers/queuing/storageQueue.js @@ -69,7 +69,13 @@ class StorageQueue { return null } else { try { - message.body = JSON.parse(message.messageText) + const decodedText = message.messageText + .replace(/"/g, '"') + .replace(/&/g, '&') + .replace(/'/g, "'") + .replace(/</g, '<') + .replace(/>/g, '>') + message.body = JSON.parse(decodedText) } catch (error) { this.logger.error(`Failed to parse message ${message.messageId}:`) this.logger.error(`Raw message: ${message.messageText}`) From b038b724a38a9f6debb1896c700f8eb2ab90e9c7 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Fri, 17 Jan 2025 17:51:56 +0100 Subject: [PATCH 11/19] Fix the parameter passing in storage queue updateMessage call --- ghcrawler/providers/queuing/storageQueue.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js index b93e60d9..c408d45d 100644 --- a/ghcrawler/providers/queuing/storageQueue.js +++ b/ghcrawler/providers/queuing/storageQueue.js @@ -110,9 +110,12 @@ class StorageQueue { } async updateVisibilityTimeout(request, visibilityTimeout = 0) { - await this.queueClient.updateMessage(request._message.messageId, request._message.popReceipt, { + await this.queueClient.updateMessage( + request._message.messageId, + request._message.popReceipt, + undefined, visibilityTimeout - }) + ) this._log('NAKed', request._message.body) } From b437f9c59becfd77239cf8f85d73e5e1fac40afc Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Wed, 5 Feb 2025 13:15:48 +0100 Subject: [PATCH 12/19] Fix code review comments --- ghcrawler/providers/queuing/storageQueue.js | 6 ++++-- ghcrawler/providers/storage/storageDocStore.js | 10 +++++----- package-lock.json | 11 +++++++---- package.json | 6 +++--- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js index c408d45d..fd31d27e 100644 --- a/ghcrawler/providers/queuing/storageQueue.js +++ b/ghcrawler/providers/queuing/storageQueue.js @@ -64,7 +64,8 @@ class StorageQueue { try { await this.queueClient.deleteMessage(message.messageId, message.popReceipt) } catch (error) { - // Ignore error + this.logger.error(`Failed to delete message ${message.messageId} in storageQueue, error: ${error.message}`) + throw error } return null } else { @@ -110,13 +111,14 @@ class StorageQueue { } async updateVisibilityTimeout(request, visibilityTimeout = 0) { - await this.queueClient.updateMessage( + const response = await this.queueClient.updateMessage( request._message.messageId, request._message.popReceipt, undefined, visibilityTimeout ) this._log('NAKed', request._message.body) + return this._buildMessageReceipt(response, request) } async flush() { diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js index 9f8bc2a8..17c56128 100644 --- a/ghcrawler/providers/storage/storageDocStore.js +++ b/ghcrawler/providers/storage/storageDocStore.js @@ -46,6 +46,7 @@ class AzureStorageDocStore { dataStream.push(null) const blockBlobClient = this.containerClient.getBlockBlobClient(blobName) await blockBlobClient.uploadStream(dataStream, 8 << 20, 5, options) + return blobName } async get(type, key) { @@ -102,11 +103,10 @@ class AzureStorageDocStore { } } let entryCount = 0 - const properties = await this.containerClient.getProperties() - properties.blobCount - // eslint-disable-next-line no-unused-vars - for await (const _ of this.containerClient.listBlobsFlat()) { - entryCount++ + for await (const page of this.containerClient.listBlobsFlat().byPage({ maxPageSize: 1000 })) { + if (page.segment.blobItems) { + entryCount += page.segment.blobItems.length() + } } memoryCache.put(key, entryCount, 60000) return entryCount diff --git a/package-lock.json b/package-lock.json index 2048fe70..1e7d385c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,7 +15,6 @@ "@azure/storage-queue": "^12.25.0", "@clearlydefined/spdx": "github:clearlydefined/spdx#v0.1.9", "@microsoft/refreshing-config": "^0.1.3", - "@types/node": "^22.10.1", "applicationinsights": "^1.5.0", "ar-async": "^0.1.4", "axios": "^1.7.4", @@ -55,13 +54,13 @@ "spdx-correct": "^3.2.0", "throat": "^5.0.0", "tmp": "0.1.0", - "typescript": "^5.7.2", "unbzip2-stream": "^1.3.3", "winston": "^2.3.0", "winston-azure-application-insights": "^1.5.0", "xml2js": "^0.5.0" }, "devDependencies": { + "@types/node": "^22.10.1", "chai": "^4.2.0", "chai-as-promised": "^7.1.1", "chai-spies": "^1.0.0", @@ -73,7 +72,8 @@ "prettier": "3.2.4", "proxyquire": "^2.1.3", "request": "^2.88.2", - "sinon": "^5.0.0" + "sinon": "^5.0.0", + "typescript": "^5.7.2" } }, "node_modules/@aashutoshrathi/word-wrap": { @@ -1194,6 +1194,7 @@ "version": "22.10.1", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz", "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==", + "dev": true, "dependencies": { "undici-types": "~6.20.0" } @@ -8282,6 +8283,7 @@ "version": "5.7.2", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz", "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==", + "dev": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -8302,7 +8304,8 @@ "node_modules/undici-types": { "version": "6.20.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", - "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==" + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", + "dev": true }, "node_modules/universalify": { "version": "2.0.1", diff --git a/package.json b/package.json index d8fe1fe5..e9e81518 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,6 @@ "@azure/storage-queue": "^12.25.0", "@clearlydefined/spdx": "github:clearlydefined/spdx#v0.1.9", "@microsoft/refreshing-config": "^0.1.3", - "@types/node": "^22.10.1", "applicationinsights": "^1.5.0", "ar-async": "^0.1.4", "axios": "^1.7.4", @@ -79,7 +78,6 @@ "spdx-correct": "^3.2.0", "throat": "^5.0.0", "tmp": "0.1.0", - "typescript": "^5.7.2", "unbzip2-stream": "^1.3.3", "winston": "^2.3.0", "winston-azure-application-insights": "^1.5.0", @@ -97,6 +95,8 @@ "prettier": "3.2.4", "proxyquire": "^2.1.3", "request": "^2.88.2", - "sinon": "^5.0.0" + "sinon": "^5.0.0", + "@types/node": "^22.10.1", + "typescript": "^5.7.2" } } From 6766b84223adc4eb1d452c38cc792492218d9156 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Thu, 6 Feb 2025 13:11:26 +0100 Subject: [PATCH 13/19] Ensure messageId is included into message receipt --- ghcrawler/providers/queuing/storageQueue.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js index fd31d27e..eb364cf3 100644 --- a/ghcrawler/providers/queuing/storageQueue.js +++ b/ghcrawler/providers/queuing/storageQueue.js @@ -118,7 +118,7 @@ class StorageQueue { visibilityTimeout ) this._log('NAKed', request._message.body) - return this._buildMessageReceipt(response, request) + return this._buildMessageReceipt({ messageId: request._message.messageId, ...response }, request) } async flush() { From 4c41da01f40d49144866d646a5c6c67d364cecab Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Thu, 6 Feb 2025 14:43:25 +0100 Subject: [PATCH 14/19] Add safe XML+HTML codecs to storage queue --- ghcrawler/providers/queuing/storageQueue.js | 39 +++++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js index eb364cf3..a2ae9f9f 100644 --- a/ghcrawler/providers/queuing/storageQueue.js +++ b/ghcrawler/providers/queuing/storageQueue.js @@ -38,7 +38,8 @@ class StorageQueue { requests.map( qlimit(this.options.parallelPush || 1)(async request => { const body = JSON.stringify(request) - const queueMessageResult = await this.queueClient.sendMessage(body) + const encoded = this._encodeXMLSafe(body) + const queueMessageResult = await this.queueClient.sendMessage(encoded) this._log('Queued', request) return this._buildMessageReceipt(queueMessageResult, request) }) @@ -70,12 +71,7 @@ class StorageQueue { return null } else { try { - const decodedText = message.messageText - .replace(/"/g, '"') - .replace(/&/g, '&') - .replace(/'/g, "'") - .replace(/</g, '<') - .replace(/>/g, '>') + const decodedText = this._decodeXMLSafe(message.messageText) message.body = JSON.parse(decodedText) } catch (error) { this.logger.error(`Failed to parse message ${message.messageId}:`) @@ -147,6 +143,35 @@ class StorageQueue { isMessageNotFound(error) { return error?.code === 'MessageNotFound' } + + _encodeXMLSafe(text) { + if (typeof text !== 'string') return text + + return ( + text + // Handle & first to prevent double-encoding + .replace(/&/g, '&') + .replace(/"/g, '"') + .replace(/'/g, ''') + .replace(/</g, '<') + .replace(/>/g, '>') + ) + } + + _decodeXMLSafe(text) { + if (typeof text !== 'string') return text + + return ( + text + // Handle both XML and HTML encodings for quotes and apostrophes + .replace(/'|'|'/g, "'") + .replace(/"|"|"/g, '"') + // Handle basic XML entities + .replace(/<|<|[Cc];/g, '<') + .replace(/>|>|[Ee];/g, '>') + .replace(/&|&|&/g, '&') // Must be after other & entities + ) + } } module.exports = StorageQueue From a7e714cac6f844f5f1067a71c8a27995a9553ee2 Mon Sep 17 00:00:00 2001 From: Lewis Jones <ljones140@gmail.com> Date: Tue, 11 Feb 2025 11:48:04 +0000 Subject: [PATCH 15/19] Queues can be configued separatly with SPN from harvest azblob So we have the ability to have an harvest connection with connections string and queues with azure SPN --- config/cdConfig.js | 5 ++-- .../providers/queuing/storageQueueManager.js | 29 ++++++++++++------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/config/cdConfig.js b/config/cdConfig.js index f07a1f3c..3adafb11 100644 --- a/config/cdConfig.js +++ b/config/cdConfig.js @@ -140,8 +140,9 @@ module.exports = { attenuation: { ttl: 3000 }, - spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH'), - account: cd_azblob.account + spnAuth: config.get('CRAWLER_QUEUE_AZURE_SPN_AUTH') || cd_azblob.spnAuth, + account: config.get('CRAWLER_QUEUE_AZURE_ACCOUNT_NAME') || cd_azblob.account, + isSpnAuth: config.get('CRAWLER_QUEUE_AZURE_IS_SPN_AUTH') || false }, appVersion: config.get('APP_VERSION'), buildsha: config.get('BUILD_SHA') diff --git a/ghcrawler/providers/queuing/storageQueueManager.js b/ghcrawler/providers/queuing/storageQueueManager.js index 7974c0e5..9d142019 100644 --- a/ghcrawler/providers/queuing/storageQueueManager.js +++ b/ghcrawler/providers/queuing/storageQueueManager.js @@ -18,19 +18,28 @@ class StorageQueueManager { retryPolicyType: StorageRetryPolicyType.EXPONENTIAL } } + + const { account, spnAuth, isSpnAuth } = options + if (isSpnAuth) { + const authParsed = JSON.parse(spnAuth) + this.client = new QueueServiceClient( + `https://${account}.queue.core.windows.net`, + new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret), + pipelineOptions + ) + return + } + if (connectionString) { this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions) - } else { - const { account, spnAuth } = options - let credential - if (spnAuth) { - const authParsed = JSON.parse(spnAuth) - credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret) - } else { - credential = new DefaultAzureCredential() - } - this.client = new QueueServiceClient(`https://${account}.queue.core.windows.net`, credential, pipelineOptions) + return } + + this.client = new QueueServiceClient( + `https://${account}.queue.core.windows.net`, + new DefaultAzureCredential(), + pipelineOptions + ) } createQueueClient(name, formatter, options) { From 5d6eca2f12977cf00476f74f9fc506e18fbd41e9 Mon Sep 17 00:00:00 2001 From: Lewis Jones <ljones140@gmail.com> Date: Thu, 13 Feb 2025 11:27:12 +0000 Subject: [PATCH 16/19] name is not set anymore, options.container is the new place for this --- ghcrawler/providers/storage/storageDocStore.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js index 17c56128..a531269c 100644 --- a/ghcrawler/providers/storage/storageDocStore.js +++ b/ghcrawler/providers/storage/storageDocStore.js @@ -95,7 +95,7 @@ class AzureStorageDocStore { // This API can only be used for the 'deadletter' store because we cannot look up documents by type performantly async count(type, force = false) { this._ensureDeadletter(type) - const key = `${this.name}:count:${type || ''}` + const key = `${this.options.container}:count:${type || ''}` if (!force) { const cachedCount = memoryCache.get(key) if (cachedCount) { From edc8bb24da2bc951d3049163f680ef115a066826 Mon Sep 17 00:00:00 2001 From: Lewis Jones <ljones140@gmail.com> Date: Thu, 13 Feb 2025 17:59:50 +0000 Subject: [PATCH 17/19] Fix integer here was breaking dead letter queue writing Needs to be a string --- ghcrawler/lib/crawler.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghcrawler/lib/crawler.js b/ghcrawler/lib/crawler.js index 69553802..79d7fc94 100644 --- a/ghcrawler/lib/crawler.js +++ b/ghcrawler/lib/crawler.js @@ -638,7 +638,7 @@ class Crawler { metadata.errorMessage = request._error.message metadata.errorStack = request._error.stack } - metadata.version = 1 + metadata.version = "1" metadata.meta = request.meta metadata.type = 'deadletter' metadata.url = request.url.replace('//', '//deadletter.') From 4b67c4a522a6149e54237c8522923de01082fcbf Mon Sep 17 00:00:00 2001 From: Lewis Jones <ljones140@gmail.com> Date: Thu, 13 Feb 2025 18:04:19 +0000 Subject: [PATCH 18/19] single quotes --- ghcrawler/lib/crawler.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghcrawler/lib/crawler.js b/ghcrawler/lib/crawler.js index 79d7fc94..f494493c 100644 --- a/ghcrawler/lib/crawler.js +++ b/ghcrawler/lib/crawler.js @@ -638,7 +638,7 @@ class Crawler { metadata.errorMessage = request._error.message metadata.errorStack = request._error.stack } - metadata.version = "1" + metadata.version = '1' metadata.meta = request.meta metadata.type = 'deadletter' metadata.url = request.url.replace('//', '//deadletter.') From 19aa5cdec2e7ec4775cb63d110fbd08a95646773 Mon Sep 17 00:00:00 2001 From: Roman Iakovlev <romaniakovlev@github.com> Date: Mon, 17 Feb 2025 17:45:09 +0100 Subject: [PATCH 19/19] Modify ordered auth selection in azureBlobFactory and azureQueueStore This is to make it consistent with the storageQueueManager. --- config/cdConfig.js | 6 ++-- .../providers/queuing/storageQueueManager.js | 3 ++ .../providers/storage/azureBlobFactory.js | 33 +++++++++++-------- providers/store/azureQueueStore.js | 32 ++++++++++++------ 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/config/cdConfig.js b/config/cdConfig.js index 3adafb11..c1a6a1be 100644 --- a/config/cdConfig.js +++ b/config/cdConfig.js @@ -7,7 +7,8 @@ const cd_azblob = { connection: config.get('CRAWLER_AZBLOB_CONNECTION_STRING'), container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME'), account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME'), - spnAuth: config.get('CRAWLER_AZBLOB_SPN_AUTH') + spnAuth: config.get('CRAWLER_AZBLOB_SPN_AUTH'), + isSpnAuth: config.get('CRAWLER_AZBLOB_IS_SPN_AUTH') || false } const githubToken = config.get('CRAWLER_GITHUB_TOKEN') @@ -115,7 +116,8 @@ module.exports = { connectionString: cd_azblob.connection, account: cd_azblob.account, queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests', - spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH') + spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH'), + isSpnAuth: config.get('CRAWLER_HARVESTS_QUEUE_IS_SPN_AUTH') || false }, 'cd(azblob)': cd_azblob, 'cd(file)': cd_file diff --git a/ghcrawler/providers/queuing/storageQueueManager.js b/ghcrawler/providers/queuing/storageQueueManager.js index 9d142019..2e662978 100644 --- a/ghcrawler/providers/queuing/storageQueueManager.js +++ b/ghcrawler/providers/queuing/storageQueueManager.js @@ -21,6 +21,7 @@ class StorageQueueManager { const { account, spnAuth, isSpnAuth } = options if (isSpnAuth) { + options.logger.info('using service principal credentials in storageQueueManager') const authParsed = JSON.parse(spnAuth) this.client = new QueueServiceClient( `https://${account}.queue.core.windows.net`, @@ -31,10 +32,12 @@ class StorageQueueManager { } if (connectionString) { + options.logger.info('using connection string in storageQueueManager') this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions) return } + options.logger.info('using default credentials in storageQueueManager') this.client = new QueueServiceClient( `https://${account}.queue.core.windows.net`, new DefaultAzureCredential(), diff --git a/ghcrawler/providers/storage/azureBlobFactory.js b/ghcrawler/providers/storage/azureBlobFactory.js index b08af8b7..5416ed5e 100644 --- a/ghcrawler/providers/storage/azureBlobFactory.js +++ b/ghcrawler/providers/storage/azureBlobFactory.js @@ -13,10 +13,11 @@ const { DefaultAzureCredential, ClientSecretCredential } = require('@azure/ident * @param {string} options.container * @param {object} options.logger * @param {object} options.spnAuth + * @param {object} options.isSpnAuth */ module.exports = options => { options.logger.info('creating azure storage store') - const { account, connection, container, spnAuth } = options + const { account, connection, container, spnAuth, isSpnAuth } = options const pipelineOptions = { retryOptions: { @@ -27,22 +28,28 @@ module.exports = options => { retryPolicyType: StorageRetryPolicyType.EXPONENTIAL } } - let blobServiceClient - if (connection) { - options.logger.info('using connection string') - blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions) + + if (isSpnAuth) { + options.logger.info('using service principal credentials in azureBlobFactory') + const authParsed = JSON.parse(spnAuth) + blobServiceClient = new BlobServiceClient( + `https://${account}.queue.core.windows.net`, + new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret), + pipelineOptions + ) } else { - let credential - if (spnAuth) { - const authParsed = JSON.parse(spnAuth) - credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret) - options.logger.info('using service principal credentials') + if (connection) { + options.logger.info('using connection string in azureBlobFactory') + blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions) } else { - credential = new DefaultAzureCredential() - options.logger.info('using default credentials') + options.logger.info('using default credentials in azureBlobFactory') + blobServiceClient = new BlobServiceClient( + `https://${account}.queue.core.windows.net`, + new DefaultAzureCredential(), + pipelineOptions + ) } - blobServiceClient = new BlobServiceClient(`https://${account}.blob.core.windows.net`, credential, pipelineOptions) } const containerClient = blobServiceClient.getContainerClient(container) diff --git a/providers/store/azureQueueStore.js b/providers/store/azureQueueStore.js index 25c020b2..9ad21bb3 100644 --- a/providers/store/azureQueueStore.js +++ b/providers/store/azureQueueStore.js @@ -10,7 +10,7 @@ class AzureStorageQueue { this.queueName = options.queueName this.logger = options.logger - const { connectionString, account, spnAuth } = options + const { connectionString, account, spnAuth, isSpnAuth } = options const pipelineOptions = { retryOptions: { @@ -21,18 +21,30 @@ class AzureStorageQueue { retryPolicyType: StorageRetryPolicyType.FIXED } } + + if (isSpnAuth) { + options.logger.info('using service principal credentials in azureQueueStore') + const authParsed = JSON.parse(spnAuth) + this.client = new QueueServiceClient( + `https://${account}.queue.core.windows.net`, + new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret), + pipelineOptions + ) + return + } + if (connectionString) { + options.logger.info('using connection string in azureQueueStore') this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions) - } else { - let credential - if (spnAuth) { - const authParsed = JSON.parse(spnAuth) - credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret) - } else { - credential = new DefaultAzureCredential() - } - this.client = new QueueServiceClient(`https://${account}.queue.core.windows.net`, credential, pipelineOptions) + return } + + options.logger.info('using default credentials in azureQueueStore') + this.client = new QueueServiceClient( + `https://${account}.queue.core.windows.net`, + new DefaultAzureCredential(), + pipelineOptions + ) } async connect() {