From 4a2eba65a78c2ec77af0088b1faca563c9e90247 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Fri, 13 Dec 2024 16:25:42 +0100
Subject: [PATCH 01/19] Upgrade Azure Storage SDK to a modern version

---
 config/cdConfig.js                            |   4 +-
 ghcrawler/providers/queuing/storageQueue.js   | 137 ++--
 .../providers/queuing/storageQueueManager.js  |  25 +-
 .../providers/storage/azureBlobFactory.js     |  43 +-
 .../providers/storage/storageDocStore.js      | 164 ++---
 package-lock.json                             | 586 ++++++++++++++----
 package.json                                  |   6 +-
 providers/store/azureQueueStore.js            |  33 +-
 8 files changed, 649 insertions(+), 349 deletions(-)

diff --git a/config/cdConfig.js b/config/cdConfig.js
index 05b5a524..ced4c304 100644
--- a/config/cdConfig.js
+++ b/config/cdConfig.js
@@ -5,7 +5,8 @@ const config = require('painless-config')
 
 const cd_azblob = {
   connection: config.get('CRAWLER_AZBLOB_CONNECTION_STRING'),
-  container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME')
+  container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME'),
+  account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME'),
 }
 
 const githubToken = config.get('CRAWLER_GITHUB_TOKEN')
@@ -111,6 +112,7 @@ module.exports = {
     },
     azqueue: {
       connectionString: cd_azblob.connection,
+      account: cd_azblob.account,
       queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests'
     },
     'cd(azblob)': cd_azblob,
diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js
index c52e1257..5d6822c0 100644
--- a/ghcrawler/providers/queuing/storageQueue.js
+++ b/ghcrawler/providers/queuing/storageQueue.js
@@ -1,52 +1,46 @@
 // Copyright (c) Microsoft Corporation and others. Made available under the MIT license.
 // SPDX-License-Identifier: MIT
 
+const { QueueServiceClient } = require('@azure/storage-queue')
 const qlimit = require('qlimit')
 const { cloneDeep } = require('lodash')
 
 class StorageQueue {
+  /**
+   * @param {QueueServiceClient} client
+   * @param {string} name
+   * @param {string} queueName
+   * @param {object} formatter
+   * @param {object} options
+   */
   constructor(client, name, queueName, formatter, options) {
-    this.client = client
     this.name = name
     this.queueName = queueName
     this.messageFormatter = formatter
     this.options = options
     this.logger = options.logger
+    this.queueClient = client.getQueueClient(this.queueName)
   }
 
   async subscribe() {
-    return new Promise((resolve, reject) => {
-      this.client.createQueueIfNotExists(this.queueName, error => {
-        if (error) {
-          return reject(error)
-        }
-        this.logger.info(`Subscribed to ${this.queueName} using Queue Storage`)
-        resolve()
-      })
-    })
+    await this.queueClient.createIfNotExists()
+    this.logger.info(`Subscribed to ${this.queueName} using Queue Storage`)
   }
 
   async unsubscribe() {
-    return
+    // No specific unsubscribe logic for Azure Queue Storage
   }
 
   async push(requests, option) {
     requests = Array.isArray(requests) ? requests : [requests]
     return Promise.all(
       requests.map(
-        qlimit(this.options.parallelPush || 1)(request => {
+        qlimit(this.options.parallelPush || 1)(async request => {
           const body = JSON.stringify(request)
-          return new Promise((resolve, reject) => {
-            this.client.createMessage(this.queueName, body, option, (error, queueMessageResult) => {
-              if (error) {
-                return reject(error)
-              }
-              this._log('Queued', request)
-              resolve(this._buildMessageReceipt(queueMessageResult, request))
-            })
-          })
-        })
-      )
+          const queueMessageResult = await this.queueClient.sendMessage(body)
+          this._log('Queued', request)
+          return this._buildMessageReceipt(queueMessageResult, request)
+        }))
     )
   }
 
@@ -56,47 +50,32 @@ class StorageQueue {
   }
 
   async pop() {
-    const msgOptions = { numOfMessages: 1, visibilityTimeout: this.options.visibilityTimeout || 60 * 60 }
-    return new Promise((resolve, reject) => {
-      this.client.getMessages(this.queueName, msgOptions, (error, result) => {
-        if (error) {
-          return reject(error)
-        }
-        const message = result[0]
-        if (!message) {
-          this.logger.verbose('No messages to receive')
-          return resolve(null)
-        }
-        if (this.options.maxDequeueCount && message.dequeueCount > this.options.maxDequeueCount) {
-          this.logger.verbose('maxDequeueCount exceeded')
-          this.client.deleteMessage(this.queueName, message.messageId, message.popReceipt, error => {
-            if (error) return reject(error)
-            resolve(null)
-          })
-        } else {
-          message.body = JSON.parse(message.messageText)
-          const request = this.messageFormatter(message)
-          request._message = message
-          this._log('Popped', message.body)
-          resolve(request)
-        }
-      })
-    })
+    const msgOptions = { numberOfMessages: 1, visibilityTimeout: this.options.visibilityTimeout || 60 * 60 }
+    const response = await this.queueClient.receiveMessages(msgOptions)
+    const message = response.receivedMessageItems[0]
+    if (!message) {
+      this.logger.verbose('No messages to receive')
+      return null
+    }
+    if (this.options.maxDequeueCount && message.dequeueCount > this.options.maxDequeueCount) {
+      this.logger.verbose('maxDequeueCount exceeded')
+      await this.queueClient.deleteMessage(message.messageId, message.popReceipt)
+      return null
+    } else {
+      message.body = JSON.parse(message.messageText)
+      const request = this.messageFormatter(message)
+      request._message = message
+      this._log('Popped', message.body)
+      return request
+    }
   }
 
   async done(request) {
     if (!request || !request._message) {
       return
     }
-    return new Promise((resolve, reject) => {
-      this.client.deleteMessage(this.queueName, request._message.messageId, request._message.popReceipt, error => {
-        if (error) {
-          return reject(error)
-        }
-        this._log('ACKed', request._message.body)
-        resolve()
-      })
-    })
+    await this.queueClient.deleteMessage(request._message.messageId, request._message.popReceipt)
+    this._log('ACKed', request._message.body)
   }
 
   async defer(request) {
@@ -110,47 +89,21 @@ class StorageQueue {
     await this.updateVisibilityTimeout(request)
   }
 
-  updateVisibilityTimeout(request, visibilityTimeout = 0) {
-    return new Promise((resolve, reject) => {
-      // visibilityTimeout is updated to 0 to unlock/unlease the message
-      this.client.updateMessage(
-        this.queueName,
-        request._message.messageId,
-        request._message.popReceipt,
-        visibilityTimeout,
-        (error, result) => {
-          if (error) {
-            return reject(error)
-          }
-          this._log('NAKed', request._message.body)
-          resolve(this._buildMessageReceipt(result, request._message.body))
-        }
-      )
+  async updateVisibilityTimeout(request, visibilityTimeout = 0) {
+    await this.queueClient.updateMessage(request._message.messageId, request._message.popReceipt, {
+      visibilityTimeout
     })
+    this._log('NAKed', request._message.body)
   }
 
   async flush() {
-    return new Promise((resolve, reject) => {
-      this.client.deleteQueue(this.queueName, error => {
-        if (error) return reject(error)
-        this.client.createQueueIfNotExists(this.queueName, error => {
-          if (error) return reject(error)
-          resolve()
-        })
-      })
-    })
+    await this.queueClient.clearMessages()
+    this.logger.info(`Flushed all messages from ${this.queueName}`)
   }
 
   async getInfo() {
-    return new Promise(resolve => {
-      this.client.getQueueMetadata(this.queueName, (result, error) => {
-        if (error) {
-          this.logger.error(error)
-          resolve(null)
-        }
-        resolve({ count: result[0].approximateMessageCount })
-      })
-    })
+    const properties = await this.queueClient.getProperties()
+    return { count: properties.approximateMessagesCount }
   }
 
   getName() {
diff --git a/ghcrawler/providers/queuing/storageQueueManager.js b/ghcrawler/providers/queuing/storageQueueManager.js
index 2f23a7c9..048b9bf2 100644
--- a/ghcrawler/providers/queuing/storageQueueManager.js
+++ b/ghcrawler/providers/queuing/storageQueueManager.js
@@ -2,14 +2,31 @@
 // SPDX-License-Identifier: MIT
 
 const AttenuatedQueue = require('./attenuatedQueue')
-const AzureStorage = require('azure-storage')
+const { QueueServiceClient } = require('@azure/storage-queue')
 const Request = require('../../lib/request')
 const StorageQueue = require('./storageQueue')
+const { DefaultAzureCredential } = require('@azure/identity')
 
 class StorageQueueManager {
-  constructor(connectionString) {
-    const retryOperations = new AzureStorage.ExponentialRetryPolicyFilter()
-    this.client = AzureStorage.createQueueService(connectionString).withFilter(retryOperations)
+  constructor(connectionString, options) {
+    const pipelineOptions = {
+      retryOptions: {
+        maxTries: 3,
+        retryDelayInMs: 1000,
+        maxRetryDelayInMs: 120 * 1000,
+        tryTimeoutInMs: 30000,
+        retryPolicyType: StorageRetryPolicyType.EXPONENTIAL
+      }
+    }
+    if (connectionString) {
+      this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions)
+    } else {
+      this.client = new QueueServiceClient(
+        `https://${options.account}.queue.core.windows.net`,
+        new DefaultAzureCredential(),
+        pipelineOptions
+      )
+    }
   }
 
   createQueueClient(name, formatter, options) {
diff --git a/ghcrawler/providers/storage/azureBlobFactory.js b/ghcrawler/providers/storage/azureBlobFactory.js
index 2d2d5eb5..60cfb121 100644
--- a/ghcrawler/providers/storage/azureBlobFactory.js
+++ b/ghcrawler/providers/storage/azureBlobFactory.js
@@ -1,15 +1,44 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // SPDX-License-Identifier: MIT
 
-const AzureStorage = require('azure-storage')
+// @ts-check
+const { BlobServiceClient, StorageRetryPolicyType } = require('@azure/storage-blob')
 const AzureStorageDocStore = require('./storageDocStore')
+const { DefaultAzureCredential } = require ('@azure/identity');
 
+/**
+ * @param {object} options
+ * @param {string} options.account
+ * @param {string} options.connection
+ * @param {string} options.container
+ * @param {object} options.logger
+ */
 module.exports = options => {
   options.logger.info('creating azure storage store')
-  const { account, key, connection, container } = options
-  const retryOperations = new AzureStorage.ExponentialRetryPolicyFilter()
-  const blobService = connection
-    ? AzureStorage.createBlobService(connection).withFilter(retryOperations)
-    : AzureStorage.createBlobService(account, key).withFilter(retryOperations)
-  return new AzureStorageDocStore(blobService, container, options)
+  const { account, connection, container } = options
+
+  const pipelineOptions = {
+    retryOptions: {
+      maxTries: 3,
+      retryDelayInMs: 1000,
+      maxRetryDelayInMs: 120 * 1000,
+      tryTimeoutInMs: 30000,
+      retryPolicyType: StorageRetryPolicyType.EXPONENTIAL
+    }
+  }
+
+  let blobServiceClient
+  if (connection) {
+    options.logger.info('using connection string')
+    blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions)
+  } else if (account) {
+    options.logger.info('using default credentials')
+    blobServiceClient = new BlobServiceClient(`https://${account}.blob.core.windows.net`, new DefaultAzureCredential(), pipelineOptions)
+  } else {
+    throw new Error('either connection or account must be provided')
+  }
+
+  const containerClient = blobServiceClient.getContainerClient(container)
+
+  return new AzureStorageDocStore(containerClient, options)
 }
diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js
index 8131dfa5..a9af732f 100644
--- a/ghcrawler/providers/storage/storageDocStore.js
+++ b/ghcrawler/providers/storage/storageDocStore.js
@@ -1,32 +1,29 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-const azure = require('azure-storage')
+const { BlobServiceClient } = require('@azure/storage-blob')
+const { ContainerClient } = require('@azure/storage-blob')
 const memoryCache = require('memory-cache')
 const { Readable } = require('stream')
 const URL = require('url')
 
 class AzureStorageDocStore {
-  constructor(blobService, name, options) {
-    this.service = blobService
-    this.name = name
+  /**
+   * @param {ContainerClient} containerClient
+   * @param {object} options
+   */
+  constructor(containerClient, options) {
+    this.containerClient = containerClient
     this.options = options
     this._getBlobNameFromKey = this.options.blobKey === 'url' ? this._getBlobNameFromUrl : this._getBlobNameFromUrn
   }
 
   async connect() {
-    return this._createContainer(this.name)
+    await this._createContainer(this.containerClient)
   }
 
-  async _createContainer(name) {
-    return new Promise((resolve, reject) => {
-      this.service.createContainerIfNotExists(name, error => {
-        if (error) {
-          return reject(error)
-        }
-        resolve(this.service)
-      })
-    })
+  async _createContainer(containerClient) {
+    await containerClient.createIfNotExists()
   }
 
   async upsert(document) {
@@ -43,85 +40,46 @@ class AzureStorageDocStore {
     if (document._metadata.extra) {
       blobMetadata.extra = JSON.stringify(document._metadata.extra)
     }
-    const options = { metadata: blobMetadata, contentSettings: { contentType: 'application/json' } }
+    const options = { metadata: blobMetadata, blobHTTPHeaders: { blobContentType: 'application/json' } }
     const dataStream = new Readable()
     dataStream.push(JSON.stringify(document))
     dataStream.push(null)
-    return new Promise((resolve, reject) => {
-      dataStream
-        .pipe(this.service.createWriteStreamToBlockBlob(this.name, blobName, options))
-        .on('error', error => {
-          return reject(error)
-        })
-        .on('finish', () => {
-          resolve(blobName)
-        })
-    })
+    const blockBlobClient = this.containerClient.getBlockBlobClient(blobName)
+    await blockBlobClient.uploadStream(dataStream, 8 << 20, 5, options)
   }
 
-  // TODO: Consistency on whether key is a URL or URN
   async get(type, key) {
     const blobName = this._getBlobNameFromKey(type, key)
-    return new Promise((resolve, reject) => {
-      this.service.getBlobToText(this.name, blobName, (error, text) => {
-        if (error) {
-          return reject(error)
-        }
-        const result = JSON.parse(text)
-        resolve(result)
-      })
-    })
+    const blockBlobClient = this.containerClient.getBlockBlobClient(blobName)
+    const downloadBlockBlobResponse = await blockBlobClient.download(0)
+    const downloaded = await this._streamToString(downloadBlockBlobResponse.readableStreamBody)
+    return JSON.parse(downloaded)
   }
 
-  // TODO: Consistency on whether key is a URL or URN
   async etag(type, key) {
     const blobName = this._getBlobNameFromKey(type, key)
-    return new Promise(resolve => {
-      this.service.getBlobMetadata(this.name, blobName, (error, blob) => {
-        resolve(error ? null : blob.metadata.etag)
-      })
-    })
+    const blockBlobClient = this.containerClient.getBlockBlobClient(blobName)
+    const properties = await blockBlobClient.getProperties()
+    return properties.etag
   }
 
   // This API can only be used for the 'deadletter' store because we cannot look up documents by type performantly
   async list(type) {
     this._ensureDeadletter(type)
     let entries = []
-    let continuationToken = null
-    do {
-      const result = await new Promise((resolve, reject) => {
-        this.service.listBlobsSegmented(
-          this.name,
-          continuationToken,
-          {
-            include: azure.BlobUtilities.BlobListingDetails.METADATA,
-            location: azure.StorageUtilities.LocationMode.PRIMARY_THEN_SECONDARY
-          },
-          (error, response) => {
-            if (error) {
-              continuationToken = null
-              reject(error)
-            }
-            return resolve(response)
-          }
-        )
+    for await (const blob of this.containerClient.listBlobsFlat({ includeMetadata: true })) {
+      const blobMetadata = blob.metadata
+      entries.push({
+        version: blobMetadata.version,
+        etag: blobMetadata.etag,
+        type: blobMetadata.type,
+        url: blobMetadata.url,
+        urn: blobMetadata.urn,
+        fetchedAt: blobMetadata.fetchedat,
+        processedAt: blobMetadata.processedat,
+        extra: blobMetadata.extra ? JSON.parse(blobMetadata.extra) : undefined
       })
-      entries = entries.concat(
-        result.entries.map(entry => {
-          const blobMetadata = entry.metadata
-          return {
-            version: blobMetadata.version,
-            etag: blobMetadata.etag,
-            type: blobMetadata.type,
-            url: blobMetadata.url,
-            urn: blobMetadata.urn,
-            fetchedAt: blobMetadata.fetchedat,
-            processedAt: blobMetadata.processedat,
-            extra: blobMetadata.extra ? JSON.parse(blobMetadata.extra) : undefined
-          }
-        })
-      )
-    } while (continuationToken && entries.length < 10000)
+    }
     return entries
   }
 
@@ -129,47 +87,8 @@ class AzureStorageDocStore {
   async delete(type, key) {
     this._ensureDeadletter(type)
     const blobName = this._getBlobNameFromKey(type, key)
-    return new Promise((resolve, reject) => {
-      this.service.deleteBlob(this.name, blobName, error => {
-        if (error) {
-          return reject(error)
-        }
-        resolve(true)
-      })
-    })
-  }
-
-  // This API can only be used for the 'deadletter' store because we cannot look up documents by type performantly
-  async count(type, force = false) {
-    this._ensureDeadletter(type)
-    const key = `${this.name}:count:${type || ''}`
-    if (!force) {
-      const cachedCount = memoryCache.get(key)
-      if (cachedCount) {
-        return cachedCount
-      }
-    }
-    let entryCount = 0
-    let continuationToken = null
-    do {
-      const result = await new Promise((resolve, reject) => {
-        this.service.listBlobsSegmented(
-          this.name,
-          continuationToken,
-          { location: azure.StorageUtilities.LocationMode.PRIMARY_THEN_SECONDARY },
-          (error, response) => {
-            if (error) {
-              continuationToken = null
-              reject(error)
-            }
-            return resolve(response)
-          }
-        )
-      })
-      entryCount += result.entries.length
-    } while (continuationToken)
-    memoryCache.put(key, entryCount, 60000)
-    return entryCount
+    const blockBlobClient = this.containerClient.getBlockBlobClient(blobName)
+    await blockBlobClient.delete()
   }
 
   async close() {
@@ -216,6 +135,19 @@ class AzureStorageDocStore {
     }
     return `${this._getBlobPathFromUrn(type, urn)}.json`
   }
+
+  async _streamToString(readableStream) {
+    return new Promise((resolve, reject) => {
+      const chunks = []
+      readableStream.on('data', (data) => {
+        chunks.push(data.toString())
+      })
+      readableStream.on('end', () => {
+        resolve(chunks.join(''))
+      })
+      readableStream.on('error', reject)
+    })
+  }
 }
 
 module.exports = AzureStorageDocStore
diff --git a/package-lock.json b/package-lock.json
index 59e09015..2048fe70 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -10,13 +10,16 @@
       "hasInstallScript": true,
       "license": "MIT",
       "dependencies": {
+        "@azure/identity": "^4.5.0",
+        "@azure/storage-blob": "^12.26.0",
+        "@azure/storage-queue": "^12.25.0",
         "@clearlydefined/spdx": "github:clearlydefined/spdx#v0.1.9",
         "@microsoft/refreshing-config": "^0.1.3",
+        "@types/node": "^22.10.1",
         "applicationinsights": "^1.5.0",
         "ar-async": "^0.1.4",
         "axios": "^1.7.4",
         "axios-retry": "^3.2.5",
-        "azure-storage": "^2.10.3",
         "body-parser": "^1.19.0",
         "debug": "^4.3.4",
         "decompress": "^4.2.1",
@@ -52,6 +55,7 @@
         "spdx-correct": "^3.2.0",
         "throat": "^5.0.0",
         "tmp": "0.1.0",
+        "typescript": "^5.7.2",
         "unbzip2-stream": "^1.3.3",
         "winston": "^2.3.0",
         "winston-azure-application-insights": "^1.5.0",
@@ -81,6 +85,288 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/@azure/abort-controller": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-2.1.2.tgz",
+      "integrity": "sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==",
+      "dependencies": {
+        "tslib": "^2.6.2"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/core-auth": {
+      "version": "1.9.0",
+      "resolved": "https://registry.npmjs.org/@azure/core-auth/-/core-auth-1.9.0.tgz",
+      "integrity": "sha512-FPwHpZywuyasDSLMqJ6fhbOK3TqUdviZNF8OqRGA4W5Ewib2lEEZ+pBsYcBa88B2NGO/SEnYPGhyBqNlE8ilSw==",
+      "dependencies": {
+        "@azure/abort-controller": "^2.0.0",
+        "@azure/core-util": "^1.11.0",
+        "tslib": "^2.6.2"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/core-client": {
+      "version": "1.9.2",
+      "resolved": "https://registry.npmjs.org/@azure/core-client/-/core-client-1.9.2.tgz",
+      "integrity": "sha512-kRdry/rav3fUKHl/aDLd/pDLcB+4pOFwPPTVEExuMyaI5r+JBbMWqRbCY1pn5BniDaU3lRxO9eaQ1AmSMehl/w==",
+      "dependencies": {
+        "@azure/abort-controller": "^2.0.0",
+        "@azure/core-auth": "^1.4.0",
+        "@azure/core-rest-pipeline": "^1.9.1",
+        "@azure/core-tracing": "^1.0.0",
+        "@azure/core-util": "^1.6.1",
+        "@azure/logger": "^1.0.0",
+        "tslib": "^2.6.2"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/core-http-compat": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/@azure/core-http-compat/-/core-http-compat-2.1.2.tgz",
+      "integrity": "sha512-5MnV1yqzZwgNLLjlizsU3QqOeQChkIXw781Fwh1xdAqJR5AA32IUaq6xv1BICJvfbHoa+JYcaij2HFkhLbNTJQ==",
+      "dependencies": {
+        "@azure/abort-controller": "^2.0.0",
+        "@azure/core-client": "^1.3.0",
+        "@azure/core-rest-pipeline": "^1.3.0"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/core-lro": {
+      "version": "2.7.2",
+      "resolved": "https://registry.npmjs.org/@azure/core-lro/-/core-lro-2.7.2.tgz",
+      "integrity": "sha512-0YIpccoX8m/k00O7mDDMdJpbr6mf1yWo2dfmxt5A8XVZVVMz2SSKaEbMCeJRvgQ0IaSlqhjT47p4hVIRRy90xw==",
+      "dependencies": {
+        "@azure/abort-controller": "^2.0.0",
+        "@azure/core-util": "^1.2.0",
+        "@azure/logger": "^1.0.0",
+        "tslib": "^2.6.2"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/core-paging": {
+      "version": "1.6.2",
+      "resolved": "https://registry.npmjs.org/@azure/core-paging/-/core-paging-1.6.2.tgz",
+      "integrity": "sha512-YKWi9YuCU04B55h25cnOYZHxXYtEvQEbKST5vqRga7hWY9ydd3FZHdeQF8pyh+acWZvppw13M/LMGx0LABUVMA==",
+      "dependencies": {
+        "tslib": "^2.6.2"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/core-rest-pipeline": {
+      "version": "1.18.1",
+      "resolved": "https://registry.npmjs.org/@azure/core-rest-pipeline/-/core-rest-pipeline-1.18.1.tgz",
+      "integrity": "sha512-/wS73UEDrxroUEVywEm7J0p2c+IIiVxyfigCGfsKvCxxCET4V/Hef2aURqltrXMRjNmdmt5IuOgIpl8f6xdO5A==",
+      "dependencies": {
+        "@azure/abort-controller": "^2.0.0",
+        "@azure/core-auth": "^1.8.0",
+        "@azure/core-tracing": "^1.0.1",
+        "@azure/core-util": "^1.11.0",
+        "@azure/logger": "^1.0.0",
+        "http-proxy-agent": "^7.0.0",
+        "https-proxy-agent": "^7.0.0",
+        "tslib": "^2.6.2"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/core-rest-pipeline/node_modules/agent-base": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz",
+      "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==",
+      "dependencies": {
+        "debug": "^4.3.4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/@azure/core-rest-pipeline/node_modules/https-proxy-agent": {
+      "version": "7.0.5",
+      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.5.tgz",
+      "integrity": "sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==",
+      "dependencies": {
+        "agent-base": "^7.0.2",
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/@azure/core-tracing": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/@azure/core-tracing/-/core-tracing-1.2.0.tgz",
+      "integrity": "sha512-UKTiEJPkWcESPYJz3X5uKRYyOcJD+4nYph+KpfdPRnQJVrZfk0KJgdnaAWKfhsBBtAf/D58Az4AvCJEmWgIBAg==",
+      "dependencies": {
+        "tslib": "^2.6.2"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/core-util": {
+      "version": "1.11.0",
+      "resolved": "https://registry.npmjs.org/@azure/core-util/-/core-util-1.11.0.tgz",
+      "integrity": "sha512-DxOSLua+NdpWoSqULhjDyAZTXFdP/LKkqtYuxxz1SCN289zk3OG8UOpnCQAz/tygyACBtWp/BoO72ptK7msY8g==",
+      "dependencies": {
+        "@azure/abort-controller": "^2.0.0",
+        "tslib": "^2.6.2"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/core-xml": {
+      "version": "1.4.4",
+      "resolved": "https://registry.npmjs.org/@azure/core-xml/-/core-xml-1.4.4.tgz",
+      "integrity": "sha512-J4FYAqakGXcbfeZjwjMzjNcpcH4E+JtEBv+xcV1yL0Ydn/6wbQfeFKTCHh9wttAi0lmajHw7yBbHPRG+YHckZQ==",
+      "dependencies": {
+        "fast-xml-parser": "^4.4.1",
+        "tslib": "^2.6.2"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/identity": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/@azure/identity/-/identity-4.5.0.tgz",
+      "integrity": "sha512-EknvVmtBuSIic47xkOqyNabAme0RYTw52BTMz8eBgU1ysTyMrD1uOoM+JdS0J/4Yfp98IBT3osqq3BfwSaNaGQ==",
+      "dependencies": {
+        "@azure/abort-controller": "^2.0.0",
+        "@azure/core-auth": "^1.9.0",
+        "@azure/core-client": "^1.9.2",
+        "@azure/core-rest-pipeline": "^1.17.0",
+        "@azure/core-tracing": "^1.0.0",
+        "@azure/core-util": "^1.11.0",
+        "@azure/logger": "^1.0.0",
+        "@azure/msal-browser": "^3.26.1",
+        "@azure/msal-node": "^2.15.0",
+        "events": "^3.0.0",
+        "jws": "^4.0.0",
+        "open": "^8.0.0",
+        "stoppable": "^1.1.0",
+        "tslib": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/identity/node_modules/open": {
+      "version": "8.4.2",
+      "resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz",
+      "integrity": "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==",
+      "dependencies": {
+        "define-lazy-prop": "^2.0.0",
+        "is-docker": "^2.1.1",
+        "is-wsl": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/@azure/logger": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/@azure/logger/-/logger-1.1.4.tgz",
+      "integrity": "sha512-4IXXzcCdLdlXuCG+8UKEwLA1T1NHqUfanhXYHiQTn+6sfWCZXduqbtXDGceg3Ce5QxTGo7EqmbV6Bi+aqKuClQ==",
+      "dependencies": {
+        "tslib": "^2.6.2"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/msal-browser": {
+      "version": "3.28.0",
+      "resolved": "https://registry.npmjs.org/@azure/msal-browser/-/msal-browser-3.28.0.tgz",
+      "integrity": "sha512-1c1qUF6vB52mWlyoMem4xR1gdwiQWYEQB2uhDkbAL4wVJr8WmAcXybc1Qs33y19N4BdPI8/DHI7rPE8L5jMtWw==",
+      "dependencies": {
+        "@azure/msal-common": "14.16.0"
+      },
+      "engines": {
+        "node": ">=0.8.0"
+      }
+    },
+    "node_modules/@azure/msal-common": {
+      "version": "14.16.0",
+      "resolved": "https://registry.npmjs.org/@azure/msal-common/-/msal-common-14.16.0.tgz",
+      "integrity": "sha512-1KOZj9IpcDSwpNiQNjt0jDYZpQvNZay7QAEi/5DLubay40iGYtLzya/jbjRPLyOTZhEKyL1MzPuw2HqBCjceYA==",
+      "engines": {
+        "node": ">=0.8.0"
+      }
+    },
+    "node_modules/@azure/msal-node": {
+      "version": "2.16.2",
+      "resolved": "https://registry.npmjs.org/@azure/msal-node/-/msal-node-2.16.2.tgz",
+      "integrity": "sha512-An7l1hEr0w1HMMh1LU+rtDtqL7/jw74ORlc9Wnh06v7TU/xpG39/Zdr1ZJu3QpjUfKJ+E0/OXMW8DRSWTlh7qQ==",
+      "dependencies": {
+        "@azure/msal-common": "14.16.0",
+        "jsonwebtoken": "^9.0.0",
+        "uuid": "^8.3.0"
+      },
+      "engines": {
+        "node": ">=16"
+      }
+    },
+    "node_modules/@azure/storage-blob": {
+      "version": "12.26.0",
+      "resolved": "https://registry.npmjs.org/@azure/storage-blob/-/storage-blob-12.26.0.tgz",
+      "integrity": "sha512-SriLPKezypIsiZ+TtlFfE46uuBIap2HeaQVS78e1P7rz5OSbq0rsd52WE1mC5f7vAeLiXqv7I7oRhL3WFZEw3Q==",
+      "dependencies": {
+        "@azure/abort-controller": "^2.1.2",
+        "@azure/core-auth": "^1.4.0",
+        "@azure/core-client": "^1.6.2",
+        "@azure/core-http-compat": "^2.0.0",
+        "@azure/core-lro": "^2.2.0",
+        "@azure/core-paging": "^1.1.1",
+        "@azure/core-rest-pipeline": "^1.10.1",
+        "@azure/core-tracing": "^1.1.2",
+        "@azure/core-util": "^1.6.1",
+        "@azure/core-xml": "^1.4.3",
+        "@azure/logger": "^1.0.0",
+        "events": "^3.0.0",
+        "tslib": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@azure/storage-queue": {
+      "version": "12.25.0",
+      "resolved": "https://registry.npmjs.org/@azure/storage-queue/-/storage-queue-12.25.0.tgz",
+      "integrity": "sha512-uoobHFbH/o7wIul/sCm32X2YFq6zb1XpNdpKIms9I60mwG3BBaOpEs5pgQV5a5ONG5WMSHlo8E1dNFB5ZZIa1g==",
+      "dependencies": {
+        "@azure/abort-controller": "^2.1.2",
+        "@azure/core-auth": "^1.4.0",
+        "@azure/core-client": "^1.6.2",
+        "@azure/core-http-compat": "^2.0.0",
+        "@azure/core-paging": "^1.1.1",
+        "@azure/core-rest-pipeline": "^1.10.1",
+        "@azure/core-tracing": "^1.1.2",
+        "@azure/core-util": "^1.6.1",
+        "@azure/core-xml": "^1.4.3",
+        "@azure/logger": "^1.0.0",
+        "tslib": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
     "node_modules/@babel/code-frame": {
       "version": "7.12.11",
       "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz",
@@ -905,12 +1191,11 @@
       }
     },
     "node_modules/@types/node": {
-      "version": "20.14.10",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.10.tgz",
-      "integrity": "sha512-MdiXf+nDuMvY0gJKxyfZ7/6UFsETO7mGKF54MVD/ekJS6HdFtpZFBgrh6Pseu64XTb2MLyFPlbW6hj8HYRQNOQ==",
-      "dev": true,
+      "version": "22.10.1",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz",
+      "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==",
       "dependencies": {
-        "undici-types": "~5.26.4"
+        "undici-types": "~6.20.0"
       }
     },
     "node_modules/@types/ws": {
@@ -1282,46 +1567,6 @@
         "node": ">= 6"
       }
     },
-    "node_modules/azure-storage": {
-      "version": "2.10.7",
-      "resolved": "https://registry.npmjs.org/azure-storage/-/azure-storage-2.10.7.tgz",
-      "integrity": "sha512-4oeFGtn3Ziw/fGs/zkoIpKKtygnCVIcZwzJ7UQzKTxhkGQqVCByOFbYqMGYR3L+wOsunX9lNfD0jc51SQuKSSA==",
-      "deprecated": "Please note: newer packages @azure/storage-blob, @azure/storage-queue and @azure/storage-file are available as of November 2019 and @azure/data-tables is available as of June 2021. While the legacy azure-storage package will continue to receive critical bug fixes, we strongly encourage you to upgrade. Migration guide can be found: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/storage/MigrationGuide.md",
-      "dependencies": {
-        "browserify-mime": "^1.2.9",
-        "extend": "^3.0.2",
-        "json-edm-parser": "~0.1.2",
-        "json-schema": "~0.4.0",
-        "md5.js": "^1.3.4",
-        "readable-stream": "^2.0.0",
-        "request": "^2.86.0",
-        "underscore": "^1.12.1",
-        "uuid": "^3.0.0",
-        "validator": "^13.7.0",
-        "xml2js": "~0.2.8",
-        "xmlbuilder": "^9.0.7"
-      },
-      "engines": {
-        "node": ">= 0.8.26"
-      }
-    },
-    "node_modules/azure-storage/node_modules/uuid": {
-      "version": "3.3.2",
-      "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.2.tgz",
-      "integrity": "sha512-yXJmeNaw3DnnKAOKJE51sL/ZaYfWJRl1pK9dr19YFCu0ObS231AB1/LbqTKRAQ5kw8A90rA6fr4riOUpTZvQZA==",
-      "deprecated": "Please upgrade  to version 7 or higher.  Older versions may use Math.random() in certain circumstances, which is known to be problematic.  See https://v8.dev/blog/math-random for details.",
-      "bin": {
-        "uuid": "bin/uuid"
-      }
-    },
-    "node_modules/azure-storage/node_modules/xml2js": {
-      "version": "0.2.8",
-      "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.2.8.tgz",
-      "integrity": "sha512-ZHZBIAO55GHCn2jBYByVPHvHS+o3j8/a/qmpEe6kxO3cTnTCWC3Htq9RYJ5G4XMwMMClD2QkXA9SNdPadLyn3Q==",
-      "dependencies": {
-        "sax": "0.5.x"
-      }
-    },
     "node_modules/backo2": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/backo2/-/backo2-1.0.2.tgz",
@@ -1499,11 +1744,6 @@
       "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==",
       "dev": true
     },
-    "node_modules/browserify-mime": {
-      "version": "1.2.9",
-      "resolved": "https://registry.npmjs.org/browserify-mime/-/browserify-mime-1.2.9.tgz",
-      "integrity": "sha1-rrGvKN5sDXpqLOQK22j/GEIq8x8="
-    },
     "node_modules/buffer": {
       "version": "5.4.0",
       "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.4.0.tgz",
@@ -1535,6 +1775,11 @@
         "node": "*"
       }
     },
+    "node_modules/buffer-equal-constant-time": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz",
+      "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA=="
+    },
     "node_modules/buffer-fill": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/buffer-fill/-/buffer-fill-1.0.0.tgz",
@@ -2364,6 +2609,14 @@
         "abstract-leveldown": "~2.6.0"
       }
     },
+    "node_modules/define-lazy-prop": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz",
+      "integrity": "sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==",
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/degenerator": {
       "version": "5.0.1",
       "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz",
@@ -2559,6 +2812,14 @@
         "safer-buffer": "^2.1.0"
       }
     },
+    "node_modules/ecdsa-sig-formatter": {
+      "version": "1.0.11",
+      "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz",
+      "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==",
+      "dependencies": {
+        "safe-buffer": "^5.0.1"
+      }
+    },
     "node_modules/ee-first": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
@@ -3186,6 +3447,14 @@
       "integrity": "sha512-tvtQIeLVHjDkJYnzf2dgVMxfuSGJeM/7UCG17TT4EumTfNtF+0nebF/4zWOIkCreAbtNqhGEboB6BWrwqNaw4Q==",
       "dev": true
     },
+    "node_modules/events": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz",
+      "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==",
+      "engines": {
+        "node": ">=0.8.x"
+      }
+    },
     "node_modules/express": {
       "version": "4.19.2",
       "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz",
@@ -3405,6 +3674,27 @@
       "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=",
       "dev": true
     },
+    "node_modules/fast-xml-parser": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.5.0.tgz",
+      "integrity": "sha512-/PlTQCI96+fZMAOLMZK4CWG1ItCbfZ/0jx7UIJFChPNrx7tcEgerUgWbeieCM9MfHInUDyK8DWYZ+YrywDJuTg==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/NaturalIntelligence"
+        },
+        {
+          "type": "paypal",
+          "url": "https://paypal.me/naturalintelligence"
+        }
+      ],
+      "dependencies": {
+        "strnum": "^1.0.5"
+      },
+      "bin": {
+        "fxparser": "src/cli/cli.js"
+      }
+    },
     "node_modules/fastq": {
       "version": "1.16.0",
       "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.16.0.tgz",
@@ -4166,18 +4456,6 @@
       "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz",
       "integrity": "sha1-4Ob+aijPUROIVeCG0Wkedx3iqLk="
     },
-    "node_modules/hash-base": {
-      "version": "3.0.4",
-      "resolved": "https://registry.npmjs.org/hash-base/-/hash-base-3.0.4.tgz",
-      "integrity": "sha1-X8hoaEfs1zSZQDMZprCj8/auSRg=",
-      "dependencies": {
-        "inherits": "^2.0.1",
-        "safe-buffer": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=4"
-      }
-    },
     "node_modules/hasha": {
       "version": "5.2.2",
       "resolved": "https://registry.npmjs.org/hasha/-/hasha-5.2.2.tgz",
@@ -4272,7 +4550,6 @@
       "version": "7.0.2",
       "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
       "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
-      "dev": true,
       "dependencies": {
         "agent-base": "^7.1.0",
         "debug": "^4.3.4"
@@ -4285,7 +4562,6 @@
       "version": "7.1.1",
       "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz",
       "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==",
-      "dev": true,
       "dependencies": {
         "debug": "^4.3.4"
       },
@@ -4938,14 +5214,6 @@
         "node": ">=4"
       }
     },
-    "node_modules/json-edm-parser": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/json-edm-parser/-/json-edm-parser-0.1.2.tgz",
-      "integrity": "sha1-HmCw/vG8CvZ7wNFG393lSGzWFbQ=",
-      "dependencies": {
-        "jsonparse": "~1.2.0"
-      }
-    },
     "node_modules/json-schema": {
       "version": "0.4.0",
       "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz",
@@ -4990,13 +5258,45 @@
         "graceful-fs": "^4.1.6"
       }
     },
-    "node_modules/jsonparse": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/jsonparse/-/jsonparse-1.2.0.tgz",
-      "integrity": "sha1-XAxWhRBxYOcv50ib3eoLRMK8Z70=",
-      "engines": [
-        "node >= 0.2.0"
-      ]
+    "node_modules/jsonwebtoken": {
+      "version": "9.0.2",
+      "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
+      "integrity": "sha512-PRp66vJ865SSqOlgqS8hujT5U4AOgMfhrwYIuIhfKaoSCZcirrmASQr8CX7cUg+RMih+hgznrjp99o+W4pJLHQ==",
+      "dependencies": {
+        "jws": "^3.2.2",
+        "lodash.includes": "^4.3.0",
+        "lodash.isboolean": "^3.0.3",
+        "lodash.isinteger": "^4.0.4",
+        "lodash.isnumber": "^3.0.3",
+        "lodash.isplainobject": "^4.0.6",
+        "lodash.isstring": "^4.0.1",
+        "lodash.once": "^4.0.0",
+        "ms": "^2.1.1",
+        "semver": "^7.5.4"
+      },
+      "engines": {
+        "node": ">=12",
+        "npm": ">=6"
+      }
+    },
+    "node_modules/jsonwebtoken/node_modules/jwa": {
+      "version": "1.4.1",
+      "resolved": "https://registry.npmjs.org/jwa/-/jwa-1.4.1.tgz",
+      "integrity": "sha512-qiLX/xhEEFKUAJ6FiBMbes3w9ATzyk5W7Hvzpa/SLYdxNtng+gcurvrI7TbACjIXlsJyr05/S1oUhZrc63evQA==",
+      "dependencies": {
+        "buffer-equal-constant-time": "1.0.1",
+        "ecdsa-sig-formatter": "1.0.11",
+        "safe-buffer": "^5.0.1"
+      }
+    },
+    "node_modules/jsonwebtoken/node_modules/jws": {
+      "version": "3.2.2",
+      "resolved": "https://registry.npmjs.org/jws/-/jws-3.2.2.tgz",
+      "integrity": "sha512-YHlZCB6lMTllWDtSPHz/ZXTsi8S00usEV6v1tjq8tOUZzw7DpSDWVXjXDre6ed1w/pd495ODpHZYSdkRTsa0HA==",
+      "dependencies": {
+        "jwa": "^1.4.1",
+        "safe-buffer": "^5.0.1"
+      }
     },
     "node_modules/jsprim": {
       "version": "1.4.2",
@@ -5018,6 +5318,25 @@
       "integrity": "sha512-g3UB796vUFIY90VIv/WX3L2c8CS2MdWUww3CNrYmqza1Fg0DURc2K/O4YrnklBdQarSJ/y8JnJYDGc+1iumQjg==",
       "dev": true
     },
+    "node_modules/jwa": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.0.tgz",
+      "integrity": "sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==",
+      "dependencies": {
+        "buffer-equal-constant-time": "1.0.1",
+        "ecdsa-sig-formatter": "1.0.11",
+        "safe-buffer": "^5.0.1"
+      }
+    },
+    "node_modules/jws": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.0.tgz",
+      "integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==",
+      "dependencies": {
+        "jwa": "^2.0.0",
+        "safe-buffer": "^5.0.1"
+      }
+    },
     "node_modules/klaw-sync": {
       "version": "6.0.0",
       "resolved": "https://registry.npmjs.org/klaw-sync/-/klaw-sync-6.0.0.tgz",
@@ -5122,12 +5441,47 @@
       "integrity": "sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk=",
       "dev": true
     },
+    "node_modules/lodash.includes": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz",
+      "integrity": "sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w=="
+    },
+    "node_modules/lodash.isboolean": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz",
+      "integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg=="
+    },
+    "node_modules/lodash.isinteger": {
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz",
+      "integrity": "sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA=="
+    },
+    "node_modules/lodash.isnumber": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/lodash.isnumber/-/lodash.isnumber-3.0.3.tgz",
+      "integrity": "sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw=="
+    },
+    "node_modules/lodash.isplainobject": {
+      "version": "4.0.6",
+      "resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz",
+      "integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA=="
+    },
+    "node_modules/lodash.isstring": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz",
+      "integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw=="
+    },
     "node_modules/lodash.merge": {
       "version": "4.6.2",
       "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
       "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==",
       "dev": true
     },
+    "node_modules/lodash.once": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz",
+      "integrity": "sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg=="
+    },
     "node_modules/log-symbols": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.0.0.tgz",
@@ -5262,15 +5616,6 @@
       "resolved": "https://registry.npmjs.org/map-async/-/map-async-0.1.1.tgz",
       "integrity": "sha1-yJfARJ+Fhkx0taPxlu20IVZDF0U="
     },
-    "node_modules/md5.js": {
-      "version": "1.3.4",
-      "resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.4.tgz",
-      "integrity": "sha1-6b296UogpawYsENA/Fdk1bCdkB0=",
-      "dependencies": {
-        "hash-base": "^3.0.0",
-        "inherits": "^2.0.1"
-      }
-    },
     "node_modules/media-typer": {
       "version": "0.3.0",
       "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
@@ -7183,11 +7528,6 @@
       "deprecated": "This package has been deprecated in favour of @sinonjs/samsam",
       "dev": true
     },
-    "node_modules/sax": {
-      "version": "0.5.8",
-      "resolved": "https://registry.npmjs.org/sax/-/sax-0.5.8.tgz",
-      "integrity": "sha1-1HLbIo6zMcJQaw6MFVJK25OdEsE="
-    },
     "node_modules/seek-bzip": {
       "version": "1.0.5",
       "resolved": "https://registry.npmjs.org/seek-bzip/-/seek-bzip-1.0.5.tgz",
@@ -7613,6 +7953,15 @@
         "node": ">= 0.8"
       }
     },
+    "node_modules/stoppable": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/stoppable/-/stoppable-1.1.0.tgz",
+      "integrity": "sha512-KXDYZ9dszj6bzvnEMRYvxgeTHU74QBFL54XKtP3nyMuJ81CFYtABZ3bAzL2EdFUaEwJOBOgENyFj3R7oTzDyyw==",
+      "engines": {
+        "node": ">=4",
+        "npm": ">=6"
+      }
+    },
     "node_modules/stream-shift": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.3.tgz",
@@ -7672,6 +8021,11 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/strnum": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz",
+      "integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA=="
+    },
     "node_modules/supports-color": {
       "version": "5.5.0",
       "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
@@ -7852,8 +8206,7 @@
     "node_modules/tslib": {
       "version": "2.6.3",
       "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.3.tgz",
-      "integrity": "sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ==",
-      "dev": true
+      "integrity": "sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ=="
     },
     "node_modules/tunnel-agent": {
       "version": "0.6.0",
@@ -7925,6 +8278,18 @@
         "is-typedarray": "^1.0.0"
       }
     },
+    "node_modules/typescript": {
+      "version": "5.7.2",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz",
+      "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==",
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=14.17"
+      }
+    },
     "node_modules/unbzip2-stream": {
       "version": "1.3.3",
       "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.3.3.tgz",
@@ -7934,16 +8299,10 @@
         "through": "^2.3.8"
       }
     },
-    "node_modules/underscore": {
-      "version": "1.13.6",
-      "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.6.tgz",
-      "integrity": "sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A=="
-    },
     "node_modules/undici-types": {
-      "version": "5.26.5",
-      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
-      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
-      "dev": true
+      "version": "6.20.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz",
+      "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg=="
     },
     "node_modules/universalify": {
       "version": "2.0.1",
@@ -7992,19 +8351,10 @@
       "version": "8.3.2",
       "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz",
       "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==",
-      "dev": true,
       "bin": {
         "uuid": "dist/bin/uuid"
       }
     },
-    "node_modules/validator": {
-      "version": "13.11.0",
-      "resolved": "https://registry.npmjs.org/validator/-/validator-13.11.0.tgz",
-      "integrity": "sha512-Ii+sehpSfZy+At5nPdnyMhx78fEoPDkR2XW/zimHEL3MyGJQOCQ7WeP20jPYRz7ZCpcKLB21NxuXHF3bxjStBQ==",
-      "engines": {
-        "node": ">= 0.10"
-      }
-    },
     "node_modules/value-or-promise": {
       "version": "1.0.11",
       "resolved": "https://registry.npmjs.org/value-or-promise/-/value-or-promise-1.0.11.tgz",
@@ -8284,14 +8634,6 @@
         "node": ">=4.0"
       }
     },
-    "node_modules/xmlbuilder": {
-      "version": "9.0.7",
-      "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-9.0.7.tgz",
-      "integrity": "sha1-Ey7mPS7FVlxVfiD0wi35rKaGsQ0=",
-      "engines": {
-        "node": ">=4.0"
-      }
-    },
     "node_modules/xtend": {
       "version": "4.0.1",
       "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz",
diff --git a/package.json b/package.json
index 2fe6b1cd..d8fe1fe5 100644
--- a/package.json
+++ b/package.json
@@ -34,13 +34,16 @@
     "url": "https://github.com/clearlydefined/crawler"
   },
   "dependencies": {
+    "@azure/identity": "^4.5.0",
+    "@azure/storage-blob": "^12.26.0",
+    "@azure/storage-queue": "^12.25.0",
     "@clearlydefined/spdx": "github:clearlydefined/spdx#v0.1.9",
     "@microsoft/refreshing-config": "^0.1.3",
+    "@types/node": "^22.10.1",
     "applicationinsights": "^1.5.0",
     "ar-async": "^0.1.4",
     "axios": "^1.7.4",
     "axios-retry": "^3.2.5",
-    "azure-storage": "^2.10.3",
     "body-parser": "^1.19.0",
     "debug": "^4.3.4",
     "decompress": "^4.2.1",
@@ -76,6 +79,7 @@
     "spdx-correct": "^3.2.0",
     "throat": "^5.0.0",
     "tmp": "0.1.0",
+    "typescript": "^5.7.2",
     "unbzip2-stream": "^1.3.3",
     "winston": "^2.3.0",
     "winston-azure-application-insights": "^1.5.0",
diff --git a/providers/store/azureQueueStore.js b/providers/store/azureQueueStore.js
index b1e0e461..2002c3ca 100644
--- a/providers/store/azureQueueStore.js
+++ b/providers/store/azureQueueStore.js
@@ -1,25 +1,46 @@
 // Copyright (c) Microsoft Corporation and others. Licensed under the MIT license.
 // SPDX-License-Identifier: MIT
 
-const azure = require('azure-storage')
+const { DefaultAzureCredential } = require('@azure/identity')
+const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue')
 const { promisify } = require('util')
 
 class AzureStorageQueue {
   constructor(options) {
     this.options = options
+    this.queueName = options.queueName
     this.logger = options.logger
+
+    const { connectionString, account } = options
+
+    const pipelineOptions = {
+      retryOptions: {
+        maxTries: 3,
+        retryDelayInMs: 1000,
+        maxRetryDelayInMs: 120 * 1000,
+        tryTimeoutInMs: 30000,
+        retryPolicyType: StorageRetryPolicyType.FIXED
+      }
+    }
+    if (connectionString) {
+      this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions)
+    } else {
+      this.client = new QueueServiceClient(
+        `https://${account}.queue.core.windows.net`,
+        new DefaultAzureCredential(),
+        pipelineOptions
+      )
+    }
   }
 
   async connect() {
-    this.queueService = azure
-      .createQueueService(this.options.connectionString)
-      .withFilter(new azure.LinearRetryPolicyFilter())
-    await promisify(this.queueService.createQueueIfNotExists).bind(this.queueService)(this.options.queueName)
+    this.queueService = this.client.getQueueClient(this.queueName)
+    this.queueService.createIfNotExists()
   }
 
   async upsert(document) {
     const message = Buffer.from(JSON.stringify({ _metadata: document._metadata })).toString('base64')
-    await promisify(this.queueService.createMessage).bind(this.queueService)(this.options.queueName, message)
+    return await this.queueService.sendMessage(message)
   }
 
   get() {

From 0d00eaf8b30c399e05d6fde995ae81daaeb37143 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Tue, 17 Dec 2024 15:50:03 +0100
Subject: [PATCH 02/19] Add back AzureStorageDocStore.count method

---
 .../providers/storage/storageDocStore.js      | 28 ++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js
index a9af732f..e79cecfd 100644
--- a/ghcrawler/providers/storage/storageDocStore.js
+++ b/ghcrawler/providers/storage/storageDocStore.js
@@ -91,6 +91,32 @@ class AzureStorageDocStore {
     await blockBlobClient.delete()
   }
 
+  // This API can only be used for the 'deadletter' store because we cannot look up documents by type performantly
+  async count(type, force = false) {
+    this._ensureDeadletter(type)
+    const key = `${this.name}:count:${type || ''}`
+
+    if (!force) {
+      const cachedCount = memoryCache.get(key)
+      if (cachedCount) {
+        return cachedCount
+      }
+    }
+
+    let entryCount = 0
+    const properties = await this.containerClient.getProperties()
+    properties.blobCount
+    try {
+      for await (const blob of this.containerClient.listBlobsFlat()) {
+        entryCount++
+      }
+      memoryCache.put(key, entryCount, 60000)
+      return entryCount
+    } catch (error) {
+      throw error
+    }
+  }
+
   async close() {
     return
   }
@@ -139,7 +165,7 @@ class AzureStorageDocStore {
   async _streamToString(readableStream) {
     return new Promise((resolve, reject) => {
       const chunks = []
-      readableStream.on('data', (data) => {
+      readableStream.on('data', data => {
         chunks.push(data.toString())
       })
       readableStream.on('end', () => {

From 13bfce996f2ab6f0606c05c03575395092976c4e Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Tue, 17 Dec 2024 16:03:12 +0100
Subject: [PATCH 03/19] Tweak async error handling, apply prettier

---
 config/cdConfig.js                             |  2 +-
 ghcrawler/providers/queuing/storageQueue.js    | 18 ++++++++++++++----
 .../providers/storage/azureBlobFactory.js      |  8 ++++++--
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/config/cdConfig.js b/config/cdConfig.js
index ced4c304..5a3592a9 100644
--- a/config/cdConfig.js
+++ b/config/cdConfig.js
@@ -6,7 +6,7 @@ const config = require('painless-config')
 const cd_azblob = {
   connection: config.get('CRAWLER_AZBLOB_CONNECTION_STRING'),
   container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME'),
-  account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME'),
+  account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME')
 }
 
 const githubToken = config.get('CRAWLER_GITHUB_TOKEN')
diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js
index 5d6822c0..c77251de 100644
--- a/ghcrawler/providers/queuing/storageQueue.js
+++ b/ghcrawler/providers/queuing/storageQueue.js
@@ -40,7 +40,8 @@ class StorageQueue {
           const queueMessageResult = await this.queueClient.sendMessage(body)
           this._log('Queued', request)
           return this._buildMessageReceipt(queueMessageResult, request)
-        }))
+        })
+      )
     )
   }
 
@@ -59,7 +60,11 @@ class StorageQueue {
     }
     if (this.options.maxDequeueCount && message.dequeueCount > this.options.maxDequeueCount) {
       this.logger.verbose('maxDequeueCount exceeded')
-      await this.queueClient.deleteMessage(message.messageId, message.popReceipt)
+      try {
+        await this.queueClient.deleteMessage(message.messageId, message.popReceipt)
+      } catch (error) {
+        // Ignore error
+      }
       return null
     } else {
       message.body = JSON.parse(message.messageText)
@@ -102,8 +107,13 @@ class StorageQueue {
   }
 
   async getInfo() {
-    const properties = await this.queueClient.getProperties()
-    return { count: properties.approximateMessagesCount }
+    try {
+      const properties = await this.queueClient.getProperties()
+      return { count: properties.approximateMessagesCount }
+    } catch (error) {
+      this.logger.error(error)
+      return null
+    }
   }
 
   getName() {
diff --git a/ghcrawler/providers/storage/azureBlobFactory.js b/ghcrawler/providers/storage/azureBlobFactory.js
index 60cfb121..5009b68d 100644
--- a/ghcrawler/providers/storage/azureBlobFactory.js
+++ b/ghcrawler/providers/storage/azureBlobFactory.js
@@ -4,7 +4,7 @@
 // @ts-check
 const { BlobServiceClient, StorageRetryPolicyType } = require('@azure/storage-blob')
 const AzureStorageDocStore = require('./storageDocStore')
-const { DefaultAzureCredential } = require ('@azure/identity');
+const { DefaultAzureCredential } = require('@azure/identity')
 
 /**
  * @param {object} options
@@ -33,7 +33,11 @@ module.exports = options => {
     blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions)
   } else if (account) {
     options.logger.info('using default credentials')
-    blobServiceClient = new BlobServiceClient(`https://${account}.blob.core.windows.net`, new DefaultAzureCredential(), pipelineOptions)
+    blobServiceClient = new BlobServiceClient(
+      `https://${account}.blob.core.windows.net`,
+      new DefaultAzureCredential(),
+      pipelineOptions
+    )
   } else {
     throw new Error('either connection or account must be provided')
   }

From 7003187c9c4a12ff1444a7fbe94371bff62609b2 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Tue, 17 Dec 2024 17:21:32 +0100
Subject: [PATCH 04/19] Fix code style issues

---
 ghcrawler/providers/queuing/storageQueue.js      |  3 ++-
 .../providers/queuing/storageQueueManager.js     |  2 +-
 ghcrawler/providers/storage/storageDocStore.js   | 16 +++++-----------
 providers/store/azureQueueStore.js               |  1 -
 4 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js
index c77251de..2c8bd0f4 100644
--- a/ghcrawler/providers/queuing/storageQueue.js
+++ b/ghcrawler/providers/queuing/storageQueue.js
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation and others. Made available under the MIT license.
 // SPDX-License-Identifier: MIT
 
+// eslint-disable-next-line no-unused-vars
 const { QueueServiceClient } = require('@azure/storage-queue')
 const qlimit = require('qlimit')
 const { cloneDeep } = require('lodash')
@@ -31,7 +32,7 @@ class StorageQueue {
     // No specific unsubscribe logic for Azure Queue Storage
   }
 
-  async push(requests, option) {
+  async push(requests, _option) {
     requests = Array.isArray(requests) ? requests : [requests]
     return Promise.all(
       requests.map(
diff --git a/ghcrawler/providers/queuing/storageQueueManager.js b/ghcrawler/providers/queuing/storageQueueManager.js
index 048b9bf2..ae1a4581 100644
--- a/ghcrawler/providers/queuing/storageQueueManager.js
+++ b/ghcrawler/providers/queuing/storageQueueManager.js
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: MIT
 
 const AttenuatedQueue = require('./attenuatedQueue')
-const { QueueServiceClient } = require('@azure/storage-queue')
+const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue')
 const Request = require('../../lib/request')
 const StorageQueue = require('./storageQueue')
 const { DefaultAzureCredential } = require('@azure/identity')
diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js
index e79cecfd..0e052365 100644
--- a/ghcrawler/providers/storage/storageDocStore.js
+++ b/ghcrawler/providers/storage/storageDocStore.js
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-const { BlobServiceClient } = require('@azure/storage-blob')
+// eslint-disable-next-line no-unused-vars
 const { ContainerClient } = require('@azure/storage-blob')
 const memoryCache = require('memory-cache')
 const { Readable } = require('stream')
@@ -95,26 +95,20 @@ class AzureStorageDocStore {
   async count(type, force = false) {
     this._ensureDeadletter(type)
     const key = `${this.name}:count:${type || ''}`
-
     if (!force) {
       const cachedCount = memoryCache.get(key)
       if (cachedCount) {
         return cachedCount
       }
     }
-
     let entryCount = 0
     const properties = await this.containerClient.getProperties()
     properties.blobCount
-    try {
-      for await (const blob of this.containerClient.listBlobsFlat()) {
-        entryCount++
-      }
-      memoryCache.put(key, entryCount, 60000)
-      return entryCount
-    } catch (error) {
-      throw error
+    for await (const _blob of this.containerClient.listBlobsFlat()) {
+      entryCount++
     }
+    memoryCache.put(key, entryCount, 60000)
+    return entryCount
   }
 
   async close() {
diff --git a/providers/store/azureQueueStore.js b/providers/store/azureQueueStore.js
index 2002c3ca..51cee901 100644
--- a/providers/store/azureQueueStore.js
+++ b/providers/store/azureQueueStore.js
@@ -3,7 +3,6 @@
 
 const { DefaultAzureCredential } = require('@azure/identity')
 const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue')
-const { promisify } = require('util')
 
 class AzureStorageQueue {
   constructor(options) {

From 305b88251ecb30f1dd2a5fbff631c66e502d1077 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Tue, 17 Dec 2024 17:47:40 +0100
Subject: [PATCH 05/19] Fix code style issues

---
 ghcrawler/providers/queuing/storageQueue.js    | 2 +-
 ghcrawler/providers/storage/storageDocStore.js | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js
index 2c8bd0f4..75256988 100644
--- a/ghcrawler/providers/queuing/storageQueue.js
+++ b/ghcrawler/providers/queuing/storageQueue.js
@@ -32,7 +32,7 @@ class StorageQueue {
     // No specific unsubscribe logic for Azure Queue Storage
   }
 
-  async push(requests, _option) {
+  async push(requests) {
     requests = Array.isArray(requests) ? requests : [requests]
     return Promise.all(
       requests.map(
diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js
index 0e052365..a4967a92 100644
--- a/ghcrawler/providers/storage/storageDocStore.js
+++ b/ghcrawler/providers/storage/storageDocStore.js
@@ -104,7 +104,7 @@ class AzureStorageDocStore {
     let entryCount = 0
     const properties = await this.containerClient.getProperties()
     properties.blobCount
-    for await (const _blob of this.containerClient.listBlobsFlat()) {
+    for await (const {} of this.containerClient.listBlobsFlat()) {
       entryCount++
     }
     memoryCache.put(key, entryCount, 60000)

From a6a56ecdbc71da05663d0e494ffe910717385a01 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Tue, 17 Dec 2024 17:51:01 +0100
Subject: [PATCH 06/19] Fix code style issues

---
 ghcrawler/providers/storage/storageDocStore.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js
index a4967a92..9f8bc2a8 100644
--- a/ghcrawler/providers/storage/storageDocStore.js
+++ b/ghcrawler/providers/storage/storageDocStore.js
@@ -104,7 +104,8 @@ class AzureStorageDocStore {
     let entryCount = 0
     const properties = await this.containerClient.getProperties()
     properties.blobCount
-    for await (const {} of this.containerClient.listBlobsFlat()) {
+    // eslint-disable-next-line no-unused-vars
+    for await (const _ of this.containerClient.listBlobsFlat()) {
       entryCount++
     }
     memoryCache.put(key, entryCount, 60000)

From e12ba6786f5954732ab71d4dfd12c1f139ed40d6 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Wed, 18 Dec 2024 17:38:39 +0100
Subject: [PATCH 07/19] Add support for separate service principal credentials
 for blobs and queues

---
 config/cdConfig.js                            |  6 +++--
 .../providers/queuing/storageQueueManager.js  | 16 ++++++++-----
 .../providers/storage/azureBlobFactory.js     | 23 +++++++++++--------
 providers/store/azureQueueStore.js            | 17 ++++++++------
 4 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/config/cdConfig.js b/config/cdConfig.js
index 5a3592a9..f56d03a6 100644
--- a/config/cdConfig.js
+++ b/config/cdConfig.js
@@ -6,7 +6,8 @@ const config = require('painless-config')
 const cd_azblob = {
   connection: config.get('CRAWLER_AZBLOB_CONNECTION_STRING'),
   container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME'),
-  account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME')
+  account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME'),
+  spnAuth: config.get('CRAWLER_AZBLOB_SPN_AUTH')
 }
 
 const githubToken = config.get('CRAWLER_GITHUB_TOKEN')
@@ -113,7 +114,8 @@ module.exports = {
     azqueue: {
       connectionString: cd_azblob.connection,
       account: cd_azblob.account,
-      queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests'
+      queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests',
+      spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH')
     },
     'cd(azblob)': cd_azblob,
     'cd(file)': cd_file
diff --git a/ghcrawler/providers/queuing/storageQueueManager.js b/ghcrawler/providers/queuing/storageQueueManager.js
index ae1a4581..7974c0e5 100644
--- a/ghcrawler/providers/queuing/storageQueueManager.js
+++ b/ghcrawler/providers/queuing/storageQueueManager.js
@@ -5,7 +5,7 @@ const AttenuatedQueue = require('./attenuatedQueue')
 const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue')
 const Request = require('../../lib/request')
 const StorageQueue = require('./storageQueue')
-const { DefaultAzureCredential } = require('@azure/identity')
+const { DefaultAzureCredential, ClientSecretCredential } = require('@azure/identity')
 
 class StorageQueueManager {
   constructor(connectionString, options) {
@@ -21,11 +21,15 @@ class StorageQueueManager {
     if (connectionString) {
       this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions)
     } else {
-      this.client = new QueueServiceClient(
-        `https://${options.account}.queue.core.windows.net`,
-        new DefaultAzureCredential(),
-        pipelineOptions
-      )
+      const { account, spnAuth } = options
+      let credential
+      if (spnAuth) {
+        const authParsed = JSON.parse(spnAuth)
+        credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret)
+      } else {
+        credential = new DefaultAzureCredential()
+      }
+      this.client = new QueueServiceClient(`https://${account}.queue.core.windows.net`, credential, pipelineOptions)
     }
   }
 
diff --git a/ghcrawler/providers/storage/azureBlobFactory.js b/ghcrawler/providers/storage/azureBlobFactory.js
index 5009b68d..b08af8b7 100644
--- a/ghcrawler/providers/storage/azureBlobFactory.js
+++ b/ghcrawler/providers/storage/azureBlobFactory.js
@@ -4,7 +4,7 @@
 // @ts-check
 const { BlobServiceClient, StorageRetryPolicyType } = require('@azure/storage-blob')
 const AzureStorageDocStore = require('./storageDocStore')
-const { DefaultAzureCredential } = require('@azure/identity')
+const { DefaultAzureCredential, ClientSecretCredential } = require('@azure/identity')
 
 /**
  * @param {object} options
@@ -12,10 +12,11 @@ const { DefaultAzureCredential } = require('@azure/identity')
  * @param {string} options.connection
  * @param {string} options.container
  * @param {object} options.logger
+ * @param {object} options.spnAuth
  */
 module.exports = options => {
   options.logger.info('creating azure storage store')
-  const { account, connection, container } = options
+  const { account, connection, container, spnAuth } = options
 
   const pipelineOptions = {
     retryOptions: {
@@ -31,15 +32,17 @@ module.exports = options => {
   if (connection) {
     options.logger.info('using connection string')
     blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions)
-  } else if (account) {
-    options.logger.info('using default credentials')
-    blobServiceClient = new BlobServiceClient(
-      `https://${account}.blob.core.windows.net`,
-      new DefaultAzureCredential(),
-      pipelineOptions
-    )
   } else {
-    throw new Error('either connection or account must be provided')
+    let credential
+    if (spnAuth) {
+      const authParsed = JSON.parse(spnAuth)
+      credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret)
+      options.logger.info('using service principal credentials')
+    } else {
+      credential = new DefaultAzureCredential()
+      options.logger.info('using default credentials')
+    }
+    blobServiceClient = new BlobServiceClient(`https://${account}.blob.core.windows.net`, credential, pipelineOptions)
   }
 
   const containerClient = blobServiceClient.getContainerClient(container)
diff --git a/providers/store/azureQueueStore.js b/providers/store/azureQueueStore.js
index 51cee901..25c020b2 100644
--- a/providers/store/azureQueueStore.js
+++ b/providers/store/azureQueueStore.js
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation and others. Licensed under the MIT license.
 // SPDX-License-Identifier: MIT
 
-const { DefaultAzureCredential } = require('@azure/identity')
+const { DefaultAzureCredential, ClientSecretCredential } = require('@azure/identity')
 const { QueueServiceClient, StorageRetryPolicyType } = require('@azure/storage-queue')
 
 class AzureStorageQueue {
@@ -10,7 +10,7 @@ class AzureStorageQueue {
     this.queueName = options.queueName
     this.logger = options.logger
 
-    const { connectionString, account } = options
+    const { connectionString, account, spnAuth } = options
 
     const pipelineOptions = {
       retryOptions: {
@@ -24,11 +24,14 @@ class AzureStorageQueue {
     if (connectionString) {
       this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions)
     } else {
-      this.client = new QueueServiceClient(
-        `https://${account}.queue.core.windows.net`,
-        new DefaultAzureCredential(),
-        pipelineOptions
-      )
+      let credential
+      if (spnAuth) {
+        const authParsed = JSON.parse(spnAuth)
+        credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret)
+      } else {
+        credential = new DefaultAzureCredential()
+      }
+      this.client = new QueueServiceClient(`https://${account}.queue.core.windows.net`, credential, pipelineOptions)
     }
   }
 

From 73c02a31e257461ea9030944932505462da56a51 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Thu, 19 Dec 2024 18:01:18 +0100
Subject: [PATCH 08/19] Add missing config values

---
 config/cdConfig.js | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/config/cdConfig.js b/config/cdConfig.js
index f56d03a6..f07a1f3c 100644
--- a/config/cdConfig.js
+++ b/config/cdConfig.js
@@ -139,7 +139,9 @@ module.exports = {
       maxDequeueCount: 5,
       attenuation: {
         ttl: 3000
-      }
+      },
+      spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH'),
+      account: cd_azblob.account
     },
     appVersion: config.get('APP_VERSION'),
     buildsha: config.get('BUILD_SHA')

From bebef96518f7ec60c3fdab20ef1d3d5e0b1c0384 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Tue, 14 Jan 2025 16:44:50 +0100
Subject: [PATCH 09/19] Add more logging around queue message parsing

---
 ghcrawler/providers/queuing/storageQueue.js | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js
index 75256988..02f43d69 100644
--- a/ghcrawler/providers/queuing/storageQueue.js
+++ b/ghcrawler/providers/queuing/storageQueue.js
@@ -68,7 +68,15 @@ class StorageQueue {
       }
       return null
     } else {
-      message.body = JSON.parse(message.messageText)
+      try {
+        message.body = JSON.parse(message.messageText)
+      } catch (error) {
+        this.logger.error(`Failed to parse message ${message.messageId}:`)
+        this.logger.error(`Raw message: ${message.messageText}`)
+        this.logger.error(`Parse error: ${error.message}`)
+        await this.queueClient.deleteMessage(message.messageId, message.popReceipt)
+        return null
+      }
       const request = this.messageFormatter(message)
       request._message = message
       this._log('Popped', message.body)

From dad4e0d9f769f82602b771c5b9ad83b39bae39e7 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Wed, 15 Jan 2025 15:46:02 +0100
Subject: [PATCH 10/19] Decode Azure queue message before parsing

---
 ghcrawler/providers/queuing/storageQueue.js | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js
index 02f43d69..b93e60d9 100644
--- a/ghcrawler/providers/queuing/storageQueue.js
+++ b/ghcrawler/providers/queuing/storageQueue.js
@@ -69,7 +69,13 @@ class StorageQueue {
       return null
     } else {
       try {
-        message.body = JSON.parse(message.messageText)
+        const decodedText = message.messageText
+          .replace(/&quot;/g, '"')
+          .replace(/&amp;/g, '&')
+          .replace(/&#39;/g, "'")
+          .replace(/&lt;/g, '<')
+          .replace(/&gt;/g, '>')
+        message.body = JSON.parse(decodedText)
       } catch (error) {
         this.logger.error(`Failed to parse message ${message.messageId}:`)
         this.logger.error(`Raw message: ${message.messageText}`)

From b038b724a38a9f6debb1896c700f8eb2ab90e9c7 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Fri, 17 Jan 2025 17:51:56 +0100
Subject: [PATCH 11/19] Fix the parameter passing in storage queue
 updateMessage call

---
 ghcrawler/providers/queuing/storageQueue.js | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js
index b93e60d9..c408d45d 100644
--- a/ghcrawler/providers/queuing/storageQueue.js
+++ b/ghcrawler/providers/queuing/storageQueue.js
@@ -110,9 +110,12 @@ class StorageQueue {
   }
 
   async updateVisibilityTimeout(request, visibilityTimeout = 0) {
-    await this.queueClient.updateMessage(request._message.messageId, request._message.popReceipt, {
+    await this.queueClient.updateMessage(
+      request._message.messageId,
+      request._message.popReceipt,
+      undefined,
       visibilityTimeout
-    })
+    )
     this._log('NAKed', request._message.body)
   }
 

From b437f9c59becfd77239cf8f85d73e5e1fac40afc Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Wed, 5 Feb 2025 13:15:48 +0100
Subject: [PATCH 12/19] Fix code review comments

---
 ghcrawler/providers/queuing/storageQueue.js    |  6 ++++--
 ghcrawler/providers/storage/storageDocStore.js | 10 +++++-----
 package-lock.json                              | 11 +++++++----
 package.json                                   |  6 +++---
 4 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js
index c408d45d..fd31d27e 100644
--- a/ghcrawler/providers/queuing/storageQueue.js
+++ b/ghcrawler/providers/queuing/storageQueue.js
@@ -64,7 +64,8 @@ class StorageQueue {
       try {
         await this.queueClient.deleteMessage(message.messageId, message.popReceipt)
       } catch (error) {
-        // Ignore error
+        this.logger.error(`Failed to delete message ${message.messageId} in storageQueue, error: ${error.message}`)
+        throw error
       }
       return null
     } else {
@@ -110,13 +111,14 @@ class StorageQueue {
   }
 
   async updateVisibilityTimeout(request, visibilityTimeout = 0) {
-    await this.queueClient.updateMessage(
+    const response = await this.queueClient.updateMessage(
       request._message.messageId,
       request._message.popReceipt,
       undefined,
       visibilityTimeout
     )
     this._log('NAKed', request._message.body)
+    return this._buildMessageReceipt(response, request)
   }
 
   async flush() {
diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js
index 9f8bc2a8..17c56128 100644
--- a/ghcrawler/providers/storage/storageDocStore.js
+++ b/ghcrawler/providers/storage/storageDocStore.js
@@ -46,6 +46,7 @@ class AzureStorageDocStore {
     dataStream.push(null)
     const blockBlobClient = this.containerClient.getBlockBlobClient(blobName)
     await blockBlobClient.uploadStream(dataStream, 8 << 20, 5, options)
+    return blobName
   }
 
   async get(type, key) {
@@ -102,11 +103,10 @@ class AzureStorageDocStore {
       }
     }
     let entryCount = 0
-    const properties = await this.containerClient.getProperties()
-    properties.blobCount
-    // eslint-disable-next-line no-unused-vars
-    for await (const _ of this.containerClient.listBlobsFlat()) {
-      entryCount++
+    for await (const page of this.containerClient.listBlobsFlat().byPage({ maxPageSize: 1000 })) {
+      if (page.segment.blobItems) {
+        entryCount += page.segment.blobItems.length()
+      }
     }
     memoryCache.put(key, entryCount, 60000)
     return entryCount
diff --git a/package-lock.json b/package-lock.json
index 2048fe70..1e7d385c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -15,7 +15,6 @@
         "@azure/storage-queue": "^12.25.0",
         "@clearlydefined/spdx": "github:clearlydefined/spdx#v0.1.9",
         "@microsoft/refreshing-config": "^0.1.3",
-        "@types/node": "^22.10.1",
         "applicationinsights": "^1.5.0",
         "ar-async": "^0.1.4",
         "axios": "^1.7.4",
@@ -55,13 +54,13 @@
         "spdx-correct": "^3.2.0",
         "throat": "^5.0.0",
         "tmp": "0.1.0",
-        "typescript": "^5.7.2",
         "unbzip2-stream": "^1.3.3",
         "winston": "^2.3.0",
         "winston-azure-application-insights": "^1.5.0",
         "xml2js": "^0.5.0"
       },
       "devDependencies": {
+        "@types/node": "^22.10.1",
         "chai": "^4.2.0",
         "chai-as-promised": "^7.1.1",
         "chai-spies": "^1.0.0",
@@ -73,7 +72,8 @@
         "prettier": "3.2.4",
         "proxyquire": "^2.1.3",
         "request": "^2.88.2",
-        "sinon": "^5.0.0"
+        "sinon": "^5.0.0",
+        "typescript": "^5.7.2"
       }
     },
     "node_modules/@aashutoshrathi/word-wrap": {
@@ -1194,6 +1194,7 @@
       "version": "22.10.1",
       "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz",
       "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==",
+      "dev": true,
       "dependencies": {
         "undici-types": "~6.20.0"
       }
@@ -8282,6 +8283,7 @@
       "version": "5.7.2",
       "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz",
       "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==",
+      "dev": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -8302,7 +8304,8 @@
     "node_modules/undici-types": {
       "version": "6.20.0",
       "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz",
-      "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg=="
+      "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==",
+      "dev": true
     },
     "node_modules/universalify": {
       "version": "2.0.1",
diff --git a/package.json b/package.json
index d8fe1fe5..e9e81518 100644
--- a/package.json
+++ b/package.json
@@ -39,7 +39,6 @@
     "@azure/storage-queue": "^12.25.0",
     "@clearlydefined/spdx": "github:clearlydefined/spdx#v0.1.9",
     "@microsoft/refreshing-config": "^0.1.3",
-    "@types/node": "^22.10.1",
     "applicationinsights": "^1.5.0",
     "ar-async": "^0.1.4",
     "axios": "^1.7.4",
@@ -79,7 +78,6 @@
     "spdx-correct": "^3.2.0",
     "throat": "^5.0.0",
     "tmp": "0.1.0",
-    "typescript": "^5.7.2",
     "unbzip2-stream": "^1.3.3",
     "winston": "^2.3.0",
     "winston-azure-application-insights": "^1.5.0",
@@ -97,6 +95,8 @@
     "prettier": "3.2.4",
     "proxyquire": "^2.1.3",
     "request": "^2.88.2",
-    "sinon": "^5.0.0"
+    "sinon": "^5.0.0",
+    "@types/node": "^22.10.1",
+    "typescript": "^5.7.2"
   }
 }

From 6766b84223adc4eb1d452c38cc792492218d9156 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Thu, 6 Feb 2025 13:11:26 +0100
Subject: [PATCH 13/19] Ensure messageId is included into message receipt

---
 ghcrawler/providers/queuing/storageQueue.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js
index fd31d27e..eb364cf3 100644
--- a/ghcrawler/providers/queuing/storageQueue.js
+++ b/ghcrawler/providers/queuing/storageQueue.js
@@ -118,7 +118,7 @@ class StorageQueue {
       visibilityTimeout
     )
     this._log('NAKed', request._message.body)
-    return this._buildMessageReceipt(response, request)
+    return this._buildMessageReceipt({ messageId: request._message.messageId, ...response }, request)
   }
 
   async flush() {

From 4c41da01f40d49144866d646a5c6c67d364cecab Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Thu, 6 Feb 2025 14:43:25 +0100
Subject: [PATCH 14/19] Add safe XML+HTML codecs to storage queue

---
 ghcrawler/providers/queuing/storageQueue.js | 39 +++++++++++++++++----
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/ghcrawler/providers/queuing/storageQueue.js b/ghcrawler/providers/queuing/storageQueue.js
index eb364cf3..a2ae9f9f 100644
--- a/ghcrawler/providers/queuing/storageQueue.js
+++ b/ghcrawler/providers/queuing/storageQueue.js
@@ -38,7 +38,8 @@ class StorageQueue {
       requests.map(
         qlimit(this.options.parallelPush || 1)(async request => {
           const body = JSON.stringify(request)
-          const queueMessageResult = await this.queueClient.sendMessage(body)
+          const encoded = this._encodeXMLSafe(body)
+          const queueMessageResult = await this.queueClient.sendMessage(encoded)
           this._log('Queued', request)
           return this._buildMessageReceipt(queueMessageResult, request)
         })
@@ -70,12 +71,7 @@ class StorageQueue {
       return null
     } else {
       try {
-        const decodedText = message.messageText
-          .replace(/&quot;/g, '"')
-          .replace(/&amp;/g, '&')
-          .replace(/&#39;/g, "'")
-          .replace(/&lt;/g, '<')
-          .replace(/&gt;/g, '>')
+        const decodedText = this._decodeXMLSafe(message.messageText)
         message.body = JSON.parse(decodedText)
       } catch (error) {
         this.logger.error(`Failed to parse message ${message.messageId}:`)
@@ -147,6 +143,35 @@ class StorageQueue {
   isMessageNotFound(error) {
     return error?.code === 'MessageNotFound'
   }
+
+  _encodeXMLSafe(text) {
+    if (typeof text !== 'string') return text
+
+    return (
+      text
+        // Handle & first to prevent double-encoding
+        .replace(/&/g, '&amp;')
+        .replace(/"/g, '&quot;')
+        .replace(/'/g, '&apos;')
+        .replace(/</g, '&lt;')
+        .replace(/>/g, '&gt;')
+    )
+  }
+
+  _decodeXMLSafe(text) {
+    if (typeof text !== 'string') return text
+
+    return (
+      text
+        // Handle both XML and HTML encodings for quotes and apostrophes
+        .replace(/&apos;|&#39;|&#x27;/g, "'")
+        .replace(/&quot;|&#34;|&#x22;/g, '"')
+        // Handle basic XML entities
+        .replace(/&lt;|&#60;|&#x3[Cc];/g, '<')
+        .replace(/&gt;|&#62;|&#x3[Ee];/g, '>')
+        .replace(/&amp;|&#38;|&#x26;/g, '&') // Must be after other & entities
+    )
+  }
 }
 
 module.exports = StorageQueue

From a7e714cac6f844f5f1067a71c8a27995a9553ee2 Mon Sep 17 00:00:00 2001
From: Lewis Jones <ljones140@gmail.com>
Date: Tue, 11 Feb 2025 11:48:04 +0000
Subject: [PATCH 15/19] Queues can be configued separatly with SPN from harvest
 azblob

So we have the ability to have an harvest connection with connections string and queues with azure SPN
---
 config/cdConfig.js                            |  5 ++--
 .../providers/queuing/storageQueueManager.js  | 29 ++++++++++++-------
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/config/cdConfig.js b/config/cdConfig.js
index f07a1f3c..3adafb11 100644
--- a/config/cdConfig.js
+++ b/config/cdConfig.js
@@ -140,8 +140,9 @@ module.exports = {
       attenuation: {
         ttl: 3000
       },
-      spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH'),
-      account: cd_azblob.account
+      spnAuth: config.get('CRAWLER_QUEUE_AZURE_SPN_AUTH') || cd_azblob.spnAuth,
+      account: config.get('CRAWLER_QUEUE_AZURE_ACCOUNT_NAME') || cd_azblob.account,
+      isSpnAuth: config.get('CRAWLER_QUEUE_AZURE_IS_SPN_AUTH') || false
     },
     appVersion: config.get('APP_VERSION'),
     buildsha: config.get('BUILD_SHA')
diff --git a/ghcrawler/providers/queuing/storageQueueManager.js b/ghcrawler/providers/queuing/storageQueueManager.js
index 7974c0e5..9d142019 100644
--- a/ghcrawler/providers/queuing/storageQueueManager.js
+++ b/ghcrawler/providers/queuing/storageQueueManager.js
@@ -18,19 +18,28 @@ class StorageQueueManager {
         retryPolicyType: StorageRetryPolicyType.EXPONENTIAL
       }
     }
+
+    const { account, spnAuth, isSpnAuth } = options
+    if (isSpnAuth) {
+      const authParsed = JSON.parse(spnAuth)
+      this.client = new QueueServiceClient(
+        `https://${account}.queue.core.windows.net`,
+        new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret),
+        pipelineOptions
+      )
+      return
+    }
+
     if (connectionString) {
       this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions)
-    } else {
-      const { account, spnAuth } = options
-      let credential
-      if (spnAuth) {
-        const authParsed = JSON.parse(spnAuth)
-        credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret)
-      } else {
-        credential = new DefaultAzureCredential()
-      }
-      this.client = new QueueServiceClient(`https://${account}.queue.core.windows.net`, credential, pipelineOptions)
+      return
     }
+
+    this.client = new QueueServiceClient(
+      `https://${account}.queue.core.windows.net`,
+      new DefaultAzureCredential(),
+      pipelineOptions
+    )
   }
 
   createQueueClient(name, formatter, options) {

From 5d6eca2f12977cf00476f74f9fc506e18fbd41e9 Mon Sep 17 00:00:00 2001
From: Lewis Jones <ljones140@gmail.com>
Date: Thu, 13 Feb 2025 11:27:12 +0000
Subject: [PATCH 16/19] name is not set anymore, options.container is the new
 place for this

---
 ghcrawler/providers/storage/storageDocStore.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ghcrawler/providers/storage/storageDocStore.js b/ghcrawler/providers/storage/storageDocStore.js
index 17c56128..a531269c 100644
--- a/ghcrawler/providers/storage/storageDocStore.js
+++ b/ghcrawler/providers/storage/storageDocStore.js
@@ -95,7 +95,7 @@ class AzureStorageDocStore {
   // This API can only be used for the 'deadletter' store because we cannot look up documents by type performantly
   async count(type, force = false) {
     this._ensureDeadletter(type)
-    const key = `${this.name}:count:${type || ''}`
+    const key = `${this.options.container}:count:${type || ''}`
     if (!force) {
       const cachedCount = memoryCache.get(key)
       if (cachedCount) {

From edc8bb24da2bc951d3049163f680ef115a066826 Mon Sep 17 00:00:00 2001
From: Lewis Jones <ljones140@gmail.com>
Date: Thu, 13 Feb 2025 17:59:50 +0000
Subject: [PATCH 17/19] Fix integer here was breaking dead letter queue writing

Needs to be a string
---
 ghcrawler/lib/crawler.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ghcrawler/lib/crawler.js b/ghcrawler/lib/crawler.js
index 69553802..79d7fc94 100644
--- a/ghcrawler/lib/crawler.js
+++ b/ghcrawler/lib/crawler.js
@@ -638,7 +638,7 @@ class Crawler {
       metadata.errorMessage = request._error.message
       metadata.errorStack = request._error.stack
     }
-    metadata.version = 1
+    metadata.version = "1"
     metadata.meta = request.meta
     metadata.type = 'deadletter'
     metadata.url = request.url.replace('//', '//deadletter.')

From 4b67c4a522a6149e54237c8522923de01082fcbf Mon Sep 17 00:00:00 2001
From: Lewis Jones <ljones140@gmail.com>
Date: Thu, 13 Feb 2025 18:04:19 +0000
Subject: [PATCH 18/19] single quotes

---
 ghcrawler/lib/crawler.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ghcrawler/lib/crawler.js b/ghcrawler/lib/crawler.js
index 79d7fc94..f494493c 100644
--- a/ghcrawler/lib/crawler.js
+++ b/ghcrawler/lib/crawler.js
@@ -638,7 +638,7 @@ class Crawler {
       metadata.errorMessage = request._error.message
       metadata.errorStack = request._error.stack
     }
-    metadata.version = "1"
+    metadata.version = '1'
     metadata.meta = request.meta
     metadata.type = 'deadletter'
     metadata.url = request.url.replace('//', '//deadletter.')

From 19aa5cdec2e7ec4775cb63d110fbd08a95646773 Mon Sep 17 00:00:00 2001
From: Roman Iakovlev <romaniakovlev@github.com>
Date: Mon, 17 Feb 2025 17:45:09 +0100
Subject: [PATCH 19/19] Modify ordered auth selection in azureBlobFactory and
 azureQueueStore

This is to make it consistent with the storageQueueManager.
---
 config/cdConfig.js                            |  6 ++--
 .../providers/queuing/storageQueueManager.js  |  3 ++
 .../providers/storage/azureBlobFactory.js     | 33 +++++++++++--------
 providers/store/azureQueueStore.js            | 32 ++++++++++++------
 4 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/config/cdConfig.js b/config/cdConfig.js
index 3adafb11..c1a6a1be 100644
--- a/config/cdConfig.js
+++ b/config/cdConfig.js
@@ -7,7 +7,8 @@ const cd_azblob = {
   connection: config.get('CRAWLER_AZBLOB_CONNECTION_STRING'),
   container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME'),
   account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME'),
-  spnAuth: config.get('CRAWLER_AZBLOB_SPN_AUTH')
+  spnAuth: config.get('CRAWLER_AZBLOB_SPN_AUTH'),
+  isSpnAuth: config.get('CRAWLER_AZBLOB_IS_SPN_AUTH') || false
 }
 
 const githubToken = config.get('CRAWLER_GITHUB_TOKEN')
@@ -115,7 +116,8 @@ module.exports = {
       connectionString: cd_azblob.connection,
       account: cd_azblob.account,
       queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests',
-      spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH')
+      spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH'),
+      isSpnAuth: config.get('CRAWLER_HARVESTS_QUEUE_IS_SPN_AUTH') || false
     },
     'cd(azblob)': cd_azblob,
     'cd(file)': cd_file
diff --git a/ghcrawler/providers/queuing/storageQueueManager.js b/ghcrawler/providers/queuing/storageQueueManager.js
index 9d142019..2e662978 100644
--- a/ghcrawler/providers/queuing/storageQueueManager.js
+++ b/ghcrawler/providers/queuing/storageQueueManager.js
@@ -21,6 +21,7 @@ class StorageQueueManager {
 
     const { account, spnAuth, isSpnAuth } = options
     if (isSpnAuth) {
+      options.logger.info('using service principal credentials in storageQueueManager')
       const authParsed = JSON.parse(spnAuth)
       this.client = new QueueServiceClient(
         `https://${account}.queue.core.windows.net`,
@@ -31,10 +32,12 @@ class StorageQueueManager {
     }
 
     if (connectionString) {
+      options.logger.info('using connection string in storageQueueManager')
       this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions)
       return
     }
 
+    options.logger.info('using default credentials in storageQueueManager')
     this.client = new QueueServiceClient(
       `https://${account}.queue.core.windows.net`,
       new DefaultAzureCredential(),
diff --git a/ghcrawler/providers/storage/azureBlobFactory.js b/ghcrawler/providers/storage/azureBlobFactory.js
index b08af8b7..5416ed5e 100644
--- a/ghcrawler/providers/storage/azureBlobFactory.js
+++ b/ghcrawler/providers/storage/azureBlobFactory.js
@@ -13,10 +13,11 @@ const { DefaultAzureCredential, ClientSecretCredential } = require('@azure/ident
  * @param {string} options.container
  * @param {object} options.logger
  * @param {object} options.spnAuth
+ * @param {object} options.isSpnAuth
  */
 module.exports = options => {
   options.logger.info('creating azure storage store')
-  const { account, connection, container, spnAuth } = options
+  const { account, connection, container, spnAuth, isSpnAuth } = options
 
   const pipelineOptions = {
     retryOptions: {
@@ -27,22 +28,28 @@ module.exports = options => {
       retryPolicyType: StorageRetryPolicyType.EXPONENTIAL
     }
   }
-
   let blobServiceClient
-  if (connection) {
-    options.logger.info('using connection string')
-    blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions)
+
+  if (isSpnAuth) {
+    options.logger.info('using service principal credentials in azureBlobFactory')
+    const authParsed = JSON.parse(spnAuth)
+    blobServiceClient = new BlobServiceClient(
+      `https://${account}.queue.core.windows.net`,
+      new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret),
+      pipelineOptions
+    )
   } else {
-    let credential
-    if (spnAuth) {
-      const authParsed = JSON.parse(spnAuth)
-      credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret)
-      options.logger.info('using service principal credentials')
+    if (connection) {
+      options.logger.info('using connection string in azureBlobFactory')
+      blobServiceClient = BlobServiceClient.fromConnectionString(connection, pipelineOptions)
     } else {
-      credential = new DefaultAzureCredential()
-      options.logger.info('using default credentials')
+      options.logger.info('using default credentials in azureBlobFactory')
+      blobServiceClient = new BlobServiceClient(
+        `https://${account}.queue.core.windows.net`,
+        new DefaultAzureCredential(),
+        pipelineOptions
+      )
     }
-    blobServiceClient = new BlobServiceClient(`https://${account}.blob.core.windows.net`, credential, pipelineOptions)
   }
 
   const containerClient = blobServiceClient.getContainerClient(container)
diff --git a/providers/store/azureQueueStore.js b/providers/store/azureQueueStore.js
index 25c020b2..9ad21bb3 100644
--- a/providers/store/azureQueueStore.js
+++ b/providers/store/azureQueueStore.js
@@ -10,7 +10,7 @@ class AzureStorageQueue {
     this.queueName = options.queueName
     this.logger = options.logger
 
-    const { connectionString, account, spnAuth } = options
+    const { connectionString, account, spnAuth, isSpnAuth } = options
 
     const pipelineOptions = {
       retryOptions: {
@@ -21,18 +21,30 @@ class AzureStorageQueue {
         retryPolicyType: StorageRetryPolicyType.FIXED
       }
     }
+
+    if (isSpnAuth) {
+      options.logger.info('using service principal credentials in azureQueueStore')
+      const authParsed = JSON.parse(spnAuth)
+      this.client = new QueueServiceClient(
+        `https://${account}.queue.core.windows.net`,
+        new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret),
+        pipelineOptions
+      )
+      return
+    }
+
     if (connectionString) {
+      options.logger.info('using connection string in azureQueueStore')
       this.client = QueueServiceClient.fromConnectionString(connectionString, pipelineOptions)
-    } else {
-      let credential
-      if (spnAuth) {
-        const authParsed = JSON.parse(spnAuth)
-        credential = new ClientSecretCredential(authParsed.tenantId, authParsed.clientId, authParsed.clientSecret)
-      } else {
-        credential = new DefaultAzureCredential()
-      }
-      this.client = new QueueServiceClient(`https://${account}.queue.core.windows.net`, credential, pipelineOptions)
+      return
     }
+
+    options.logger.info('using default credentials in azureQueueStore')
+    this.client = new QueueServiceClient(
+      `https://${account}.queue.core.windows.net`,
+      new DefaultAzureCredential(),
+      pipelineOptions
+    )
   }
 
   async connect() {