Merge pull request #86 from penwern/dev/checksum-generation-fix

Sunday-Crunk · web-flow · commit 22c99020a6c0 · 2025-09-19T13:43:09.000+01:00
passing multipart configuration down from worker manager to worker
diff --git a/src/js/core/WorkerManager.js b/src/js/core/WorkerManager.js
@@ -68,7 +68,17 @@ class CurateWorkerManager {
     if (this.taskQueue.length > 0) {
       const task = this.taskQueue.shift();
       this.currentTasks.set(workerId, task);
-      worker.postMessage({ file: task.file, msg: "begin hash" });
+
+      // Get PydioApi values and pass them to worker
+      const multipartThreshold = PydioApi.getMultipartThreshold();
+      const multipartPartSize = PydioApi.getMultipartPartSize();
+
+      worker.postMessage({
+        file: task.file,
+        msg: "begin hash",
+        multipartThreshold,
+        multipartPartSize
+      });
     } else if (this.currentTasks.size === 0) {
       // No more tasks in queue and no running tasks - cleanup workers
       this.cleanupWorkers();
diff --git a/src/js/workers/hashWorker.worker.js b/src/js/workers/hashWorker.worker.js
@@ -1,4 +1,4 @@
-import SparkMD5 from "spark-md5";
+importScripts("https://cdnjs.cloudflare.com/ajax/libs/spark-md5/3.0.2/spark-md5.min.js")
 
 // Function to calculate the checksum for multipart files
 const calculateMultipartChecksum = (file, partSize) =>
@@ -53,19 +53,65 @@ const calculateMultipartChecksum = (file, partSize) =>
     loadNext();
   });
 
+
+const incrementalMD5 = file => new Promise((resolve, reject) => {
+  var loaded = 0;
+  var startTime = performance.now();
+  var tSize = file.size;
+  const fileReader = new FileReader();
+  const spark = new SparkMD5.ArrayBuffer();
+  const chunkSize = 2097152; // Read in chunks of 2MB
+  const chunks = Math.ceil(file.size / chunkSize);
+  let currentChunk = 0;
+
+  fileReader.onload = event => {
+      spark.append(event.target.result); // Append array buffer
+      ++currentChunk;
+      if (currentChunk < chunks) {
+          loadNext();
+      } else {
+          resolve(spark.end()); // Compute hash
+      }
+  };
+  
+  fileReader.addEventListener("progress", event => {
+      loaded += event.loaded;
+      let pE = Math.round((loaded / tSize) * 100);
+      let rS = pE + "%";
+      // console.log(rS)
+  });
+  
+  fileReader.addEventListener("loadend", event => {
+      if (event.total > 0) {
+          var endTime = performance.now();
+          // console.log(`Took ${endTime - startTime} milliseconds`)
+      }
+  });
+
+  fileReader.onerror = () => reject(fileReader.error);
+
+  const loadNext = () => {
+      const start = currentChunk * chunkSize;
+      const end = start + chunkSize >= file.size ? file.size : start + chunkSize;
+      fileReader.readAsArrayBuffer(File.prototype.slice.call(file, start, end));
+  };
+
+  loadNext();
+});
+
 // Main worker handler
 self.onmessage = async function (event) {
   if (event.data.file && event.data.msg == "begin hash") {
-    console.log("ello chum!");
     const file = event.data.file;
-    const multipartThreshold = PydioApi.getMultipartPartSize(); // Get the current multipart chunk size
+    const multipartThreshold = event.data.multipartThreshold;
+    const multipartPartSize = event.data.multipartPartSize;
 
     if (file.size > multipartThreshold) {
       // Only run multipart checksum logic for files above the threshold
       try {
         const finalChecksum = await calculateMultipartChecksum(
           file,
-          multipartThreshold
+          multipartPartSize
         );
         postMessage({ status: "complete", hash: finalChecksum });
       } catch (error) {