From b8abb82702b1a1e397153494592971854f104474 Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Sat, 18 Jan 2025 09:31:31 -0800 Subject: [PATCH 1/3] fix: Add manifest corruption prevention with configurable delays This commit adds several improvements to prevent manifest corruption: 1. File Size Stability Check: Monitors file size stability before reading, ensures manifest isn't being written to during read 2. Configurable Delays: Added preRebuildDelay setting (default: 0ms), Added manifestRebuildDebounce setting (default: 1000ms), Users can increase delays if experiencing issues 3. Telemetry Events: Track manifest read errors and recovery, Monitor rebuild timings and success rates, Collect data on delay setting effectiveness Closes #[issue-number] --- package.json | 14 ++++ src/manifest/dbtProject.ts | 52 +++++++++++++- src/manifest/parsers/index.ts | 128 ++++++++++++++++++++++++++++++---- 3 files changed, 180 insertions(+), 14 deletions(-) diff --git a/package.json b/package.json index b2a3e9ce6..e31f63ebb 100644 --- a/package.json +++ b/package.json @@ -263,6 +263,20 @@ } } } + }, + "dbt.preRebuildDelay": { + "type": "number", + "default": 0, + "description": "Delay in milliseconds before rebuilding manifest after file changes. Increase this value (e.g. to 2000) if you experience manifest corruption issues.", + "minimum": 0, + "maximum": 10000 + }, + "dbt.manifestRebuildDebounce": { + "type": "number", + "default": 1000, + "description": "Debounce time in milliseconds for manifest rebuilds. Increase this value if you have many rapid file changes causing frequent rebuilds.", + "minimum": 500, + "maximum": 10000 } } } diff --git a/src/manifest/dbtProject.ts b/src/manifest/dbtProject.ts index f85440d5b..2b10fa534 100644 --- a/src/manifest/dbtProject.ts +++ b/src/manifest/dbtProject.ts @@ -360,8 +360,56 @@ export class DBTProject implements Disposable { this.projectRoot }`, ); - await this.rebuildManifest(); - }, this.dbtProjectIntegration.getDebounceForRebuildManifest()), + + // Get configurable delays from settings with reasonable defaults + const preRebuildDelay = workspace + .getConfiguration("dbt") + .get("preRebuildDelay", 0); // Default to 0ms for fast systems + + const defaultDebounceTime = workspace + .getConfiguration("dbt") + .get("manifestRebuildDebounce", 1000); // Default to 1s + + // Send telemetry about the current delay settings + this.telemetry.sendTelemetryEvent("manifestRebuildSettings", { + preRebuildDelay: preRebuildDelay.toString(), + debounceTime: ( + this.dbtProjectIntegration.getDebounceForRebuildManifest() || + defaultDebounceTime + ).toString(), + projectName: DBTProject.hashProjectRoot(this.projectRoot.fsPath), + }); + + const startTime = Date.now(); + + // Only add delay if configured (for systems experiencing issues) + if (preRebuildDelay > 0) { + await new Promise((resolve) => + setTimeout(resolve, preRebuildDelay), + ); + } + + try { + await this.rebuildManifest(); + + // Track successful rebuilds and their timing + this.telemetry.sendTelemetryEvent("manifestRebuildSuccess", { + duration: (Date.now() - startTime).toString(), + preRebuildDelay: preRebuildDelay.toString(), + projectName: DBTProject.hashProjectRoot(this.projectRoot.fsPath), + }); + } catch (error) { + // Track failed rebuilds + this.telemetry.sendTelemetryEvent("manifestRebuildError", { + duration: (Date.now() - startTime).toString(), + preRebuildDelay: preRebuildDelay.toString(), + projectName: DBTProject.hashProjectRoot(this.projectRoot.fsPath), + errorType: error instanceof Error ? error.name : "unknown", + errorMessage: error instanceof Error ? error.message : "unknown", + }); + throw error; // Re-throw to maintain original error handling + } + }, this.dbtProjectIntegration.getDebounceForRebuildManifest() || defaultDebounceTime), ), ); diff --git a/src/manifest/parsers/index.ts b/src/manifest/parsers/index.ts index 5c176f36b..d9ccca19d 100755 --- a/src/manifest/parsers/index.ts +++ b/src/manifest/parsers/index.ts @@ -1,4 +1,4 @@ -import { existsSync, readFileSync } from "fs"; +import { existsSync, readFileSync, statSync } from "fs"; import { provide } from "inversify-binding-decorators"; import * as path from "path"; import { Uri } from "vscode"; @@ -49,7 +49,7 @@ export class ManifestParser { return; } const projectRoot = project.projectRoot; - const manifest = this.readAndParseManifest(projectRoot, targetPath); + const manifest = await this.readAndParseManifest(projectRoot, targetPath); if (manifest === undefined) { const event: ManifestCacheChangedEvent = { added: [ @@ -187,7 +187,7 @@ export class ManifestParser { return event; } - private readAndParseManifest(projectRoot: Uri, targetPath: string) { + private async readAndParseManifest(projectRoot: Uri, targetPath: string) { const pathParts = [targetPath]; if (!path.isAbsolute(targetPath)) { pathParts.unshift(projectRoot.fsPath); @@ -198,15 +198,119 @@ export class ManifestParser { `Reading manifest at ${manifestLocation} for project at ${projectRoot}`, ); - try { - const manifestFile = readFileSync(manifestLocation, "utf8"); - return JSON.parse(manifestFile); - } catch (error) { - this.terminal.error( - "ManifestParser", - `Could not read manifest file at ${manifestLocation}, ignoring error`, - error, - ); + // MANIFEST CORRUPTION PREVENTION + // The manifest file can become corrupted when it's read while dbt is still writing to it. + // This happens because dbt writes the manifest file in a single operation, but the file + // can be quite large (often >1MB). During this write operation, if we try to read the file, + // we may get a partially written manifest that is invalid JSON or missing critical data. + + // To prevent this, we implement three safety mechanisms: + // 1. File Size Stability Check: We check if the file size remains stable for a period, + // indicating dbt has finished writing + // 2. Retry Logic: If reading fails, we retry multiple times with delays + // 3. Basic Validation: We verify the manifest has required fields before accepting it + + const maxRetries = 3; + const stabilityDelay = 1000; // 1 second between file size checks + const maxStabilityChecks = 3; // Number of times file size must remain stable + + let lastAttemptError: any; + let lastFileSize: number = 0; + let stabilityCheckAttempts = 0; + + for (let attempt = 0; attempt < maxRetries; attempt++) { + try { + // FILE SIZE STABILITY CHECK + // We check the file size multiple times to ensure it's not actively being written to. + // If the size changes between checks, we reset our counter and wait longer. + let lastSize = -1; + let stableChecks = 0; + stabilityCheckAttempts = 0; + + while (stableChecks < maxStabilityChecks) { + if (!existsSync(manifestLocation)) { + throw new Error("Manifest file does not exist"); + } + const stats = statSync(manifestLocation); + const currentSize = stats.size; + lastFileSize = currentSize; + + if (currentSize === lastSize) { + stableChecks++; + } else { + stableChecks = 0; + lastSize = currentSize; + } + + stabilityCheckAttempts++; + + if (stableChecks === maxStabilityChecks) { + break; + } + await new Promise((resolve) => setTimeout(resolve, stabilityDelay)); + } + + const manifestFile = readFileSync(manifestLocation, "utf8"); + const parsed = JSON.parse(manifestFile); + + // MANIFEST VALIDATION + // The manifest must have these core sections to be considered valid. + // If any are missing, the manifest is likely corrupted or incomplete. + if (!parsed.nodes || !parsed.sources || !parsed.macros) { + throw new Error("Manifest file appears to be incomplete"); + } + + // If we successfully read and parsed the manifest after retries, log it + if (attempt > 0) { + this.telemetry.sendTelemetryEvent("manifestReadRetrySuccess", { + retryAttempt: attempt.toString(), + fileSize: lastFileSize.toString(), + stabilityChecks: stabilityCheckAttempts.toString(), + }); + } + + return parsed; + } catch (error) { + lastAttemptError = error; + this.terminal.error( + "ManifestParser", + `Attempt ${attempt + 1}/${maxRetries} failed to read manifest file at ${manifestLocation}`, + error, + ); + + // Send telemetry for each failed attempt + this.telemetry.sendTelemetryEvent("manifestReadError", { + attempt: attempt.toString(), + fileSize: lastFileSize.toString(), + errorType: error instanceof Error ? error.name : "unknown", + errorMessage: error instanceof Error ? error.message : "unknown", + stabilityChecks: stabilityCheckAttempts.toString(), + }); + + if (attempt === maxRetries - 1) { + // After all retries fail, send a final telemetry event + this.telemetry.sendTelemetryEvent("manifestReadFinalFailure", { + totalAttempts: maxRetries.toString(), + finalFileSize: lastFileSize.toString(), + finalErrorType: + lastAttemptError instanceof Error + ? lastAttemptError.name + : "unknown", + finalErrorMessage: + lastAttemptError instanceof Error + ? lastAttemptError.message + : "unknown", + totalStabilityChecks: stabilityCheckAttempts.toString(), + }); + + // Return undefined to trigger empty cache event + // This allows the extension to continue functioning with reduced capabilities + // rather than breaking completely + return undefined; + } + // Wait before retry to allow potential write operations to complete + await new Promise((resolve) => setTimeout(resolve, 2000)); + } } } } From eac7b40fd3665599a332daabd6f25962e448665f Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Sat, 18 Jan 2025 09:35:46 -0800 Subject: [PATCH 2/3] cut down on waits before parsing --- src/manifest/parsers/index.ts | 65 +++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/src/manifest/parsers/index.ts b/src/manifest/parsers/index.ts index d9ccca19d..631a1c105 100755 --- a/src/manifest/parsers/index.ts +++ b/src/manifest/parsers/index.ts @@ -204,11 +204,10 @@ export class ManifestParser { // can be quite large (often >1MB). During this write operation, if we try to read the file, // we may get a partially written manifest that is invalid JSON or missing critical data. - // To prevent this, we implement three safety mechanisms: - // 1. File Size Stability Check: We check if the file size remains stable for a period, - // indicating dbt has finished writing - // 2. Retry Logic: If reading fails, we retry multiple times with delays - // 3. Basic Validation: We verify the manifest has required fields before accepting it + // To prevent this, we implement a progressive retry strategy: + // 1. First Attempt: Try immediate read for best performance + // 2. If that fails, use file size stability checks to ensure write is complete + // 3. Basic validation to verify manifest completeness const maxRetries = 3; const stabilityDelay = 1000; // 1 second between file size checks @@ -218,11 +217,38 @@ export class ManifestParser { let lastFileSize: number = 0; let stabilityCheckAttempts = 0; + // First attempt: Try immediate read for best performance + try { + if (!existsSync(manifestLocation)) { + throw new Error("Manifest file does not exist"); + } + const stats = statSync(manifestLocation); + lastFileSize = stats.size; + + const manifestFile = readFileSync(manifestLocation, "utf8"); + const parsed = JSON.parse(manifestFile); + + if (!parsed.nodes || !parsed.sources || !parsed.macros) { + throw new Error("Manifest file appears to be incomplete"); + } + + // Fast path succeeded - no telemetry needed for successful first attempt + return parsed; + } catch (error) { + // First attempt failed, log it and move to retry with stability checks + this.telemetry.sendTelemetryEvent("manifestReadFirstAttemptError", { + fileSize: lastFileSize.toString(), + errorType: error instanceof Error ? error.name : "unknown", + errorMessage: error instanceof Error ? error.message : "unknown", + }); + lastAttemptError = error; + } + + // If we get here, first attempt failed - try again with stability checks for (let attempt = 0; attempt < maxRetries; attempt++) { try { // FILE SIZE STABILITY CHECK - // We check the file size multiple times to ensure it's not actively being written to. - // If the size changes between checks, we reset our counter and wait longer. + // Only done on retry attempts after first attempt fails let lastSize = -1; let stableChecks = 0; stabilityCheckAttempts = 0; @@ -253,21 +279,16 @@ export class ManifestParser { const manifestFile = readFileSync(manifestLocation, "utf8"); const parsed = JSON.parse(manifestFile); - // MANIFEST VALIDATION - // The manifest must have these core sections to be considered valid. - // If any are missing, the manifest is likely corrupted or incomplete. if (!parsed.nodes || !parsed.sources || !parsed.macros) { throw new Error("Manifest file appears to be incomplete"); } - // If we successfully read and parsed the manifest after retries, log it - if (attempt > 0) { - this.telemetry.sendTelemetryEvent("manifestReadRetrySuccess", { - retryAttempt: attempt.toString(), - fileSize: lastFileSize.toString(), - stabilityChecks: stabilityCheckAttempts.toString(), - }); - } + // Log successful retry with stability checks + this.telemetry.sendTelemetryEvent("manifestReadRetrySuccess", { + retryAttempt: attempt.toString(), + fileSize: lastFileSize.toString(), + stabilityChecks: stabilityCheckAttempts.toString(), + }); return parsed; } catch (error) { @@ -278,7 +299,6 @@ export class ManifestParser { error, ); - // Send telemetry for each failed attempt this.telemetry.sendTelemetryEvent("manifestReadError", { attempt: attempt.toString(), fileSize: lastFileSize.toString(), @@ -288,9 +308,8 @@ export class ManifestParser { }); if (attempt === maxRetries - 1) { - // After all retries fail, send a final telemetry event this.telemetry.sendTelemetryEvent("manifestReadFinalFailure", { - totalAttempts: maxRetries.toString(), + totalAttempts: (maxRetries + 1).toString(), // +1 for first attempt finalFileSize: lastFileSize.toString(), finalErrorType: lastAttemptError instanceof Error @@ -303,12 +322,8 @@ export class ManifestParser { totalStabilityChecks: stabilityCheckAttempts.toString(), }); - // Return undefined to trigger empty cache event - // This allows the extension to continue functioning with reduced capabilities - // rather than breaking completely return undefined; } - // Wait before retry to allow potential write operations to complete await new Promise((resolve) => setTimeout(resolve, 2000)); } } From 048e285f9f14b1331301dc37397641ef3df27cff Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Mon, 20 Jan 2025 11:54:09 -0800 Subject: [PATCH 3/3] pushing local changes --- package.json | 14 ++++++++++++++ src/manifest/parsers/index.ts | 9 +++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index e31f63ebb..5052bd4a3 100644 --- a/package.json +++ b/package.json @@ -277,6 +277,20 @@ "description": "Debounce time in milliseconds for manifest rebuilds. Increase this value if you have many rapid file changes causing frequent rebuilds.", "minimum": 500, "maximum": 10000 + }, + "dbt.manifestStabilityChecks": { + "type": "number", + "default": 3, + "minimum": 1, + "maximum": 10, + "description": "Number of consecutive checks where file size must remain stable before reading manifest (only used when initial read fails)" + }, + "dbt.manifestStabilityDelay": { + "type": "number", + "default": 1000, + "minimum": 100, + "maximum": 5000, + "description": "Delay in milliseconds between manifest stability checks (only used when initial read fails)" } } } diff --git a/src/manifest/parsers/index.ts b/src/manifest/parsers/index.ts index 631a1c105..68ddbf67c 100755 --- a/src/manifest/parsers/index.ts +++ b/src/manifest/parsers/index.ts @@ -14,6 +14,7 @@ import { TestParser } from "./testParser"; import { TelemetryService } from "../../telemetry"; import { ExposureParser } from "./exposureParser"; import { MetricParser } from "./metricParser"; +import { workspace } from "vscode"; @provide(ManifestParser) export class ManifestParser { @@ -210,8 +211,12 @@ export class ManifestParser { // 3. Basic validation to verify manifest completeness const maxRetries = 3; - const stabilityDelay = 1000; // 1 second between file size checks - const maxStabilityChecks = 3; // Number of times file size must remain stable + const maxStabilityChecks = workspace + .getConfiguration("dbt") + .get("manifestStabilityChecks", 3); // Default to 3 checks + const stabilityDelay = workspace + .getConfiguration("dbt") + .get("manifestStabilityDelay", 1000); // Default to 1000ms let lastAttemptError: any; let lastFileSize: number = 0;