Skip to content

Commit

Permalink
Add more exit codes to detect interruption reason (#764)
Browse files Browse the repository at this point in the history
Fix #584

- Replace interrupted with interruptReason
- Distinct exit codes for different interrupt reasons: SizeLimit (14), TimeLimit (15), FailedLimit (12), DiskUtilization (16)
are used when an interrupt happens for these reasons, in addition to existing reasons BrowserCrashed (10),
SignalInterrupted (11) and SignalInterruptedForce (13)
- Doc fix to cli args

---------
Co-authored-by: Ilya Kreymer <[email protected]>
Co-authored-by: Ilya Kreymer <[email protected]>
  • Loading branch information
benoit74 authored Feb 10, 2025
1 parent 846f035 commit fc56c2c
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 52 deletions.
2 changes: 1 addition & 1 deletion docs/docs/user-guide/cli-options.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ Options:
--maxPageRetries, --retries If set, number of times to retry a p
age that failed to load before page
is considered to have failed
[number] [default: 1]
[number] [default: 2]
--failOnFailedSeed If set, crawler will fail with exit
code 1 if any seed fails. When combi
ned with --failOnInvalidStatus,will
Expand Down
70 changes: 45 additions & 25 deletions src/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import {
PAGE_OP_TIMEOUT_SECS,
SITEMAP_INITIAL_FETCH_TIMEOUT_SECS,
ExitCodes,
InterruptReason,
} from "./util/constants.js";

import { AdBlockRules, BlockRuleDecl, BlockRules } from "./util/blockrules.js";
Expand Down Expand Up @@ -168,8 +169,7 @@ export class Crawler {

skipTextDocs = 0;

interrupted = false;
browserCrashed = false;
interruptReason: InterruptReason | null = null;
finalExit = false;
uploadAndDeleteLocal = false;
done = false;
Expand Down Expand Up @@ -307,7 +307,7 @@ export class Crawler {

this.healthChecker = null;

this.interrupted = false;
this.interruptReason = null;
this.finalExit = false;
this.uploadAndDeleteLocal = false;

Expand Down Expand Up @@ -596,11 +596,28 @@ export class Crawler {
} else if (stopped) {
status = "done";
logger.info("Crawl gracefully stopped on request");
} else if (this.interrupted) {
} else if (this.interruptReason) {
status = "interrupted";
exitCode = this.browserCrashed
? ExitCodes.BrowserCrashed
: ExitCodes.InterruptedGraceful;
switch (this.interruptReason) {
case InterruptReason.SizeLimit:
exitCode = ExitCodes.SizeLimit;
break;
case InterruptReason.BrowserCrashed:
exitCode = ExitCodes.BrowserCrashed;
break;
case InterruptReason.SignalInterrupted:
exitCode = ExitCodes.SignalInterrupted;
break;
case InterruptReason.DiskUtilization:
exitCode = ExitCodes.DiskUtilization;
break;
case InterruptReason.FailedLimit:
exitCode = ExitCodes.FailedLimit;
break;
case InterruptReason.TimeLimit:
exitCode = ExitCodes.TimeLimit;
break;
}
}
}
} catch (e) {
Expand Down Expand Up @@ -1378,7 +1395,7 @@ self.__bx_behaviors.selectMainBehavior();
}

async checkLimits() {
let interrupt = false;
let interrupt: InterruptReason | null = null;

const size = await this.updateCurrSize();

Expand All @@ -1387,7 +1404,7 @@ self.__bx_behaviors.selectMainBehavior();
logger.info(
`Size threshold reached ${size} >= ${this.params.sizeLimit}, stopping`,
);
interrupt = true;
interrupt = InterruptReason.SizeLimit;
}
}

Expand All @@ -1397,7 +1414,7 @@ self.__bx_behaviors.selectMainBehavior();
logger.info(
`Time threshold reached ${elapsed} > ${this.params.timeLimit}, stopping`,
);
interrupt = true;
interrupt = InterruptReason.TimeLimit;
}
}

Expand All @@ -1409,7 +1426,7 @@ self.__bx_behaviors.selectMainBehavior();
size,
);
if (diskUtil.stop === true) {
interrupt = true;
interrupt = InterruptReason.DiskUtilization;
}
}

Expand All @@ -1419,18 +1436,21 @@ self.__bx_behaviors.selectMainBehavior();
if (numFailed >= failedLimit) {
logger.fatal(
`Failed threshold reached ${numFailed} >= ${failedLimit}, failing crawl`,
{},
"general",
ExitCodes.FailedLimit,
);
}
}

if (interrupt) {
this.uploadAndDeleteLocal = true;
this.gracefulFinishOnInterrupt();
this.gracefulFinishOnInterrupt(interrupt);
}
}

gracefulFinishOnInterrupt() {
this.interrupted = true;
gracefulFinishOnInterrupt(interruptReason: InterruptReason) {
this.interruptReason = interruptReason;
logger.info("Crawler interrupted, gracefully finishing current pages");
if (!this.params.waitOnDone && !this.params.restartsOnError) {
this.finalExit = true;
Expand All @@ -1457,23 +1477,25 @@ self.__bx_behaviors.selectMainBehavior();
async serializeAndExit() {
await this.serializeConfig();

if (this.interrupted) {
await this.browser.close();
if (this.interruptReason) {
await closeWorkers(0);
await this.browser.close();
await this.closeFiles();

if (!this.done) {
await this.setStatusAndExit(
ExitCodes.InterruptedImmediate,
ExitCodes.SignalInterruptedForce,
"interrupted",
);
return;
}
}

await this.setStatusAndExit(ExitCodes.Success, "done");
}

async isCrawlRunning() {
if (this.interrupted) {
if (this.interruptReason) {
return false;
}

Expand All @@ -1495,6 +1517,7 @@ self.__bx_behaviors.selectMainBehavior();
this.healthChecker = new HealthChecker(
this.params.healthCheckPort,
this.params.workers,
this.browser,
async () => {
await this.updateCurrSize();
},
Expand Down Expand Up @@ -1726,7 +1749,7 @@ self.__bx_behaviors.selectMainBehavior();
if (
this.params.generateWACZ &&
!this.params.dryRun &&
(!this.interrupted || this.finalExit || this.uploadAndDeleteLocal)
(!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal)
) {
const uploaded = await this.generateWACZ();

Expand All @@ -1742,7 +1765,7 @@ self.__bx_behaviors.selectMainBehavior();
}
}

if (this.params.waitOnDone && (!this.interrupted || this.finalExit)) {
if (this.params.waitOnDone && (!this.interruptReason || this.finalExit)) {
this.done = true;
logger.info("All done, waiting for signal...");
await this.crawlState.setStatus("done");
Expand All @@ -1753,11 +1776,8 @@ self.__bx_behaviors.selectMainBehavior();
}

markBrowserCrashed() {
this.interrupted = true;
this.browserCrashed = true;
if (this.healthChecker) {
this.healthChecker.browserCrashed = true;
}
this.interruptReason = InterruptReason.BrowserCrashed;
this.browser.crashed = true;
}

async closeLog(): Promise<void> {
Expand Down
8 changes: 4 additions & 4 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { setExitOnRedisError } from "./util/redis.js";
import { Crawler } from "./crawler.js";
import { ReplayCrawler } from "./replaycrawler.js";
import fs from "node:fs";
import { ExitCodes } from "./util/constants.js";
import { ExitCodes, InterruptReason } from "./util/constants.js";

let crawler: Crawler | null = null;

Expand All @@ -29,9 +29,9 @@ async function handleTerminate(signame: string) {
try {
await crawler.checkCanceled();

if (!crawler.interrupted) {
logger.info("SIGNAL: gracefully finishing current pages...");
crawler.gracefulFinishOnInterrupt();
if (!crawler.interruptReason) {
logger.info("SIGNAL: interrupt request received...");
crawler.gracefulFinishOnInterrupt(InterruptReason.SignalInterrupted);
} else if (forceTerm || Date.now() - lastSigInt > 200) {
logger.info("SIGNAL: stopping crawl now...");
await crawler.serializeAndExit();
Expand Down
29 changes: 26 additions & 3 deletions src/util/browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ import path from "path";
import { formatErr, LogContext, logger } from "./logger.js";
import { initStorage } from "./storage.js";

import { DISPLAY, type ServiceWorkerOpt } from "./constants.js";
import {
DISPLAY,
PAGE_OP_TIMEOUT_SECS,
type ServiceWorkerOpt,
} from "./constants.js";

import puppeteer, {
Frame,
Expand All @@ -20,6 +24,7 @@ import puppeteer, {
} from "puppeteer-core";
import { CDPSession, Target, Browser as PptrBrowser } from "puppeteer-core";
import { Recorder } from "./recorder.js";
import { timedRun } from "./timing.js";

type BtrixChromeOpts = {
proxy?: string;
Expand All @@ -35,6 +40,7 @@ type LaunchOpts = {
// TODO: Fix this the next time the file is edited.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
emulateDevice?: Record<string, any>;

ondisconnect?: ((err: unknown) => NonNullable<unknown>) | null;

swOpt?: ServiceWorkerOpt;
Expand All @@ -61,6 +67,8 @@ export class Browser {

swOpt?: ServiceWorkerOpt = "disabled";

crashed = false;

constructor() {
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
}
Expand Down Expand Up @@ -364,9 +372,24 @@ export class Browser {
}

async close() {
if (this.browser) {
if (!this.browser) {
return;
}

if (!this.crashed) {
this.browser.removeAllListeners("disconnected");
await this.browser.close();
try {
await timedRun(
this.browser.close(),
PAGE_OP_TIMEOUT_SECS,
"Closing Browser Timed Out",
{},
"browser",
true,
);
} catch (e) {
// ignore
}
this.browser = null;
}
}
Expand Down
17 changes: 15 additions & 2 deletions src/util/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,21 @@ export enum ExitCodes {
Failed = 9,
OutOfSpace = 3,
BrowserCrashed = 10,
InterruptedGraceful = 11,
InterruptedImmediate = 13,
SignalInterrupted = 11,
FailedLimit = 12,
SignalInterruptedForce = 13,
SizeLimit = 14,
TimeLimit = 15,
DiskUtilization = 16,
Fatal = 17,
ProxyError = 21,
}

export enum InterruptReason {
SizeLimit = 1,
TimeLimit = 2,
FailedLimit = 3,
DiskUtilization = 4,
BrowserCrashed = 5,
SignalInterrupted = 6,
}
7 changes: 5 additions & 2 deletions src/util/healthcheck.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import http from "http";
import url from "url";
import { logger } from "./logger.js";
import { Browser } from "./browser.js";

// ===========================================================================
export class HealthChecker {
port: number;
errorThreshold: number;
healthServer: http.Server;
browserCrashed = false;
browser: Browser;

updater: (() => Promise<void>) | null;

Expand All @@ -16,9 +17,11 @@ export class HealthChecker {
constructor(
port: number,
errorThreshold: number,
browser: Browser,
updater: (() => Promise<void>) | null = null,
) {
this.port = port;
this.browser = browser;
this.errorThreshold = errorThreshold;

this.healthServer = http.createServer((...args) =>
Expand All @@ -34,7 +37,7 @@ export class HealthChecker {
const pathname = req.url ? url.parse(req.url).pathname : "";
switch (pathname) {
case "/healthz":
if (this.errorCount < this.errorThreshold && !this.browserCrashed) {
if (this.errorCount < this.errorThreshold && !this.browser.crashed) {
logger.debug(
`health check ok, num errors ${this.errorCount} < ${this.errorThreshold}`,
{},
Expand Down
2 changes: 1 addition & 1 deletion src/util/recorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ export class Recorder {
while (
numPending &&
!this.pageFinished &&
!this.crawler.interrupted &&
!this.crawler.interruptReason &&
!this.crawler.postCrawling
) {
pending = [];
Expand Down
15 changes: 2 additions & 13 deletions src/util/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import { rxEscape } from "./seeds.js";
import { CDPSession, Page } from "puppeteer-core";
import { PageState, WorkerId } from "./state.js";
import { Crawler } from "../crawler.js";
import { PAGE_OP_TIMEOUT_SECS } from "./constants.js";

const MAX_REUSE = 5;

Expand Down Expand Up @@ -233,8 +232,7 @@ export class PageWorker {
}

if (retry >= MAX_REUSE) {
this.crawler.browserCrashed = true;
this.crawler.interrupted = true;
this.crawler.markBrowserCrashed();
throw new Error("Unable to load new page, browser needs restart");
}

Expand Down Expand Up @@ -433,16 +431,7 @@ export async function runWorkers(

await closeWorkers();

if (!crawler.browserCrashed) {
await timedRun(
crawler.browser.close(),
PAGE_OP_TIMEOUT_SECS,
"Closing Browser Timed Out",
{},
"worker",
true,
);
}
await crawler.browser.close();
}

// ===========================================================================
Expand Down
Loading

0 comments on commit fc56c2c

Please sign in to comment.