Skip to content

Commit

Permalink
detect invalid custom behaviors on load: (#450)
Browse files Browse the repository at this point in the history
- on first page, attempt to evaluate the behavior class to ensure it
compiles
- if fails to compile, log exception with fatal and exit
- update behavior gathering code to keep track of behavior filename
- tests: add test for invalid behavior which causes crawl to exit with
fatal exit code (17)
  • Loading branch information
ikreymer authored Dec 13, 2023
1 parent 3323262 commit 703835a
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 8 deletions.
21 changes: 19 additions & 2 deletions src/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ export class Crawler {
done = false;

customBehaviors = "";
behaviorsChecked = false;
behaviorLastLine?: string;

browser: Browser;
Expand Down Expand Up @@ -620,6 +621,10 @@ self.__bx_behaviors.init(${this.params.behaviorOpts}, false);
${this.customBehaviors}
self.__bx_behaviors.selectMainBehavior();
`;
if (!this.behaviorsChecked && this.customBehaviors) {
await this.checkBehaviorScripts(cdp);
this.behaviorsChecked = true;
}

await this.browser.addInitScript(page, initScript);
}
Expand All @@ -628,13 +633,25 @@ self.__bx_behaviors.selectMainBehavior();
loadCustomBehaviors(filename: string) {
let str = "";

for (const source of collectAllFileSources(filename, ".js")) {
str += `self.__bx_behaviors.load(${source});\n`;
for (const { contents } of collectAllFileSources(filename, ".js")) {
str += `self.__bx_behaviors.load(${contents});\n`;
}

return str;
}

async checkBehaviorScripts(cdp: CDPSession) {
const filename = this.params.customBehaviors;

if (!filename) {
return;
}

for (const { path, contents } of collectAllFileSources(filename, ".js")) {
await this.browser.checkScript(cdp, path, contents);
}
}

async getFavicon(page: Page, logDetails: LogDetails): Promise<string> {
try {
const resp = await fetch("http://127.0.0.1:9221/json");
Expand Down
13 changes: 13 additions & 0 deletions src/util/browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,19 @@ export class Browser {
return page.evaluateOnNewDocument(script);
}

async checkScript(cdp: CDPSession, filename: string, script: string) {
const { exceptionDetails } = await cdp.send("Runtime.evaluate", {
expression: script,
});
if (exceptionDetails) {
logger.fatal(
"Custom behavior load error, aborting",
{ filename, ...exceptionDetails },
"behavior",
);
}
}

async _init(
launchOpts: PuppeteerLaunchOptions,
// eslint-disable-next-line @typescript-eslint/ban-types
Expand Down
20 changes: 14 additions & 6 deletions src/util/file_reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export function collectAllFileSources(
fileOrDir: string,
ext?: string,
depth = 0,
): string[] {
): { path: string; contents: string }[] {
const resolvedPath = path.resolve(fileOrDir);

if (depth >= MAX_DEPTH) {
Expand All @@ -21,15 +21,23 @@ export function collectAllFileSources(

if (stat.isFile() && (ext === null || path.extname(resolvedPath) === ext)) {
const contents = fs.readFileSync(resolvedPath);
return [`/* src: ${resolvedPath} */\n\n${contents}`];
return [
{
path: resolvedPath,
contents: `/* src: ${resolvedPath} */\n\n${contents}`,
},
];
}

if (stat.isDirectory()) {
const files = fs.readdirSync(resolvedPath);
return files.reduce((acc: string[], next: string) => {
const nextPath = path.join(fileOrDir, next);
return [...acc, ...collectAllFileSources(nextPath, ext, depth + 1)];
}, []);
return files.reduce(
(acc: { path: string; contents: string }[], next: string) => {
const nextPath = path.join(fileOrDir, next);
return [...acc, ...collectAllFileSources(nextPath, ext, depth + 1)];
},
[],
);
}

if (depth === 0) {
Expand Down
15 changes: 15 additions & 0 deletions tests/custom-behavior.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,18 @@ test("test custom behaviors", async () => {
) > 0,
).toBe(true);
});

test("test invalid behavior exit", async () => {
let status = 0;

try {
child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page",
);
} catch (e) {
status = e.status;
}

// logger fatal exit code
expect(status).toBe(17);
});
20 changes: 20 additions & 0 deletions tests/invalid-behaviors/invalid-export.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
export class TestBehavior {
static init() {
return {
state: {},
};
}

static get id() {
return "TestBehavior";
}

static isMatch() {
return window.location.origin === "https://example.com";
}

async *run(ctx) {
ctx.log("In Test Behavior!");
yield ctx.Lib.getState(ctx, "test-stat");
}
}

0 comments on commit 703835a

Please sign in to comment.