Skip to content

Commit

Permalink
fix test
Browse files Browse the repository at this point in the history
  • Loading branch information
stavares843 authored Jan 17, 2025
1 parent 10eea6a commit 9800feb
Showing 1 changed file with 12 additions and 59 deletions.
71 changes: 12 additions & 59 deletions tests/adblockrules.test.js
Original file line number Diff line number Diff line change
@@ -1,30 +1,15 @@
import child_process from "child_process";
import fs from "fs";
import yaml from "js-yaml";
import path from "path";

// Define an interface for the config object
interface CrawlConfig {
url: string;
blockAds?: boolean;
pageExtraDelay?: number;
generateCDX?: boolean;
depth?: number;
collection?: string;
}

// Function to run the crawl process
function runCrawl(name: string, config: CrawlConfig, commandExtra = "") {
// Ensure required config properties
function runCrawl(name, config, commandExtra = "") {
config.generateCDX = true;
config.depth = 0;
config.collection = name;

// Convert config to YAML
const configYaml = yaml.dump(config);

try {
// Execute the Docker command with the YAML config
const output = child_process.execSync(
`docker run -i -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --config stdin ${commandExtra}`,
{ input: configYaml, stdin: "inherit", encoding: "utf8" },
Expand All @@ -33,58 +18,26 @@ function runCrawl(name: string, config: CrawlConfig, commandExtra = "") {
console.log("Crawl completed successfully:", output);
} catch (error) {
console.error("Error during crawl process:", error);
throw error; // Rethrow the error if needed for higher-level handling
}
}

// Function to check if the CDX file contains a specific value
function doesCDXContain(coll: string, value: string): boolean {
const filePath = path.join("test-crawls", "collections", coll, "indexes", "index.cdxj");

try {
const data = fs.readFileSync(filePath, "utf8");
return data.includes(value);
} catch (error) {
console.error(`Error reading CDX file at ${filePath}:`, error);
return false; // Return false if the file can't be read
throw error;
}
}

// Helper function for tests to reduce duplication
function testCrawl(config: CrawlConfig, collectionName: string, expectedValue: string, shouldContain: boolean) {
runCrawl(collectionName, config);
const contains = doesCDXContain(collectionName, expectedValue);
expect(contains).toBe(shouldContain);
function doesCDXContain(coll, value) {
const data = fs.readFileSync(
`test-crawls/collections/${coll}/indexes/index.cdxj`,
);
return data.includes(value);
}

// Test cases
test("Test crawl with ad block for specific URL", () => {
const config: CrawlConfig = {
test("test crawl with ad block for specific URL", () => {
const config = {
url: "https://www.mozilla.org/en-US/firefox/",
blockAds: true,
};

testCrawl(
config,
"adblock-block",
"www.googletagmanager.com",
false, // Expect "www.googletagmanager.com" to NOT be included
);
});

// Test Disabled for Brave -- should always be blocked, but seeing inconsistent CI behavior
/*
test("Test crawl without ad block for specific URL", () => {
const config: CrawlConfig = {
url: "https://www.mozilla.org/en-US/firefox/",
pageExtraDelay: 10,
};
runCrawl("adblock-block", config);

testCrawl(
config,
"adblock-no-block",
"www.googletagmanager.com",
true, // Expect "www.googletagmanager.com" to be included
expect(doesCDXContain("adblock-block", "www.googletagmanager.com")).toBe(
false,
);
});
*/

0 comments on commit 9800feb

Please sign in to comment.