From 988c0f58e624f4382580ba4b075fad6be2b5dd5f Mon Sep 17 00:00:00 2001 From: alexburykin Date: Wed, 24 Jul 2024 13:23:50 +0100 Subject: [PATCH] code review --- .env.example | 17 + .github/workflows/review.yml | 30 + .gitignore | 3 + README copy.md | 107 + diffy-screenshots.js | 108 + docker-compose.yml | 7 + docker/Dockerfile | 32 + examples/README.md | 9 + examples/example_puppeteer.js | 18 + examples/example_s3.js | 14 + examples/example_sqs_receive.js | 13 + examples/example_sqs_send.js | 24 + index.js | 174 + lib/api.js | 205 ++ lib/chromiumBrowser.js | 56 + lib/executor.js | 94 + lib/func.js | 598 ++++ lib/funcPerform.js | 771 +++++ lib/jobs.js | 289 ++ lib/logger.js | 21 + lib/sqsSender.js | 206 ++ lib/thumbnail.js | 81 + lib/uploadS3.js | 76 + package-lock.json | 5759 +++++++++++++++++++++++++++++++ package.json | 43 + sonar-project.properties | 1 + test_jobs/screenshot1.json | 21 + 27 files changed, 8777 insertions(+) create mode 100644 .env.example create mode 100644 .github/workflows/review.yml create mode 100644 .gitignore create mode 100644 README copy.md create mode 100644 diffy-screenshots.js create mode 100644 docker-compose.yml create mode 100644 docker/Dockerfile create mode 100644 examples/README.md create mode 100644 examples/example_puppeteer.js create mode 100644 examples/example_s3.js create mode 100644 examples/example_sqs_receive.js create mode 100644 examples/example_sqs_send.js create mode 100644 index.js create mode 100644 lib/api.js create mode 100644 lib/chromiumBrowser.js create mode 100644 lib/executor.js create mode 100644 lib/func.js create mode 100644 lib/funcPerform.js create mode 100644 lib/jobs.js create mode 100644 lib/logger.js create mode 100644 lib/sqsSender.js create mode 100644 lib/thumbnail.js create mode 100644 lib/uploadS3.js create mode 100644 package-lock.json create mode 100755 package.json create mode 100644 sonar-project.properties create mode 100644 test_jobs/screenshot1.json diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..c9ab332 --- /dev/null +++ b/.env.example @@ -0,0 +1,17 @@ +NODE_ENV=dev +DEBUG=true + +# Variables for local worker. +API_KEY=XXX +PROJECT_ID=XXX + +# Diffy production variables for AWS infrastructure. These are not needed for local worker. +JOB_QUEUE_NAME= +RESULTS_QUEUE_NAME= +APP_AWS_REGION= +AWS_ACCOUNT_ID= +MAX_ATTEMPTS= +S3_ACCESS_KEY_ID= +SE_ACCESS_KEY_SECRET= +S3_BUCKET= +PROXY= diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml new file mode 100644 index 0000000..0f20f5c --- /dev/null +++ b/.github/workflows/review.yml @@ -0,0 +1,30 @@ +name: Review + +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize, reopened] + +jobs: + build: + name: Build + runs-on: ubuntu-latest + permissions: read-all + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis + - uses: sonarsource/sonarqube-scan-action@master + env: + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }} + # If you wish to fail your job when the Quality Gate is red, uncomment the + # following lines. This would typically be used to fail a deployment. + # We do not recommend to use this in a pull request. Prefer using pull request + # decoration instead. + - uses: sonarsource/sonarqube-quality-gate-action@master + timeout-minutes: 5 + env: + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba0ca8b --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +.idea +node_modules diff --git a/README copy.md b/README copy.md new file mode 100644 index 0000000..f6f8763 --- /dev/null +++ b/README copy.md @@ -0,0 +1,107 @@ +This is the code of the screenshot worker that runs on production for Diffy (https://diffy.website). + +By open sourcing it we allow local development integrations (i.e. DDEV, Lando). + +To start container (default platform is needed if you are on M1 processor) + +```shell +docker-compose -f docker-compose.yml up +``` + +Login to container + +```shell +docker-compose -f docker-compose.yml exec node bash +cd /app +``` + +To start an app with a test job +```shell +node index.js --file=test_jobs/screenshot1.json +``` + +List of compatible versions of puppeteer and Chrome +https://pptr.dev/supported-browsers + +To install specific version of Chromium +https://www.chromium.org/getting-involved/download-chromium/ + +Chromium 111 was installed from specific source +```shell +add-apt-repository ppa:saiarcot895/chromium-dev +apt update +apt-get install chromium-browser +chromium-browser --version +``` + +Create a job in SQS. Once created edit it and clear "Access policy" section. + +Additionally installed fonts on production workers: +```shell +apt-get update && apt-get install -y fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst --no-install-recommends +apt-get install ttf-mscorefonts-installer +apt-get install fonts-ubuntu fonts-noto-color-emoji fonts-noto-cjk fonts-ipafont-gothic fonts-wqy-zenhei fonts-kacst fonts-freefont-ttf fonts-liberation fonts-thai-tlwg fonts-indic +apt-get install fonts-lato fonts-open-sans fonts-roboto +apt install fonts-dejavu-core + +fc-cache -f -v +``` + +To check fonts +fc-match system-ui + +### Chrome version validation + +To validate Chrome run screenshot on https://vrt-test.diffy.website + +Project's settings: +```YAML +basic: + name: 'Chrome validation 1' + environments: + production: 'https://vrt-test.diffy.website' + staging: '' + development: '' + breakpoints: + - 1200 + pages: + - / + monitoring: + days: { } + type: '' + schedule_time: '12:30 AM' + schedule_time_zone: Europe/London + compare_with: last +advanced: + mask: '' + remove: '#mask' + isolate: '#remove' + delay: 10 + scroll: true + headers: + - { value: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0', header: User-Agent } + cookies: CUSTOM=cookie; + custom_js: "var div = document.getElementById('custom-javascript');\ndiv.innerHTML += ' Extra content added!';" + custom_css: "#custom-css {\n background-color: red;\n}" + mock_content: + - { type: title, selector: '#timestamp' } + login: + type: '' + click_element: false + click_element_selector: '' + login_url: '' + username: '' + password: '' + username_selector: '' + password_selector: '' + submit_selector: '' + after_login_selector: '' + performance: + workers_production: 30 + workers_nonproduction: 10 + workers_production_delay: 0 + workers_nonproduction_delay: 0 + stabilize: true + +``` + diff --git a/diffy-screenshots.js b/diffy-screenshots.js new file mode 100644 index 0000000..7a83085 --- /dev/null +++ b/diffy-screenshots.js @@ -0,0 +1,108 @@ +// Example to run +// node diffy-screenshots.js --url=https://diffy.website + +const debug = false + +require('dotenv').config(); + +const { Logger } = require('./lib/logger') +const logger = new Logger(debug); + +const { Jobs } = require('./lib/jobs') +const jobs = new Jobs(logger) + +const { Api } = require('./lib/api.js') +let api + +const process = require("process"); +const fs = require("fs"); + +const apiKey = process.env.API_KEY || '' +if (apiKey == '') { + console.error('Add Diffy API key to .env file. API_KEY=XXX'); + return; +} +const projectId = process.env.PROJECT_ID || '' +if (projectId == '') { + console.error('Add Diffy API project ID .env file. PROJECT_ID=XXX'); + return; +} + +const diffyUrl = 'https://app.diffy.website/api' +const diffyWebsiteUrl = 'https://app.diffy.website/#' + +var argv = require('minimist')(process.argv.slice(2)); + + +async function end () { + try { + // Remove tmp files. + // func.cleanTmpDir() + } catch (e) { + console.error(e.message) + } + process.exit(1) +} + +process.once('SIGTERM', end) +process.once('SIGINT', end) + +process.on('uncaughtException', async (e) => { + console.error('Unhandled exception:', e) + await end() +}); + +process.on('unhandledRejection', async (reason, p) => { + console.error('Unhandled Rejection at: Promise', p, 'reason:', reason) + await end() +}); + +(async () => { + if (argv.url === undefined) { + console.error('Provide --url parameter. Example --url="https://diffy.website"'); + } + const screenshotName = argv['screenshot-name'] ? argv['screenshot-name'] : argv.url; + try { + api = new Api(diffyUrl, apiKey, projectId, logger) + await api.login() + const project = await api.getProject() + const jobsList = jobs.prepareJobs(argv.url, project) + + const execSync = require('node:child_process').execSync; + const outputFilepath = '/tmp/screenshot-results.json'; + const inputFilepath = '/tmp/screenshot-input.json'; + let uploadItems = []; + for (let i = 0; i < jobsList.length; i++) { + let jsonJob = JSON.stringify(jobsList[i]); + try { + fs.writeFileSync(inputFilepath, jsonJob); + } catch (err) { + console.error(err); + } + console.log('Staring screenshot ' + (i + 1) + ' of ' + jobsList.length); + await execSync('node ./index.js --local=true --output-filepath=\'' + outputFilepath + '\' --file=\'' + inputFilepath + '\'', {stdio: 'inherit'}); + console.log('Completed screenshot ' + (i + 1) + ' of ' + jobsList.length); + const resultsContent = fs.readFileSync(outputFilepath, 'utf8'); + console.log(resultsContent); + let result = JSON.parse(resultsContent); + let uploadItem = { + status: true, + breakpoint: jobsList[i].params.breakpoint, + uri: jobsList[i].params.uri, + filename: result.screenshot, + htmlFilename: result.html, + jsConsoleFilename: result.jsConsole + }; + uploadItems.push(uploadItem); + } + + // Send screenshots to Diffy. + screenshotId = await api.uploadScreenshots(screenshotName, uploadItems) + console.log('Diffy screenshot url: ', `${diffyWebsiteUrl}/snapshots/${screenshotId}`) + + await end() + } catch (e) { + console.error('ERROR:', e.message) + await end() + } +})() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..20bac8f --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,7 @@ +services: + node: + build: './docker/' + volumes: + - "./:/app" + command: tail -f /dev/null + tty: true diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..e97db17 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,32 @@ +FROM --platform=linux/arm64 ubuntu:22.04 + +ENV DEBIAN_FRONTEND noninteractive +RUN apt-get update +RUN apt-get install -y gconf-service apt-transport-https ca-certificates libssl-dev wget libasound2 libatk1.0-0 libcairo2 libcups2 libfontconfig1 libgdk-pixbuf2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libxss1 fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils curl build-essential tar gzip findutils net-tools dnsutils telnet ngrep tcpdump +RUN apt-get install software-properties-common -y +RUN add-apt-repository ppa:saiarcot895/chromium-dev + +RUN apt update +RUN apt-get install -y chromium-browser + +ENV NODE_VERSION 22.5.1 + +RUN ARCH=arm64 \ + && curl -fsSLO --compressed "https://nodejs.org/dist/v$NODE_VERSION/node-v$NODE_VERSION-linux-$ARCH.tar.xz" \ + && tar -xJf "node-v$NODE_VERSION-linux-$ARCH.tar.xz" -C /usr/local --strip-components=1 --no-same-owner \ + && rm "node-v$NODE_VERSION-linux-$ARCH.tar.xz" \ + && ln -s /usr/local/bin/node /usr/local/bin/nodejs \ + # smoke tests + && node --version \ + && npm --version + +RUN ARCH=arm64 \ + && npm install -g npm@10.8.2 + +RUN apt install -y imagemagick + +# Install all the fonts. +RUN apt-get install -y --no-install-recommends fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst ttf-mscorefonts-installer fonts-ubuntu fonts-noto-color-emoji fonts-noto-cjk fonts-ipafont-gothic fonts-wqy-zenhei fonts-kacst fonts-freefont-ttf fonts-liberation fonts-thai-tlwg fonts-indic fonts-lato fonts-open-sans fonts-roboto fonts-dejavu-core +RUN fc-cache -f -v + +#ENTRYPOINT ["/bin/sh", "-c", "bash"] diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..7587586 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,9 @@ +Scripts used to test various subsystems in isolation i.e. puppeteer, s3 uploads, sqs + +To run test scripts make sure to copy .env file to the "examples" folder. + +SQS tests +```shell +node example_sqs_send.js ../test_jobs/screenshot1.json +node example_sqs_receive.js +``` diff --git a/examples/example_puppeteer.js b/examples/example_puppeteer.js new file mode 100644 index 0000000..4f6917a --- /dev/null +++ b/examples/example_puppeteer.js @@ -0,0 +1,18 @@ +const puppeteer = require('puppeteer'); + +(async () => { + const browser = await puppeteer.launch({ + args: ['--no-sandbox', '--disable-setuid-sandbox'], + defaultViewport: {width: 800, height: 600}, + // executablePath: '/app/chromium/linux-1083080/chrome-linux/chrome', + headless: 'shell', + dumpio: false, + ignoreHTTPSErrors: true + } + ); + const page = await browser.newPage(); + await page.goto('https://www.freecodecamp.org/'); + await page.screenshot({path: 'freecodecamp.png'}); + + await browser.close(); +})(); diff --git a/examples/example_s3.js b/examples/example_s3.js new file mode 100644 index 0000000..edc82fc --- /dev/null +++ b/examples/example_s3.js @@ -0,0 +1,14 @@ +require('dotenv').config(); +const process = require('process'); + +const uploadS3 = require("../lib/uploadS3"); +const filename = '/app/screenshot-1714780252-73939221.webp'; + +(async () => { + s3Url = await uploadS3.upload(filename).catch((err) => { + throw new Error('Can\'t upload screenshot: ' + err.name + ': ' + (err && err.hasOwnProperty('message')) ? err.message : err) + }) + + console.log(s3Url); + +})() diff --git a/examples/example_sqs_receive.js b/examples/example_sqs_receive.js new file mode 100644 index 0000000..d59fb27 --- /dev/null +++ b/examples/example_sqs_receive.js @@ -0,0 +1,13 @@ +require('dotenv').config(); +const process = require('process'); + +const { SqsSender, maxAttempts } = require('../lib/sqsSender'); + +(async () => { + const sqsSender = new SqsSender(true, false); + let messages = await sqsSender.fetchSQSJob(); + console.log(messages, 'received'); + + await sqsSender.deleteSQSMessage(messages[0]); +})() + diff --git a/examples/example_sqs_send.js b/examples/example_sqs_send.js new file mode 100644 index 0000000..4e98ad5 --- /dev/null +++ b/examples/example_sqs_send.js @@ -0,0 +1,24 @@ +require('dotenv').config(); +const process = require('process'); + +const { SqsSender, maxAttempts } = require('../lib/sqsSender') +const fs = require("fs"); + +if (process.argv[2] === undefined) { + console.log('Error. Specify file to json encoded job to post to SQS') + process.exit(); +} +let fileContent; +try { + fileContent = fs.readFileSync(process.argv[2], 'utf8'); +} catch (err) { + console.error(err); + process.exit(); +} + +(async () => { + const sqsSender = new SqsSender(true, false); + const result = await sqsSender.sendSQSJob(JSON.parse(fileContent)); + console.log(result); + console.log('Job is sent ' + fileContent); +})() diff --git a/index.js b/index.js new file mode 100644 index 0000000..3e6c237 --- /dev/null +++ b/index.js @@ -0,0 +1,174 @@ +// Script has following optional parameters: +// file -- path to local json file with arguments for creating screenshots +// local -- whether to store resulting image locally or upload it to AWS (Diffy's production default mode) +// file-content -- if we pass job file as json as parameter +// output-filepath -- path to a file to save the results in json format. Used by wrapper. + +require('dotenv').config(); + +const debug = false +const timeout = 3 * 60 * 1000; // Timeout in milliseconds. + +const process = require('process'); +const { performance } = require('perf_hooks') +const { Executor } = require('./lib/executor') +const { Logger } = require('./lib/logger') +const { ChromiumBrowser } = require('./lib/chromiumBrowser') +const { SqsSender, maxAttempts } = require('./lib/sqsSender') + +var argv = require('minimist')(process.argv.slice(2)); +const local = argv.local ? argv.local : false; +const jobFile = argv.file !== undefined; +const jobFileContent = argv['file-content'] !== undefined ? argv['file-content'] : false; +const outputFilepath = argv['output-filepath'] !== undefined ? argv['output-filepath'] : false; +const isSqs = !jobFile && !jobFileContent; + +const sqsSender = new SqsSender(debug, local); + +let message; + +const fs = require('fs'); +// When manually passed json file to the script. Used for testing. +if (jobFile) { + try { + const fileContent = fs.readFileSync(argv.file, 'utf8'); + // Example of SQS message https://docs.aws.amazon.com/lambda/latest/dg/with-sqs.html + message = { + "Body": fileContent, + // Flag to save file locally and exit instead of creating thumbnails and uploading to S3. + 'local': local + }; + } catch (err) { + console.error(err); + } +} + +// We also accept job message as JSON encoded string. Used in local worker wrapper. +if (jobFileContent) { + try { + // Example of SQS message https://docs.aws.amazon.com/lambda/latest/dg/with-sqs.html + message = { + "Body": jobFileContent, + // Flag to save file locally and exit instead of creating thumbnails and uploading to S3. + 'local': local + }; + } catch (err) { + console.error(err); + } +} + +async function end () { + try { + // Remove tmp files. + // func.cleanTmpDir() + } catch (e) { + console.error(e.message) + } + process.exit(1) +} +process.once('SIGTERM', end) +process.once('SIGINT', end) +process.on('uncaughtException', (e) => { + console.error('Unhandled exception:', e) + process.exit(6) +}) +process.on('unhandledRejection', (reason, p) => { + console.error('Unhandled Rejection at: Promise', p, 'reason:', reason) +}) + +const logger = new Logger(); + +(async () => { + if (isSqs) { + let messages = await sqsSender.fetchSQSJob(); + message = messages[0]; + } + + let browser = null + let data = null + let results = [] + let handlerTimeExecuteStart = performance.now(); + const executor = new Executor(debug, local); + const chromiumBrowser = new ChromiumBrowser(debug, local) + + // Stop process after a timeout. + const shutdownTimeout = setTimeout(async () => { + try { + const result = await executor.timeout(handlerTimeExecuteStart) + executor.shutdown() + console.log(result); + process.exit(1); // Failure code returned. + } catch (e) { + console.log(e); + process.exit(1); // Failure code returned. + } + }, timeout); + + try { + const proxy = process.env.PROXY; + browser = await chromiumBrowser.getBrowser(proxy) + results = await run(message, browser, executor, data); + // If we use local json file we are debugging. + if (debug || jobFile || jobFileContent) { + console.log(results); + } + if (outputFilepath) { + fs.writeFile(outputFilepath, JSON.stringify(results[0]), err => { + if (err) { + console.error(err); + } + }); + } + } catch (err) { + await closeBrowser(browser) + await chromiumBrowser.closeProxy() + return console.log(err) + } + + clearTimeout(shutdownTimeout) + await closeBrowser(browser) + await chromiumBrowser.closeProxy(); + + if (isSqs) { + await sqsSender.deleteSQSMessage(message); + } +})() + + +/** + * Close the browser. + * + * @param browser + * @return {Promise} + */ +const closeBrowser = async (browser) => { + if (browser !== null) { + try { + await browser.close() + } catch (e) { + logger.error('Can\'t close Browser', e) + } + } +} + +/** + * Parse events and run executor. + * + * @param message + * @param browser + * @param executor + * @param data + * @return {Promise<[]>} + */ +const run = async (message, browser, executor, data) => { + let result = null + const results = [] + if (message.hasOwnProperty('Body')) { + data = JSON.parse(message.Body); + data.params.local = message.local; + + result = await executor.run(browser, data) + results.push(result) + } + return results +} diff --git a/lib/api.js b/lib/api.js new file mode 100644 index 0000000..d223645 --- /dev/null +++ b/lib/api.js @@ -0,0 +1,205 @@ +const request = require('request') +const fs = require('fs-extra') + +class Api { + constructor (diffyUrl, apiKey, projectId, logger) { + this.logger = logger + this.diffyUrl = diffyUrl + this.apiKey = apiKey + this.projectId = projectId + this.token = '' + this.uploadScreenshotTimeout = (process.env.UPLOAD_SCREENSHOT_TIMEOUT && process.env.UPLOAD_SCREENSHOT_TIMEOUT.length) ? process.env.UPLOAD_SCREENSHOT_TIMEOUT : 600000 + this.defaultRequestTimeout = (process.env.DEFAULT_REQUEST_TIMEOUT && process.env.DEFAULT_REQUEST_TIMEOUT.length) ? process.env.DEFAULT_REQUEST_TIMEOUT : 30000 + } + + /** + * Api login action. + * + * @returns {Promise<*>} + */ + async login () { + const url = `${this.diffyUrl}/auth/key` + let body + try { + body = await this._makePostRequest(url, { key: this.apiKey }) + this.logger.log(body, 'Login') + if (body && body.hasOwnProperty('token')) { + this.token = body.token + return body.token + } else { + throw new Error('Can\'t login') + } + } catch (e) { + if (!e.hasOwnProperty('message')) { + throw new Error(JSON.stringify(e)) + } else { + throw new Error(e.message) + } + } + } + + /** + * Api get project settings action. + * + * @returns {Promise<{name}|*>} + */ + async getProject () { + const url = `${this.diffyUrl}/projects/${this.projectId}` + let project + try { + project = await this._makeGetRequest(url) + this.logger.log(project, 'Get project') + if (project && project.hasOwnProperty('name')) { + return project + } else { + throw new Error('Can\'t get project') + } + } catch (e) { + if (!e.hasOwnProperty('message')) { + throw new Error(JSON.stringify(e)) + } else { + throw new Error(e.message) + } + } + } + + async uploadScreenshots (snapshotName, results) { + const url = `${this.diffyUrl}/projects/${this.projectId}/create-custom-snapshot?Xlogger_SESSION_START=PHPSTORM` + const formData = { + snapshotName + } + + results.forEach((item, i) => { + formData['urls[' + i + ']'] = item.uri + formData['breakpoints[' + i + ']'] = item.breakpoint + formData['files[' + i + ']'] = fs.createReadStream(item.filename) + formData['htmlFiles[' + i + ']'] = fs.createReadStream(item.htmlFilename) + formData['jsConsoleFiles[' + i + ']'] = fs.createReadStream(item.jsConsoleFilename) + }) + + this.logger.log(`Files: ${results.length}`, 'Sending screensot to Diffy') + + const screenshotId = await this._makePostRequest(url, formData, true, true, this.uploadScreenshotTimeout) + this.logger.log(`Saved screenshot id: ${screenshotId}`) + return screenshotId + } + + /** + * Make GET request. + * + * @param url + * @returns {Promise} + * @private + */ + async _makeGetRequest (url, timeout = this.defaultRequestTimeout) { + const options = { + url, + timeout, + headers: { + 'Content-Type': 'application/json' + }, + auth: { + bearer: this.token + } + } + + return new Promise((resolve, reject) => { + request.get(options, function (err, res, body) { + if (err) { + return reject(err) + } + + if (!res || !res.hasOwnProperty('statusCode')) { + return reject(new Error('Can\'t resolve GET request')) + } + + if (res.statusCode !== 200) { + try { + const result = JSON.parse(body) + if (result.hasOwnProperty('message') && result.hasOwnProperty('code')) { + return reject(result) + } + if (result.hasOwnProperty('errors')) { + return reject(result.errors) + } else { + return reject(res.statusMessage) + } + } catch (e) { + return reject(res.statusMessage) + } + } + + try { + const result = JSON.parse(body) + return resolve(result) + } catch (e) { + return reject(body) + } + }) + }) + } + + /** + * Make POST request. + * + * @param url + * @param postBody + * @returns {Promise} + * @private + */ + async _makePostRequest (url, postBody, useAuth = false, multipartFormData = false, timeout = this.defaultRequestTimeout) { + const options = { + url, + timeout, + headers: { + 'Content-Type': (multipartFormData) ? 'multipart/form-data' : 'application/json' + } + } + + if (multipartFormData) { + options.formData = postBody + } else { + options.body = JSON.stringify(postBody) + } + + if (useAuth) { + options.auth = { + bearer: this.token + } + } + + return new Promise((resolve, reject) => { + request.post(options, function (err, res, body) { + if (err) { + return reject(err) + } + + if (!res || !res.hasOwnProperty('statusCode')) { + return reject(new Error('Can\'t resolve POST request')) + } + + if (res.statusCode !== 200) { + try { + const result = JSON.parse(body) + if (result.hasOwnProperty('message') && result.hasOwnProperty('code')) { + return reject(new Error(result.message)) + } else { + return reject(new Error(body)) + } + } catch (e) { + return reject(new Error(res.statusMessage + ' => ' + e.message)) + } + } + + try { + const result = JSON.parse(body) + return resolve(result) + } catch (e) { + return reject(new Error(body)) + } + }) + }) + } +} + +module.exports = { Api } diff --git a/lib/chromiumBrowser.js b/lib/chromiumBrowser.js new file mode 100644 index 0000000..f4d9e5d --- /dev/null +++ b/lib/chromiumBrowser.js @@ -0,0 +1,56 @@ +// const chromium = require('@sparticuz/chromium') +const proxyChain = require('proxy-chain') +const puppeteer = require('puppeteer-core') + +class ChromiumBrowser { + args = ['--no-sandbox', '--disable-setuid-sandbox', '--disable-web-security'] + browser = null + debug = false + local = false + + // This is where Chromium got installed in the docker box. + localExecutivePath = '/usr/bin/chromium-browser' + anonymizedProxy = null + + constructor (debug = false, local = false) { + this.debug = debug + this.local = local + // this.args = chromium.args + // https://peter.sh/experiments/chromium-command-line-switches/ + // if (this.debug) { + // this.args.push('--full-memory-crash-report') + // } + // this.args.push('--ignore-certificate-errors') + // this.args.push('--force-gpu-mem-available-mb=4096') + // this.args.push('--disable-gpu') + } + + /** + * Get browser. + * @return {Promise} + */ + async getBrowser (proxy) { + if (typeof proxy != 'undefined') { + this.anonymizedProxy = await proxyChain.anonymizeProxy(proxy); + this.args.push(`--proxy-server=${this.anonymizedProxy}`); + } + + return puppeteer.launch({ + args: this.args, + defaultViewport: { width: 800, height: 600}, + executablePath: this.localExecutivePath, + headless: 'shell', + dumpio: this.debug, + ignoreHTTPSErrors: true, + }) + } + + async closeProxy () { + if (this.anonymizedProxy) { + await proxyChain.closeAnonymizedProxy(this.anonymizedProxy, true); + this.anonymizedProxy = null; + } + } +} + +module.exports = { ChromiumBrowser } diff --git a/lib/executor.js b/lib/executor.js new file mode 100644 index 0000000..6e22e75 --- /dev/null +++ b/lib/executor.js @@ -0,0 +1,94 @@ +const { performance } = require('perf_hooks') +const funcPerform = require('./funcPerform.js') +const { SqsSender, maxAttempts } = require('./sqsSender') +const { Logger } = require('./logger') + +class Executor { + + item = null + debug = false + local = false + + constructor (debug = false, local = false) { + this.sqsSender = new SqsSender(debug,local) + this.debug = debug + this.logger = new Logger(this.debug) + this.local = local + } + + /** + * Execute screenshot job. + * + * @param browser + * @param data + * @return {Promise<*|null>} + */ + async run (browser, data) { + let result = null + let timeExecuteStart + + timeExecuteStart = performance.now() + this.item = data + try { + result = await funcPerform.perform(browser, data, data.params) + + if (this.local) { + return result + } + + if (result && result.hasOwnProperty('status') && result.status) { + await this.sqsSender.sendSQSResult(result, timeExecuteStart) + } else { + funcPerform.debugLog('Run error result', data.params, result) + await this.sqsSender.resend(data, timeExecuteStart, result.hasOwnProperty('err') ? result.err : result) + } + return result + } catch (e) { + if (this.local) { + this.logger.error(e) + return result + } + + funcPerform.debugLog('Run error', data.params, e) + + await this.sqsSender.resend(data, timeExecuteStart, e) + return result + } + } + + /** + * Timeout handler. + * + * @param handlerTimeExecuteStart + * @return {Promise<*>} + */ + async timeout (handlerTimeExecuteStart) { + this.logger.log('timeout: ', this.item) + if (this.item && this.item.hasOwnProperty('attempts') && this.item.attempts < maxAttempts) { + this.logger.log('timeout-resend') + await this.sqsSender.resend(this.item, handlerTimeExecuteStart) + throw new Error(`Timeout: resend to sqs. Attempts: ${this.item.attempts}`) + } else if (this.item && this.item.hasOwnProperty('params')) { + this.logger.log('timeout-send-result') + const result = await funcPerform.saveTimeoutError(this.item, this.item.params) + await this.sqsSender.sendSQSResult(result, handlerTimeExecuteStart) + return result + } else { + let data = this.item + try { + data = JSON.stringify(data) + } catch (e) { + this.logger.error('timeout-error', e) + } + + throw new Error(`Timeout: Wrong params format: ${data}`) + } + } + + shutdown () { + this.item = null + } + +} + +module.exports = { Executor } diff --git a/lib/func.js b/lib/func.js new file mode 100644 index 0000000..5f9f1df --- /dev/null +++ b/lib/func.js @@ -0,0 +1,598 @@ +const fs = require('fs') +const url = require('url') +const im = require('imagemagick') + +const checkArgs = (obj, field, checkLength = false) => { + let result = (obj.hasOwnProperty('args') && obj.args && obj.args.hasOwnProperty(field)) + if (checkLength) { + return (result && obj.args[field].length) + } else { + return result + } +} + +const updatePageViewport = async (page, job, maxPageHeight = null) => { + let scrollHeight = await page.evaluate(`(async () => { + return document.documentElement.scrollHeight; + })()`) + + if (maxPageHeight && scrollHeight > maxPageHeight) { + scrollHeight = maxPageHeight + } + + await page.setViewport({ width: parseInt(job.breakpoint), height: parseInt(scrollHeight) }) + await page.waitForTimeout(1000) + return scrollHeight +} + +const random = (low, high) => { + return Math.floor(Math.random() * (high - low) + low) +} + +const awaitResponse = async (page) => { + const MAX_WAITING_TIME_ACCESS_URL = 10000 + let responseEventOccurred = false + const responseHandler = () => (responseEventOccurred = true) + + const responseWatcher = new Promise(function (resolve) { + setTimeout(() => { + if (!responseEventOccurred) { + resolve() + } else { + setTimeout(() => resolve(), MAX_WAITING_TIME_ACCESS_URL) + } + page.removeListener('response', responseHandler) + }, 500) + }) + + page.on('response', responseHandler) + + return Promise.race([ + responseWatcher, + page.waitForNavigation() + ]) +} + +module.exports = { + checkArgs: (obj, field, checkLength = false) => { + return checkArgs(obj, field, checkLength) + }, + + autoScroll: async (page, job) => { + if (!checkArgs(job, 'scroll_step')) { + return Promise.resolve() + } + + let scrollHeight = 0; + let totalHeight = 0; + + do { + // Need to wait if page is reloaded (see: https://github.com/ygerasimov/diffy-pm/issues/122) + await page.waitForSelector('body') + scrollHeight = await page.evaluate('document.body.scrollHeight'); + + await page.waitForSelector('body') + await page.evaluate('window.scrollBy(0, 100)'); + totalHeight += 100; + + await page.waitForTimeout(100); + } while (totalHeight < scrollHeight) + + try { + await page.evaluate('window.scrollTo(0, 0)') + await page.waitForTimeout(500); + } catch (e) {} + + return Promise.resolve(); + }, + + cutElements: async (page, job) => { + if (!checkArgs(job, 'cut_elements', true)) { + return Promise.resolve() + } + + return page.evaluate((_elements) => { + try { + window.scrollTo(0, 0) + } catch (e) {} + + _elements.forEach((selector) => { + selector = selector.trim(); + + if (selector.length) { + document.querySelectorAll(selector) + .forEach((element) => { + element.remove(); + }); + } + }) + + return true + }, job.args.cut_elements) + }, + + addJsCode: async (page, job) => { + if (!checkArgs(job, 'js_code', true)) { + return Promise.resolve() + } + + try { + await page.evaluate(job.args.js_code) + } catch (e) { + console.error(e) + } + return page.waitForTimeout(2000) + }, + + addCssCode: async (page, job) => { + if (!checkArgs(job, 'css_code', true)) { + return Promise.resolve() + } + + try { + await page.addStyleTag({ content: job.args.css_code }) + } catch (e) { + console.error(e) + } + + return page.waitForTimeout(2000) + }, + + addFixtures: async (page, job) => { + if (!checkArgs(job, 'fixtures', true)) { + return Promise.resolve() + } + + await page.evaluate((_fixtures) => { + let selector + let content + let type + let i + let l + let fixturePromises = [] + + function diffyImageFixture (el, selector) { + return new Promise((resolve) => { + try { + const w = el.width || null + const h = el.height || null + const src = el.src || null + + // console.log('[diffyImageFixture] Processing element: ', selector, src, h, w) + + if (src && w && h) { + el.addEventListener('load', () => { + resolve(); + }); + el.addEventListener('error', () => { + resolve(); + }); + + // @TODO add timeout in case image is not loaded + + /** + * @TODO check if we want to depend on picsum.photos service + * idea: copy images for all resolutions to s3 and expose via cloudfront (fast and stable) + */ + + el.src = `https://picsum.photos/id/0/${w}/${h}` + + // console.log('[diffyImageFixture] New src: ', selector, src, el.src) + + if (el.hasAttribute('data-src')) { + el.setAttribute('data-src', el.src) + } + + if (el.hasAttribute('srcset')) { + el.setAttribute('srcset', el.src + ' 1x') + } + } else { + console.log('Can\'t add diffy image fixture', selector, src, h, w) + return resolve() + } + } catch (e) { + console.log('diffyImageFixture', e) + return resolve() + } + }) + } + + function diffyBackgroundImageFixture (el) { + return new Promise((resolve) => { + try { + const elStyle = el.currentStyle || window.getComputedStyle(el, false); + const backgroundImage = elStyle.backgroundImage.slice(4, -1).replace(/"/g, ''); + + // console.log('Processing element: ', selector) + + if (!backgroundImage) { + // No background image + return resolve() + } + + getImageInfo(backgroundImage) + .then((imageInfo) => { + if (imageInfo.width && imageInfo.height) { + const newBackgroundImageSrc = `https://picsum.photos/id/0/${Math.round(imageInfo.width)}/${Math.round(imageInfo.height)}`; + const newBackgroundImage = new Image(); + newBackgroundImage.addEventListener('load', () => { + el.style.backgroundImage = 'url(' + newBackgroundImageSrc + ')'; + + resolve(); + }); + newBackgroundImage.addEventListener('error', () => { + resolve(); + }); + + // @TODO add timeout in case image is not loaded + + newBackgroundImage.src = newBackgroundImageSrc; + } else { + resolve(); + } + }) + .catch(() => { + return resolve() + }) + } catch (e) { + console.log('diffyImageFixture', e) + + return resolve() + } + }) + } + + function getImageInfo (url) { + return new Promise((resolve, reject) => { + const img = new Image(); + img.onload = () => resolve(img); + img.onerror = () => reject(); + img.src = url; + }); + } + + function diffyTextFixture (el) { + return new Promise((resolve) => { + try { + el.innerHTML = content + } catch (e) { + console.log('diffyTextFixture', e) + } + + return resolve() + }) + } + + fixturePromises = [] + _fixtures.forEach(function (fixture) { + selector = (fixture.selector) ? fixture.selector.trim() : '' + type = (fixture.type) ? fixture.type.trim() : '' + content = (fixture.content) ? fixture.content.trim() : '' + + if (selector.length) { + const element = document.querySelectorAll(selector) + + if (element) { + const elementKeys = Object.keys(element) + + for (i = 0, l = elementKeys.length; i < l; ++i) { + if (type === 'image') { + fixturePromises.push(diffyImageFixture(element[elementKeys[i]], selector)) + } else if (type === 'background image') { + fixturePromises.push(diffyBackgroundImageFixture(element[elementKeys[i]])) + } else { + fixturePromises.push(diffyTextFixture(element[elementKeys[i]])) + } + } + } + } + }) + + if (fixturePromises.length) { + return Promise.all(fixturePromises) + } else { + return Promise.resolve() + } + + }, job.args.fixtures) + + console.log('Diffy fixtures were added.') + return page.waitForTimeout(5000) + }, + + hideBanners: async (page, job) => { + if (!checkArgs(job, 'elements', true)) { + return Promise.resolve() + } + + return page.evaluate((_elements) => { + function getPosition (el) { + var xPos = 0 + var yPos = 0 + if (!el) { + return + } + var rect = el.getBoundingClientRect() + while (el) { + if (el.tagName === 'BODY') { + // deal with browser quirks with body/window/document and page scroll + var xScroll = el.scrollLeft || document.documentElement.scrollLeft + var yScroll = el.scrollTop || document.documentElement.scrollTop + + xPos += (el.offsetLeft - xScroll + el.clientLeft) + yPos += (el.offsetTop - yScroll + el.clientTop) + } else { + // for all other non-BODY elements + xPos += (el.offsetLeft - el.scrollLeft + el.clientLeft) + yPos += (el.offsetTop - el.scrollTop + el.clientTop) + } + + el = el.offsetParent + } + + return { + left: xPos, + top: yPos, + width: rect.width, + height: rect.height, + } + } + + function vrtPaintOver (element) { + var rectObject = getPosition(element) + if (!rectObject) { + return + } + var div = document.createElement('div') + var body = document.body + div.style.display = 'block' + div.style.left = rectObject.left + 'px' + div.style.top = rectObject.top + 'px' + div.style.width = rectObject.width + 'px' + div.style.height = rectObject.height + 'px' + div.style.backgroundColor = 'green' + div.style.position = 'absolute' + div.style.zIndex = '9999' + body.appendChild(div) + for (var i = 0; i < div.childNodes.length; i++) { + var child = div.childNodes[i] + if (child && child.style) { + child.style.zIndex = '-1' + } + } + } + + window.scrollTo(0, 0) + + _elements.forEach(function (selector) { + selector = selector.trim(); + + if (selector.length) { + document.querySelectorAll(selector) + .forEach((element) => { + vrtPaintOver(element); + }); + } + }) + }, job.args.elements) + }, + + updatePageViewport: async (page, job, maxPageHeight = null) => { + return updatePageViewport(page, job, maxPageHeight) + }, + + delayBeforeScreenshot: async (page, job) => { + if (checkArgs(job, 'delay_before_screenshot')) { + return page.waitForTimeout(job.args.delay_before_screenshot * 1000) + } + + return Promise.resolve() + }, + + addCookies: async (job) => { + if (!checkArgs(job, 'cookies')) { + return [] + } + + let items = job.args.cookies.split(';') + let cookies = [] + const urlObj = url.parse(job.url, true) + + items.forEach((item) => { + let els = item.split('=') + if (els[0] && els[1]) { + cookies.push({ + name: els[0], + value: els[1], + domain: urlObj.host, + path: '/', + expires: Math.round(new Date().getTime() / 1000) + (60 * 60), + httpOnly: false, + secure: false, + session: false + }) + } + }) + + return cookies + }, + + auth: async (page, job) => { + // @TODO !checkArgs(job, 'username') || !checkArgs(job, 'usernameSelector') - doesn't exist for netlify + if (!checkArgs(job, 'url') || !checkArgs(job, 'passwordSelector') || !checkArgs(job, 'submitSelector') || !checkArgs(job, 'password')) { + return Promise.resolve() + } + + let url = job.args.url + + // Clear cookies for url. + let cookies = await page.cookies(url) + await page.deleteCookie(...cookies) + + console.log(`Navigating to ${url}`); + await page.goto(url, { waitUntil: ['domcontentloaded', 'networkidle2'] }) + await awaitResponse(page) + + await updatePageViewport(page, job) + + if (checkArgs(job, 'before_login_css')) { + console.log("Clicking before login element."); + await page.evaluate((job) => { + document.querySelector(job.args.before_login_css).click() + }, job) + await page.waitForTimeout(2000); + } + + if (job.args.usernameSelector) { + await page.waitForSelector(job.args.usernameSelector) + console.log("Typing username."); + await page.type(job.args.usernameSelector, job.args.username, { delay: 10 }); // Increased delay + } + + await page.waitForSelector(job.args.passwordSelector) + console.log("Typing password."); + await page.type(job.args.passwordSelector, job.args.password, { delay: 10 }); // Increased delay + + await page.waitForSelector(job.args.submitSelector) + console.log("Clicking submit button."); + await page.focus(job.args.submitSelector) + await page.click(job.args.submitSelector); + + try { + await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 120000 }) + console.log("Navigation after login successful."); + } catch (error) { + console.log("Navigation after login failed, retrying..."); + console.log(error) + // Retry logic or handle the failure as needed + return Promise.resolve(); + } + + console.log("Authentication process completed."); + return page.cookies() + }, + + removeFile: (filepath) => { + fs.rmSync(filepath, { force: true }) + }, + + random: (min, max) => { + return random(min, max) + }, + + /** + * Get tmp dir for screenshots. + * @returns {string} + */ + // getTmpDir: () => { + // let tmp = (process.env.TMP_PATH && process.env.TMP_PATH.length) ? this._rTrim(process.env.TMP_PATH) : '/tmp' + // tmp += '/diffy' + // if (!fs.existsSync(tmp)) fs.mkdirSync(tmp, { recursive: true }) + // return tmp + // }, + // + // Can't use emptyDirSync as it is part of fs-extra that requires node 18 at minimum. We run on 16 still. + // cleanTmpDir: () => { + // const tmp = this.getTmpDir() + // fs.emptyDirSync(tmp) + // }, + + setHeaders: async (page, job) => { + if (!checkArgs(job, 'headers', true)) { + return + } + + let headers = {} + let userAgent = job.args.headers.filter(item => { + return (item.hasOwnProperty('header') && item.header && item.header.toLowerCase() === 'user-agent') + }) + + if (userAgent && userAgent.length) { + let userAgentString = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0' + if (userAgent[0].hasOwnProperty('value') && userAgent[0].value.length) { + userAgentString = userAgent[0].value + } + await page.setUserAgent(userAgentString) + } + + job.args.headers.forEach(element => { + if (element.header.trim().length) { + headers[element.header] = element.value + } + }) + + if (Object.keys(headers).length) { + await page.setExtraHTTPHeaders(headers) + } + }, + + cropElement: async (page, job) => { + if (!checkArgs(job, 'crop')) { + return Promise.resolve() + } + + return page.evaluate((_selector) => { + + function getPosition (el) { + if (!el) { + return + } + var xPos = 0 + var yPos = 0 + var rect = el.getBoundingClientRect() + while (el) { + if (el.tagName === 'BODY') { + // deal with browser quirks with body/window/document and page scroll + var xScroll = el.scrollLeft || document.documentElement.scrollLeft + var yScroll = el.scrollTop || document.documentElement.scrollTop + + xPos += (el.offsetLeft - xScroll + el.clientLeft) + yPos += (el.offsetTop - yScroll + el.clientTop) + } else { + // for all other non-BODY elements + xPos += (el.offsetLeft - el.scrollLeft + el.clientLeft) + yPos += (el.offsetTop - el.scrollTop + el.clientTop) + } + + el = el.offsetParent + } + + return { + left: xPos, + top: yPos, + width: rect.width, + height: rect.height, + } + } + + window.scrollTo(0, 0) + + return getPosition(document.querySelector(_selector)) + + }, job.args.crop) + + }, + + getPageHtml: async (page) => { + return page.evaluate(() => { + return document.documentElement.outerHTML + }) + }, + + getImageSize: async (file) => { + return new Promise((resolve, reject) => { + im.identify(file, (err, features) => { + if (err) { + console.log(err) + + reject(err.message) + } else { + resolve({ + height: features.height, + width: features.width + }) + } + }) + }) + } +} diff --git a/lib/funcPerform.js b/lib/funcPerform.js new file mode 100644 index 0000000..954bbfb --- /dev/null +++ b/lib/funcPerform.js @@ -0,0 +1,771 @@ +const request = require('request') // @TODO use node http module +const uploadS3 = require('./uploadS3.js') +const thumbnail = require('./thumbnail.js') +const func = require('./func.js') + +let debug = !!process.env.DEBUG || false +if (debug === 'false') { + debug = false +} + +const sendResult = (job, jobItem, data) => { + job.status = true + job.item_result = data + if (jobItem && jobItem.hasOwnProperty('additionalType')) { + job.item_result['additionalType'] = jobItem.additionalType + } + return job +} + +const sendError = (job, error, jobItem) => { + job.status = false + job.err = error + job.item_result = [] + if (jobItem && jobItem.hasOwnProperty('additionalType')) { + job.item_result['additionalType'] = jobItem.additionalType + } + return job +} + +const checkUrl = async (url, job) => { + const options = { + method: 'HEAD', + rejectUnauthorized: false, + requestCert: false, + strictSSL: false, + insecureHTTPParser: true, + timeout: 20 * 1000, + pool: { maxSockets: Infinity }, + headers: { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0' }, + } + + if (func.checkArgs(job, 'headers', true)) { + job.args.headers.forEach(item => { + if (item.hasOwnProperty('header') && item.header) { + if (item.header.toLowerCase() === 'user-agent' && item.hasOwnProperty('value') && item.value.length) { + options.headers['user-agent'] = item.value; + } + + if (item.header.toLowerCase() === 'x-vercel-protection-bypass') { + options.headers['x-vercel-protection-bypass'] = item.value; + } + } + }); + } + + if ( + job.hasOwnProperty('basicAuth') && job.basicAuth && + job.basicAuth.hasOwnProperty('user') && job.basicAuth.user && + job.basicAuth.hasOwnProperty('password') && job.basicAuth.password + ) { + options.auth = { + user: job.basicAuth.user, + pass: job.basicAuth.password + } + } + + let lastException; + + try { + await checkURLRequest(url, options, job) + return true + } catch (e) { + console.log('Failed to checkUrl', e); + lastException = e; + } + + try { + options.method = 'GET' + await checkURLRequest(url, options, job) + return true + } catch (e) { + console.log('Failed to checkUrl (GET)', e); + lastException = e; + } + + if (job.args.url) { + try { + await checkURLRequest(job.args.url, options, job) + return true + } catch (e) { + console.log('Failed to checkUrl (get, auth)', e); + lastException = e; + } + } + + throw lastException; +} + +const checkURLRequest = function (url, options, job) { + return new Promise((resolve, reject) => { + if (func.checkArgs(job, 'cookies')) { + let j = request.jar() + let cookie = request.cookie(job.args.cookies) + j.setCookie(cookie, url) + options.jar = j + } else { + options.jar = true + } + + requestLoop(url, options, 2, 1000, (err, res) => { + if (err) { + return reject(err) + } + + if (!res || !res.hasOwnProperty('statusCode')) { + return reject('Can\'t resolve GET request') + } + + try { + if (job.args.auth && job.args.auth.type === 'netlify' && res.statusCode === 401) { + return resolve() + } else if (res.statusCode === 0 || (res.statusCode >= 400 && !([403, 404].indexOf(res.statusCode) !== -1))) { + return reject('Wrong status code => ' + res.statusCode + ': ' + res.statusMessage) + } + + return resolve() + } catch (e) { + return reject('checkUrlError: ' + (e && e.hasOwnProperty('message')) ? e.message : e) + } + }) + }) +} + +const requestLoop = function (url, options, attemptsLeft, retryDelay, callback, lastError = null) { + if (attemptsLeft <= 0) { + callback((lastError != null ? lastError : new Error('checkUrlError'))) + } else { + request(url, options, function (error, response) { + const recoverableErrors = ['ESOCKETTIMEDOUT', 'ETIMEDOUT', 'ECONNRESET', 'ECONNREFUSED'] + if (error && recoverableErrors.includes(error.code)) { + setTimeout((function () { + requestLoop(url, options, --attemptsLeft, retryDelay, callback, error) + }), retryDelay) + } else { + callback(error, response) + } + }) + } +} + +const debugLog = (data, jobItem = {}, additional = false) => { + if (debug) { + const jobId = (jobItem && jobItem.hasOwnProperty('id')) ? jobItem.id : 'noJobId' + const projectId = (jobItem && jobItem.hasOwnProperty('project_id')) ? jobItem.project_id : 'noProjectId' + const breakpoint = (jobItem && jobItem.hasOwnProperty('breakpoint')) ? jobItem.breakpoint : 'noBreakpoint' + const url = (jobItem && jobItem.hasOwnProperty('url')) ? jobItem.url : 'noUrl' + const key = `j:${jobId}-p:${projectId}-b:${breakpoint}-u:${url}` + + if (additional) { + console.log(key, data, additional) + } else { + console.log(key, data) + } + } +} + +const saveError = async (job, jobItem, errorText) => { + let filenameKey + let filename + let thumbnailFilepath + let s3UrlThumbnail + let s3Url + let width + + try { + if (errorText && (errorText.includes('SOCKETTIMEOUT') || errorText.includes('SOCKETTIMEDOUT'))) { + errorText = 'Diffy was unable to take the screenshot.\n' + + 'Looks like we have overloaded your server. Please try lowering number of workers for this environment under Project Settings -> Advanced -> Performance' + } + + errorText = 'Error: ' + errorText + width = (jobItem && jobItem.hasOwnProperty('breakpoint')) ? jobItem.breakpoint : 1024 + filenameKey = Math.floor(Date.now() / 1000) + '-' + (func.random(0, 999999999)).toString() + + if (width < 16000) { + filename = '/tmp/screenshot-error-' + filenameKey + '.webp' + thumbnailFilepath = filename.replace('.webp', '-thumbnail.webp') + } else { + filename = '/tmp/screenshot-error-' + filenameKey + '.png' + thumbnailFilepath = filename.replace('.png', '-thumbnail.png') + } + + await thumbnail.createErrorImage(filename, errorText, width) + + s3Url = await uploadS3.upload(filename).catch((err) => { + throw new Error('Can\'t upload screenshot: ' + err.name + ': ' + (err && err.hasOwnProperty('message')) ? err.message : err) + }) + + await thumbnail.generateImageThumbnail(filename, thumbnailFilepath).catch((err) => { + throw new Error('Can\'t generate thumbnail: ' + err.name + ': ' + (err && err.hasOwnProperty('message')) ? err.message : err) + }) + + s3UrlThumbnail = await uploadS3.upload(thumbnailFilepath).catch((err) => { + throw new Error('Can\'t upload thumbnail: ' + err.name + ': ' + (err && err.hasOwnProperty('message')) ? err.message : err) + }) + + // Not need to remove "htmlFilename" because we use stream and not creating real file. + // Async remove files. + func.removeFile(filename) + func.removeFile(thumbnailFilepath) + + return sendResult(job, jobItem, { + 'full': s3Url, + 'thumbnail': s3UrlThumbnail, + 'html': '', + 'data': 'Error: ' + JSON.stringify(job), + 'log_data': '', + 'error': { + 'message': errorText + } + }) + } catch (err) { + return sendResult(job, jobItem, { + 'full': '', + 'thumbnail': '', + 'html': '', + 'data': 'Error: Can\'t generate error image. ' + errorText + ' => ' + (err && err.hasOwnProperty('message')) ? err.message : err, + 'log_data': '', + }) + } +} + +async function disableGifAnimation (page) { + await page.evaluate(() => { + Array.from(document.images) + .filter((image) => /^(?!data:).*\.gif/i.test(image.src)) + .map((image) => { + const c = document.createElement('canvas'); + const w = c.width = image.width; + const h = c.height = image.height; + + c.getContext('2d').drawImage(image, 0, 0, w, h); + + try { + image.src = c.toDataURL('image/gif'); // if possible, retain all css aspects + } catch(e) { + // cross-domain -- mimic original with all its tag attributes + for (const attribute of Object.entries(image.attributes)) { + c.setAttribute(attribute[1].name, attribute[1].value); + } + + image.parentNode.replaceChild(c, image); + } + }); + }); +} + +module.exports = { + + perform: async (browser, job, jobItem) => { + let is_cut + let is_crop + let filename + let s3Url + let s3HtmlUrl + let thumbnailFilepath + let s3UrlThumbnail + let data = {} + let cookies + let authCookies + let page + let pageHtml + let htmlFilename + let url + let filenameKey + let jsConsole = [] + let s3JsConsoleUrl + let jsConsoleFilename + let consoleMes + let pageHeight + const maxPageHeightIfError = 30000 + + debugLog('Start process:', jobItem, job) + // try { + // debugLog('Start screenshot', jobItem) + // await checkUrl(jobItem.url, jobItem) + // debugLog('checkUrl done: ' + jobItem.url, jobItem) + // } catch (e) { + // debugLog('checkUrl error:', jobItem) + // debugLog(e, jobItem) + // return await saveError(job, jobItem, 'CheckURL ' + ((e && e.hasOwnProperty('message')) ? e.message : e.toString())) + // } + + try { + const maxPageHeight = (job.hasOwnProperty('attempts') && job.attempts > 0) ? (maxPageHeightIfError / job.attempts) : maxPageHeightIfError + page = await browser.newPage() + + if (jobItem.args.hasOwnProperty('night_mode') && jobItem.args.night_mode) { + // Emulate dark mode + await page.emulateMediaFeatures([{ name: 'prefers-color-scheme', value: 'dark' }]); + } + + if (jobItem.args.hasOwnProperty('retina_images') && jobItem.args.retina_images) { + await page.setViewport({ width: parseInt(jobItem.breakpoint), height: 1000, deviceScaleFactor: 2 }) + } + + debugLog('browser.newPage', jobItem) + + // set global timeout and disable CSP + await page.setBypassCSP(true) + await page.setDefaultNavigationTimeout(90 * 1000) + debugLog('setDefaultNavigationTimeout done', jobItem) + page.on('console', msg => { + try { + consoleMes = { + type: msg.type(), + text: msg.text(), + location: msg.location(), + } + } catch (e) { + consoleMes = { + type: e.type(), + text: e.text(), + location: e.location(), + } + } + + jsConsole.push(consoleMes) + }) + + // Remove all browser Cookies. + const client = await page.target().createCDPSession(); + await client.send('Network.clearBrowserCookies'); + await page.waitForTimeout(1000) // wait 1 second. + + await func.setHeaders(page, jobItem) + debugLog('setHeaders done', jobItem) + + if (!jobItem.hasOwnProperty('url') || !jobItem.hasOwnProperty('breakpoint')) { + throw new Error('Cannot find url or breakpoint options') + } + + url = jobItem.url + + if ( + jobItem.hasOwnProperty('basicAuth') && jobItem.basicAuth && + jobItem.basicAuth.hasOwnProperty('user') && jobItem.basicAuth.user && + jobItem.basicAuth.hasOwnProperty('password') && jobItem.basicAuth.password + ) { + await page.authenticate({ username: jobItem.basicAuth.user, password: jobItem.basicAuth.password }) + } + + // Add new cookies. + cookies = await func.addCookies(jobItem) + debugLog('addCookies done', jobItem) + authCookies = await func.auth(page, jobItem).catch((err) => { + data.auth_error = err.name + ': ' + (err && err.hasOwnProperty('message')) ? err.message : err + }) + + debugLog('auth done', jobItem) + if (authCookies) { + debugLog(authCookies, jobItem) + cookies = cookies.concat(authCookies) + } + + if (cookies) { + await page.setCookie(...cookies) + } + + if (jobItem.hasOwnProperty('project_id') && jobItem.project_id === 21791) { + // @see https://support.callrail.com/hc/en-us/articles/5711492051085-Preventing-a-number-from-swapping-on-a-website + console.log('Apply calltrkNoswap') + + await page.setRequestInterception(true); + page.on('request', interceptedRequest => { + if (interceptedRequest.url().endsWith('swap_session.json')) { + interceptedRequest.abort(); + } else { + interceptedRequest.continue(); + } + }); + } + + try { + await page.goto(url, { timeout: 60000, waitUntil: ['networkidle2'] }) + } catch (err) { + debugLog('page was not loaded by networkidle2', jobItem) + await page.goto(url, { timeout: 60000, waitUntil: ['domcontentloaded'] }) + } + debugLog('page loaded done', jobItem) + + // Disable animation / transition (exclude diff from animation) + if (jobItem.args.hasOwnProperty('disable_css_animation') && jobItem.args.disable_css_animation) { + debugLog('disable css animation', jobItem) + + await page.addStyleTag({ + content: ` + *, *::after, *::before { + transition-delay: 0s !important; + transition-duration: 0s !important; + animation-delay: -0.0001s !important; + animation-duration: 0s !important; + animation-play-state: paused !important; + caret-color: transparent !important; + color-adjust: exact !important; + } + ` + }).catch((e) => console.error('Failed to addStyleTag: ', e)) + + try { + await disableGifAnimation(page) + } catch (e) { + console.error('Failed to disableGifAnimation: ', e) + } + } + + await page.setViewport({ width: parseInt(jobItem.breakpoint), height: 1000 }) + await page.waitForTimeout(1000) + debugLog('page.goto done', jobItem) + + console.time('waitFontsReady'); + await page.evaluateHandle('document.fonts.ready'); + console.timeEnd('waitFontsReady'); + + // @see https://github.com/ygerasimov/diffy-pm/issues/250 (wp-rocket fix) + await page.evaluate(() => { + try { + window.dispatchEvent(new Event('touchstart')); + window.document.dispatchEvent(new Event('touchstart')); + } catch (e) {} + }); + + await func.addCssCode(page, jobItem) + debugLog('addCssCode done', jobItem) + + // #see https://github.com/ygerasimov/diffy-pm/issues/339 + if (jobItem.hasOwnProperty('project_id') && jobItem.project_id === 20882) { + await func.cutElements(page, jobItem) + } + + await func.autoScroll(page, jobItem) + debugLog('autoScroll done', jobItem) + + if (jobItem.args.hasOwnProperty('stabilization') && jobItem.args.stabilization) { + console.log('[HeightStabilization] Starting'); + + await page.evaluate(async () => { + // pause and reset time for all