diff --git a/.travis.yml b/.travis.yml index d087e44..dc11831 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,19 @@ language: python python: 3.5 -sudo: false + env: - - TOXENV=py27 - - TOXENV=py35 + - TOXENV=py27 + - TOXENV=py35 install: - - pip install -U tox twine wheel codecov -script: tox + - pip install -U tox twine wheel codecov; + - sudo apt-get update; + - curl -sL https://deb.nodesource.com/setup_6.x | sudo -E bash -; + - sudo apt-get install -y nodejs; +script: + - make test after_success: - - codecov + - codecov + - make lint cache: - directories: - - $HOME/.cache/pip + directories: + - $HOME/.cache/pip diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..78c9609 --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +NODE_DIR = helpers/nodejs + +test: + tox + $(MAKE) -C $(NODE_DIR) test + +lint: + $(MAKE) -C $(NODE_DIR) lint + +.PHONY: test lint diff --git a/helpers/nodejs/.eslintignore b/helpers/nodejs/.eslintignore new file mode 100644 index 0000000..ab1cfb4 --- /dev/null +++ b/helpers/nodejs/.eslintignore @@ -0,0 +1 @@ +test/* diff --git a/helpers/nodejs/.eslintrc.json b/helpers/nodejs/.eslintrc.json new file mode 100644 index 0000000..52715e8 --- /dev/null +++ b/helpers/nodejs/.eslintrc.json @@ -0,0 +1,24 @@ +{ + "env": { + "node": true + }, + "extends": "eslint:recommended", + "rules": { + "indent": [ + "error", + 4 + ], + "linebreak-style": [ + "error", + "unix" + ], + "quotes": [ + "error", + "single" + ], + "semi": [ + "error", + "always" + ] + } +} \ No newline at end of file diff --git a/helpers/nodejs/.gitignore b/helpers/nodejs/.gitignore new file mode 100644 index 0000000..aea5294 --- /dev/null +++ b/helpers/nodejs/.gitignore @@ -0,0 +1,38 @@ +# Logs +logs +*.log +npm-debug.log* + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (http://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules +jspm_packages + +# Optional npm cache directory +.npm + +# Optional REPL history +.node_repl_history diff --git a/helpers/nodejs/Makefile b/helpers/nodejs/Makefile new file mode 100644 index 0000000..ceccbc0 --- /dev/null +++ b/helpers/nodejs/Makefile @@ -0,0 +1,12 @@ +requirements: + npm install + +test: requirements + ./node_modules/.bin/mocha --reporter spec + +lint: requirements + ./node_modules/eslint/bin/eslint.js *.js + +check: test lint + +.PHONY: requirements test lint check diff --git a/helpers/nodejs/index.js b/helpers/nodejs/index.js new file mode 100644 index 0000000..4625cb7 --- /dev/null +++ b/helpers/nodejs/index.js @@ -0,0 +1,396 @@ +var spider = module.exports = { + + exceptionHandler: null, + responseMapping: {}, + _requestId: 1, + + /** + * Create the spider + * + * @param {string} name name of the spider + * @param {array} startUrls list of initial urls + * @param {Function} callback callback to handle the responses from startUrls + * @param {array} allowedDomains list of allowed domains + * @param {object} customSettings custom settings to be used in Scrapy + * + * @return {string} json message written in the process stdout + */ + createSpider: function(name, startUrls, callback, allowedDomains, customSettings) { + // required fields + _isDefined(name, 'name'); + _isDefined(startUrls, 'startUrls'); + _isDefined(callback, 'callback'); + + // validation + _validateType(name, 'string', 'name'); + _validateType(callback, 'function', 'callback'); + customSettings && _validateType(customSettings, 'object', 'customSettings'); + + if (startUrls) { + if (startUrls.constructor !== Array) { + throw new Error('startUrls parameter must be an array. Received: ' + + typeof startUrls); + } + } + + if (allowedDomains) { + if (allowedDomains.constructor !== Array) { + throw new Error('allowedDomains parameter must be an array. Received: ' + + typeof allowedDomains); + } + } + + if (customSettings) { + if (allowedDomains.constructor !== Array) { + throw new Error('allowedDomains parameter must be an array. Received: ' + + typeof allowedDomains); + } + } + + this.responseMapping['parse'] = callback; + + var msg = { + type: 'spider', + name: name, + start_urls: startUrls || undefined, + allowed_domains: allowedDomains || undefined, + custom_settings: customSettings || undefined + }; + + return writeJson(msg); + }, + + closeSpider: function() { + var msg = { + 'type': 'close' + }; + + return writeJson(msg); + }, + + /** + * Send a log message to the Scrapy Streaming, using the log message + * + * @param {string} message log message + * @param {string} level log level, must be one of 'CRITICAL', 'ERROR', 'WARNING', 'INFO', and 'DEBUG' + * + * @return {string} json message written in the process stdout + */ + sendLog: function(message, level) { + // required + _isDefined(message, 'message'); + _isDefined(level, 'level'); + // validation + _validateType(message, 'string', 'message'); + _validateType(level, 'string', 'level'); + + var acceptedLevels = ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG']; + level = level.toUpperCase(); + if (acceptedLevels.indexOf(level) === -1) { + throw new Error('Invalid log level. Must be one of ' + + '\'CRITICAL\', \'ERROR\', \'WARNING\', \'INFO\', \'DEBUG\''); + } + + var msg = { + type: 'log', + message: message, + level: level + }; + + return writeJson(msg); + }, + + /** + * Opens a new request + * + * @param {string} url request url + * @param {Function} callback response callback + * @param {object} config object with extra request parameters (optional) + * @param {boolean} config.base64 if true, converts the response body to base64. (optional) + * @param {string} config.method request method (optional) + * @param {object} config.meta request extra data (optional) + * @param {string} config.body request body (optional) + * @param {object} config.headers request headers (optional) + * @param {object} config.cookies rqeuest extra cookies (optional) + * @param {string} config.encoding default encoding (optional) + * @param {int} config.priority request priority (optional) + * @param {boolean} config.dont_filter if true, the request don't pass on the request duplicate filter (optional) + * + * @return {string} json message written in the process stdout + */ + sendRequest: function(url, callback, config) { + // required fields + _isDefined(url, 'url'); + _isDefined(callback, 'callback'); + + if(!config) { + config = {}; + } + // validation + _validateType(url, 'string', 'url'); + _validateType(callback, 'function', 'callback'); + config.base64 && _validateType(config.base64, 'boolean', 'base64'); + config.method && _validateType(config.method, 'string', 'method'); + config.meta && _validateType(config.meta, 'object', 'meta'); + config.body && _validateType(config.body, 'string', 'body'); + config.headers && _validateType(config.headers, 'object', 'headers'); + config.cookies && _validateType(config.cookies, 'object', 'cookies'); + config.encoding && _validateType(config.encoding, 'string', 'encoding'); + config.priority && _validateType(config.priority, 'number', 'priority'); + config.dont_filter && _validateType(config.dont_filter, 'boolean', 'dont_filter'); + + this.responseMapping[this._requestId] = callback; + + var msg = { + type: 'request', + url: url, + id: '' + this._requestId, + base64: config.base64 || undefined, + method: config.method || undefined, + meta: config.meta || undefined, + body: config.body || undefined, + headers: config.headers || undefined, + cookies: config.cookies || undefined, + encoding: config.encoding || undefined, + priority: config.priority || undefined, + dont_filter: config.dont_filter || undefined + }; + + this._requestId++; + + return writeJson(msg); + }, + + /** + * Opens a new request + * + * @param {string} url request url + * @param {Function} callback response callback + * @param {fromResponseRequest} Creates a new request using the response + * @param {boolean} fromResponseRequest.base64 if true, converts the response body to base64. (optional) + * @param {string} fromResponseRequest.method request method (optional) + * @param {object} fromResponseRequest.meta request extra data (optional) + * @param {string} fromResponseRequest.body request body (optional) + * @param {object} fromResponseRequest.headers request headers (optional) + * @param {object} fromResponseRequest.cookies rqeuest extra cookies (optional) + * @param {string} fromResponseRequest.encoding default encoding (optional) + * @param {int} fromResponseRequest.priority request priority (optional) + * @param {boolean} fromResponseRequest.dont_filter if true, the request don't pass on the request duplicate filter (optional) + * @param {string} fromResponseRequest.formname FormRequest.formname parameter (optional) + * @param {string} fromResponseRequest.formxpath FormRequest.formxpath parameter (optional) + * @param {string} fromResponseRequest.formcss FormRequest.formcss parameter (optional) + * @param {int} fromResponseRequest.formnumber FormRequest.formnumber parameter (optional) + * @param {object} fromResponseRequest.formdata FormRequest.formdata parameter (optional) + * @param {object} fromResponseRequest.clickdata FormRequest.clickdata parameter (optional) + * @param {boolean} fromResponseRequest.dont_click FormRequest.dont_click parameter (optional) + * + * @param {object} config object with extra request parameters (optional) + * @param {boolean} config.base64 if true, converts the response body to base64. (optional) + * @param {string} config.method request method (optional) + * @param {object} config.meta request extra data (optional) + * @param {string} config.body request body (optional) + * @param {object} config.headers request headers (optional) + * @param {object} config.cookies rqeuest extra cookies (optional) + * @param {string} config.encoding default encoding (optional) + * @param {int} config.priority request priority (optional) + * @param {boolean} config.dont_filter if true, the request don't pass on the request duplicate filter (optional) + * + * @return {string} json message written in the process stdout + */ + sendFromResponseRequest: function(url, callback, fromResponseRequest, config) { + // required fields + _isDefined(url, 'url'); + _isDefined(callback, 'callback'); + _isDefined(fromResponseRequest, 'fromResponseRequest'); + + if (!config) { + config = {}; + } + // validation - request + _validateType(url, 'string', 'url'); + _validateType(callback, 'function', 'callback'); + _validateType(fromResponseRequest, 'object', 'fromResponseRequest'); + config.base64 && _validateType(config.base64, 'boolean', 'base64'); + config.method && _validateType(config.method, 'string', 'method'); + config.meta && _validateType(config.meta, 'object', 'meta'); + config.body && _validateType(config.body, 'string', 'body'); + config.headers && _validateType(config.headers, 'object', 'headers'); + config.cookies && _validateType(config.cookies, 'object', 'cookies'); + config.encoding && _validateType(config.encoding, 'string', 'encoding'); + config.priority && _validateType(config.priority, 'number', 'priority'); + config.dont_filter && _validateType(config.dont_filter, 'boolean', 'dont_filter'); + + // validation - fromResponseRequest + + fromResponseRequest.url && _validateType(fromResponseRequest.url, 'string', 'fromResponseRequest.url'); + fromResponseRequest.method && _validateType(fromResponseRequest.method, 'string', 'fromResponseRequest.method'); + fromResponseRequest.meta && _validateType(fromResponseRequest.meta, 'object', 'fromResponseRequest.meta'); + fromResponseRequest.body && _validateType(fromResponseRequest.body, 'string', 'fromResponseRequest.body'); + fromResponseRequest.headers && _validateType(fromResponseRequest.headers, 'object', 'fromResponseRequest.headers'); + fromResponseRequest.cookies && _validateType(fromResponseRequest.cookies, 'object', 'fromResponseRequest.cookies'); + fromResponseRequest.encoding && _validateType(fromResponseRequest.encoding, 'string', 'fromResponseRequest.encoding'); + fromResponseRequest.priority && _validateType(fromResponseRequest.priority, 'number', 'fromResponseRequest.priority'); + fromResponseRequest.dont_filter && _validateType(fromResponseRequest.dont_filter, 'boolean', 'fromResponseRequest.dont_filter'); + + fromResponseRequest.formname && _validateType(fromResponseRequest.formname, 'string', 'fromResponseRequest.formname'); + fromResponseRequest.formxpath && _validateType(fromResponseRequest.formxpath, 'string', 'fromResponseRequest.formxpath'); + fromResponseRequest.formcss && _validateType(fromResponseRequest.formcss, 'string', 'fromResponseRequest.formcss'); + fromResponseRequest.formnumber && _validateType(fromResponseRequest.formnumber, 'number', 'fromResponseRequest.formnumber'); + fromResponseRequest.formdata && _validateType(fromResponseRequest.formdata, 'object', 'fromResponseRequest.formdata'); + fromResponseRequest.clickdata && _validateType(fromResponseRequest.clickdata, 'object', 'fromResponseRequest.clickdata'); + fromResponseRequest.dont_click && _validateType(fromResponseRequest.dont_click, 'boolean', 'fromResponseRequest.dont_click'); + + this.responseMapping[this._requestId] = callback; + + var msg = { + type: 'from_response_request', + url: url, + id: '' + this._requestId, + from_response_request: fromResponseRequest, + base64: config.base64 || undefined, + method: config.method || undefined, + meta: config.meta || undefined, + body: config.body || undefined, + headers: config.headers || undefined, + cookies: config.cookies || undefined, + encoding: config.encoding || undefined, + priority: config.priority || undefined, + dont_filter: config.dont_filter || undefined + }; + + this._requestId++; + + return writeJson(msg); + }, + + /** + * Starts the spider execution. This will bind the process stdin to read data + * from Scrapy Streaming, and process each message received. + * + * If you want to handle the exceptions generated by Scrapy, pass a function that receives a single parameter as an argument. + * + * By default, any exception will stop the spider execution and throw an Error. + * @param {Function} exceptionHandler function to handle exceptions. Must receive a single parameter, the received json with the exception. (optional) + */ + runSpider: function(exceptionHandler) { + if (exceptionHandler !== undefined) { + _validateType(exceptionHandler, 'function', 'exceptionHandler'); + spider.exceptionHandler = exceptionHandler; + } + process.stdin.pipe(require('split')()).on('data', onLineReceive); + } +}; + +/** + * Function that receives the exception message. + * + * If there is a exceptionHandler registered in the spider, it will dispatch the exception. + * Otherwise, the script will throw an exception. + * @param {object} msg received exception message + */ +var onException = function (msg) { + // uses the exceptionHandler if available + if (spider.exceptionHandler) { + spider.exceptionHandler(msg); + } else { + + throw new Error ('There is a problem in the Scrapy Streaming: \n\tReceived message: ' + + msg.received_message + '. \n\tError: ' + msg.exception + '\n\n'); + } +}; + +/** + * Validate if the spider connected successfuly with the Scrapy Streaming + * + * @param {object} msg received ready message + */ +var checkStatus = function (msg) { + if (msg.status !== 'ready') { + throw new Error ('There is a problem in the communication channel: ' + msg.status); + } +}; + +/** + * Receives the response from a request. Call the callback function, sending + * the received response + * @param {object} msg response. It will be sent to the callback function + */ +var onResponse = function (msg) { + spider.responseMapping[msg.id](msg); +}; + +/** + * Receives the Error message. This message implies that there is a problem in the spider + * source code. + * + * The spider execution will stop, and more details will be visible in the Scrapy Streaming logger. + * @param {object} msg error message + */ +var onError = function (msg) { + throw new Error ('There is a problem in the Spider: \n\tReceived message: ' + + msg.received_message + '. \n\tError: ' + msg.details + '\n\n'); +}; + +var mapping = { + 'ready': checkStatus, + 'response': onResponse, + 'exception': onException, + 'error': onError +}; + +/** + * Receives a json in a single line, parse it, and call the respective message handler. + * + * @param {string} line string with the json message sent by Scrapy Streaming. + */ +var onLineReceive = function(line) { + var msg = JSON.parse(line); + var msg_type = msg.type; + + mapping[msg_type](msg); +}; + +/** + * Converts a JS object to json and writes it to the process stdout + * @param {object} obj message to be sent + * @return {string} the string printed in the process stdout + */ +var writeJson = function(obj) { + var json = JSON.stringify(obj); + + process.stdout.write(json + '\n'); + + return json; +}; + +/** + * Validate if a variable is defined, trows an exception if not. + * + * @param {var} paramenter variable to be tested + * @param {string} name verbose name of the variable + * + * @return {boolean} true, if is defined. Otherwise, throws an exception + */ +var _isDefined = function(paramenter, name) { + if (paramenter === undefined) { + throw new Error('Error: missing ' + name + ' parameter'); + } + return true; +}; + +/** + * Validates the type of a variable + * + * @param {var} variable variable to be tested + * @param {string} expectedType name of the expected type + * @param {string} name verbose name of the variable + * + * @return {boolean} true if it the type if valid. Otherwise, throws an exception. + */ +var _validateType = function(variable, expectedType, name) { + if (typeof variable !== expectedType) { + throw new Error(name + ' parameter must be ' + expectedType + '. Received: ' + + typeof variable); + } + return true; +}; diff --git a/helpers/nodejs/package.json b/helpers/nodejs/package.json new file mode 100644 index 0000000..62aca18 --- /dev/null +++ b/helpers/nodejs/package.json @@ -0,0 +1,32 @@ +{ + "name": "scrapystreaming", + "version": "0.1.0", + "description": "Node.js Scrapy Streaming", + "main": "index.js", + "scripts": { + "test": "make test", + "lint": "make lint", + "check": "make check" + }, + "repository": { + "type": "git", + "url": "git@github.com:scrapy-plugins/scrapy-streaming.git" + }, + "keywords": [ + "scrapy" + ], + "author": "Aron Bordin", + "license": "BSD-2-Clause", + "bugs": { + "url": "https://github.com/scrapy-plugins/scrapy-streaming/issues" + }, + "homepage": "https://github.com/scrapy-plugins/scrapy-streaming#readme", + "devDependencies": { + "chai": "^3.5.0", + "eslint": "^3.0.1", + "mocha": "^2.5.3" + }, + "dependencies": { + "split": "^1.0.0" + } +} diff --git a/helpers/nodejs/test/index.js b/helpers/nodejs/test/index.js new file mode 100644 index 0000000..4ed9fdc --- /dev/null +++ b/helpers/nodejs/test/index.js @@ -0,0 +1,451 @@ +var assert = require('chai').assert; + + +describe('Initialization', function() { + spider = require('../index.js'); + it('start with null / empty values', function(){ + assert.equal(spider.exceptionHandler, null); + }); +}); + + +describe('runSpider', function() { + spider = require('../index.js'); + + it('raises exception with wrong exceptionHandler', function() { + assert.throws(function(){spider.runSpider(1)}, Error); + assert.throws(function(){spider.runSpider('test')}, Error); + assert.throws(function(){spider.runSpider(null)}, Error); + }); + + it('updates exceptionHandler', function() { + var testFunction = function(){}; + spider.runSpider(testFunction); + assert.equal(spider.exceptionHandler, testFunction); + }); +}); + + +describe('closeSpider', function() { + spider = require('../index.js'); + msg = spider.closeSpider(); + var expected_msg = { + type: 'close' + }; + + it('correct close message', function() { + assert.deepEqual(JSON.parse(msg), expected_msg); + }); +}); + + +describe('createSpider', function() { + spider = require('../index.js'); + + it('check required fields', function() { + assert.throws(function(){spider.createSpider()}, 'Error: missing name'); + assert.throws(function(){spider.createSpider('name')}, 'Error: missing startUrls'); + assert.throws(function(){spider.createSpider('name', [])}, 'Error: missing callback'); + }); + + it('validate field type', function() { + //name + assert.throws(function(){spider.createSpider(1, [], function(){})}, 'name parameter must be string'); + assert.doesNotThrow(function(){spider.createSpider('name', [], function(){})}); + + //startUrls + assert.throws(function(){spider.createSpider('name', 1, function(){})}, 'startUrls parameter must be an array'); + assert.doesNotThrow(function(){spider.createSpider('name', [], function(){})}); + + //callback + assert.throws(function(){spider.createSpider('name', [], 11)}, 'callback parameter must be function'); + assert.doesNotThrow(function(){spider.createSpider('name', [], function(){})}); + + //allowedDomains + assert.throws(function(){spider.createSpider('name', [], function(){}, 1)}, 'allowedDomains parameter must be an array'); + assert.doesNotThrow(function(){spider.createSpider('name', [], function(){}), []}); + + //customSettings + assert.throws(function(){spider.createSpider('name', [], function(){}, null, 3)}, 'customSettings parameter must be object'); + assert.doesNotThrow(function(){spider.createSpider('name', [], function(){}), null, {test: 1}}); + }); + + it('generate the json message', function() { + var expected_msg = { + type: 'spider', + name: 'name', + start_urls: ['http://example.com'], + allowed_domains: ['example.com'], + custom_settings: {test: 1} + }; + + var msg = spider.createSpider(expected_msg.name, expected_msg.start_urls, function(){}, + expected_msg.allowed_domains, expected_msg.custom_settings); + + assert.deepEqual(JSON.parse(msg), expected_msg); + }); +}); + + +describe('sendLog', function() { + spider = require('../index.js'); + + it('check required fields', function() { + assert.throws(function(){spider.sendLog()}, 'Error: missing message'); + assert.throws(function(){spider.sendLog('name')}, 'Error: missing level'); + }); + + it('validate field type', function() { + //message + assert.throws(function(){spider.sendLog(1, 'debug')}, 'message parameter must be string'); + assert.doesNotThrow(function(){spider.sendLog('name', 'debug')}); + + //level + assert.throws(function(){spider.sendLog('name', 1)}, 'level parameter must be string'); + assert.doesNotThrow(function(){spider.sendLog('name', 'debug')}); + }); + + it('check debug level', function() { + assert.doesNotThrow(function(){spider.sendLog('name', 'critical')}); + assert.doesNotThrow(function(){spider.sendLog('name', 'error')}); + assert.doesNotThrow(function(){spider.sendLog('name', 'warning')}); + assert.doesNotThrow(function(){spider.sendLog('name', 'info')}); + assert.doesNotThrow(function(){spider.sendLog('name', 'debug')}); + assert.throws(function(){spider.sendLog('name', 'aaa')}, 'Invalid log level. Must be one of '); + }); + + it('generate the json message', function() { + var expected_msg = { + type: 'log', + message: 'message', + level: 'DEBUG' + }; + + var msg = spider.sendLog(expected_msg.message, expected_msg.level); + + assert.deepEqual(JSON.parse(msg), expected_msg); + }); +}); + + +describe('sendRequest', function() { + spider = require('../index.js'); + + // url, callback, base64, method, meta, body, headers, cookies, encoding, priority, dontFilter + it('check required fields', function() { + assert.throws(function(){spider.sendRequest()}, 'Error: missing url'); + assert.throws(function(){spider.sendRequest('http://example.com')}, 'Error: missing callback'); + }); + + it('validate field type', function() { + //url + assert.throws(function(){spider.sendRequest(1, function(){})}, 'url parameter must be string'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){})}); + + //callback + assert.throws(function(){spider.sendRequest('http://example.com', 1)}, 'callback parameter must be function'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){})}); + + //base64 + assert.throws(function(){spider.sendRequest('http://example.com', function(){}, {base64: 1})}, + 'base64 parameter must be boolean'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){}, {base64: false})}); + + //method + assert.throws(function(){spider.sendRequest('http://example.com', function(){}, {method: 1})}, + 'method parameter must be string'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){}, {method: 'get'})}); + + //meta + assert.throws(function(){spider.sendRequest('http://example.com', function(){}, {meta: 1})}, + 'meta parameter must be object'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){}, {meta: {}})}); + + //body + assert.throws(function(){spider.sendRequest('http://example.com', function(){}, {body: 1})}, + 'body parameter must be string'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){}, {body: 'body'})}); + + //headers + assert.throws(function(){spider.sendRequest('http://example.com', function(){}, {headers: 'Content-type'})}, + 'headers parameter must be object'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){}, {headers: {}})}); + + //cookies + assert.throws(function(){spider.sendRequest('http://example.com', function(){}, {cookies: 'a'})}, + 'cookies parameter must be object'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){}, {cookies: {}})}); + + //encoding + assert.throws(function(){spider.sendRequest('http://example.com', function(){}, {encoding: 1})}, + 'encoding parameter must be string'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){}, {encoding: 'utf8'})}); + + //priority + assert.throws(function(){spider.sendRequest('http://example.com', function(){}, {priority: 'high'})}, + 'priority parameter must be number'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){}, {priority: 1})}); + + //dont_filter + assert.throws(function(){spider.sendRequest('http://example.com', function(){}, {dont_filter: 1})}, + 'dont_filter parameter must be boolean'); + assert.doesNotThrow(function(){spider.sendRequest('http://example.com', function(){}, {dont_filter: true})}); + }); + + it('generate the json message', function() { + + var config = { + base64: true, + method: 'get', + meta: {a: 1}, + body: 'body', + headers: {b: 2}, + cookies: {c: 3}, + encoding: 'utf8', + priority: 1, + dont_filter: true + }; + + var expected_msg = { + type: 'request', + url: 'http://example.com', + id: '' + spider._requestId, + base64: config.base64, + method: config.method, + meta: config.meta, + body: config.body, + headers: config.headers, + cookies: config.cookies, + encoding: config.encoding, + priority: config.priority, + dont_filter: config.dont_filter + }; + + + var msg = spider.sendRequest(expected_msg.url, function(){}, config); + + assert.deepEqual(JSON.parse(msg), expected_msg); + }); + + it('register the callback', function(){ + var responseId = spider._requestId; + var callback = function() {}; + + var msg = spider.sendRequest('http://example.com', callback); + assert.equal(spider.responseMapping[responseId], callback); + }) +}); + + +describe('sendFromResponseRequest', function() { + spider = require('../index.js'); + + // url, callback, base64, method, meta, body, headers, cookies, encoding, priority, dontFilter + it('check required fields', function() { + assert.throws(function(){spider.sendFromResponseRequest()}, 'Error: missing url'); + assert.throws(function(){spider.sendFromResponseRequest('http://example.com')}, 'Error: missing callback'); + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){})}, 'Error: missing fromResponseRequest'); + }); + + it('validate field type', function() { + //url + assert.throws(function(){spider.sendFromResponseRequest(1, function(){}, {})}, 'url parameter must be string'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {})}); + + //callback + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', 1, {})}, 'callback parameter must be function'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {})}); + + //fromResponseRequest + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, 1)}, 'fromResponseRequest parameter must be object'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {})}); + + //base64 + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {base64: 1})}, + 'base64 parameter must be boolean'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {base64: false})}); + + //method + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {method: 1})}, + 'method parameter must be string'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {method: 'get'})}); + + //meta + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {meta: 1})}, + 'meta parameter must be object'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {meta: {}})}); + + //body + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {body: 1})}, + 'body parameter must be string'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {body: 'body'})}); + + //headers + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {headers: 'Content-type'})}, + 'headers parameter must be object'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {headers: {}})}); + + //cookies + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {cookies: 'a'})}, + 'cookies parameter must be object'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {cookies: {}})}); + + //encoding + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {encoding: 1})}, + 'encoding parameter must be string'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {encoding: 'utf8'})}); + + //priority + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {priority: 'high'})}, + 'priority parameter must be number'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {priority: 1})}); + + //dont_filter + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {dont_filter: 1})}, + 'dont_filter parameter must be boolean'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {}, {dont_filter: true})}); + }); + + it('validate fromResponseRequest fields type', function() { + //url + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {url: 1})}, 'url parameter must be string'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {url: 'http://example.com'})}); + + //method + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {method: 1})}, + 'method parameter must be string'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {method: 'get'})}); + + //meta + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {meta: 1})}, + 'meta parameter must be object'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {meta: {}})}); + + //body + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {body: 1})}, + 'body parameter must be string'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {body: 'body'})}); +//url + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {method: 'get'})}); + + //meta + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {meta: 1})}, + 'meta parameter must be object'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {meta: {}})}); + + //body + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {body: 1})}, + 'body parameter must be string'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {body: 'body'})}); + + //headers + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {headers: 'Content-type'})}, + 'headers parameter must be object'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {headers: {}})}); + + //cookies + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {cookies: 'a'})}, + 'cookies parameter must be object'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {cookies: {}})}); + + //encoding + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {encoding: 1})}, + 'encoding parameter must be string'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {encoding: 'utf8'})}); + + //priority + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {priority: 'high'})}, + 'priority parameter must be number'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {priority: 1})}); + + //dontFilter + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {dont_filter: 1})}, + 'dont_filter parameter must be boolean'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {dont_filter: true})}); + //headers + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {headers: 'Content-type'})}, + 'headers parameter must be object'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {headers: {}})}); + + //cookies + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {cookies: 'a'})}, + 'cookies parameter must be object'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {cookies: {}})}); + + //encoding + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {encoding: 1})}, + 'encoding parameter must be string'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {encoding: 'utf8'})}); + + //priority + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {priority: 'high'})}, + 'priority parameter must be number'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {priority: 1})}); + + //dontFilter + assert.throws(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {dont_filter: 1})}, + 'dont_filter parameter must be boolean'); + assert.doesNotThrow(function(){spider.sendFromResponseRequest('http://example.com', function(){}, {dont_filter: true})}); + }); + + + it('generate the json message', function() { + var config = { + base64: true, + method: 'get', + meta: {a: 1}, + body: 'body', + headers: {b: 2}, + cookies: {c: 3}, + encoding: 'utf8', + priority: 1, + dont_filter: true + }; + + var expected_msg = { + type: 'from_response_request', + url: 'http://example.com', + id: '' + spider._requestId, + from_response_request: { + url: 'http://example.com/login', + method: 'get', + meta: {a: 1}, + body: 'body', + headers: {b: 2}, + cookies: {c: 3}, + encoding: 'utf8', + priority: 1, + dont_filter: true, + formname: 'name', + formxpath: 'xpath', + formcss: 'css', + formnumber: 1, + formdata: {a: 1}, + clickdata: {b: 2}, + dont_click: true, + }, + base64: config.base64, + method: config.method, + meta: config.meta, + body: config.body, + headers: config.headers, + cookies: config.cookies, + encoding: config.encoding, + priority: config.priority, + dont_filter: config.dont_filter + }; + + var msg = spider.sendFromResponseRequest(expected_msg.url, function(){}, + expected_msg.from_response_request, config); + + assert.deepEqual(JSON.parse(msg), expected_msg); + }); + + it('register the callback', function(){ + var responseId = spider._requestId; + var callback = function() {}; + + var msg = spider.sendFromResponseRequest('http://example.com', callback, {}); + assert.equal(spider.responseMapping[responseId], callback); + }) +});