From 63948bd8554a2316517f10c8aa4901a7e65903ea Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Tue, 28 Jun 2016 23:07:51 -0300 Subject: [PATCH 01/11] init java --- helpers/java/pom.xml | 24 ++++++ helpers/java/scrapystreaming.iml | 18 +++++ .../scrapystreaming/FromResponseRequest.java | 5 ++ .../org/scrapy/scrapystreaming/Logger.java | 36 +++++++++ .../org/scrapy/scrapystreaming/Request.java | 5 ++ .../org/scrapy/scrapystreaming/Spider.java | 27 +++++++ .../messages/CloseMessage.java | 6 ++ .../scrapystreaming/messages/LogMessage.java | 13 ++++ .../scrapystreaming/messages/Message.java | 25 +++++++ .../messages/ResponseMessage.java | 5 ++ .../messages/SpiderMessage.java | 21 ++++++ .../scrapy/scrapystreaming/utils/Utils.java | 8 ++ .../org/scrapy/scrapystreaming/BaseStd.java | 20 +++++ .../scrapy/scrapystreaming/LoggerTest.java | 74 +++++++++++++++++++ .../scrapy/scrapystreaming/SpiderTest.java | 71 ++++++++++++++++++ 15 files changed, 358 insertions(+) create mode 100644 helpers/java/pom.xml create mode 100644 helpers/java/scrapystreaming.iml create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/FromResponseRequest.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/CloseMessage.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/Message.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/utils/Utils.java create mode 100644 helpers/java/src/test/java/org/scrapy/scrapystreaming/BaseStd.java create mode 100644 helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java create mode 100644 helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java diff --git a/helpers/java/pom.xml b/helpers/java/pom.xml new file mode 100644 index 0000000..1d0afbb --- /dev/null +++ b/helpers/java/pom.xml @@ -0,0 +1,24 @@ + + + 4.0.0 + + org.scrapy.scrapystreaming + scrapystreaming + 0.1-SNAPSHOT + + + + com.google.code.gson + gson + 2.7 + + + junit + junit + 4.12 + test + + + \ No newline at end of file diff --git a/helpers/java/scrapystreaming.iml b/helpers/java/scrapystreaming.iml new file mode 100644 index 0000000..084719c --- /dev/null +++ b/helpers/java/scrapystreaming.iml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/FromResponseRequest.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/FromResponseRequest.java new file mode 100644 index 0000000..e86798e --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/FromResponseRequest.java @@ -0,0 +1,5 @@ +package org.scrapy.scrapystreaming; + + +public class FromResponseRequest { +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java new file mode 100644 index 0000000..372da29 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java @@ -0,0 +1,36 @@ +package org.scrapy.scrapystreaming; + + +import org.scrapy.scrapystreaming.messages.LogMessage; + +public class Logger { + + public enum LEVEL { + CRITICAL, ERROR, WARNING, INFO, DEBUG + } + + public static void log(String message, LEVEL level) { + new LogMessage(message, level.name()).sendMessage(); + } + + public static void logCritical(String message) { + log(message, LEVEL.CRITICAL); + } + + public static void logError(String message) { + log(message, LEVEL.ERROR); + } + + public static void logWarning(String message) { + log(message, LEVEL.WARNING); + } + + public static void logInfo(String message) { + log(message, LEVEL.INFO); + } + + public static void logDebug(String message) { + log(message, LEVEL.DEBUG); + } +} + diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java new file mode 100644 index 0000000..946a215 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java @@ -0,0 +1,5 @@ +package org.scrapy.scrapystreaming; + + +public class Request { +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java new file mode 100644 index 0000000..50522ce --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java @@ -0,0 +1,27 @@ +package org.scrapy.scrapystreaming; + +import org.scrapy.scrapystreaming.messages.CloseMessage; +import org.scrapy.scrapystreaming.messages.ResponseMessage; +import org.scrapy.scrapystreaming.messages.SpiderMessage; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + + +public abstract class Spider { + public String name = "ExternalSpider"; + public List start_urls = new ArrayList(0); + public List allowed_domains; + public HashMap custom_settings; + + public void start() { + new SpiderMessage(name, start_urls, allowed_domains, custom_settings).sendMessage(); + } + + public void close() { + new CloseMessage().sendMessage(); + } + + public abstract void parse(ResponseMessage response); +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/CloseMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/CloseMessage.java new file mode 100644 index 0000000..8951318 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/CloseMessage.java @@ -0,0 +1,6 @@ +package org.scrapy.scrapystreaming.messages; + + +public class CloseMessage extends Message { + public final String type = "close"; +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java new file mode 100644 index 0000000..14508f5 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java @@ -0,0 +1,13 @@ +package org.scrapy.scrapystreaming.messages; + + +public class LogMessage extends Message{ + public final String type = "log"; + public final String message; + public final String level; + + public LogMessage(String message, String level) { + this.message = message; + this.level = level; + } +} \ No newline at end of file diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/Message.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/Message.java new file mode 100644 index 0000000..3773602 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/Message.java @@ -0,0 +1,25 @@ +package org.scrapy.scrapystreaming.messages; + + +import org.scrapy.scrapystreaming.utils.Utils; + +public class Message { + + public String sendMessage() { + String message = this.toString(); + System.out.println(message); + System.out.flush(); + + return message; + } + + @Override + public String toString() { + return Utils.gson.toJson(this); + } + + @Override + public boolean equals(Object compare) { + return this.toString().equals(compare.toString()); + } +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java new file mode 100644 index 0000000..84d0342 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java @@ -0,0 +1,5 @@ +package org.scrapy.scrapystreaming.messages; + + +public class ResponseMessage { +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java new file mode 100644 index 0000000..3920b30 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java @@ -0,0 +1,21 @@ +package org.scrapy.scrapystreaming.messages; + + +import java.util.List; +import java.util.HashMap; + +public class SpiderMessage extends Message { + public final String type = "spider"; + public final String name; + public final List start_urls; + public final List allowed_domains; + public final HashMap custom_settings; + + public SpiderMessage(String name, List start_urls, List allowed_domains, + HashMap custom_settings) { + this.name = name; + this.start_urls = start_urls; + this.allowed_domains = allowed_domains; + this.custom_settings = custom_settings; + } +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/utils/Utils.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/utils/Utils.java new file mode 100644 index 0000000..7eba46f --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/utils/Utils.java @@ -0,0 +1,8 @@ +package org.scrapy.scrapystreaming.utils; + + +import com.google.gson.Gson; + +public class Utils { + public static Gson gson = new Gson(); +} diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/BaseStd.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/BaseStd.java new file mode 100644 index 0000000..e66414d --- /dev/null +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/BaseStd.java @@ -0,0 +1,20 @@ +package org.scrapy.scrapystreaming; + + +import com.google.gson.Gson; +import org.junit.Before; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; + +public class BaseStd { + ByteArrayOutputStream out; + Gson gson = new Gson(); + + @Before + public void setUp() { + out = new ByteArrayOutputStream(); + System.setOut(new PrintStream(out)); + } + +} diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java new file mode 100644 index 0000000..43444af --- /dev/null +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java @@ -0,0 +1,74 @@ +package org.scrapy.scrapystreaming; + + +import org.junit.Assert; +import org.junit.Test; +import org.scrapy.scrapystreaming.messages.LogMessage; + + + +public class LoggerTest extends BaseStd { + + @Test + public void log() { + for (Logger.LEVEL level: Logger.LEVEL.values()) { + Logger.log("message", level); + + LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); + LogMessage logExpected = new LogMessage("message", level.name()); + out.reset(); + + Assert.assertEquals(logExpected, logMessage); + } + } + + @Test + public void logCritical() throws Exception { + Logger.logCritical("critical"); + + LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); + LogMessage logExpected = new LogMessage("critical", Logger.LEVEL.CRITICAL.name()); + + Assert.assertEquals(logMessage, logExpected); + } + + @Test + public void logError() throws Exception { + Logger.logError("error"); + + LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); + LogMessage logExpected = new LogMessage("error", Logger.LEVEL.ERROR.name()); + + Assert.assertEquals(logMessage, logExpected); + } + + @Test + public void logWarning() throws Exception { + Logger.logWarning("warn"); + + LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); + LogMessage logExpected = new LogMessage("warn", Logger.LEVEL.WARNING.name()); + + Assert.assertEquals(logMessage, logExpected); + } + + @Test + public void logInfo() throws Exception { + Logger.logInfo("info"); + + LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); + LogMessage logExpected = new LogMessage("info", Logger.LEVEL.INFO.name()); + + Assert.assertEquals(logMessage, logExpected); + } + + @Test + public void logDebug() throws Exception { + Logger.logDebug("debug"); + + LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); + LogMessage logExpected = new LogMessage("debug", Logger.LEVEL.DEBUG.name()); + + Assert.assertEquals(logMessage, logExpected); + } +} diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java new file mode 100644 index 0000000..ba7efce --- /dev/null +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java @@ -0,0 +1,71 @@ +package org.scrapy.scrapystreaming; + + +import org.junit.Assert; +import org.junit.Test; +import org.scrapy.scrapystreaming.messages.CloseMessage; +import org.scrapy.scrapystreaming.messages.ResponseMessage; +import org.scrapy.scrapystreaming.messages.SpiderMessage; + +import java.util.ArrayList; +import java.util.HashMap; + +public class SpiderTest extends BaseStd { + + @Test + public void start() { + final ArrayList urls = new ArrayList(0); + urls.add("http://example.com"); + urls.add("http://test.com"); + final ArrayList domains = new ArrayList(0); + domains.add("example.com"); + domains.add("test.com"); + final HashMap settings = new HashMap(0); + settings.put("setting 1", "value"); + settings.put("setting 2", "value"); + settings.put("setting 3", "value"); + + class TestSpider extends Spider { + + TestSpider() { + name = "test"; + start_urls = urls; + allowed_domains = domains; + custom_settings = settings; + } + + public void parse(ResponseMessage response) { + + } + } + new TestSpider().start(); + SpiderMessage spider = gson.fromJson(out.toString(), SpiderMessage.class); + SpiderMessage spiderExpected = new SpiderMessage("test", urls, domains, settings); + + Assert.assertEquals(spiderExpected, spider); + + String json = "{\"type\":\"spider\",\"name\":\"test\",\"start_urls\":[\"http://example.com\"," + + "\"http://test.com\"],\"allowed_domains\":[\"example.com\",\"test.com\"]," + + "\"custom_settings\":{\"setting 1\":\"value\",\"setting 2\":\"value\",\"setting 3\":\"value\"}}"; + Assert.assertEquals(json.trim(), out.toString().trim()); + } + + @Test + public void close() { + class TestSpider extends Spider{ + public void parse(ResponseMessage response) {} + } + TestSpider spider = new TestSpider(); + spider.start(); + out.reset(); + spider.close(); + + CloseMessage close = gson.fromJson(out.toString(), CloseMessage.class); + CloseMessage closeExpected = new CloseMessage(); + + Assert.assertEquals(close, closeExpected); + + String json = "{\"type\":\"close\"}"; + Assert.assertEquals(json.trim(), out.toString().trim()); + } +} From c66aac700ddb7370f15f1b92099e6d0842bbd545 Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Thu, 30 Jun 2016 22:45:53 -0300 Subject: [PATCH 02/11] tmp --- .../org/scrapy/scrapystreaming/Spider.java | 29 ++++++++++++++++--- .../scrapystreaming/messages/LogMessage.java | 4 +-- .../messages/SpiderMessage.java | 8 ++--- .../scrapy/scrapystreaming/SpiderTest.java | 4 +-- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java index 50522ce..4d72895 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java @@ -1,8 +1,7 @@ package org.scrapy.scrapystreaming; -import org.scrapy.scrapystreaming.messages.CloseMessage; -import org.scrapy.scrapystreaming.messages.ResponseMessage; -import org.scrapy.scrapystreaming.messages.SpiderMessage; +import org.scrapy.scrapystreaming.core.CommunicationProtocol; +import org.scrapy.scrapystreaming.messages.*; import java.util.ArrayList; import java.util.HashMap; @@ -14,9 +13,19 @@ public abstract class Spider { public List start_urls = new ArrayList(0); public List allowed_domains; public HashMap custom_settings; + protected boolean isRunning = false; + protected CommunicationProtocol protocol; + + + public final void start() throws Exception { + if (isRunning) + throw new Exception("Spider already running"); - public void start() { new SpiderMessage(name, start_urls, allowed_domains, custom_settings).sendMessage(); + protocol = new CommunicationProtocol(); + protocol.start(); + + isRunning = true; } public void close() { @@ -24,4 +33,16 @@ public void close() { } public abstract void parse(ResponseMessage response); + } + +class Teste extends Spider { + + public void parse(ResponseMessage response) { + + } + + public static void main (String args[]) throws Exception { + new Teste().start(); + } +} \ No newline at end of file diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java index 14508f5..86eb35a 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java @@ -3,8 +3,8 @@ public class LogMessage extends Message{ public final String type = "log"; - public final String message; - public final String level; + public String message; + public String level; public LogMessage(String message, String level) { this.message = message; diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java index 3920b30..343e58d 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java @@ -6,10 +6,10 @@ public class SpiderMessage extends Message { public final String type = "spider"; - public final String name; - public final List start_urls; - public final List allowed_domains; - public final HashMap custom_settings; + public String name; + public List start_urls; + public List allowed_domains; + public HashMap custom_settings; public SpiderMessage(String name, List start_urls, List allowed_domains, HashMap custom_settings) { diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java index ba7efce..3bcd939 100644 --- a/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java @@ -13,7 +13,7 @@ public class SpiderTest extends BaseStd { @Test - public void start() { + public void start() throws Exception { final ArrayList urls = new ArrayList(0); urls.add("http://example.com"); urls.add("http://test.com"); @@ -51,7 +51,7 @@ public void parse(ResponseMessage response) { } @Test - public void close() { + public void close() throws Exception { class TestSpider extends Spider{ public void parse(ResponseMessage response) {} } From 71023f51892bcb632dbf2a101dd2f1ea1981a920 Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Sat, 2 Jul 2016 12:57:03 -0300 Subject: [PATCH 03/11] basic lib --- .../org/scrapy/scrapystreaming/Logger.java | 13 ++-- .../org/scrapy/scrapystreaming/Request.java | 22 +++++- .../org/scrapy/scrapystreaming/Spider.java | 26 +++---- .../scrapy/scrapystreaming/core/Callback.java | 19 +++++ .../core/CommunicationProtocol.java | 74 +++++++++++++++++++ .../scrapystreaming/core/SpiderException.java | 12 +++ .../messages/CloseMessage.java | 6 ++ .../messages/ErrorMessage.java | 14 ++++ .../messages/ExceptionMessage.java | 14 ++++ .../scrapystreaming/messages/LogMessage.java | 9 ++- .../scrapystreaming/messages/Message.java | 31 +++++++- .../messages/ReceivedMessage.java | 12 +++ .../messages/RequestMessage.java | 25 +++++++ .../messages/ResponseMessage.java | 17 ++++- .../messages/SpiderMessage.java | 14 ++-- .../messages/StatusMessage.java | 13 ++++ .../scrapy/scrapystreaming/utils/Utils.java | 4 + .../CommunicationProtocolTest.java | 5 ++ .../scrapy/scrapystreaming/LoggerTest.java | 3 +- .../scrapy/scrapystreaming/SpiderTest.java | 34 +++++---- 20 files changed, 317 insertions(+), 50 deletions(-) create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/core/SpiderException.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ErrorMessage.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ExceptionMessage.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ReceivedMessage.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/RequestMessage.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/StatusMessage.java create mode 100644 helpers/java/src/test/java/org/scrapy/scrapystreaming/CommunicationProtocolTest.java diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java index 372da29..f0fa178 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java @@ -1,6 +1,7 @@ package org.scrapy.scrapystreaming; +import org.scrapy.scrapystreaming.core.SpiderException; import org.scrapy.scrapystreaming.messages.LogMessage; public class Logger { @@ -9,27 +10,27 @@ public enum LEVEL { CRITICAL, ERROR, WARNING, INFO, DEBUG } - public static void log(String message, LEVEL level) { + public static void log(String message, LEVEL level) throws SpiderException { new LogMessage(message, level.name()).sendMessage(); } - public static void logCritical(String message) { + public static void logCritical(String message) throws SpiderException { log(message, LEVEL.CRITICAL); } - public static void logError(String message) { + public static void logError(String message) throws SpiderException { log(message, LEVEL.ERROR); } - public static void logWarning(String message) { + public static void logWarning(String message) throws SpiderException { log(message, LEVEL.WARNING); } - public static void logInfo(String message) { + public static void logInfo(String message) throws SpiderException { log(message, LEVEL.INFO); } - public static void logDebug(String message) { + public static void logDebug(String message) throws SpiderException { log(message, LEVEL.DEBUG); } } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java index 946a215..840cefd 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java @@ -1,5 +1,25 @@ package org.scrapy.scrapystreaming; -public class Request { +import org.scrapy.scrapystreaming.core.Callback; +import org.scrapy.scrapystreaming.messages.RequestMessage; +import org.scrapy.scrapystreaming.core.SpiderException; +import org.scrapy.scrapystreaming.utils.Utils; + + +public class Request extends RequestMessage { + + public Request(String url) { + this.url = url; + } + + public void open(Callback callback) throws SpiderException { + String id = this.id; + if (id == null) + id = callback.toString(); + this.id = id; + Utils.responseMapping.put(id, callback); + + sendMessage(); + } } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java index 4d72895..75f1e83 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java @@ -2,34 +2,30 @@ import org.scrapy.scrapystreaming.core.CommunicationProtocol; import org.scrapy.scrapystreaming.messages.*; +import org.scrapy.scrapystreaming.core.SpiderException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - - -public abstract class Spider { - public String name = "ExternalSpider"; - public List start_urls = new ArrayList(0); - public List allowed_domains; - public HashMap custom_settings; - protected boolean isRunning = false; - protected CommunicationProtocol protocol; +public abstract class Spider extends SpiderMessage { + protected transient boolean isRunning = false; + protected transient CommunicationProtocol protocol; public final void start() throws Exception { if (isRunning) throw new Exception("Spider already running"); - new SpiderMessage(name, start_urls, allowed_domains, custom_settings).sendMessage(); - protocol = new CommunicationProtocol(); + sendMessage(); + protocol = new CommunicationProtocol(this); protocol.start(); isRunning = true; } public void close() { - new CloseMessage().sendMessage(); + try { + new CloseMessage().sendMessage(); + } catch (SpiderException e) { + e.printStackTrace(); + } } public abstract void parse(ResponseMessage response); diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java new file mode 100644 index 0000000..aaddf6a --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java @@ -0,0 +1,19 @@ +package org.scrapy.scrapystreaming.core; + + +import org.scrapy.scrapystreaming.messages.ResponseMessage; + + +public abstract class Callback { + ResponseMessage response; + + public void setResponse(ResponseMessage response) { + this.response = response; + } + + public ResponseMessage getResponse() { + return response; + } + + public abstract void onResponse(ResponseMessage response); +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java new file mode 100644 index 0000000..ccc7b3c --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java @@ -0,0 +1,74 @@ +package org.scrapy.scrapystreaming.core; + + +import org.scrapy.scrapystreaming.Spider; +import org.scrapy.scrapystreaming.messages.*; +import org.scrapy.scrapystreaming.utils.Utils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; + +public class CommunicationProtocol extends Thread { + BufferedReader in; + Spider spider; + + public CommunicationProtocol(Spider spider) { + this.spider = spider; + in = new BufferedReader(new InputStreamReader(System.in)); + } + + @Override + public void run() { + while (true) { + try { + String line = in.readLine(); + ReceivedMessage msg = Utils.gson.fromJson(line, ReceivedMessage.class); + + messageReceived(msg, line); + } catch (IOException e) { + System.err.println("There is a problem in the communication channel: " + e.getMessage()); + } catch (SpiderException e) { + System.err.println(e.getMessage()); + } + } + } + + protected void messageReceived(ReceivedMessage msg, String line) throws SpiderException { + if (msg.type.equals("status")) { + StatusMessage status = Utils.gson.fromJson(line, StatusMessage.class); + onStatus(status); + } else if (msg.type.equals("response")) { + ResponseMessage response = Utils.gson.fromJson(line, ResponseMessage.class); + onResponse(response); + } else if (msg.type.equals("exception")) { + ExceptionMessage exception = Utils.gson.fromJson(line, ExceptionMessage.class); + onException(exception); + } else if (msg.type.equals("error")) { + ErrorMessage error = Utils.gson.fromJson(line, ErrorMessage.class); + onError(error); + } + } + + protected void onStatus(StatusMessage status) throws SpiderException { + if (!status.status.equals("ready")) { + throw new SpiderException("There is a problem in the communication channel. Received status: " + status.status); + } + } + + protected void onResponse(ResponseMessage response) { + if (response.id.equals("parse")) { + spider.parse(response); + } else { + Utils.responseMapping.get(response.id).onResponse(response); + } + } + + protected void onException(ExceptionMessage exception) { + + } + + protected void onError(ErrorMessage error) { + + } +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/SpiderException.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/SpiderException.java new file mode 100644 index 0000000..eaf92fa --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/SpiderException.java @@ -0,0 +1,12 @@ +package org.scrapy.scrapystreaming.core; + + +public class SpiderException extends Exception { + public SpiderException(){ + + } + + public SpiderException(String message) { + super(message); + } +} \ No newline at end of file diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/CloseMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/CloseMessage.java index 8951318..846e786 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/CloseMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/CloseMessage.java @@ -1,6 +1,12 @@ package org.scrapy.scrapystreaming.messages; +import java.util.List; + public class CloseMessage extends Message { public final String type = "close"; + + public List validator() { + return null; + } } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ErrorMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ErrorMessage.java new file mode 100644 index 0000000..6f59955 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ErrorMessage.java @@ -0,0 +1,14 @@ +package org.scrapy.scrapystreaming.messages; + + +import java.util.List; + +public class ErrorMessage extends Message { + public final String type = "error"; + public String received_message; + public String details; + + public List validator() { + return null; + } +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ExceptionMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ExceptionMessage.java new file mode 100644 index 0000000..2aa621c --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ExceptionMessage.java @@ -0,0 +1,14 @@ +package org.scrapy.scrapystreaming.messages; + + +import java.util.List; + +public class ExceptionMessage extends Message { + public final String type = "exception"; + public String received_message; + public String exception; + + public List validator() { + return null; + } +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java index 86eb35a..529f076 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/LogMessage.java @@ -1,7 +1,10 @@ package org.scrapy.scrapystreaming.messages; -public class LogMessage extends Message{ +import java.util.Arrays; +import java.util.List; + +public class LogMessage extends Message { public final String type = "log"; public String message; public String level; @@ -10,4 +13,8 @@ public LogMessage(String message, String level) { this.message = message; this.level = level; } + + public List validator() { + return Arrays.asList("type", "message", "level"); + } } \ No newline at end of file diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/Message.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/Message.java index 3773602..f39dd23 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/Message.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/Message.java @@ -1,18 +1,43 @@ package org.scrapy.scrapystreaming.messages; +import org.scrapy.scrapystreaming.core.SpiderException; import org.scrapy.scrapystreaming.utils.Utils; -public class Message { +import java.lang.reflect.Field; +import java.util.List; - public String sendMessage() { - String message = this.toString(); +public abstract class Message { + protected transient List requiredFields = validator(); + + public String sendMessage() throws SpiderException{ + validate(); + String message = Utils.gson.toJson(this); System.out.println(message); System.out.flush(); return message; } + public void validate() throws SpiderException { + try { + if (requiredFields != null) { + for (String name : requiredFields) { + Field field = this.getClass().getField(name); + Object value = field.get(this); + if (value == null) + throw new SpiderException("Required field not provided: " + name); + } + } + } catch (NoSuchFieldException e) { + e.printStackTrace(); + } catch (IllegalAccessException e) { + e.printStackTrace(); + } + } + + public abstract List validator(); + @Override public String toString() { return Utils.gson.toJson(this); diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ReceivedMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ReceivedMessage.java new file mode 100644 index 0000000..2271018 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ReceivedMessage.java @@ -0,0 +1,12 @@ +package org.scrapy.scrapystreaming.messages; + + +import java.util.List; + +public class ReceivedMessage extends Message { + public String type; + + public List validator() { + return null; + } +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/RequestMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/RequestMessage.java new file mode 100644 index 0000000..39dc079 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/RequestMessage.java @@ -0,0 +1,25 @@ +package org.scrapy.scrapystreaming.messages; + + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +public class RequestMessage extends Message { + public final String type = "request"; + public String id; + public String url; + public Boolean base64; + public String method; + public HashMap meta; + public String body; + public HashMap headers; + public HashMap cookies; + public String encoding; + public Integer priority; + public Boolean dont_filter; + + public List validator() { + return Arrays.asList("id", "url"); + } +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java index 84d0342..b84f23f 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java @@ -1,5 +1,20 @@ package org.scrapy.scrapystreaming.messages; -public class ResponseMessage { +import java.util.HashMap; +import java.util.List; + +public class ResponseMessage extends Message { + public final String type = "response"; + public String id; + public String url; + public HashMap headers; + public Integer status; + public String body; + public HashMap meta; + public List flags; + + public List validator() { + return null; + } } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java index 343e58d..a623f4b 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java @@ -1,21 +1,19 @@ package org.scrapy.scrapystreaming.messages; +import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.HashMap; public class SpiderMessage extends Message { public final String type = "spider"; - public String name; - public List start_urls; + public String name = "ExternalSpider"; + public List start_urls = new ArrayList(0); public List allowed_domains; public HashMap custom_settings; - public SpiderMessage(String name, List start_urls, List allowed_domains, - HashMap custom_settings) { - this.name = name; - this.start_urls = start_urls; - this.allowed_domains = allowed_domains; - this.custom_settings = custom_settings; + public List validator() { + return Arrays.asList("type", "name", "start_urls"); } } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/StatusMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/StatusMessage.java new file mode 100644 index 0000000..d28ec2c --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/StatusMessage.java @@ -0,0 +1,13 @@ +package org.scrapy.scrapystreaming.messages; + + +import java.util.List; + +public class StatusMessage extends Message { + public final String type = "status"; + public String status; + + public List validator() { + return null; + } +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/utils/Utils.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/utils/Utils.java index 7eba46f..c9c6f05 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/utils/Utils.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/utils/Utils.java @@ -2,7 +2,11 @@ import com.google.gson.Gson; +import org.scrapy.scrapystreaming.core.Callback; + +import java.util.HashMap; public class Utils { public static Gson gson = new Gson(); + public static HashMap responseMapping = new HashMap(0); } diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/CommunicationProtocolTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/CommunicationProtocolTest.java new file mode 100644 index 0000000..8edd73a --- /dev/null +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/CommunicationProtocolTest.java @@ -0,0 +1,5 @@ +package org.scrapy.scrapystreaming; + + +public class CommunicationProtocolTest { +} diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java index 43444af..ddc4ffd 100644 --- a/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java @@ -3,6 +3,7 @@ import org.junit.Assert; import org.junit.Test; +import org.scrapy.scrapystreaming.core.SpiderException; import org.scrapy.scrapystreaming.messages.LogMessage; @@ -10,7 +11,7 @@ public class LoggerTest extends BaseStd { @Test - public void log() { + public void log() throws SpiderException { for (Logger.LEVEL level: Logger.LEVEL.values()) { Logger.log("message", level); diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java index 3bcd939..cec669f 100644 --- a/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java @@ -10,6 +10,20 @@ import java.util.ArrayList; import java.util.HashMap; +class TestSpider extends Spider { + + TestSpider(ArrayList urls, ArrayList domains, HashMap settings) { + name = "test"; + start_urls = urls; + allowed_domains = domains; + custom_settings = settings; + } + + public void parse(ResponseMessage response) { + + } +} + public class SpiderTest extends BaseStd { @Test @@ -25,22 +39,14 @@ public void start() throws Exception { settings.put("setting 2", "value"); settings.put("setting 3", "value"); - class TestSpider extends Spider { - - TestSpider() { - name = "test"; - start_urls = urls; - allowed_domains = domains; - custom_settings = settings; - } + new TestSpider(urls, domains, settings).start(); - public void parse(ResponseMessage response) { - - } - } - new TestSpider().start(); SpiderMessage spider = gson.fromJson(out.toString(), SpiderMessage.class); - SpiderMessage spiderExpected = new SpiderMessage("test", urls, domains, settings); + SpiderMessage spiderExpected = new SpiderMessage(); + spiderExpected.name = "test"; + spiderExpected.start_urls = urls; + spiderExpected.allowed_domains = domains; + spiderExpected.custom_settings = settings; Assert.assertEquals(spiderExpected, spider); From cd87928c9d75425f54b7dc917f30a39e7eb41545 Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Mon, 4 Jul 2016 03:06:48 -0300 Subject: [PATCH 04/11] initial lib --- .../org/scrapy/scrapystreaming/Spider.java | 16 +++----- .../core/CommunicationProtocol.java | 11 +++-- .../CommunicationProtocolTest.java | 5 --- .../scrapy/scrapystreaming/RequestTest.java | 41 +++++++++++++++++++ .../scrapy/scrapystreaming/SpiderTest.java | 9 ++++ 5 files changed, 62 insertions(+), 20 deletions(-) delete mode 100644 helpers/java/src/test/java/org/scrapy/scrapystreaming/CommunicationProtocolTest.java create mode 100644 helpers/java/src/test/java/org/scrapy/scrapystreaming/RequestTest.java diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java index 75f1e83..f6bfe6e 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java @@ -9,9 +9,9 @@ public abstract class Spider extends SpiderMessage { protected transient boolean isRunning = false; protected transient CommunicationProtocol protocol; - public final void start() throws Exception { + public final void start() throws SpiderException { if (isRunning) - throw new Exception("Spider already running"); + throw new SpiderException("Spider already running"); sendMessage(); protocol = new CommunicationProtocol(this); @@ -30,15 +30,9 @@ public void close() { public abstract void parse(ResponseMessage response); -} - -class Teste extends Spider { - - public void parse(ResponseMessage response) { - + public void onException(ExceptionMessage exception) throws SpiderException { + throw new SpiderException("Scrapy raised an exception. Message sent: " + exception.received_message + + "; Exception message: " + exception.exception); } - public static void main (String args[]) throws Exception { - new Teste().start(); - } } \ No newline at end of file diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java index ccc7b3c..e267d65 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java @@ -47,6 +47,8 @@ protected void messageReceived(ReceivedMessage msg, String line) throws SpiderEx } else if (msg.type.equals("error")) { ErrorMessage error = Utils.gson.fromJson(line, ErrorMessage.class); onError(error); + } else { + throw new SpiderException("Invalid message type: " + msg.type); } } @@ -64,11 +66,12 @@ protected void onResponse(ResponseMessage response) { } } - protected void onException(ExceptionMessage exception) { - + protected void onException(ExceptionMessage exception) throws SpiderException { + spider.onException(exception); } - protected void onError(ErrorMessage error) { - + protected void onError(ErrorMessage error) throws SpiderException { + throw new SpiderException("Spider error. Message sent: " + error.received_message + + "; Error details: " + error.details); } } diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/CommunicationProtocolTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/CommunicationProtocolTest.java deleted file mode 100644 index 8edd73a..0000000 --- a/helpers/java/src/test/java/org/scrapy/scrapystreaming/CommunicationProtocolTest.java +++ /dev/null @@ -1,5 +0,0 @@ -package org.scrapy.scrapystreaming; - - -public class CommunicationProtocolTest { -} diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/RequestTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/RequestTest.java new file mode 100644 index 0000000..b1da630 --- /dev/null +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/RequestTest.java @@ -0,0 +1,41 @@ +package org.scrapy.scrapystreaming; + + +import org.junit.Assert; +import org.junit.Test; +import org.scrapy.scrapystreaming.core.Callback; +import org.scrapy.scrapystreaming.core.SpiderException; +import org.scrapy.scrapystreaming.messages.ResponseMessage; + +public class RequestTest extends BaseStd { + + @Test + public void openGeneratesID() throws SpiderException { + Request r = new Request("http://example.com"); + Assert.assertEquals(r.id, null); + + r.open(new Callback() { + @Override + public void onResponse(ResponseMessage response) { + // + } + }); + + Assert.assertNotEquals(r.id, null); + } + + @Test + public void openKeepsID() throws SpiderException { + Request r = new Request("http://example.com"); + r.id = "test"; + + r.open(new Callback() { + @Override + public void onResponse(ResponseMessage response) { + // + } + }); + + Assert.assertEquals(r.id, "test"); + } +} diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java index cec669f..d6e3ecd 100644 --- a/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/SpiderTest.java @@ -3,6 +3,7 @@ import org.junit.Assert; import org.junit.Test; +import org.scrapy.scrapystreaming.core.SpiderException; import org.scrapy.scrapystreaming.messages.CloseMessage; import org.scrapy.scrapystreaming.messages.ResponseMessage; import org.scrapy.scrapystreaming.messages.SpiderMessage; @@ -39,6 +40,7 @@ public void start() throws Exception { settings.put("setting 2", "value"); settings.put("setting 3", "value"); + // creates spider new TestSpider(urls, domains, settings).start(); SpiderMessage spider = gson.fromJson(out.toString(), SpiderMessage.class); @@ -56,6 +58,13 @@ public void start() throws Exception { Assert.assertEquals(json.trim(), out.toString().trim()); } + @Test(expected = SpiderException.class) + public void startRunsOnce() throws SpiderException{ + TestSpider s = new TestSpider(new ArrayList(), null, null); + s.start(); + s.start(); + } + @Test public void close() throws Exception { class TestSpider extends Spider{ From ad27775c458d4a67eeac088160dd1064db62a53d Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Tue, 5 Jul 2016 15:08:11 -0300 Subject: [PATCH 05/11] comments comments comments --- .../scrapystreaming/FromResponseRequest.java | 23 ++++++++++++++- .../org/scrapy/scrapystreaming/Logger.java | 29 +++++++++++++++++++ .../org/scrapy/scrapystreaming/Request.java | 14 ++++++++- .../org/scrapy/scrapystreaming/Spider.java | 25 ++++++++++++++-- .../scrapy/scrapystreaming/core/Callback.java | 15 ++++++++++ .../core/CommunicationProtocol.java | 6 ++-- .../messages/FromResponseMessage.java | 13 +++++++++ .../messages/FromResponseRequestMessage.java | 27 +++++++++++++++++ .../{StatusMessage.java => ReadyMessage.java} | 4 +-- 9 files changed, 147 insertions(+), 9 deletions(-) create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseMessage.java create mode 100644 helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseRequestMessage.java rename helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/{StatusMessage.java => ReadyMessage.java} (65%) diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/FromResponseRequest.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/FromResponseRequest.java index e86798e..fe7ed74 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/FromResponseRequest.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/FromResponseRequest.java @@ -1,5 +1,26 @@ package org.scrapy.scrapystreaming; -public class FromResponseRequest { +import org.scrapy.scrapystreaming.core.Callback; +import org.scrapy.scrapystreaming.core.SpiderException; +import org.scrapy.scrapystreaming.messages.FromResponseMessage; +import org.scrapy.scrapystreaming.messages.FromResponseRequestMessage; +import org.scrapy.scrapystreaming.utils.Utils; + +public class FromResponseRequest extends FromResponseRequestMessage{ + + public FromResponseRequest(String url, FromResponseMessage from_response_request) { + this.url = url; + this.from_response_request = from_response_request; + } + + public void open(Callback callback) throws SpiderException { + String id = this.id; + if (id == null) + id = callback.toString(); + this.id = id; + + Utils.responseMapping.put(id, callback); + sendMessage(); + } } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java index f0fa178..dccca0c 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java @@ -4,32 +4,61 @@ import org.scrapy.scrapystreaming.core.SpiderException; import org.scrapy.scrapystreaming.messages.LogMessage; +/** + * Helper class to handle log messages + */ public class Logger { public enum LEVEL { CRITICAL, ERROR, WARNING, INFO, DEBUG } + /** + * Print a log message in the scrapy streaming logger + * @param message message + * @param level log level + * @throws SpiderException + */ public static void log(String message, LEVEL level) throws SpiderException { new LogMessage(message, level.name()).sendMessage(); } + /** + * Print a critical message in the scrapy streaming logger + * @param message message + **/ public static void logCritical(String message) throws SpiderException { log(message, LEVEL.CRITICAL); } + /** + * Print a error message in the scrapy streaming logger + * @param message message + **/ public static void logError(String message) throws SpiderException { log(message, LEVEL.ERROR); } + /** + * Print a warning in the scrapy streaming logger + * @param message message + **/ public static void logWarning(String message) throws SpiderException { log(message, LEVEL.WARNING); } + /** + * Print a info message in the scrapy streaming logger + * @param message message + **/ public static void logInfo(String message) throws SpiderException { log(message, LEVEL.INFO); } + /** + * Print a debug message in the scrapy streaming logger + * @param message message + **/ public static void logDebug(String message) throws SpiderException { log(message, LEVEL.DEBUG); } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java index 840cefd..634a6ed 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Request.java @@ -6,13 +6,25 @@ import org.scrapy.scrapystreaming.core.SpiderException; import org.scrapy.scrapystreaming.utils.Utils; - +/** + * Open a new request + */ public class Request extends RequestMessage { + /** + * Creates the request object, passing its url + * @param url request URL + */ public Request(String url) { this.url = url; } + /** + * Open the request given its callback. + * The callback function will be called with the response as soon as it's available. + * @param callback response callback + * @throws SpiderException + */ public void open(Callback callback) throws SpiderException { String id = this.id; if (id == null) diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java index f6bfe6e..e8bf8a8 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java @@ -5,10 +5,17 @@ import org.scrapy.scrapystreaming.core.SpiderException; +/** + * This class lets you create the External Spider and run / stop it. + */ public abstract class Spider extends SpiderMessage { - protected transient boolean isRunning = false; - protected transient CommunicationProtocol protocol; + private transient boolean isRunning = false; + private transient CommunicationProtocol protocol; + /** + * Start the Spider execution + * @throws SpiderException + */ public final void start() throws SpiderException { if (isRunning) throw new SpiderException("Spider already running"); @@ -20,6 +27,10 @@ public final void start() throws SpiderException { isRunning = true; } + /** + * Stop the spider execution, sending the close message. + * The process will be killed as soon as Scrapy Streaming receives this message. + */ public void close() { try { new CloseMessage().sendMessage(); @@ -28,8 +39,18 @@ public void close() { } } + /** + * The callback of initial_urls responses. + * @param response response data + */ public abstract void parse(ResponseMessage response); + /** + * This method is called when Scrapy raises an exception and sends the exception message. + * If you want to analyze the exception, or just ignore the problem, override this function. + * @param exception exception message sent by Scrapy Streaming + * @throws SpiderException + */ public void onException(ExceptionMessage exception) throws SpiderException { throw new SpiderException("Scrapy raised an exception. Message sent: " + exception.received_message + "; Exception message: " + exception.exception); diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java index aaddf6a..a8d4e71 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java @@ -4,16 +4,31 @@ import org.scrapy.scrapystreaming.messages.ResponseMessage; +/** + * Represents a callback function to handle a response + */ public abstract class Callback { ResponseMessage response; + /** + * Set the response content + * @param response response message + */ public void setResponse(ResponseMessage response) { this.response = response; } + /** + * Get the response content + * @return resoponse data + */ public ResponseMessage getResponse() { return response; } + /** + * Method to handle to response content + * @param response + */ public abstract void onResponse(ResponseMessage response); } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java index e267d65..e8d3bfb 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java @@ -35,8 +35,8 @@ public void run() { } protected void messageReceived(ReceivedMessage msg, String line) throws SpiderException { - if (msg.type.equals("status")) { - StatusMessage status = Utils.gson.fromJson(line, StatusMessage.class); + if (msg.type.equals("ready")) { + ReadyMessage status = Utils.gson.fromJson(line, ReadyMessage.class); onStatus(status); } else if (msg.type.equals("response")) { ResponseMessage response = Utils.gson.fromJson(line, ResponseMessage.class); @@ -52,7 +52,7 @@ protected void messageReceived(ReceivedMessage msg, String line) throws SpiderEx } } - protected void onStatus(StatusMessage status) throws SpiderException { + protected void onStatus(ReadyMessage status) throws SpiderException { if (!status.status.equals("ready")) { throw new SpiderException("There is a problem in the communication channel. Received status: " + status.status); } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseMessage.java new file mode 100644 index 0000000..4aba9d7 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseMessage.java @@ -0,0 +1,13 @@ +package org.scrapy.scrapystreaming.messages; + +import java.util.HashMap; + +public class FromResponseMessage { + public String formname; + public String formxpath; + public String formcss; + public Integer formnumber; + public HashMap formdata; + public HashMap clickdata; + public Boolean dont_click; +} \ No newline at end of file diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseRequestMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseRequestMessage.java new file mode 100644 index 0000000..7ac2f19 --- /dev/null +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseRequestMessage.java @@ -0,0 +1,27 @@ +package org.scrapy.scrapystreaming.messages; + + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + + +public class FromResponseRequestMessage extends Message { + public final String type = "from_response_request"; + public String id; + public String url; + public Boolean base64; + public String method; + public HashMap meta; + public String body; + public HashMap headers; + public HashMap cookies; + public String encoding; + public Integer priority; + public Boolean dont_filter; + public FromResponseMessage from_response_request; + + public List validator() { + return Arrays.asList("id", "url", "from_response_request"); + } +} diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/StatusMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ReadyMessage.java similarity index 65% rename from helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/StatusMessage.java rename to helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ReadyMessage.java index d28ec2c..8d12d64 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/StatusMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ReadyMessage.java @@ -3,8 +3,8 @@ import java.util.List; -public class StatusMessage extends Message { - public final String type = "status"; +public class ReadyMessage extends Message { + public final String type = "ready"; public String status; public List validator() { From 699b5d3ccd40ea1ffe886ec9387faa3a22d48efe Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Tue, 5 Jul 2016 16:24:12 -0300 Subject: [PATCH 06/11] fixes --- .../main/java/org/scrapy/scrapystreaming/Spider.java | 1 + .../scrapystreaming/core/CommunicationProtocol.java | 12 ++++++------ .../scrapystreaming/messages/ResponseMessage.java | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java index e8bf8a8..cbad4a3 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java @@ -1,5 +1,6 @@ package org.scrapy.scrapystreaming; +import org.scrapy.scrapystreaming.core.Callback; import org.scrapy.scrapystreaming.core.CommunicationProtocol; import org.scrapy.scrapystreaming.messages.*; import org.scrapy.scrapystreaming.core.SpiderException; diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java index e8d3bfb..9d2850b 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java @@ -5,9 +5,7 @@ import org.scrapy.scrapystreaming.messages.*; import org.scrapy.scrapystreaming.utils.Utils; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; +import java.io.*; public class CommunicationProtocol extends Thread { BufferedReader in; @@ -23,9 +21,11 @@ public void run() { while (true) { try { String line = in.readLine(); - ReceivedMessage msg = Utils.gson.fromJson(line, ReceivedMessage.class); - - messageReceived(msg, line); + line = line.trim(); + if (line.length() > 0) { + ReceivedMessage msg = Utils.gson.fromJson(line, ReceivedMessage.class); + messageReceived(msg, line); + } } catch (IOException e) { System.err.println("There is a problem in the communication channel: " + e.getMessage()); } catch (SpiderException e) { diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java index b84f23f..318b4de 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java @@ -8,7 +8,7 @@ public class ResponseMessage extends Message { public final String type = "response"; public String id; public String url; - public HashMap headers; + public HashMap> headers; public Integer status; public String body; public HashMap meta; From 96ac02faede64de8e1b226f368d70e2d5a6e02e1 Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Wed, 6 Jul 2016 15:06:50 -0300 Subject: [PATCH 07/11] renamed log methods renamed log methods --- .../org/scrapy/scrapystreaming/Logger.java | 19 ++++---- .../FromResponseRequestTest.java | 44 +++++++++++++++++++ .../scrapy/scrapystreaming/LoggerTest.java | 10 ++--- 3 files changed, 60 insertions(+), 13 deletions(-) create mode 100644 helpers/java/src/test/java/org/scrapy/scrapystreaming/FromResponseRequestTest.java diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java index dccca0c..9df778f 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Logger.java @@ -17,17 +17,20 @@ public enum LEVEL { * Print a log message in the scrapy streaming logger * @param message message * @param level log level - * @throws SpiderException */ - public static void log(String message, LEVEL level) throws SpiderException { - new LogMessage(message, level.name()).sendMessage(); + public static void log(String message, LEVEL level) { + try { + new LogMessage(message, level.name()).sendMessage(); + } catch (SpiderException e) { + // logger doesn't validate data + } } /** * Print a critical message in the scrapy streaming logger * @param message message **/ - public static void logCritical(String message) throws SpiderException { + public static void critical(String message) { log(message, LEVEL.CRITICAL); } @@ -35,7 +38,7 @@ public static void logCritical(String message) throws SpiderException { * Print a error message in the scrapy streaming logger * @param message message **/ - public static void logError(String message) throws SpiderException { + public static void error(String message) { log(message, LEVEL.ERROR); } @@ -43,7 +46,7 @@ public static void logError(String message) throws SpiderException { * Print a warning in the scrapy streaming logger * @param message message **/ - public static void logWarning(String message) throws SpiderException { + public static void warning(String message) { log(message, LEVEL.WARNING); } @@ -51,7 +54,7 @@ public static void logWarning(String message) throws SpiderException { * Print a info message in the scrapy streaming logger * @param message message **/ - public static void logInfo(String message) throws SpiderException { + public static void info(String message) { log(message, LEVEL.INFO); } @@ -59,7 +62,7 @@ public static void logInfo(String message) throws SpiderException { * Print a debug message in the scrapy streaming logger * @param message message **/ - public static void logDebug(String message) throws SpiderException { + public static void debug(String message) { log(message, LEVEL.DEBUG); } } diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/FromResponseRequestTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/FromResponseRequestTest.java new file mode 100644 index 0000000..5e98887 --- /dev/null +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/FromResponseRequestTest.java @@ -0,0 +1,44 @@ +package org.scrapy.scrapystreaming; + + +import org.junit.Assert; +import org.junit.Test; +import org.scrapy.scrapystreaming.core.Callback; +import org.scrapy.scrapystreaming.core.SpiderException; +import org.scrapy.scrapystreaming.messages.FromResponseMessage; +import org.scrapy.scrapystreaming.messages.ResponseMessage; + +public class FromResponseRequestTest extends BaseStd { + + @Test + public void openGeneratesID() throws SpiderException { + FromResponseMessage fromResponseMessage = new FromResponseMessage(); + FromResponseRequest r = new FromResponseRequest("http://example.com", fromResponseMessage); + Assert.assertEquals(r.id, null); + + r.open(new Callback() { + @Override + public void onResponse(ResponseMessage response) { + // + } + }); + + Assert.assertNotEquals(r.id, null); + } + + @Test + public void openKeepsID() throws SpiderException { + FromResponseMessage fromResponseMessage = new FromResponseMessage(); + FromResponseRequest r = new FromResponseRequest("http://example.com", fromResponseMessage); + r.id = "test"; + + r.open(new Callback() { + @Override + public void onResponse(ResponseMessage response) { + // + } + }); + + Assert.assertEquals(r.id, "test"); + } +} diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java index ddc4ffd..27d2cd6 100644 --- a/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/LoggerTest.java @@ -25,7 +25,7 @@ public void log() throws SpiderException { @Test public void logCritical() throws Exception { - Logger.logCritical("critical"); + Logger.critical("critical"); LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); LogMessage logExpected = new LogMessage("critical", Logger.LEVEL.CRITICAL.name()); @@ -35,7 +35,7 @@ public void logCritical() throws Exception { @Test public void logError() throws Exception { - Logger.logError("error"); + Logger.error("error"); LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); LogMessage logExpected = new LogMessage("error", Logger.LEVEL.ERROR.name()); @@ -45,7 +45,7 @@ public void logError() throws Exception { @Test public void logWarning() throws Exception { - Logger.logWarning("warn"); + Logger.warning("warn"); LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); LogMessage logExpected = new LogMessage("warn", Logger.LEVEL.WARNING.name()); @@ -55,7 +55,7 @@ public void logWarning() throws Exception { @Test public void logInfo() throws Exception { - Logger.logInfo("info"); + Logger.info("info"); LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); LogMessage logExpected = new LogMessage("info", Logger.LEVEL.INFO.name()); @@ -65,7 +65,7 @@ public void logInfo() throws Exception { @Test public void logDebug() throws Exception { - Logger.logDebug("debug"); + Logger.debug("debug"); LogMessage logMessage = gson.fromJson(out.toString(), LogMessage.class); LogMessage logExpected = new LogMessage("debug", Logger.LEVEL.DEBUG.name()); From 32e5df249e81e743e8ccca6055d0d25e67c28a63 Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Wed, 6 Jul 2016 16:28:11 -0300 Subject: [PATCH 08/11] fix --- .../scrapy/scrapystreaming/messages/ResponseMessage.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java index 318b4de..8f8a33c 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/ResponseMessage.java @@ -1,17 +1,17 @@ package org.scrapy.scrapystreaming.messages; -import java.util.HashMap; import java.util.List; +import java.util.Map; public class ResponseMessage extends Message { public final String type = "response"; public String id; public String url; - public HashMap> headers; + public Map headers; public Integer status; public String body; - public HashMap meta; + public Map meta; public List flags; public List validator() { From 73850b8979d1f50b58b8e10a7b245a329c7b1c86 Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Fri, 8 Jul 2016 11:25:24 -0300 Subject: [PATCH 09/11] removed spider extends --- .../org/scrapy/scrapystreaming/Spider.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java index cbad4a3..30aa269 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java @@ -5,13 +5,23 @@ import org.scrapy.scrapystreaming.messages.*; import org.scrapy.scrapystreaming.core.SpiderException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + /** * This class lets you create the External Spider and run / stop it. */ -public abstract class Spider extends SpiderMessage { - private transient boolean isRunning = false; - private transient CommunicationProtocol protocol; +public abstract class Spider { + public String name = "ExternalSpider"; + public List start_urls = new ArrayList(0); + public List allowed_domains; + public HashMap custom_settings; + + private SpiderMessage spiderMessage; + private boolean isRunning = false; + private CommunicationProtocol protocol; /** * Start the Spider execution @@ -21,7 +31,13 @@ public final void start() throws SpiderException { if (isRunning) throw new SpiderException("Spider already running"); - sendMessage(); + spiderMessage = new SpiderMessage(); + spiderMessage.name = name; + spiderMessage.start_urls = start_urls; + spiderMessage.allowed_domains = allowed_domains; + spiderMessage.custom_settings = custom_settings; + spiderMessage.sendMessage(); + protocol = new CommunicationProtocol(this); protocol.start(); From 323d47703199b1882f4c462881c0e62cb78fdf0f Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Fri, 8 Jul 2016 11:41:23 -0300 Subject: [PATCH 10/11] added gereric types to maps. made callback an interface --- .../org/scrapy/scrapystreaming/Spider.java | 5 ++++- .../scrapy/scrapystreaming/core/Callback.java | 21 ++----------------- .../core/CommunicationProtocol.java | 2 +- .../messages/FromResponseMessage.java | 5 +++-- .../messages/FromResponseRequestMessage.java | 7 ++++--- .../messages/RequestMessage.java | 7 ++++--- .../messages/SpiderMessage.java | 8 +++---- .../FromResponseRequestTest.java | 4 ++-- .../scrapy/scrapystreaming/RequestTest.java | 4 ++-- 9 files changed, 25 insertions(+), 38 deletions(-) diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java index 30aa269..261b51d 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java @@ -13,7 +13,7 @@ /** * This class lets you create the External Spider and run / stop it. */ -public abstract class Spider { +public abstract class Spider implements Callback { public String name = "ExternalSpider"; public List start_urls = new ArrayList(0); public List allowed_domains; @@ -25,6 +25,7 @@ public abstract class Spider { /** * Start the Spider execution + * * @throws SpiderException */ public final void start() throws SpiderException { @@ -58,6 +59,7 @@ public void close() { /** * The callback of initial_urls responses. + * * @param response response data */ public abstract void parse(ResponseMessage response); @@ -65,6 +67,7 @@ public void close() { /** * This method is called when Scrapy raises an exception and sends the exception message. * If you want to analyze the exception, or just ignore the problem, override this function. + * * @param exception exception message sent by Scrapy Streaming * @throws SpiderException */ diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java index a8d4e71..5640da2 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/Callback.java @@ -7,28 +7,11 @@ /** * Represents a callback function to handle a response */ -public abstract class Callback { - ResponseMessage response; - - /** - * Set the response content - * @param response response message - */ - public void setResponse(ResponseMessage response) { - this.response = response; - } - - /** - * Get the response content - * @return resoponse data - */ - public ResponseMessage getResponse() { - return response; - } +public interface Callback { /** * Method to handle to response content * @param response */ - public abstract void onResponse(ResponseMessage response); + public void parse(ResponseMessage response); } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java index 9d2850b..bd4887a 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/core/CommunicationProtocol.java @@ -62,7 +62,7 @@ protected void onResponse(ResponseMessage response) { if (response.id.equals("parse")) { spider.parse(response); } else { - Utils.responseMapping.get(response.id).onResponse(response); + Utils.responseMapping.get(response.id).parse(response); } } diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseMessage.java index 4aba9d7..dc198d7 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseMessage.java @@ -1,13 +1,14 @@ package org.scrapy.scrapystreaming.messages; import java.util.HashMap; +import java.util.Map; public class FromResponseMessage { public String formname; public String formxpath; public String formcss; public Integer formnumber; - public HashMap formdata; - public HashMap clickdata; + public Map formdata; + public Map clickdata; public Boolean dont_click; } \ No newline at end of file diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseRequestMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseRequestMessage.java index 7ac2f19..e6c75c6 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseRequestMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/FromResponseRequestMessage.java @@ -4,6 +4,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Map; public class FromResponseRequestMessage extends Message { @@ -12,10 +13,10 @@ public class FromResponseRequestMessage extends Message { public String url; public Boolean base64; public String method; - public HashMap meta; + public Map meta; public String body; - public HashMap headers; - public HashMap cookies; + public Map headers; + public Map cookies; public String encoding; public Integer priority; public Boolean dont_filter; diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/RequestMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/RequestMessage.java index 39dc079..b094867 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/RequestMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/RequestMessage.java @@ -4,6 +4,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Map; public class RequestMessage extends Message { public final String type = "request"; @@ -11,10 +12,10 @@ public class RequestMessage extends Message { public String url; public Boolean base64; public String method; - public HashMap meta; + public Map meta; public String body; - public HashMap headers; - public HashMap cookies; + public Map headers; + public Map cookies; public String encoding; public Integer priority; public Boolean dont_filter; diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java index a623f4b..5e4e581 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/messages/SpiderMessage.java @@ -1,17 +1,15 @@ package org.scrapy.scrapystreaming.messages; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.HashMap; + +import java.util.*; public class SpiderMessage extends Message { public final String type = "spider"; public String name = "ExternalSpider"; public List start_urls = new ArrayList(0); public List allowed_domains; - public HashMap custom_settings; + public Map custom_settings; public List validator() { return Arrays.asList("type", "name", "start_urls"); diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/FromResponseRequestTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/FromResponseRequestTest.java index 5e98887..2197743 100644 --- a/helpers/java/src/test/java/org/scrapy/scrapystreaming/FromResponseRequestTest.java +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/FromResponseRequestTest.java @@ -18,7 +18,7 @@ public void openGeneratesID() throws SpiderException { r.open(new Callback() { @Override - public void onResponse(ResponseMessage response) { + public void parse(ResponseMessage response) { // } }); @@ -34,7 +34,7 @@ public void openKeepsID() throws SpiderException { r.open(new Callback() { @Override - public void onResponse(ResponseMessage response) { + public void parse(ResponseMessage response) { // } }); diff --git a/helpers/java/src/test/java/org/scrapy/scrapystreaming/RequestTest.java b/helpers/java/src/test/java/org/scrapy/scrapystreaming/RequestTest.java index b1da630..c9f8791 100644 --- a/helpers/java/src/test/java/org/scrapy/scrapystreaming/RequestTest.java +++ b/helpers/java/src/test/java/org/scrapy/scrapystreaming/RequestTest.java @@ -16,7 +16,7 @@ public void openGeneratesID() throws SpiderException { r.open(new Callback() { @Override - public void onResponse(ResponseMessage response) { + public void parse(ResponseMessage response) { // } }); @@ -31,7 +31,7 @@ public void openKeepsID() throws SpiderException { r.open(new Callback() { @Override - public void onResponse(ResponseMessage response) { + public void parse(ResponseMessage response) { // } }); From 9ea877a25064a45c82779d22a18631ca03d00ee1 Mon Sep 17 00:00:00 2001 From: Aron Bordin Date: Sat, 9 Jul 2016 02:09:24 -0300 Subject: [PATCH 11/11] fix customsetings type --- .../java/src/main/java/org/scrapy/scrapystreaming/Spider.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java index 261b51d..21525a2 100644 --- a/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java +++ b/helpers/java/src/main/java/org/scrapy/scrapystreaming/Spider.java @@ -6,8 +6,8 @@ import org.scrapy.scrapystreaming.core.SpiderException; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; +import java.util.Map; /** @@ -17,7 +17,7 @@ public abstract class Spider implements Callback { public String name = "ExternalSpider"; public List start_urls = new ArrayList(0); public List allowed_domains; - public HashMap custom_settings; + public Map custom_settings; private SpiderMessage spiderMessage; private boolean isRunning = false;