Skip to content

Commit 43ce1a0

Browse files
committed
Merge branch 'release/0.8.0'
2 parents f45a8ff + 7d091de commit 43ce1a0

File tree

11 files changed

+53
-21
lines changed

11 files changed

+53
-21
lines changed

pom.xml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?xml version="1.0" encoding="UTF-8"?>
22
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
33
<groupId>us.codecraft</groupId>
4-
<version>0.7.6</version>
4+
<version>0.8.0</version>
55
<modelVersion>4.0.0</modelVersion>
66
<packaging>pom</packaging>
77
<properties>
@@ -14,14 +14,14 @@
1414
<commons-collections4.version>4.4</commons-collections4.version>
1515
<commons-io.version>2.11.0</commons-io.version>
1616
<commons-lang3.version>3.12.0</commons-lang3.version>
17-
<fastjson.version>2.0.14.graal</fastjson.version>
17+
<fastjson.version>2.0.19.graal</fastjson.version>
1818
<groovy-all.version>3.0.13</groovy-all.version>
1919
<guava.version>31.1-jre</guava.version>
2020
<htmlcleaner.version>2.26</htmlcleaner.version>
2121
<httpclient.version>4.5.13</httpclient.version>
2222
<httpcore.version>4.4.15</httpcore.version>
2323
<jedis.version>3.7.1</jedis.version>
24-
<jruby.version>9.3.8.0</jruby.version>
24+
<jruby.version>9.3.9.0</jruby.version>
2525
<json-path.version>2.7.0</json-path.version>
2626
<junit.version>4.13.2</junit.version>
2727
<jython.version>2.7.3</jython.version>
@@ -31,7 +31,7 @@
3131
<phantomjsdriver.version>1.2.0</phantomjsdriver.version>
3232
<saxon-he.version>11.4</saxon-he.version>
3333
<selenium-java.version>3.141.59</selenium-java.version>
34-
<slf4j.version>2.0.3</slf4j.version>
34+
<slf4j.version>2.0.4</slf4j.version>
3535
<spring-version>4.0.0.RELEASE</spring-version>
3636
<xsoup.version>0.3.5</xsoup.version>
3737
</properties>
@@ -232,7 +232,7 @@
232232
<configuration>
233233
<rules>
234234
<requireMavenVersion>
235-
<version>3.3.9</version>
235+
<version>3.5.0</version>
236236
</requireMavenVersion>
237237
</rules>
238238
</configuration>

webmagic-core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<parent>
44
<groupId>us.codecraft</groupId>
55
<artifactId>webmagic-parent</artifactId>
6-
<version>0.7.6</version>
6+
<version>0.8.0</version>
77
</parent>
88
<modelVersion>4.0.0</modelVersion>
99

webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -333,9 +333,10 @@ public void run() {
333333
}
334334
} else {
335335
// wait until new url added,
336-
if (waitNewUrl())
337-
//if interrupted
336+
if (waitNewUrl()) {
337+
//if interrupted
338338
break;
339+
}
339340
continue;
340341
}
341342
}
@@ -805,11 +806,13 @@ public Scheduler getScheduler() {
805806
* Set wait time when no url is polled.<br><br>
806807
*
807808
* @param emptySleepTime In MILLISECONDS.
809+
* @return this
808810
*/
809-
public void setEmptySleepTime(long emptySleepTime) {
811+
public Spider setEmptySleepTime(long emptySleepTime) {
810812
if(emptySleepTime<=0){
811813
throw new IllegalArgumentException("emptySleepTime should be more than zero!");
812814
}
813815
this.emptySleepTime = emptySleepTime;
816+
return this;
814817
}
815818
}

webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,16 @@ public Page download(Request request, Task task) {
8282
try {
8383
httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext());
8484
page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task);
85+
8586
onSuccess(request, task);
8687
logger.info("downloading page success {}", request.getUrl());
88+
8789
return page;
8890
} catch (IOException e) {
89-
logger.warn("download page {} error", request.getUrl(), e);
91+
9092
onError(request, task, e);
93+
logger.info("download page {} error", request.getUrl(), e);
94+
9195
return page;
9296
} finally {
9397
if (httpResponse != null) {

webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,51 @@
11
package us.codecraft.webmagic.scheduler;
22

3-
import us.codecraft.webmagic.Request;
4-
import us.codecraft.webmagic.Task;
5-
63
import java.util.concurrent.BlockingQueue;
74
import java.util.concurrent.LinkedBlockingQueue;
85

6+
import us.codecraft.webmagic.Request;
7+
import us.codecraft.webmagic.Site;
8+
import us.codecraft.webmagic.Task;
99

1010
/**
1111
* Basic Scheduler implementation.<br>
1212
* Store urls to fetch in LinkedBlockingQueue and remove duplicate urls by HashMap.
1313
*
14+
* Note: if you use this {@link QueueScheduler}
15+
* with {@link Site#getCycleRetryTimes()} enabled, you may encountered dead-lock
16+
* when the queue is full.
17+
*
1418
* @author [email protected] <br>
1519
* @since 0.1.0
1620
*/
1721
public class QueueScheduler extends DuplicateRemovedScheduler implements MonitorableScheduler {
1822

19-
private BlockingQueue<Request> queue = new LinkedBlockingQueue<Request>();
23+
private final BlockingQueue<Request> queue;
24+
25+
public QueueScheduler() {
26+
this.queue = new LinkedBlockingQueue<>();
27+
}
28+
29+
/**
30+
* Creates a {@code QueueScheduler} with the given (fixed) capacity.
31+
*
32+
* @param capacity the capacity of this queue,
33+
* see {@link LinkedBlockingQueue#LinkedBlockingQueue(int)}
34+
* @since 0.8.0
35+
*/
36+
public QueueScheduler(int capacity) {
37+
this.queue = new LinkedBlockingQueue<>(capacity);
38+
}
2039

2140
@Override
2241
public void pushWhenNoDuplicate(Request request, Task task) {
23-
queue.add(request);
42+
logger.trace("Remaining capacity: {}", this.queue.remainingCapacity());
43+
44+
try {
45+
queue.put(request);
46+
} catch (InterruptedException e) {
47+
Thread.currentThread().interrupt();
48+
}
2449
}
2550

2651
@Override

webmagic-coverage/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
<parent>
99
<groupId>us.codecraft</groupId>
1010
<artifactId>webmagic-parent</artifactId>
11-
<version>0.7.6</version>
11+
<version>0.8.0</version>
1212
</parent>
1313

1414
<artifactId>webmagic-coverage</artifactId>

webmagic-extension/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<parent>
44
<groupId>us.codecraft</groupId>
55
<artifactId>webmagic-parent</artifactId>
6-
<version>0.7.6</version>
6+
<version>0.8.0</version>
77
</parent>
88
<modelVersion>4.0.0</modelVersion>
99

webmagic-samples/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<parent>
44
<artifactId>webmagic-parent</artifactId>
55
<groupId>us.codecraft</groupId>
6-
<version>0.7.6</version>
6+
<version>0.8.0</version>
77
</parent>
88
<modelVersion>4.0.0</modelVersion>
99

webmagic-saxon/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<parent>
44
<artifactId>webmagic-parent</artifactId>
55
<groupId>us.codecraft</groupId>
6-
<version>0.7.6</version>
6+
<version>0.8.0</version>
77
</parent>
88
<modelVersion>4.0.0</modelVersion>
99

webmagic-scripts/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<parent>
44
<artifactId>webmagic-parent</artifactId>
55
<groupId>us.codecraft</groupId>
6-
<version>0.7.6</version>
6+
<version>0.8.0</version>
77
</parent>
88
<modelVersion>4.0.0</modelVersion>
99

0 commit comments

Comments
 (0)