Skip to content

Commit 9d197ec

Browse files
committed
release notes and docs
1 parent 7515bf3 commit 9d197ec

File tree

3 files changed

+31
-5
lines changed

3 files changed

+31
-5
lines changed

release-note.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@ Release Notes
2828

2929
}
3030

31+
增加一个Spider.test(url)方法,用于开发爬虫时进行调试。
32+
3133
增加基于redis的分布式支持。
3234

3335
增加XPath2.0语法支持(webmagic-saxon模块)。
3436

3537
增加基于Selenium的浏览器渲染支持,用于抓取动态加载内容(webmagic-selenium模块)。
3638

37-
修复一些已有bug
39+
修复了不支持https的bug
3840

3941
补充了文档:[webmagic-0.2.0用户手册](http://code4craft.github.io/webmagic/)
4042

webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,17 @@ private void destroyEach(Object object) {
220220
}
221221
}
222222

223-
public void test(String url){
223+
/**
224+
* 用某些特定URL进行爬虫测试
225+
* @param urls 要抓取的url
226+
*/
227+
public void test(String... urls){
224228
checkComponent();
225-
processRequest(new Request(url));
229+
if (urls.length>0){
230+
for (String url : urls) {
231+
processRequest(new Request(url));
232+
}
233+
}
226234
}
227235

228236
private void processRequest(Request request) {

webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/GithubRepo.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import us.codecraft.webmagic.model.annotation.HelpUrl;
99
import us.codecraft.webmagic.model.annotation.TargetUrl;
1010
import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
11+
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
1112

1213
import java.util.List;
1314

@@ -32,12 +33,19 @@ public class GithubRepo implements HasKey {
3233
@ExtractBy(value = "//div[@class='repository-lang-stats']//li//span[@class='lang']",multi = true)
3334
private List<String> language;
3435

36+
@ExtractBy("//a[@class='social-count js-social-count']/text()")
37+
private String star;
38+
39+
@ExtractBy("//a[@class='social-count js-social-count']/text()")
40+
private String fork;
41+
3542
@ExtractByUrl
3643
private String url;
3744

3845
public static void main(String[] args) {
39-
OOSpider.create(Site.me().addStartUrl("https://github.com/explore").setSleepTime(0),
40-
new JsonFilePageModelPipeline(), GithubRepo.class).thread(15).run();
46+
OOSpider.create(Site.me().addStartUrl("https://github.com/explore").setSleepTime(0).setRetryTimes(3),
47+
new JsonFilePageModelPipeline(), GithubRepo.class)
48+
.scheduler(new FileCacheQueueScheduler("/data/webmagic/cache/")).thread(15).run();
4149
}
4250

4351
@Override
@@ -64,4 +72,12 @@ public List<String> getLanguage() {
6472
public String getUrl() {
6573
return url;
6674
}
75+
76+
public String getStar() {
77+
return star;
78+
}
79+
80+
public String getFork() {
81+
return fork;
82+
}
6783
}

0 commit comments

Comments
 (0)