File tree Expand file tree Collapse file tree 2 files changed +11
-3
lines changed
webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/component Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Original file line number Diff line number Diff line change @@ -46,14 +46,18 @@ protected BloomFilter<CharSequence> rebuildBloomFilter() {
46
46
47
47
@ Override
48
48
public boolean isDuplicate (Request request , Task task ) {
49
- boolean isDuplicate = bloomFilter .mightContain (request . getUrl ());
49
+ boolean isDuplicate = bloomFilter .mightContain (getUrl (request ));
50
50
if (!isDuplicate ) {
51
- bloomFilter .put (request . getUrl ());
51
+ bloomFilter .put (getUrl (request ));
52
52
counter .incrementAndGet ();
53
53
}
54
54
return isDuplicate ;
55
55
}
56
56
57
+ protected String getUrl (Request request ) {
58
+ return request .getUrl ();
59
+ }
60
+
57
61
@ Override
58
62
public void resetDuplicateCheck (Task task ) {
59
63
rebuildBloomFilter ();
Original file line number Diff line number Diff line change @@ -16,7 +16,11 @@ public class HashSetDuplicateRemover implements DuplicateRemover {
16
16
17
17
@ Override
18
18
public boolean isDuplicate (Request request , Task task ) {
19
- return !urls .add (request .getUrl ());
19
+ return !urls .add (getUrl (request ));
20
+ }
21
+
22
+ protected String getUrl (Request request ) {
23
+ return request .getUrl ();
20
24
}
21
25
22
26
@ Override
You can’t perform that action at this time.
0 commit comments