Skip to content

Commit ec1c2e8

Browse files
committed
test and so on
1 parent 4f22f12 commit ec1c2e8

File tree

2 files changed

+59
-1
lines changed

2 files changed

+59
-1
lines changed

webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/component/BloomFilterDuplicateRemover.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,14 @@ public class BloomFilterDuplicateRemover implements DuplicateRemover {
2323
private AtomicInteger counter;
2424

2525
public BloomFilterDuplicateRemover(int expectedInsertions) {
26-
this(expectedInsertions, 0.03);
26+
this(expectedInsertions, 0.01);
2727
}
2828

29+
/**
30+
*
31+
* @param expectedInsertions the number of expected insertions to the constructed
32+
* @param fpp the desired false positive probability (must be positive and less than 1.0)
33+
*/
2934
public BloomFilterDuplicateRemover(int expectedInsertions, double fpp) {
3035
this.expectedInsertions = expectedInsertions;
3136
this.fpp = fpp;

webmagic-core/src/test/java/us/codecraft/webmagic/scheduler/BloomFilterDuplicateRemoverTest.java

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
package us.codecraft.webmagic.scheduler;
22

3+
import org.junit.Ignore;
34
import org.junit.Test;
45
import us.codecraft.webmagic.Request;
56
import us.codecraft.webmagic.scheduler.component.BloomFilterDuplicateRemover;
7+
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
8+
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
69

710
import static org.assertj.core.api.Assertions.assertThat;
811

@@ -24,4 +27,54 @@ public void testRemove() throws Exception {
2427
assertThat(isDuplicate).isTrue();
2528

2629
}
30+
31+
@Ignore("long time")
32+
@Test
33+
public void testMemory() throws Exception {
34+
int times = 5000000;
35+
DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times,0.005);
36+
long freeMemory = Runtime.getRuntime().freeMemory();
37+
long time = System.currentTimeMillis();
38+
for (int i = 0; i < times; i++) {
39+
duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
40+
}
41+
System.out.println("Time used by bloomfilter:" + (System.currentTimeMillis() - time));
42+
System.out.println("Memory used by bloomfilter:" + (freeMemory - Runtime.getRuntime().freeMemory()));
43+
44+
duplicateRemover = new HashSetDuplicateRemover();
45+
System.gc();
46+
freeMemory = Runtime.getRuntime().freeMemory();
47+
time = System.currentTimeMillis();
48+
for (int i = 0; i < times; i++) {
49+
duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
50+
}
51+
System.out.println("Time used by hashset:" + (System.currentTimeMillis() - time));
52+
System.out.println("Memory used by hashset:" + (freeMemory - Runtime.getRuntime().freeMemory()));
53+
}
54+
55+
@Ignore("long time")
56+
@Test
57+
public void testMissHit() throws Exception {
58+
int times = 5000000;
59+
DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times, 0.01);
60+
int right = 0;
61+
int wrong = 0;
62+
int missCheck = 0;
63+
for (int i = 0; i < times; i++) {
64+
boolean duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
65+
if (duplicate) {
66+
wrong++;
67+
} else {
68+
right++;
69+
}
70+
duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
71+
if (!duplicate) {
72+
missCheck++;
73+
}
74+
}
75+
76+
System.out.println("Right count: " + right + " Wrong count: " + wrong + " Miss check: " + missCheck);
77+
}
78+
79+
2780
}

0 commit comments

Comments
 (0)