@@ -12,35 +12,49 @@ Currently it is only possible to implement custom weighting schemes in C++.
12
12
The API could probably be wrapped with a bit of effort, but performance is
13
13
likely to be disappointing as the :xapian-just-method: `get_sumpart() ` method
14
14
gets called a lot (approximately once per matching term in each considered
15
- document), so the overhead of routing a method call from C++ to the wrapped
16
- language will matter.
15
+ document), so the overhead of routing a virtual method call from C++ to the
16
+ wrapped language will matter.
17
17
18
18
For example, here's an implementation of "coordinate matching" - each matching
19
- term scores one point:
19
+ term scores one point (this is provided in the API as
20
+ :xapian-class: `Xapian::CoordWeight ` but is an illustrative example of
21
+ implementing a simple weighting scheme):
20
22
21
23
.. code-block :: c++
22
24
23
- class CoordinateWeight : public Xapian::Weight {
25
+ class CoordWeight : public Xapian::Weight {
26
+ double factor = 1.0;
27
+
24
28
public:
25
- CoordinateWeight * clone() const { return new CoordinateWeight; }
26
- CoordinateWeight() { }
27
- ~CoordinateWeight() { }
28
-
29
- std::string name() const { return "Coord"; }
30
- std::string serialise() const { return std::string(); }
31
- CoordinateWeight * unserialise(const std::string &) const {
32
- return new CoordinateWeight;
33
- }
29
+ CoordWeight() { }
30
+
31
+ ~CoordWeight() { }
32
+
33
+ CoordWeight* clone() const override { return new CoordWeight; }
34
+
35
+ void init(double factor _) override { factor = factor _; }
34
36
35
- double get_sumpart(Xapian::termcount, Xapian::doclength) const {
36
- return 1;
37
+ std::string name() const override { return "Coord"; }
38
+
39
+ // No parameters to serialise.
40
+ std::string serialise() const override { return std::string(); }
41
+
42
+ CoordWeight* unserialise(const std::string&) const override {
43
+ return new CoordWeight;
37
44
}
38
- double get_maxpart() const { return 1; }
39
45
40
- double get_sumextra(Xapian::doclength) const { return 0; }
41
- double get_maxextra() const { return 0; }
46
+ double get_sumpart(Xapian::termcount,
47
+ Xapian::termcount,
48
+ Xapian::termcount) const override {
49
+ return factor;
50
+ }
51
+ double get_maxpart() const override { return factor; }
42
52
43
- bool get_sumpart_needs_doclength() const { return false; }
53
+ double get_sumextra(Xapian::termcount,
54
+ Xapian::termcount) const override {
55
+ return 0;
56
+ }
57
+ double get_maxextra() const override { return 0; }
44
58
};
45
59
46
60
@@ -80,36 +94,50 @@ The implementation will be as follows:
80
94
.. code-block :: c++
81
95
82
96
class TfIdfWeight : public Xapian::Weight {
97
+ double factor = 1.0;
98
+
83
99
public:
84
- TfIdfWeight * clone() const { return new TfIdfWeight; }
85
100
TfIdfWeight() {
86
101
need_stat(WDF);
87
102
need_stat(TERMFREQ);
88
103
need_stat(WDF_MAX);
89
104
}
105
+
90
106
~TfIdfWeight() { }
91
107
92
- std::string name() const { return "TfIdf"; }
93
- std::string serialise() const { return std::string(); }
94
- TfIdfWeight * unserialise(const std::string &) const {
108
+ TfIdfWeight* clone() const override { return new TfIdfWeight; }
109
+
110
+ void init(double factor _) override { factor = factor _; }
111
+
112
+ std::string name() const override { return "TfIdf"; }
113
+
114
+ // No parameters to serialise.
115
+ std::string serialise() const override { return std::string(); }
116
+
117
+ TfIdfWeight* unserialise(const std::string&) const override {
95
118
return new TfIdfWeight;
96
119
}
97
120
98
- double get_sumpart(Xapian::termcount wdf, Xapian::doclength) const {
121
+ double get_sumpart(Xapian::termcount wdf,
122
+ Xapian::termcount,
123
+ Xapian::termcount) const override {
99
124
Xapian::doccount df = get_termfreq();
100
125
double wdf_double(wdf);
101
126
double wt = wdf_double / df;
102
- return wt;
127
+ return wt * factor ;
103
128
}
104
129
105
- double get_maxpart() const {
130
+ double get_maxpart() const override {
106
131
Xapian::doccount df = get_termfreq();
107
132
double max_wdf(get_wdf_upper_bound());
108
133
double max_weight = max_wdf / df;
109
- return max_weight;
134
+ return max_weight * factor ;
110
135
}
111
- double get_sumextra(Xapian::doclength) const { return 0; }
112
- double get_maxextra() const { return 0; }
136
+
137
+ double get_sumextra(Xapian::termcount,
138
+ Xapian::termcount) const override { return 0; }
139
+
140
+ double get_maxextra() const override { return 0; }
113
141
};
114
142
115
143
0 commit comments