Skip to content

Commit b94b042

Browse files
committed
Update custom weighting scheme examples
They were still for the Xapian 1.2 API.
1 parent dd70e33 commit b94b042

File tree

1 file changed

+57
-29
lines changed

1 file changed

+57
-29
lines changed

advanced/custom_weighting.rst

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,35 +12,49 @@ Currently it is only possible to implement custom weighting schemes in C++.
1212
The API could probably be wrapped with a bit of effort, but performance is
1313
likely to be disappointing as the :xapian-just-method:`get_sumpart()` method
1414
gets called a lot (approximately once per matching term in each considered
15-
document), so the overhead of routing a method call from C++ to the wrapped
16-
language will matter.
15+
document), so the overhead of routing a virtual method call from C++ to the
16+
wrapped language will matter.
1717

1818
For example, here's an implementation of "coordinate matching" - each matching
19-
term scores one point:
19+
term scores one point (this is provided in the API as
20+
:xapian-class:`Xapian::CoordWeight` but is an illustrative example of
21+
implementing a simple weighting scheme):
2022

2123
.. code-block:: c++
2224

23-
class CoordinateWeight : public Xapian::Weight {
25+
class CoordWeight : public Xapian::Weight {
26+
double factor = 1.0;
27+
2428
public:
25-
CoordinateWeight * clone() const { return new CoordinateWeight; }
26-
CoordinateWeight() { }
27-
~CoordinateWeight() { }
28-
29-
std::string name() const { return "Coord"; }
30-
std::string serialise() const { return std::string(); }
31-
CoordinateWeight * unserialise(const std::string &) const {
32-
return new CoordinateWeight;
33-
}
29+
CoordWeight() { }
30+
31+
~CoordWeight() { }
32+
33+
CoordWeight* clone() const override { return new CoordWeight; }
34+
35+
void init(double factor_) override { factor = factor_; }
3436

35-
double get_sumpart(Xapian::termcount, Xapian::doclength) const {
36-
return 1;
37+
std::string name() const override { return "Coord"; }
38+
39+
// No parameters to serialise.
40+
std::string serialise() const override { return std::string(); }
41+
42+
CoordWeight* unserialise(const std::string&) const override {
43+
return new CoordWeight;
3744
}
38-
double get_maxpart() const { return 1; }
3945

40-
double get_sumextra(Xapian::doclength) const { return 0; }
41-
double get_maxextra() const { return 0; }
46+
double get_sumpart(Xapian::termcount,
47+
Xapian::termcount,
48+
Xapian::termcount) const override {
49+
return factor;
50+
}
51+
double get_maxpart() const override { return factor; }
4252

43-
bool get_sumpart_needs_doclength() const { return false; }
53+
double get_sumextra(Xapian::termcount,
54+
Xapian::termcount) const override {
55+
return 0;
56+
}
57+
double get_maxextra() const override { return 0; }
4458
};
4559

4660

@@ -80,36 +94,50 @@ The implementation will be as follows:
8094
.. code-block:: c++
8195

8296
class TfIdfWeight : public Xapian::Weight {
97+
double factor = 1.0;
98+
8399
public:
84-
TfIdfWeight * clone() const { return new TfIdfWeight; }
85100
TfIdfWeight() {
86101
need_stat(WDF);
87102
need_stat(TERMFREQ);
88103
need_stat(WDF_MAX);
89104
}
105+
90106
~TfIdfWeight() { }
91107

92-
std::string name() const { return "TfIdf"; }
93-
std::string serialise() const { return std::string(); }
94-
TfIdfWeight * unserialise(const std::string &) const {
108+
TfIdfWeight* clone() const override { return new TfIdfWeight; }
109+
110+
void init(double factor_) override { factor = factor_; }
111+
112+
std::string name() const override { return "TfIdf"; }
113+
114+
// No parameters to serialise.
115+
std::string serialise() const override { return std::string(); }
116+
117+
TfIdfWeight* unserialise(const std::string&) const override {
95118
return new TfIdfWeight;
96119
}
97120

98-
double get_sumpart(Xapian::termcount wdf, Xapian::doclength) const {
121+
double get_sumpart(Xapian::termcount wdf,
122+
Xapian::termcount,
123+
Xapian::termcount) const override {
99124
Xapian::doccount df = get_termfreq();
100125
double wdf_double(wdf);
101126
double wt = wdf_double / df;
102-
return wt;
127+
return wt * factor;
103128
}
104129

105-
double get_maxpart() const {
130+
double get_maxpart() const override {
106131
Xapian::doccount df = get_termfreq();
107132
double max_wdf(get_wdf_upper_bound());
108133
double max_weight = max_wdf / df;
109-
return max_weight;
134+
return max_weight * factor;
110135
}
111-
double get_sumextra(Xapian::doclength) const { return 0; }
112-
double get_maxextra() const { return 0; }
136+
137+
double get_sumextra(Xapian::termcount,
138+
Xapian::termcount) const override { return 0; }
139+
140+
double get_maxextra() const override { return 0; }
113141
};
114142

115143

0 commit comments

Comments
 (0)