Skip to content

Commit 1344be0

Browse files
committed
Tests: enable Solr content extraction handler
This is needed for the test_content_extraction test to pass
1 parent 4f9a0ab commit 1344be0

File tree

1 file changed

+69
-46
lines changed

1 file changed

+69
-46
lines changed

tests/solrconfig.xml

Lines changed: 69 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@
1818
<config>
1919
<abortOnConfigurationError>true</abortOnConfigurationError>
2020

21+
<!--
22+
Set the path for the ExtractingRequestHandler's libraries. See /update/extract handler
23+
declaration below
24+
-->
25+
<lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
26+
<lib dir="../../contrib/extraction/lib" />
27+
2128
<!-- Used to specify an alternate directory to hold all index data
2229
other than the default ./data under the Solr home.
2330
If replication is in use, this should match the replication configuration. -->
@@ -75,7 +82,7 @@
7582

7683
<!--
7784
This option specifies which Lucene LockFactory implementation to use.
78-
85+
7986
single = SingleInstanceLockFactory - suggested for a read-only index
8087
or when there is no possibility of another process trying
8188
to modify the index.
@@ -98,15 +105,15 @@
98105
<maxMergeDocs>2147483647</maxMergeDocs>
99106
<maxFieldLength>10000</maxFieldLength>
100107

101-
<!-- If true, unlock any held write or commit locks on startup.
108+
<!-- If true, unlock any held write or commit locks on startup.
102109
This defeats the locking mechanism that allows multiple
103110
processes to safely access a lucene index, and should be
104111
used with care.
105112
This is not needed if lock type is 'none' or 'single'
106113
-->
107114
<unlockOnStartup>false</unlockOnStartup>
108115
</mainIndex>
109-
116+
110117
<updateHandler class="solr.DirectUpdateHandler2">
111118
<!-- A prefix of "solr." for class names is an alias that
112119
causes solr to search appropriate packages, including
@@ -116,9 +123,9 @@
116123
<!-- Perform a <commit/> automatically under certain conditions:
117124
maxDocs - number of updates since last commit is greater than this
118125
maxTime - oldest uncommited update (in ms) is this long ago
119-
<autoCommit>
126+
<autoCommit>
120127
<maxDocs>10000</maxDocs>
121-
<maxTime>1000</maxTime>
128+
<maxTime>1000</maxTime>
122129
</autoCommit>
123130
-->
124131

@@ -139,22 +146,22 @@
139146
</listener>
140147
-->
141148
<!-- A postOptimize event is fired only after every optimize command, useful
142-
in conjunction with index distribution to only distribute optimized indicies
149+
in conjunction with index distribution to only distribute optimized indicies
143150
<listener event="postOptimize" class="solr.RunExecutableListener">
144151
<str name="exe">snapshooter</str>
145152
<str name="dir">solr/bin</str>
146153
<bool name="wait">true</bool>
147154
</listener>
148155
-->
149156
</updateHandler>
150-
157+
151158
<query>
152159
<!-- Maximum number of clauses in a boolean query... can affect
153160
range or prefix queries that expand to big boolean
154161
queries. An exception is thrown if exceeded. -->
155162
<maxBooleanClauses>1024</maxBooleanClauses>
156163

157-
164+
158165
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
159166
unordered sets of *all* documents that match a query.
160167
When a new searcher is opened, its caches may be prepopulated
@@ -230,7 +237,7 @@
230237
then documents 0 through 49 will be collected and cached. Any further
231238
requests in that range can be satisfied via the cache. -->
232239
<queryResultWindowSize>50</queryResultWindowSize>
233-
240+
234241
<!-- Maximum number of documents to cache for any entry in the
235242
queryResultCache. -->
236243
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
@@ -276,17 +283,17 @@
276283

277284
</query>
278285

279-
<!--
286+
<!--
280287
Let the dispatch filter handler /select?qt=XXX
281288
handleSelect=true will use consistent error handling for /select and /update
282289
handleSelect=false will use solr1.1 style error formatting
283290
-->
284291
<requestDispatcher handleSelect="true" >
285292
<!--Make sure your system has some authentication before enabling remote streaming! -->
286293
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
287-
294+
288295
<!-- Set HTTP caching related parameters (for proxy caches and clients).
289-
296+
290297
To get the behaviour of Solr 1.2 (ie: no caching related headers)
291298
use the never304="true" option and do not specify a value for
292299
<cacheControl>
@@ -300,7 +307,7 @@
300307
You can change it to lastModFrom="dirLastMod" if you want the
301308
value to exactly corrispond to when the physical index was last
302309
modified.
303-
310+
304311
etagSeed="..." is an option you can change to force the ETag
305312
header (and validation against If-None-Match requests) to be
306313
differnet even if the index has not changed (ie: when making
@@ -312,7 +319,7 @@
312319
<!-- If you include a <cacheControl> directive, it will be used to
313320
generate a Cache-Control header, as well as an Expires header
314321
if the value contains "max-age="
315-
322+
316323
By default, no Cache-Control header is generated.
317324
318325
You can use the <cacheControl> option even if you have set
@@ -321,10 +328,10 @@
321328
<!-- <cacheControl>max-age=30, public</cacheControl> -->
322329
</httpCaching>
323330
</requestDispatcher>
324-
331+
325332
<!-- requestHandler plugins... incoming queries will be dispatched to the
326333
correct handler based on the path or the qt (query type) param.
327-
Names starting with a '/' are accessed with the a path equal to the
334+
Names starting with a '/' are accessed with the a path equal to the
328335
registered name. Names without a leading '/' are accessed with:
329336
http://host/app/select?qt=name
330337
If no qt is defined, the requestHandler that declares default="true"
@@ -335,14 +342,14 @@
335342
<str>spellcheck</str>
336343
</arr>
337344
</requestHandler>
338-
345+
339346
<requestHandler name="/mlt" class="solr.MoreLikeThisHandler" />
340-
347+
341348
<!--
342349
Search components are registered to SolrCore and used by Search Handlers
343-
350+
344351
By default, the following components are avaliable:
345-
352+
346353
<searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
347354
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
348355
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
@@ -360,11 +367,11 @@
360367
361368
If you register a searchComponent to one of the standard names, that will be used instead.
362369
To insert handlers before or after the 'standard' components, use:
363-
370+
364371
<arr name="first-components">
365372
<str>myFirstComponentName</str>
366373
</arr>
367-
374+
368375
<arr name="last-components">
369376
<str>myLastComponentName</str>
370377
</arr>
@@ -416,13 +423,13 @@
416423
<str>spellcheck</str>
417424
</arr>
418425
</requestHandler>
419-
420-
<!-- Update request handler.
421-
422-
Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
426+
427+
<!-- Update request handler.
428+
429+
Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
423430
the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
424431
The response format differs from solr1.1 formatting and returns a standard error code.
425-
432+
426433
To enable solr1.1 behavior, remove the /update handler or change its path
427434
-->
428435
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
@@ -432,32 +439,48 @@
432439
for debugging and as a token server for other types of applications
433440
-->
434441
<requestHandler name="/analysis" class="solr.AnalysisRequestHandler" />
435-
442+
436443

437444
<!-- CSV update handler, loaded on demand -->
438445
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
439-
440-
<!--
441-
Admin Handlers - This will register all the standard admin RequestHandlers. Adding
446+
447+
<!--
448+
Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler
449+
450+
NOTE: this loads on demand and requires some extra JAR dependencies. These are satisfied in
451+
the standard example project but might require adjusting in a multicore or other
452+
configuration where the relative directories above are no longer correct.
453+
-->
454+
<requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler">
455+
<lst name="defaults">
456+
<str name="fmap.content">text</str>
457+
<str name="lowernames">true</str>
458+
<str name="uprefix">attr_</str>
459+
<str name="captureAttr">false</str>
460+
</lst>
461+
</requestHandler>
462+
463+
<!--
464+
Admin Handlers - This will register all the standard admin RequestHandlers. Adding
442465
this single handler is equivolent to registering:
443-
466+
444467
<requestHandler name="/admin/luke" class="org.apache.solr.handler.admin.LukeRequestHandler" />
445468
<requestHandler name="/admin/system" class="org.apache.solr.handler.admin.SystemInfoHandler" />
446469
<requestHandler name="/admin/plugins" class="org.apache.solr.handler.admin.PluginInfoHandler" />
447470
<requestHandler name="/admin/threads" class="org.apache.solr.handler.admin.ThreadDumpHandler" />
448471
<requestHandler name="/admin/properties" class="org.apache.solr.handler.admin.PropertiesRequestHandler" />
449472
<requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" >
450-
473+
451474
If you wish to hide files under ${solr.home}/conf, explicitly register the ShowFileRequestHandler using:
452475
<requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" >
453476
<lst name="invariants">
454-
<str name="hidden">synonyms.txt</str>
455-
<str name="hidden">anotherfile.txt</str>
477+
<str name="hidden">synonyms.txt</str>
478+
<str name="hidden">anotherfile.txt</str>
456479
</lst>
457480
</requestHandler>
458481
-->
459482
<requestHandler name="/admin/luke" class="org.apache.solr.handler.admin.LukeRequestHandler" />
460-
483+
461484
<!-- ping/healthcheck -->
462485
<requestHandler name="/admin/ping" class="PingRequestHandler">
463486
<lst name="defaults">
@@ -466,7 +489,7 @@
466489
<str name="echoParams">all</str>
467490
</lst>
468491
</requestHandler>
469-
492+
470493
<highlighting>
471494
<!-- Configure the standard fragmenter -->
472495
<!-- This could most likely be commented out in the "default" case -->
@@ -482,12 +505,12 @@
482505
<!-- slightly smaller fragsizes work better because of slop -->
483506
<int name="hl.fragsize">70</int>
484507
<!-- allow 50% slop on fragment sizes -->
485-
<float name="hl.regex.slop">0.5</float>
508+
<float name="hl.regex.slop">0.5</float>
486509
<!-- a basic sentence pattern -->
487510
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
488511
</lst>
489512
</fragmenter>
490-
513+
491514
<!-- Configure the standard formatter -->
492515
<formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
493516
<lst name="defaults">
@@ -496,12 +519,12 @@
496519
</lst>
497520
</formatter>
498521
</highlighting>
499-
500-
522+
523+
501524
<!-- queryResponseWriter plugins... query responses will be written using the
502525
writer specified by the 'wt' request parameter matching the name of a registered
503526
writer.
504-
The "default" writer is the default and will be used if 'wt' is not specified
527+
The "default" writer is the default and will be used if 'wt' is not specified
505528
in the request. XMLResponseWriter will be used if nothing is specified here.
506529
The json, python, and ruby writers are also available by default.
507530
@@ -516,19 +539,19 @@
516539
-->
517540
<queryResponseWriter name="xml" class="org.apache.solr.request.XMLResponseWriter" default="true"/>
518541
<queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/>
519-
542+
520543
<!-- example of registering a query parser
521544
<queryParser name="lucene" class="org.apache.solr.search.LuceneQParserPlugin"/>
522545
-->
523546

524-
<!-- example of registering a custom function parser
547+
<!-- example of registering a custom function parser
525548
<valueSourceParser name="myfunc" class="com.mycompany.MyValueSourceParser" />
526549
-->
527-
550+
528551
<!-- config for the admin interface -->
529552
<admin>
530553
<defaultQuery>solr</defaultQuery>
531-
554+
532555
<!-- configure a healthcheck file for servers behind a loadbalancer
533556
<healthcheck type="file">server-enabled</healthcheck>
534557
-->

0 commit comments

Comments
 (0)