-
Notifications
You must be signed in to change notification settings - Fork 0
81 lines (79 loc) · 2.2 KB
/
Copy pathtypesense.yml
File metadata and controls
81 lines (79 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
name: Rebuild Search Index
on:
workflow_dispatch:
workflow_run:
workflows:
- pages-build-deployment
types:
- completed
jobs:
scrape:
name: Rebuild Index
runs-on: ubuntu-latest
container:
image: typesense/docsearch-scraper:latest
options: --user 0 ### incredible usefull variable.
strategy:
fail-fast: false
matrix:
node:
- hel
- fsn
steps:
-
name: Run scraper
env:
HOME: /root
TYPESENSE_API_KEY: ${{ secrets.TYPESENSE_SCRAPER_API_KEY }}
TYPESENSE_HOST: ${{ secrets.TYPESENSE_INSTANCE_URL_01 }}
TYPESENSE_PORT: '443'
TYPESENSE_PROTOCOL: https
CONFIG: |
{
"index_name": "${{ secrets.TYPESENSE_COLLECTION_NAME }}",
"start_urls": [
"https://getcert.org/"
],
"sitemap_urls": [
"https://getcert.org/sitemap.xml"
],
"sitemap_alternate_links": true,
"stop_urls": [],
"selectors": {
"lvl0": {
"selector": ".menu__link--sublist.menu__link--active",
"global": true,
"default_value": "Documentation"
},
"lvl1": "header h1",
"lvl2": "article h2",
"lvl3": "article h3",
"lvl4": "article h4",
"lvl5": "article h5, article td:first-child",
"lvl6": "article h6",
"text": "article p, article li, article td:last-child"
},
"strip_chars": " .,;:#",
"custom_settings": {
"separatorsToIndex": "_",
"attributesForFaceting": [
"language",
"version",
"type"
],
"attributesToRetrieve": [
"hierarchy",
"content",
"anchor",
"url",
"url_without_anchor",
"type"
]
},
"conversation_id": [
"1090805758"
],
"nb_hits": 8687
}
run: pipenv run python -m src.index
working-directory: /root