1
+ const fs = require ( "fs" ) ;
2
+ const path = require ( "path" ) ;
3
+ const util = require ( "util" ) ;
4
+
5
+ const readFileAsync = util . promisify ( fs . readFile ) ;
6
+ const writeFileAsync = util . promisify ( fs . writeFile ) ;
7
+
8
+ const lunr = require ( "lunr" ) ;
9
+ const htmlparser2 = require ( "htmlparser2" ) ;
10
+
11
+ function html2text ( html ) {
12
+ let pageTitle = "" ;
13
+ const sections = [ ] ;
14
+ let sectionTitle = "" ;
15
+ let sectionContent = "" ;
16
+ let hashLink = "" ;
17
+
18
+ let insideMain = false ;
19
+ let insideArticle = false ;
20
+ let insideHeader = false ;
21
+ let insidePre = false ;
22
+ let insideH1 = false ;
23
+ let insideH2 = false ;
24
+ let insideH3 = false ;
25
+ let insideHashLink = false ;
26
+ let insideTitle = false ;
27
+ let insideSVG = false ;
28
+
29
+ function sectionEnd ( ) {
30
+ if ( sections . length === 0 ) {
31
+ return ;
32
+ }
33
+ sections [ sections . length - 1 ] . content = sectionContent . trim ( ) ;
34
+ sectionContent = "" ;
35
+ }
36
+
37
+ function sectionStart ( ) {
38
+ sections . push ( {
39
+ title : sectionTitle . trim ( ) ,
40
+ hash : hashLink
41
+ } ) ;
42
+ sectionTitle = "" ;
43
+ }
44
+
45
+ const parser = new htmlparser2 . Parser (
46
+ {
47
+ onopentag ( tagname , attributes ) {
48
+ if ( [ "h1" , "h2" , "h3" ] . includes ( tagname ) ) {
49
+ sectionEnd ( ) ;
50
+ }
51
+
52
+ if ( tagname === "main" ) {
53
+ insideMain = true ;
54
+ } else if ( tagname === "article" ) {
55
+ insideArticle = true ;
56
+ } else if ( tagname === "pre" ) {
57
+ insidePre = true ;
58
+ } else if ( tagname === "header" ) {
59
+ insideHeader = true ;
60
+ } else if ( tagname === "h1" ) {
61
+ insideH1 = true ;
62
+ } else if ( tagname === "h2" ) {
63
+ insideH2 = true ;
64
+ } else if ( tagname === "h3" ) {
65
+ insideH3 = true ;
66
+ } else if ( tagname === "a" && attributes [ "class" ] === "hash-link" ) {
67
+ insideHashLink = true ;
68
+ hashLink = attributes [ "href" ] ;
69
+ } else if ( tagname === "title" ) {
70
+ insideTitle = true ;
71
+ } else if ( tagname === "svg" ) {
72
+ insideSVG = true ;
73
+ }
74
+ } ,
75
+ ontext ( text ) {
76
+ if ( insideSVG ) {
77
+ return ;
78
+ }
79
+ if ( insideMain && ! insideHashLink ) {
80
+ if ( text . length ) {
81
+ if ( insideH1 || insideH2 || insideH3 ) {
82
+ sectionTitle += text ;
83
+ } else {
84
+ sectionContent += text ;
85
+ }
86
+ }
87
+ } else if ( insideTitle ) {
88
+ pageTitle += text ;
89
+ }
90
+ } ,
91
+ onclosetag ( tagname ) {
92
+ if ( tagname === "main" ) {
93
+ insideMain = false ;
94
+ } else if ( tagname === "article" ) {
95
+ insideArticle = false ;
96
+ } else if ( tagname === "pre" ) {
97
+ insidePre = false ;
98
+ } else if ( tagname === "header" ) {
99
+ insideHeader = false ;
100
+ } else if ( tagname === "h1" ) {
101
+ insideH1 = false ;
102
+ } else if ( tagname === "h2" ) {
103
+ insideH2 = false ;
104
+ } else if ( tagname === "h3" ) {
105
+ insideH3 = false ;
106
+ } else if ( tagname === "a" && insideHashLink ) {
107
+ insideHashLink = false ;
108
+ } else if ( tagname === "title" ) {
109
+ insideTitle = false ;
110
+ } else if ( tagname === "svg" ) {
111
+ insideSVG = false ;
112
+ }
113
+
114
+ if ( insideMain && [ "h1" , "h2" , "h3" ] . includes ( tagname ) ) {
115
+ if ( insideArticle && insideHeader ) {
116
+ pageTitle = sectionTitle . trim ( ) ;
117
+ }
118
+ sectionStart ( ) ;
119
+ } else if ( ! insidePre ) {
120
+ sectionTitle += " " ;
121
+ sectionContent += " " ;
122
+ }
123
+ }
124
+ } ,
125
+ { decodeEntities : true , lowerCaseTags : true }
126
+ ) ;
127
+ parser . write ( html ) ;
128
+ parser . end ( ) ;
129
+
130
+ sectionEnd ( ) ;
131
+
132
+ return { pageTitle, sections } ;
133
+ }
134
+
135
+ module . exports = function ( context , options ) {
136
+ return {
137
+ name : 'docusaurus-plugin' ,
138
+ getThemePath ( ) {
139
+ return path . resolve ( __dirname , './theme' ) ;
140
+ } ,
141
+ async postBuild ( { routesPaths = [ ] , outDir, baseUrl } ) {
142
+ const data = routesPaths . map ( route => {
143
+ let file = route ;
144
+ if ( file . startsWith ( baseUrl ) ) {
145
+ file = file . replace ( baseUrl , "" ) ;
146
+ }
147
+ if ( ! file . endsWith ( ".html" ) ) {
148
+ if ( ! file . endsWith ( "/" ) ) {
149
+ file += "/" ;
150
+ }
151
+ file += "index.html" ;
152
+ }
153
+ file = path . join ( outDir , file ) ;
154
+
155
+ return {
156
+ file,
157
+ route
158
+ } ;
159
+ } ) ;
160
+
161
+ const documents = ( await Promise . all ( data . map ( async ( { file, route } ) => {
162
+ const html = await readFileAsync ( file , { encoding : "utf8" } ) ;
163
+
164
+ const { pageTitle, sections } = html2text ( html ) ;
165
+
166
+ return sections . map ( section => ( {
167
+ pageTitle,
168
+ pageRoute : route ,
169
+ sectionRoute : route + section . hash ,
170
+ sectionTitle : section . title ,
171
+ sectionContent : section . content
172
+ } ) ) ;
173
+ } ) ) ) . reduce ( ( acc , val ) => acc . concat ( val ) , [ ] ) ; // .flat()
174
+
175
+ const index = lunr ( function ( ) {
176
+ this . ref ( "route" ) ;
177
+ this . field ( "title" ) ;
178
+ this . field ( "content" ) ;
179
+ documents . forEach ( function ( { sectionRoute, sectionTitle, sectionContent } ) {
180
+ this . add ( {
181
+ route : sectionRoute ,
182
+ title : sectionTitle ,
183
+ content : sectionContent
184
+ } ) ;
185
+ } , this )
186
+ } ) ;
187
+
188
+ await writeFileAsync (
189
+ path . join ( outDir , "search-index.json" ) ,
190
+ JSON . stringify ( {
191
+ documents : documents . map ( ( { pageTitle, sectionTitle, sectionRoute } ) => ( { pageTitle, sectionTitle, sectionRoute } ) ) ,
192
+ index
193
+ } ) ,
194
+ { encoding : "utf8" }
195
+ ) ;
196
+ } ,
197
+ } ;
198
+ } ;
0 commit comments