-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdisplay.go
More file actions
124 lines (111 loc) · 3.64 KB
/
display.go
File metadata and controls
124 lines (111 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package defuddle
import (
"log/slog"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
)
// removeHiddenElements removes elements that are hidden via CSS
// JavaScript original code:
//
// private removeHiddenElements(doc: Document) {
// // ... (checks computed styles for display:none, visibility:hidden, opacity:0)
// }
func (d *Defuddle) removeHiddenElements(doc *goquery.Document) {
count := 0
var toRemove []*goquery.Selection
doc.Find("*").Each(func(_ int, element *goquery.Selection) {
// Protect math elements from removal
tag := goquery.NodeName(element)
if tag == "math" {
return
}
if _, hasMathML := element.Attr("data-mathml"); hasMathML {
return
}
className := element.AttrOr("class", "")
if strings.Contains(className, "katex-mathml") || strings.Contains(className, "MathJax") {
return
}
// Check inline styles for hidden elements
if style, exists := element.Attr("style"); exists {
lowerStyle := strings.ToLower(style)
if strings.Contains(lowerStyle, "display:none") ||
strings.Contains(lowerStyle, "display: none") ||
strings.Contains(lowerStyle, "visibility:hidden") ||
strings.Contains(lowerStyle, "visibility: hidden") ||
strings.Contains(lowerStyle, "opacity:0") ||
strings.Contains(lowerStyle, "opacity: 0") {
toRemove = append(toRemove, element)
count++
return
}
}
// Check class tokens for Tailwind/utility hidden classes
if className != "" {
for _, token := range strings.Fields(className) {
// Exact matches: "hidden", "invisible"
if token == "hidden" || token == "invisible" {
toRemove = append(toRemove, element)
count++
return
}
// Responsive variants: "sm:hidden", "md:hidden", "lg:hidden", etc.
// Also matches arbitrary prefix:hidden and prefix:invisible
if strings.HasSuffix(token, ":hidden") || strings.HasSuffix(token, ":invisible") {
toRemove = append(toRemove, element)
count++
return
}
}
}
})
for _, el := range toRemove {
el.Remove()
}
if d.debug {
slog.Debug("Removed hidden elements", "count", count)
}
}
// resolveReactStreaming resolves React SSR streaming placeholders.
// React's streaming SSR emits <template id="B:X"> as Suspense boundaries,
// then later provides content in <div hidden id="S:X"> with a $RC("B:X","S:X") call.
// This function swaps the templates with their resolved content.
var reactRCPattern = regexp.MustCompile(`\$RC\(\s*["']([^"']+)["']\s*,\s*["']([^"']+)["']\s*\)`)
func resolveReactStreaming(doc *goquery.Document) {
// Find $RC calls in inline scripts
doc.Find("script").Each(func(_ int, script *goquery.Selection) {
text := script.Text()
matches := reactRCPattern.FindAllStringSubmatch(text, -1)
for _, m := range matches {
boundaryID, slotID := m[1], m[2]
boundary := doc.Find(`template[id="` + boundaryID + `"]`)
slot := doc.Find(`[id="` + slotID + `"]`)
if boundary.Length() > 0 && slot.Length() > 0 {
slotHTML, _ := slot.Html()
if slotHTML != "" {
boundary.ReplaceWithHtml(slotHTML)
slot.Remove()
}
}
}
})
}
// flattenShadowDOM inlines declarative Shadow DOM templates into the main document.
// Browsers use <template shadowrootmode="open"> for SSR shadow DOM; the content
// inside is invisible to goquery without flattening.
func flattenShadowDOM(doc *goquery.Document) {
doc.Find(`template[shadowrootmode], template[shadowroot]`).Each(func(_ int, tmpl *goquery.Selection) {
parent := tmpl.Parent()
if parent.Length() == 0 {
return
}
// Move template children into the parent, replacing the template
inner, _ := tmpl.Html()
if inner != "" {
tmpl.ReplaceWithHtml(inner)
} else {
tmpl.Remove()
}
})
}