From 5e686f0ab91cba1154c40f0f7830d400cd6562ce Mon Sep 17 00:00:00 2001 From: Rajesh Malviya Date: Tue, 17 Jun 2025 19:00:10 +0530 Subject: [PATCH 1/5] content [nfc]: Make MathNode a sealed class --- lib/model/content.dart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/model/content.dart b/lib/model/content.dart index 768031ae9a..7a670ae5b1 100644 --- a/lib/model/content.dart +++ b/lib/model/content.dart @@ -341,7 +341,7 @@ class CodeBlockSpanNode extends ContentNode { } } -abstract class MathNode extends ContentNode { +sealed class MathNode extends ContentNode { const MathNode({ super.debugHtmlNode, required this.texSource, From 88ebff29c2ed7690c05c2514c68e74a8fd0e7858 Mon Sep 17 00:00:00 2001 From: Rajesh Malviya Date: Tue, 17 Jun 2025 19:26:55 +0530 Subject: [PATCH 2/5] content [nfc]: Make `KatexHtmlParseError` private --- lib/model/katex.dart | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/lib/model/katex.dart b/lib/model/katex.dart index 64c5eea82b..b5782e9485 100644 --- a/lib/model/katex.dart +++ b/lib/model/katex.dart @@ -93,7 +93,7 @@ MathParserResult? parseMath(dom.Element element, { required bool block }) { final parser = _KatexParser(); try { nodes = parser.parseKatexHtml(katexHtmlElement); - } on KatexHtmlParseError catch (e, st) { + } on _KatexHtmlParseError catch (e, st) { assert(debugLog('$e\n$st')); } @@ -123,7 +123,7 @@ class _KatexParser { if (node case dom.Element(localName: 'span')) { return _parseSpan(node); } else { - throw KatexHtmlParseError(); + throw _KatexHtmlParseError(); } })); } @@ -303,14 +303,14 @@ class _KatexParser { case 'fontsize-ensurer': // .sizing, // .fontsize-ensurer { ... } - if (index + 2 > spanClasses.length) throw KatexHtmlParseError(); + if (index + 2 > spanClasses.length) throw _KatexHtmlParseError(); final resetSizeClass = spanClasses[index++]; final sizeClass = spanClasses[index++]; final resetSizeClassSuffix = _resetSizeClassRegExp.firstMatch(resetSizeClass)?.group(1); - if (resetSizeClassSuffix == null) throw KatexHtmlParseError(); + if (resetSizeClassSuffix == null) throw _KatexHtmlParseError(); final sizeClassSuffix = _sizeClassRegExp.firstMatch(sizeClass)?.group(1); - if (sizeClassSuffix == null) throw KatexHtmlParseError(); + if (sizeClassSuffix == null) throw _KatexHtmlParseError(); const sizes = [0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.44, 1.728, 2.074, 2.488]; @@ -318,13 +318,13 @@ class _KatexParser { final sizeIdx = int.parse(sizeClassSuffix, radix: 10); // These indexes start at 1. - if (resetSizeIdx > sizes.length) throw KatexHtmlParseError(); - if (sizeIdx > sizes.length) throw KatexHtmlParseError(); + if (resetSizeIdx > sizes.length) throw _KatexHtmlParseError(); + if (sizeIdx > sizes.length) throw _KatexHtmlParseError(); fontSizeEm = sizes[sizeIdx - 1] / sizes[resetSizeIdx - 1]; case 'delimsizing': // .delimsizing { ... } - if (index + 1 > spanClasses.length) throw KatexHtmlParseError(); + if (index + 1 > spanClasses.length) throw _KatexHtmlParseError(); fontFamily = switch (spanClasses[index++]) { 'size1' => 'KaTeX_Size1', 'size2' => 'KaTeX_Size2', @@ -332,19 +332,19 @@ class _KatexParser { 'size4' => 'KaTeX_Size4', 'mult' => // TODO handle nested spans with `.delim-size{1,4}` class. - throw KatexHtmlParseError(), - _ => throw KatexHtmlParseError(), + throw _KatexHtmlParseError(), + _ => throw _KatexHtmlParseError(), }; // TODO handle .nulldelimiter and .delimcenter . case 'op-symbol': // .op-symbol { ... } - if (index + 1 > spanClasses.length) throw KatexHtmlParseError(); + if (index + 1 > spanClasses.length) throw _KatexHtmlParseError(); fontFamily = switch (spanClasses[index++]) { 'small-op' => 'KaTeX_Size1', 'large-op' => 'KaTeX_Size2', - _ => throw KatexHtmlParseError(), + _ => throw _KatexHtmlParseError(), }; // TODO handle more classes from katex.scss @@ -392,7 +392,7 @@ class _KatexParser { } else { spans = _parseChildSpans(element.nodes); } - if (text == null && spans == null) throw KatexHtmlParseError(); + if (text == null && spans == null) throw _KatexHtmlParseError(); return KatexSpanNode( styles: inlineStyles != null @@ -429,7 +429,7 @@ class _KatexParser { ' ${expression.toDebugString()}')); _hasError = true; } else { - throw KatexHtmlParseError(); + throw _KatexHtmlParseError(); } } @@ -437,7 +437,7 @@ class _KatexParser { heightEm: heightEm, ); } else { - throw KatexHtmlParseError(); + throw _KatexHtmlParseError(); } } return null; @@ -540,9 +540,10 @@ class KatexSpanStyles { } } -class KatexHtmlParseError extends Error { +class _KatexHtmlParseError extends Error { final String? message; - KatexHtmlParseError([this.message]); + + _KatexHtmlParseError([this.message]); @override String toString() { From b11f758c3a6c584e5b75edaed70ee63175284d02 Mon Sep 17 00:00:00 2001 From: Rajesh Malviya Date: Tue, 17 Jun 2025 19:42:44 +0530 Subject: [PATCH 3/5] content: Allow KaTeX parser to report failure reasons --- lib/model/content.dart | 21 +++++++++++++--- lib/model/katex.dart | 57 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/lib/model/content.dart b/lib/model/content.dart index 7a670ae5b1..e4273f1b3a 100644 --- a/lib/model/content.dart +++ b/lib/model/content.dart @@ -346,6 +346,8 @@ sealed class MathNode extends ContentNode { super.debugHtmlNode, required this.texSource, required this.nodes, + this.debugHardFailReason, + this.debugSoftFailReason, }); final String texSource; @@ -357,6 +359,9 @@ sealed class MathNode extends ContentNode { /// fallback instead. final List? nodes; + final KatexParserHardFailReason? debugHardFailReason; + final KatexParserSoftFailReason? debugSoftFailReason; + @override void debugFillProperties(DiagnosticPropertiesBuilder properties) { super.debugFillProperties(properties); @@ -411,6 +416,8 @@ class MathBlockNode extends MathNode implements BlockContentNode { super.debugHtmlNode, required super.texSource, required super.nodes, + super.debugHardFailReason, + super.debugSoftFailReason, }); } @@ -880,6 +887,8 @@ class MathInlineNode extends MathNode implements InlineContentNode { super.debugHtmlNode, required super.texSource, required super.nodes, + super.debugHardFailReason, + super.debugSoftFailReason, }); } @@ -921,7 +930,9 @@ class _ZulipInlineContentParser { return MathInlineNode( texSource: parsed.texSource, nodes: parsed.nodes, - debugHtmlNode: debugHtmlNode); + debugHtmlNode: debugHtmlNode, + debugHardFailReason: kDebugMode ? parsed.hardFailReason : null, + debugSoftFailReason: kDebugMode ? parsed.softFailReason : null); } UserMentionNode? parseUserMention(dom.Element element) { @@ -1628,7 +1639,9 @@ class _ZulipContentParser { result.add(MathBlockNode( texSource: parsed.texSource, nodes: parsed.nodes, - debugHtmlNode: kDebugMode ? firstChild : null)); + debugHtmlNode: kDebugMode ? firstChild : null, + debugHardFailReason: kDebugMode ? parsed.hardFailReason : null, + debugSoftFailReason: kDebugMode ? parsed.softFailReason : null)); } else { result.add(UnimplementedBlockContentNode(htmlNode: firstChild)); } @@ -1664,7 +1677,9 @@ class _ZulipContentParser { result.add(MathBlockNode( texSource: parsed.texSource, nodes: parsed.nodes, - debugHtmlNode: debugHtmlNode)); + debugHtmlNode: debugHtmlNode, + debugHardFailReason: kDebugMode ? parsed.hardFailReason : null, + debugSoftFailReason: kDebugMode ? parsed.softFailReason : null)); continue; } } diff --git a/lib/model/katex.dart b/lib/model/katex.dart index b5782e9485..922546c676 100644 --- a/lib/model/katex.dart +++ b/lib/model/katex.dart @@ -9,10 +9,40 @@ import 'binding.dart'; import 'content.dart'; import 'settings.dart'; +/// The failure reason in case the KaTeX parser encountered a +/// `_KatexHtmlParseError` exception. +/// +/// Generally this means that parser encountered an unexpected HTML structure, +/// an unsupported HTML node, or an unexpected inline CSS style or CSS class on +/// a specific node. +class KatexParserHardFailReason { + const KatexParserHardFailReason({ + required this.error, + required this.stackTrace, + }); + + final String error; + final StackTrace stackTrace; +} + +/// The failure reason in case the KaTeX parser found an unsupported +/// CSS class or unsupported inline CSS style property. +class KatexParserSoftFailReason { + const KatexParserSoftFailReason({ + this.unsupportedCssClasses = const [], + this.unsupportedInlineCssProperties = const [], + }); + + final List unsupportedCssClasses; + final List unsupportedInlineCssProperties; +} + class MathParserResult { const MathParserResult({ required this.texSource, required this.nodes, + this.hardFailReason, + this.softFailReason, }); final String texSource; @@ -23,6 +53,9 @@ class MathParserResult { /// CSS style, indicating that the widget should render the [texSource] as a /// fallback instead. final List? nodes; + + final KatexParserHardFailReason? hardFailReason; + final KatexParserSoftFailReason? softFailReason; } /// Parses the HTML spans containing KaTeX HTML tree. @@ -88,6 +121,8 @@ MathParserResult? parseMath(dom.Element element, { required bool block }) { final flagForceRenderKatex = globalSettings.getBool(BoolGlobalSetting.forceRenderKatex); + KatexParserHardFailReason? hardFailReason; + KatexParserSoftFailReason? softFailReason; List? nodes; if (flagRenderKatex) { final parser = _KatexParser(); @@ -95,14 +130,24 @@ MathParserResult? parseMath(dom.Element element, { required bool block }) { nodes = parser.parseKatexHtml(katexHtmlElement); } on _KatexHtmlParseError catch (e, st) { assert(debugLog('$e\n$st')); + hardFailReason = KatexParserHardFailReason( + error: e.message ?? 'unknown', + stackTrace: st); } if (parser.hasError && !flagForceRenderKatex) { nodes = null; + softFailReason = KatexParserSoftFailReason( + unsupportedCssClasses: parser.unsupportedCssClasses, + unsupportedInlineCssProperties: parser.unsupportedInlineCssProperties); } } - return MathParserResult(nodes: nodes, texSource: texSource); + return MathParserResult( + nodes: nodes, + texSource: texSource, + hardFailReason: hardFailReason, + softFailReason: softFailReason); } else { return null; } @@ -112,6 +157,9 @@ class _KatexParser { bool get hasError => _hasError; bool _hasError = false; + final unsupportedCssClasses = []; + final unsupportedInlineCssProperties = []; + List parseKatexHtml(dom.Element element) { assert(element.localName == 'span'); assert(element.className == 'katex-html'); @@ -123,7 +171,10 @@ class _KatexParser { if (node case dom.Element(localName: 'span')) { return _parseSpan(node); } else { - throw _KatexHtmlParseError(); + throw _KatexHtmlParseError( + node is dom.Element + ? 'unsupported html node: ${node.localName}' + : 'unsupported html node'); } })); } @@ -374,6 +425,7 @@ class _KatexParser { default: assert(debugLog('KaTeX: Unsupported CSS class: $spanClass')); + unsupportedCssClasses.add(spanClass); _hasError = true; } } @@ -427,6 +479,7 @@ class _KatexParser { // TODO handle more CSS properties assert(debugLog('KaTeX: Unsupported CSS expression:' ' ${expression.toDebugString()}')); + unsupportedInlineCssProperties.add(property); _hasError = true; } else { throw _KatexHtmlParseError(); From 4b90a9a889d558b445c5d904ac5d222b5d785cd5 Mon Sep 17 00:00:00 2001 From: Rajesh Malviya Date: Tue, 17 Jun 2025 20:05:57 +0530 Subject: [PATCH 4/5] tools/content: Support surveying unimplemented KaTeX features --- tools/content/check-features | 14 +- tools/content/unimplemented_katex_test.dart | 159 ++++++++++++++++++++ 2 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 tools/content/unimplemented_katex_test.dart diff --git a/tools/content/check-features b/tools/content/check-features index 76c00f1ce9..747924dc86 100755 --- a/tools/content/check-features +++ b/tools/content/check-features @@ -29,6 +29,11 @@ The steps are: file. This wraps around tools/content/unimplemented_features_test.dart. + katex-check + Check for unimplemented KaTeX features. This requires the corpus + directory \`CORPUS_DIR\` to contain at least one corpus file. + This wraps around tools/content/unimplemented_katex_test.dart. + Options: --config @@ -50,7 +55,7 @@ opt_verbose= opt_steps=() while (( $# )); do case "$1" in - fetch|check) opt_steps+=("$1"); shift;; + fetch|check|katex-check) opt_steps+=("$1"); shift;; --config) shift; opt_zuliprc="$1"; shift;; --verbose) opt_verbose=1; shift;; --help) usage; exit 0;; @@ -98,11 +103,18 @@ run_check() { || return 1 } +run_katex_check() { + flutter test tools/content/unimplemented_katex_test.dart \ + --dart-define=corpusDir="$opt_corpus_dir" \ + || return 1 +} + for step in "${opt_steps[@]}"; do echo "Running ${step}" case "${step}" in fetch) run_fetch ;; check) run_check ;; + katex-check) run_katex_check ;; *) echo >&2 "Internal error: unknown step ${step}" ;; esac done diff --git a/tools/content/unimplemented_katex_test.dart b/tools/content/unimplemented_katex_test.dart new file mode 100644 index 0000000000..ad3769e78f --- /dev/null +++ b/tools/content/unimplemented_katex_test.dart @@ -0,0 +1,159 @@ +// Override `flutter test`'s default timeout +@Timeout(Duration(minutes: 10)) +library; + +import 'dart:io'; +import 'dart:math'; + +import 'package:checks/checks.dart'; +import 'package:collection/collection.dart'; +import 'package:flutter/foundation.dart'; +import 'package:flutter_test/flutter_test.dart'; +import 'package:zulip/model/content.dart'; +import 'package:zulip/model/settings.dart'; + +import '../../test/model/binding.dart'; +import 'model.dart'; + +void main() async { + TestZulipBinding.ensureInitialized(); + await testBinding.globalStore.settings.setBool( + BoolGlobalSetting.renderKatex, true); + + Future checkForKatexFailuresInFile(File file) async { + int totalMessageCount = 0; + final Set katexMessageIds = {}; + final Set failedKatexMessageIds = {}; + int totalMathBlockNodes = 0; + int failedMathBlockNodes = 0; + int totalMathInlineNodes = 0; + int failedMathInlineNodes = 0; + + final failedMessageIdsByReason = >{}; + final failedMathNodesByReason = >{}; + + void walk(int messageId, DiagnosticsNode node) { + final value = node.value; + if (value is UnimplementedNode) return; + + for (final child in node.getChildren()) { + walk(messageId, child); + } + + if (value is! MathNode) return; + katexMessageIds.add(messageId); + switch (value) { + case MathBlockNode(): totalMathBlockNodes++; + case MathInlineNode(): totalMathInlineNodes++; + } + + if (value.nodes != null) return; + failedKatexMessageIds.add(messageId); + switch (value) { + case MathBlockNode(): failedMathBlockNodes++; + case MathInlineNode(): failedMathInlineNodes++; + } + + final hardFailReason = value.debugHardFailReason; + final softFailReason = value.debugSoftFailReason; + int failureCount = 0; + + if (hardFailReason != null) { + final firstLine = hardFailReason.stackTrace.toString().split('\n').first; + final reason = 'hard fail: ${hardFailReason.error} "$firstLine"'; + (failedMessageIdsByReason[reason] ??= {}).add(messageId); + (failedMathNodesByReason[reason] ??= {}).add(value); + failureCount++; + } + + if (softFailReason != null) { + for (final cssClass in softFailReason.unsupportedCssClasses) { + final reason = 'unsupported css class: $cssClass'; + (failedMessageIdsByReason[reason] ??= {}).add(messageId); + (failedMathNodesByReason[reason] ??= {}).add(value); + failureCount++; + } + for (final cssProp in softFailReason.unsupportedInlineCssProperties) { + final reason = 'unsupported inline css property: $cssProp'; + (failedMessageIdsByReason[reason] ??= {}).add(messageId); + (failedMathNodesByReason[reason] ??= {}).add(value); + failureCount++; + } + } + + if (failureCount == 0) { + final reason = 'unknown'; + (failedMessageIdsByReason[reason] ??= {}).add(messageId); + (failedMathNodesByReason[reason] ??= {}).add(value); + } + } + + await for (final message in readMessagesFromJsonl(file)) { + totalMessageCount++; + walk(message.id, parseContent(message.content).toDiagnosticsNode()); + } + + final buf = StringBuffer(); + buf.writeln(); + buf.writeln('Out of $totalMessageCount total messages,' + ' ${katexMessageIds.length} of them were KaTeX containing messages' + ' and ${failedKatexMessageIds.length} of those failed.'); + buf.writeln('There were $totalMathBlockNodes math block nodes out of which $failedMathBlockNodes failed.'); + buf.writeln('There were $totalMathInlineNodes math inline nodes out of which $failedMathInlineNodes failed.'); + buf.writeln(); + + for (final MapEntry(key: reason, value: messageIds) in failedMessageIdsByReason.entries.sorted( + (a, b) => b.value.length.compareTo(a.value.length), + )) { + final failedMathNodes = failedMathNodesByReason[reason]!.toList(); + failedMathNodes.shuffle(); + final oldestId = messageIds.reduce(min); + final newestId = messageIds.reduce(max); + + buf.writeln('Because of $reason:'); + buf.writeln(' ${messageIds.length} messages failed.'); + buf.writeln(' Oldest message: $oldestId, Newest message: $newestId'); + buf.writeln(' Message IDs (up to 100): ${messageIds.take(100).join(', ')}'); + buf.writeln(' TeX source (up to 30):'); + for (final node in failedMathNodes.take(30)) { + switch (node) { + case MathBlockNode(): + buf.writeln(' ```math'); + for (final line in node.texSource.split('\n')) { + buf.writeln(' $line'); + } + buf.writeln(' ```'); + case MathInlineNode(): + buf.writeln(' \$\$ ${node.texSource} \$\$'); + } + } + buf.writeln(' HTML (up to 3):'); + for (final node in failedMathNodes.take(3)) { + buf.writeln(' ${node.debugHtmlText}'); + } + buf.writeln(); + } + + check(failedKatexMessageIds.length, because: buf.toString()).equals(0); + } + + final corpusFiles = _getCorpusFiles(); + + if (corpusFiles.isEmpty) { + throw Exception('No corpus found in directory "$_corpusDirPath" to check' + ' for katex failures.'); + } + + group('Check for katex failures in', () { + for (final file in corpusFiles) { + test(file.path, () => checkForKatexFailuresInFile(file)); + } + }); +} + +const String _corpusDirPath = String.fromEnvironment('corpusDir'); + +Iterable _getCorpusFiles() { + final corpusDir = Directory(_corpusDirPath); + return corpusDir.existsSync() ? corpusDir.listSync().whereType() : []; +} From 80dcd472a5dadf7fcbe267bb792dc4c10db96820 Mon Sep 17 00:00:00 2001 From: Rajesh Malviya Date: Wed, 18 Jun 2025 22:25:23 +0530 Subject: [PATCH 5/5] tools/content: Add a flag to control verbosity of KaTeX check result --- tools/content/check-features | 1 + tools/content/unimplemented_katex_test.dart | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/tools/content/check-features b/tools/content/check-features index 747924dc86..7b4698c099 100755 --- a/tools/content/check-features +++ b/tools/content/check-features @@ -106,6 +106,7 @@ run_check() { run_katex_check() { flutter test tools/content/unimplemented_katex_test.dart \ --dart-define=corpusDir="$opt_corpus_dir" \ + --dart-define=verbose="$opt_verbose" \ || return 1 } diff --git a/tools/content/unimplemented_katex_test.dart b/tools/content/unimplemented_katex_test.dart index ad3769e78f..80b0f482a7 100644 --- a/tools/content/unimplemented_katex_test.dart +++ b/tools/content/unimplemented_katex_test.dart @@ -113,6 +113,11 @@ void main() async { buf.writeln('Because of $reason:'); buf.writeln(' ${messageIds.length} messages failed.'); buf.writeln(' Oldest message: $oldestId, Newest message: $newestId'); + if (!_verbose) { + buf.writeln(); + continue; + } + buf.writeln(' Message IDs (up to 100): ${messageIds.take(100).join(', ')}'); buf.writeln(' TeX source (up to 30):'); for (final node in failedMathNodes.take(30)) { @@ -153,6 +158,8 @@ void main() async { const String _corpusDirPath = String.fromEnvironment('corpusDir'); +const bool _verbose = int.fromEnvironment('verbose', defaultValue: 0) != 0; + Iterable _getCorpusFiles() { final corpusDir = Directory(_corpusDirPath); return corpusDir.existsSync() ? corpusDir.listSync().whereType() : [];