Skip to content

Commit 0930a8c

Browse files
committed
fix: Harden SVG Parser
1 parent 3075935 commit 0930a8c

1 file changed

Lines changed: 86 additions & 22 deletions

File tree

src/vector/mod.rs

Lines changed: 86 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -207,22 +207,38 @@ fn try_extract(svg: &str, seed: [u8; 32], actual_bits: usize) -> Option<ExtractR
207207

208208
// ── SVG Text-Level Processing ────────────────────────────────────────────
209209

210-
/// Find all `d="..."` attribute values in the SVG text.
211-
fn find_path_d_attrs(svg: &str) -> Vec<String> {
210+
/// Find all `d="..."` attribute positions in the SVG text.
211+
/// Returns a vector of (start_index, end_index) matching the interior of the quotes.
212+
fn find_path_d_positions(svg: &str) -> Vec<(usize, usize)> {
212213
let mut results = Vec::new();
213-
let pattern = " d=\"";
214-
let mut search_from = 0;
214+
let bytes = svg.as_bytes();
215+
let mut i = 0;
215216

216-
while let Some(start) = svg[search_from..].find(pattern) {
217-
let attr_start = search_from + start + pattern.len();
218-
if let Some(end) = svg[attr_start..].find('"') {
219-
results.push(svg[attr_start..attr_start + end].to_string());
217+
while i + 3 < bytes.len() {
218+
// Match any whitespace followed by d="
219+
if bytes[i].is_ascii_whitespace() && bytes[i + 1] == b'd' && bytes[i + 2] == b'=' && bytes[i + 3] == b'"' {
220+
let attr_start = i + 4;
221+
if let Some(end) = svg[attr_start..].find('"') {
222+
results.push((attr_start, attr_start + end));
223+
i = attr_start + end + 1;
224+
} else {
225+
break;
226+
}
227+
} else {
228+
i += 1;
220229
}
221-
search_from = attr_start;
222230
}
223231
results
224232
}
225233

234+
/// Find all `d="..."` attribute values in the SVG text.
235+
fn find_path_d_attrs(svg: &str) -> Vec<String> {
236+
find_path_d_positions(svg)
237+
.into_iter()
238+
.map(|(start, end)| svg[start..end].to_string())
239+
.collect()
240+
}
241+
226242
/// Count paths that have at least `min_coords` numeric values.
227243
fn count_qualifying_paths(svg: &str, min_coords: usize) -> usize {
228244
find_path_d_attrs(svg)
@@ -289,25 +305,13 @@ fn parse_numbers(d: &str) -> Vec<(f64, usize, usize)> {
289305

290306
/// Embed watermark into all qualifying paths in the SVG text.
291307
fn embed_in_svg(svg: &str, scrambled: &[bool], seed: [u8; 32]) -> Result<(String, usize), String> {
292-
let pattern = " d=\"";
293308
let mut num_paths = 0;
294309

295310
// Pre-generate all PN values
296311
let pn_values: Vec<f64> = (0..scrambled.len()).map(|i| generate_pn(seed, i)).collect();
297312

298313
// Collect all (attr_start, attr_end) positions first
299-
let d_positions: Vec<(usize, usize)> = {
300-
let mut positions = Vec::new();
301-
let mut search_from = 0;
302-
while let Some(start) = svg[search_from..].find(pattern) {
303-
let attr_start = search_from + start + pattern.len();
304-
if let Some(end) = svg[attr_start..].find('"') {
305-
positions.push((attr_start, attr_start + end));
306-
}
307-
search_from = attr_start + 1;
308-
}
309-
positions
310-
};
314+
let d_positions = find_path_d_positions(svg);
311315

312316
// Build result using a single pass with collected replacements
313317
let mut replacements: Vec<(usize, usize, String)> = Vec::new();
@@ -538,6 +542,66 @@ mod tests {
538542
assert_eq!(nums[1].0, -20.3);
539543
}
540544

545+
#[test]
546+
fn test_parse_numbers_scientific_notation() {
547+
let d = "M 1.5e3 2E-4 L 3.0e+2 -1e1";
548+
let nums = parse_numbers(d);
549+
assert_eq!(nums.len(), 4);
550+
assert!((nums[0].0 - 1500.0).abs() < 0.01);
551+
assert!((nums[1].0 - 0.0002).abs() < 0.0001);
552+
assert!((nums[2].0 - 300.0).abs() < 0.01);
553+
assert!((nums[3].0 - (-10.0)).abs() < 0.01);
554+
}
555+
556+
#[test]
557+
fn test_parse_numbers_implicit_separator() {
558+
// SVG allows "100-50" to mean "100, -50"
559+
let d = "M100-50L200-100";
560+
let nums = parse_numbers(d);
561+
assert_eq!(nums.len(), 4);
562+
assert_eq!(nums[0].0, 100.0);
563+
assert_eq!(nums[1].0, -50.0);
564+
assert_eq!(nums[2].0, 200.0);
565+
assert_eq!(nums[3].0, -100.0);
566+
}
567+
568+
#[test]
569+
fn test_parse_numbers_empty_path() {
570+
assert!(parse_numbers("").is_empty());
571+
assert!(parse_numbers("M Z").is_empty());
572+
}
573+
574+
#[test]
575+
fn test_parse_numbers_small_values() {
576+
let d = "M 0.001 -0.0025";
577+
let nums = parse_numbers(d);
578+
assert_eq!(nums.len(), 2);
579+
assert!((nums[0].0 - 0.001).abs() < 1e-6);
580+
assert!((nums[1].0 - (-0.0025)).abs() < 1e-6);
581+
}
582+
583+
#[test]
584+
fn test_find_path_d_attrs_whitespace_variants() {
585+
// Tab before d=
586+
let svg = "<svg><path\td=\"M 0 0 L 10 10\"/></svg>";
587+
assert_eq!(find_path_d_attrs(svg).len(), 1);
588+
589+
// Newline directly before d=
590+
let svg = "<svg><path\nd=\"M 5 5 L 15 15\"/></svg>";
591+
assert_eq!(find_path_d_attrs(svg).len(), 1);
592+
593+
// Carriage return before d=
594+
let svg = "<svg><path\rd=\"M 0 0 L 10 10\"/></svg>";
595+
assert_eq!(find_path_d_attrs(svg).len(), 1);
596+
597+
// No paths
598+
assert!(find_path_d_attrs("<svg></svg>").is_empty());
599+
600+
// Multiple paths with mixed whitespace
601+
let svg = "<svg><path d=\"M 0 0\"/><path\td=\"M 1 1\"/></svg>";
602+
assert_eq!(find_path_d_attrs(svg).len(), 2);
603+
}
604+
541605
#[test]
542606
fn test_find_path_d_attrs() {
543607
let svg = r#"<svg><path d="M 0 0 L 10 10 Z"/><path d="M 5 5 L 15 15 Z"/></svg>"#;

0 commit comments

Comments
 (0)