diff --git a/crates/common/src/auction/formats.rs b/crates/common/src/auction/formats.rs
index 3eb4d843..71347027 100644
--- a/crates/common/src/auction/formats.rs
+++ b/crates/common/src/auction/formats.rs
@@ -217,15 +217,17 @@ pub fn convert_to_openrtb_response(
})
})?;
- // Process creative HTML if present - rewrite URLs and return inline
+ // Process creative HTML if present — sanitize dangerous markup first, then rewrite URLs.
let creative_html = if let Some(ref raw_creative) = bid.creative {
- // Rewrite creative HTML with proxy URLs for first-party delivery
- let rewritten = creative::rewrite_creative_html(settings, raw_creative);
+ let sanitized = creative::sanitize_creative_html(raw_creative);
+ let rewritten = creative::rewrite_creative_html(settings, &sanitized);
log::debug!(
- "Rewritten creative for auction {} slot {} ({} bytes)",
+ "Processed creative for auction {} slot {} ({} → {} → {} bytes)",
auction_request.id,
slot_id,
+ raw_creative.len(),
+ sanitized.len(),
rewritten.len()
);
diff --git a/crates/common/src/creative.rs b/crates/common/src/creative.rs
index 63db7e6f..0b9e1cc5 100644
--- a/crates/common/src/creative.rs
+++ b/crates/common/src/creative.rs
@@ -303,6 +303,205 @@ pub fn rewrite_css_body(settings: &Settings, css: &str) -> String {
rewrite_style_urls(settings, css)
}
+/// Maximum byte length of creative HTML accepted by [`sanitize_creative_html`].
+///
+/// Inputs larger than this are rejected (empty string returned) to prevent unbounded
+/// allocations on the hot path. Fastly Compute enforces upstream request-body limits,
+/// but this guard protects internal callers too.
+const MAX_CREATIVE_SIZE: usize = 1024 * 1024; // 1 MiB
+
+/// Returns `true` if a lowercased `data:` URI points to a safe, non-executable MIME type.
+///
+/// Only well-known raster image formats are allowed. `data:image/svg+xml` is **excluded**
+/// because SVG documents can contain `"#;
+ let out = sanitize_creative_html(html);
+ assert!(!out.contains("">"#;
+ let out = sanitize_creative_html(html);
+ assert!(!out.contains("data:text/html"), "should strip data: src");
+ }
+
+ #[test]
+ fn sanitize_strips_dangerous_data_src_attribute() {
+ // data-src is used by lazy-loaders; dangerous URI schemes must be stripped.
+ let html = r#""#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ !out.contains("javascript:"),
+ "should strip javascript: in data-src"
+ );
+ }
+
+ #[test]
+ fn sanitize_strips_dangerous_srcset_leading_entry() {
+ // A javascript: URI at the start of srcset must be stripped.
+ let html =
+ r#"
"#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ !out.contains("srcset"),
+ "should remove srcset with leading dangerous URL"
+ );
+ assert!(
+ !out.contains("javascript:"),
+ "should strip javascript: from srcset"
+ );
+ }
+
+ #[test]
+ fn sanitize_strips_dangerous_srcset_non_leading_entry() {
+ // A javascript: URI that is NOT the first entry must also be stripped.
+ // This was the gap in the previous starts_with-only check.
+ let html =
+ r#"

