diff --git a/proxy-bin-l7/src/client/mock_server/malware_list.rs b/proxy-bin-l7/src/client/mock_server/malware_list.rs index 2ac72c32..c852a1b8 100644 --- a/proxy-bin-l7/src/client/mock_server/malware_list.rs +++ b/proxy-bin-l7/src/client/mock_server/malware_list.rs @@ -27,6 +27,7 @@ pub(super) fn web_svc() -> impl Service impl Service impl IntoResponse { reason: Reason::Malware, }]) } + +pub const MALWARE_PACKAGIST_VENDOR: &str = "safechain"; +pub const MALWARE_PACKAGIST_PACKAGE: &str = "packagist-test"; +pub const MALWARE_PACKAGIST_VERSION: &str = "1.0.0"; +async fn malware_packagist() -> impl IntoResponse { + Json([ListDataEntry { + package_name: format!("{MALWARE_PACKAGIST_VENDOR}/{MALWARE_PACKAGIST_PACKAGE}"), + version: PackageVersion::Semver(PragmaticSemver::new_semver(1, 0, 0)), + reason: Reason::Malware, + }]) +} + +pub const FRESH_PACKAGIST_VENDOR: &str = "safechain"; +pub const FRESH_PACKAGIST_PACKAGE: &str = "fresh-packagist-pkg"; +pub const FRESH_PACKAGIST_VERSION: &str = "2.0.0"; +async fn released_packagist() -> impl IntoResponse { + // Timestamp far in the future (year ~2255) so this entry is always "recently released" + // relative to any realistic `now - 48h` cutoff used in tests. + Json([ReleasedPackageData { + package_name: format!("{FRESH_PACKAGIST_VENDOR}/{FRESH_PACKAGIST_PACKAGE}"), + version: PackageVersion::Semver(PragmaticSemver::parse(FRESH_PACKAGIST_VERSION).unwrap()), + released_on: 9_000_000_000, + }]) +} diff --git a/proxy-bin-l7/src/client/mock_server/mod.rs b/proxy-bin-l7/src/client/mock_server/mod.rs index fb02e0ba..7e86df7f 100644 --- a/proxy-bin-l7/src/client/mock_server/mod.rs +++ b/proxy-bin-l7/src/client/mock_server/mod.rs @@ -46,6 +46,7 @@ mod assert_endpoint; mod endpoint_protection_callbacks; pub mod malware_list; mod npm_registry; +mod packagist_registry; mod pypi_registry; mod vscode_marketplace; @@ -128,6 +129,10 @@ fn new_mock_server() -> impl Service impl Service + Clone { + service_fn(handle) +} + +/// Handles Packagist v2 metadata requests for the mock server. +/// +/// Serves `/p2/vendor/package.json` responses in Composer 2.x minified format. +/// Known test packages carry specific version entries used by the e2e firewall tests; +/// all other paths return `200 OK` with an empty packages object. +async fn handle(req: Request) -> Result { + let path = req.uri().path(); + + let malware_p2_path = + format!("/p2/{MALWARE_PACKAGIST_VENDOR}/{MALWARE_PACKAGIST_PACKAGE}.json"); + let fresh_p2_path = format!("/p2/{FRESH_PACKAGIST_VENDOR}/{FRESH_PACKAGIST_PACKAGE}.json"); + + if path == malware_p2_path { + // Version 1.0.0 is in the malware list; 0.9.0 is safe and old. + let body = json!({ + "minified": "composer/2.0", + "packages": { + format!("{MALWARE_PACKAGIST_VENDOR}/{MALWARE_PACKAGIST_PACKAGE}"): [ + { + "name": format!("{MALWARE_PACKAGIST_VENDOR}/{MALWARE_PACKAGIST_PACKAGE}"), + "description": "Test package for SafeChain malware blocking", + "version": MALWARE_PACKAGIST_VERSION, + "version_normalized": "1.0.0.0", + "dist": { + "url": "https://api.github.com/repos/test/test/zipball/abc", + "type": "zip", + "reference": "abc", + "shasum": "" + }, + "source": { + "url": "https://github.com/test/test.git", + "type": "git", + "reference": "abc" + }, + "time": "2020-06-01T00:00:00+00:00", + "require": {"php": "^8.0"} + }, + { + "version": "0.9.0", + "version_normalized": "0.9.0.0", + "dist": { + "url": "https://api.github.com/repos/test/test/zipball/def", + "type": "zip", + "reference": "def", + "shasum": "" + }, + "source": { + "url": "https://github.com/test/test.git", + "type": "git", + "reference": "def" + }, + "time": "2020-01-01T00:00:00+00:00" + } + ] + } + }); + return Ok(Json(body).into_response()); + } + + if path == fresh_p2_path { + // Version 2.0.0 has a far-future `time` (always "too new"); 1.0.0 is old and safe. + let body = json!({ + "minified": "composer/2.0", + "packages": { + format!("{FRESH_PACKAGIST_VENDOR}/{FRESH_PACKAGIST_PACKAGE}"): [ + { + "name": format!("{FRESH_PACKAGIST_VENDOR}/{FRESH_PACKAGIST_PACKAGE}"), + "description": "Test package for SafeChain min-age blocking", + "version": FRESH_PACKAGIST_VERSION, + "version_normalized": "2.0.0.0", + "dist": { + "url": "https://api.github.com/repos/test/fresh/zipball/ghi", + "type": "zip", + "reference": "ghi", + "shasum": "" + }, + "source": { + "url": "https://github.com/test/fresh.git", + "type": "git", + "reference": "ghi" + }, + // Far-future timestamp so this is always considered "recently released". + "time": "2255-01-01T00:00:00+00:00", + "require": {"php": "^8.1"} + }, + { + "version": "1.0.0", + "version_normalized": "1.0.0.0", + "dist": { + "url": "https://api.github.com/repos/test/fresh/zipball/jkl", + "type": "zip", + "reference": "jkl", + "shasum": "" + }, + "source": { + "url": "https://github.com/test/fresh.git", + "type": "git", + "reference": "jkl" + }, + "time": "2020-01-01T00:00:00+00:00" + } + ] + } + }); + return Ok(Json(body).into_response()); + } + + Ok(StatusCode::OK.into_response()) +} diff --git a/proxy-bin-l7/src/test/e2e/test_proxy/firewall_packagist.rs b/proxy-bin-l7/src/test/e2e/test_proxy/firewall_packagist.rs new file mode 100644 index 00000000..99540227 --- /dev/null +++ b/proxy-bin-l7/src/test/e2e/test_proxy/firewall_packagist.rs @@ -0,0 +1,197 @@ +use rama::{ + http::{BodyExtractExt as _, StatusCode, service::client::HttpClientExt as _}, + telemetry::tracing, +}; + +use crate::{ + client::mock_server::malware_list::{ + FRESH_PACKAGIST_PACKAGE, FRESH_PACKAGIST_VENDOR, FRESH_PACKAGIST_VERSION, + MALWARE_PACKAGIST_PACKAGE, MALWARE_PACKAGIST_VENDOR, MALWARE_PACKAGIST_VERSION, + }, + test::e2e, +}; + +fn versions_in(body: &serde_json::Value, vendor: &str, package: &str) -> Vec { + body["packages"][format!("{vendor}/{package}")] + .as_array() + .unwrap_or(&vec![]) + .iter() + .filter_map(|v| v["version"].as_str().map(str::to_owned)) + .collect() +} + +// --- malware version suppression --- + +#[tokio::test] +#[tracing_test::traced_test] +async fn test_packagist_malware_version_removed_from_metadata() { + let runtime = e2e::runtime::get().await; + let client = runtime.client_with_http_proxy().await; + + let resp = client + .get(format!( + "https://repo.packagist.org/p2/{MALWARE_PACKAGIST_VENDOR}/{MALWARE_PACKAGIST_PACKAGE}.json" + )) + .send() + .await + .unwrap(); + + assert_eq!( + StatusCode::OK, + resp.status(), + "proxy must not block metadata requests" + ); + let body: serde_json::Value = resp.try_into_json().await.unwrap(); + let versions = versions_in(&body, MALWARE_PACKAGIST_VENDOR, MALWARE_PACKAGIST_PACKAGE); + + assert!( + !versions.contains(&MALWARE_PACKAGIST_VERSION.to_owned()), + "malware version {MALWARE_PACKAGIST_VERSION} must be absent from rewritten metadata; got: {versions:?}" + ); + assert!( + versions.contains(&"0.9.0".to_owned()), + "safe version 0.9.0 must remain in rewritten metadata; got: {versions:?}" + ); +} + +#[tokio::test] +#[tracing_test::traced_test] +async fn test_packagist_malware_version_removed_via_http() { + let runtime = e2e::runtime::get().await; + let client = runtime.client_with_http_proxy().await; + + let resp = client + .get(format!( + "http://repo.packagist.org/p2/{MALWARE_PACKAGIST_VENDOR}/{MALWARE_PACKAGIST_PACKAGE}.json" + )) + .send() + .await + .unwrap(); + + assert_eq!(StatusCode::OK, resp.status()); + let body: serde_json::Value = resp.try_into_json().await.unwrap(); + let versions = versions_in(&body, MALWARE_PACKAGIST_VENDOR, MALWARE_PACKAGIST_PACKAGE); + + assert!(!versions.contains(&MALWARE_PACKAGIST_VERSION.to_owned())); +} + +// --- min-age version suppression --- + +#[tokio::test] +#[tracing_test::traced_test] +async fn test_packagist_new_package_version_removed_from_metadata() { + let runtime = e2e::runtime::get().await; + let client = runtime.client_with_http_proxy().await; + + let resp = client + .get(format!( + "https://repo.packagist.org/p2/{FRESH_PACKAGIST_VENDOR}/{FRESH_PACKAGIST_PACKAGE}.json" + )) + .send() + .await + .unwrap(); + + assert_eq!( + StatusCode::OK, + resp.status(), + "proxy must not block metadata requests" + ); + let body: serde_json::Value = resp.try_into_json().await.unwrap(); + let versions = versions_in(&body, FRESH_PACKAGIST_VENDOR, FRESH_PACKAGIST_PACKAGE); + + assert!( + !versions.contains(&FRESH_PACKAGIST_VERSION.to_owned()), + "too-new version {FRESH_PACKAGIST_VERSION} must be absent; got: {versions:?}" + ); + assert!( + versions.contains(&"1.0.0".to_owned()), + "older version 1.0.0 must remain; got: {versions:?}" + ); +} + +// --- clean package passthrough --- + +#[tokio::test] +#[tracing_test::traced_test] +async fn test_packagist_clean_package_passes_through() { + let runtime = e2e::runtime::get().await; + let client = runtime.client_with_http_proxy().await; + + // Any package not in the malware list or releases list passes through unchanged. + let resp = client + .get("https://repo.packagist.org/p2/vendor/clean-package.json") + .send() + .await + .unwrap(); + + assert_eq!(StatusCode::OK, resp.status()); +} + +// --- dev variant path --- + +#[tokio::test] +#[tracing_test::traced_test] +async fn test_packagist_dev_variant_path_handled() { + let runtime = e2e::runtime::get().await; + let client = runtime.client_with_http_proxy().await; + + // The ~dev.json path must also be intercepted and rewritten. + let resp = client + .get(format!( + "https://repo.packagist.org/p2/{MALWARE_PACKAGIST_VENDOR}/{MALWARE_PACKAGIST_PACKAGE}~dev.json" + )) + .send() + .await + .unwrap(); + + // Mock server returns 200 for unknown paths; the proxy should not block it. + assert_eq!(StatusCode::OK, resp.status()); +} + +// --- non-packagist path passthrough --- + +#[tokio::test] +#[tracing_test::traced_test] +async fn test_packagist_unrelated_path_passes_through() { + let runtime = e2e::runtime::get().await; + let client = runtime.client_with_http_proxy().await; + + // /packages.json is not a package-specific metadata endpoint; proxy should not rewrite it. + let resp = client + .get("https://repo.packagist.org/packages.json") + .send() + .await + .unwrap(); + + assert_eq!(StatusCode::OK, resp.status()); +} + +// --- de-minification: inherited fields survive --- + +#[tokio::test] +#[tracing_test::traced_test] +async fn test_packagist_inherited_require_present_after_rewrite() { + let runtime = e2e::runtime::get().await; + let client = runtime.client_with_http_proxy().await; + + // The malware version (1.0.0) carries `require`; 0.9.0 inherits it. + // After rewriting (removing 1.0.0), 0.9.0 must still expose `require`. + let resp = client + .get(format!( + "https://repo.packagist.org/p2/{MALWARE_PACKAGIST_VENDOR}/{MALWARE_PACKAGIST_PACKAGE}.json" + )) + .send() + .await + .unwrap(); + + assert_eq!(StatusCode::OK, resp.status()); + let body: serde_json::Value = resp.try_into_json().await.unwrap(); + let pkg_key = format!("{MALWARE_PACKAGIST_VENDOR}/{MALWARE_PACKAGIST_PACKAGE}"); + let remaining = &body["packages"][&pkg_key][0]; + + assert_eq!(remaining["version"], "0.9.0"); + assert!( + !remaining["require"].is_null(), + "require field must be inherited from the removed first entry; got: {remaining}" + ); +} diff --git a/proxy-bin-l7/src/test/e2e/test_proxy/mod.rs b/proxy-bin-l7/src/test/e2e/test_proxy/mod.rs index a1399691..f068d37b 100644 --- a/proxy-bin-l7/src/test/e2e/test_proxy/mod.rs +++ b/proxy-bin-l7/src/test/e2e/test_proxy/mod.rs @@ -5,6 +5,7 @@ mod firewall_maven; mod firewall_npm; mod firewall_nuget; mod firewall_open_vsx; +mod firewall_packagist; mod firewall_pypi; mod firewall_skills_sh; mod firewall_vscode; diff --git a/proxy-lib/src/http/firewall/mod.rs b/proxy-lib/src/http/firewall/mod.rs index 927a8847..7f885bda 100644 --- a/proxy-lib/src/http/firewall/mod.rs +++ b/proxy-lib/src/http/firewall/mod.rs @@ -242,6 +242,17 @@ impl Firewall { .await .context("create block rule: pypi")? .into_dyn(), + self::rule::packagist::RulePackagist::try_new( + guard.clone(), + layered_client.clone(), + data.clone(), + policy_evaluator.clone(), + notifier.clone(), + remote_endpoint_config.clone(), + ) + .await + .context("create block rule: packagist")? + .into_dyn(), self::rule::maven::RuleMaven::try_new( guard.clone(), layered_client.clone(), diff --git a/proxy-lib/src/http/firewall/rule/mod.rs b/proxy-lib/src/http/firewall/rule/mod.rs index f9438aff..75d99e13 100644 --- a/proxy-lib/src/http/firewall/rule/mod.rs +++ b/proxy-lib/src/http/firewall/rule/mod.rs @@ -70,6 +70,7 @@ pub mod maven; pub mod npm; pub mod nuget; pub mod open_vsx; +pub mod packagist; pub mod pypi; pub mod skills_sh; pub mod vscode; diff --git a/proxy-lib/src/http/firewall/rule/packagist/json.rs b/proxy-lib/src/http/firewall/rule/packagist/json.rs new file mode 100644 index 00000000..c6ca45dc --- /dev/null +++ b/proxy-lib/src/http/firewall/rule/packagist/json.rs @@ -0,0 +1,87 @@ +use std::{str::FromStr, time::UNIX_EPOCH}; + +use rama::telemetry::tracing; +use serde_json::{Map, Value}; + +use crate::package::version::PackageVersion; + +/// Parse a Packagist `/p2/vendor/package.json` body and de-minify its entries. +/// +/// Returns the canonical JSON key for the package (may differ in casing from the +/// URL-derived name) and the de-minified entry list, or `None` if the body cannot +/// be parsed or the package key is not found. +pub(super) fn parse_and_deminify( + bytes: &[u8], + package_name: &str, +) -> Option<(String, Vec>)> { + let json: Value = serde_json::from_slice(bytes) + .inspect_err(|e| tracing::debug!("packagist: failed to parse metadata JSON: {e}")) + .ok()?; + + let packages_obj = json.get("packages")?.as_object()?; + + let package_key = packages_obj + .keys() + .find(|k| k.to_ascii_lowercase() == package_name)? + .clone(); + + let versions_array = packages_obj.get(&package_key)?.as_array()?; + + Some((package_key, deminify(versions_array))) +} + +/// Serialize a filtered package entry list back to Packagist metadata JSON. +/// +/// Produces `{ "packages": { "": [...kept...] } }` without the `minified` +/// key so that every entry is self-contained. +pub(super) fn serialize(package_key: String, kept: Vec) -> Option> { + let mut packages = Map::new(); + packages.insert(package_key, Value::Array(kept)); + let output = serde_json::json!({ "packages": packages }); + serde_json::to_vec(&output) + .inspect_err(|e| tracing::debug!("packagist: failed to serialize rewritten metadata: {e}")) + .ok() +} + +/// Parse the `time` field of a de-minified entry as Unix seconds. +pub(super) fn time_from_entry(entry: &Map) -> Option { + let ts = entry.get("time")?.as_str()?; + let t = humantime::parse_rfc3339(ts).ok()?; + let secs = t.duration_since(UNIX_EPOCH).ok()?.as_secs(); + Some(secs as i64) +} + +/// Parse a `version` string from a de-minified entry into a `PackageVersion`. +pub(super) fn version_from_entry(entry: &Map) -> Option { + let s = entry.get("version")?.as_str()?; + Some(PackageVersion::from_str(s).unwrap_or_else(|_| PackageVersion::Unknown(s.into()))) +} + +/// Expand minified Composer 2.x package entries to full field sets. +/// +/// Walks entries in order, maintaining a running accumulator. Each entry patches +/// the accumulator: present fields override, `"__unset"` fields are removed. +/// The returned vec contains one fully-expanded map per input entry. +pub(super) fn deminify(entries: &[Value]) -> Vec> { + let mut accumulator: Map = Map::new(); + let mut result = Vec::with_capacity(entries.len()); + + for entry in entries { + if let Some(obj) = entry.as_object() { + for (key, value) in obj { + if value.as_str() == Some("__unset") { + accumulator.remove(key); + } else { + accumulator.insert(key.clone(), value.clone()); + } + } + } + result.push(accumulator.clone()); + } + + result +} + +#[cfg(test)] +#[path = "json_tests.rs"] +mod tests; diff --git a/proxy-lib/src/http/firewall/rule/packagist/json_tests.rs b/proxy-lib/src/http/firewall/rule/packagist/json_tests.rs new file mode 100644 index 00000000..a3148a6d --- /dev/null +++ b/proxy-lib/src/http/firewall/rule/packagist/json_tests.rs @@ -0,0 +1,133 @@ +use std::time::{Duration, SystemTime}; + +use serde_json::{Map, Value, json}; + +use super::*; + +fn ts(hours_from_now: i64) -> String { + if hours_from_now >= 0 { + let t = SystemTime::now() + Duration::from_secs(hours_from_now as u64 * 3600); + humantime::format_rfc3339(t).to_string() + } else { + let t = SystemTime::now() - Duration::from_secs((-hours_from_now) as u64 * 3600); + humantime::format_rfc3339(t).to_string() + } +} + +fn entry(fields: Value) -> Map { + fields.as_object().unwrap().clone() +} + +// --- parse_and_deminify --- + +#[test] +fn parse_returns_none_for_wrong_package_key() { + let body = json!({ "packages": { "other/pkg": [{"version": "1.0.0"}] } }); + let bytes = serde_json::to_vec(&body).unwrap(); + assert!(parse_and_deminify(&bytes, "vendor/pkg").is_none()); +} + +#[test] +fn parse_finds_key_case_insensitively() { + let body = json!({ "packages": { "Vendor/Pkg": [{"version": "1.0.0"}] } }); + let bytes = serde_json::to_vec(&body).unwrap(); + let (key, entries) = parse_and_deminify(&bytes, "vendor/pkg").unwrap(); + assert_eq!(key, "Vendor/Pkg"); + assert_eq!(entries.len(), 1); +} + +#[test] +fn parse_returns_none_for_invalid_json() { + assert!(parse_and_deminify(b"not json", "vendor/pkg").is_none()); +} + +// --- deminify --- + +#[test] +fn deminify_propagates_fields_from_first_entry() { + let entries = vec![ + json!({"name": "vendor/pkg", "version": "2.0.0", "require": {"php": "^8.0"}}), + json!({"version": "1.0.0"}), + ]; + let expanded = deminify(&entries); + assert_eq!(expanded.len(), 2); + assert_eq!(expanded[1]["version"], "1.0.0"); + assert_eq!( + expanded[1]["require"]["php"], "^8.0", + "require must be inherited from first entry" + ); +} + +#[test] +fn deminify_handles_unset_sentinel() { + let entries = vec![ + json!({"version": "2.0.0", "homepage": "https://example.com"}), + json!({"version": "1.0.0", "homepage": "__unset"}), + ]; + let expanded = deminify(&entries); + assert_eq!(expanded[1]["version"], "1.0.0"); + assert!( + expanded[1].get("homepage").is_none(), + "homepage should be removed by __unset" + ); +} + +#[test] +fn deminify_later_entry_overrides_earlier_value() { + let entries = vec![ + json!({"version": "2.0.0", "description": "old"}), + json!({"version": "1.0.0", "description": "new"}), + ]; + let expanded = deminify(&entries); + assert_eq!(expanded[1]["description"], "new"); +} + +// --- time_from_entry --- + +#[test] +fn time_from_entry_parses_valid_timestamp() { + let e = entry(json!({"time": "2020-06-01T00:00:00+00:00"})); + assert!(time_from_entry(&e).is_some()); +} + +#[test] +fn time_from_entry_returns_none_when_field_absent() { + let e = entry(json!({"version": "1.0.0"})); + assert!(time_from_entry(&e).is_none()); +} + +#[test] +fn time_from_entry_returns_none_for_invalid_timestamp() { + let e = entry(json!({"time": "not-a-date"})); + assert!(time_from_entry(&e).is_none()); +} + +#[test] +fn time_from_entry_future_timestamp_is_greater_than_past() { + let past = entry(json!({"time": ts(-72)})); + let future = entry(json!({"time": ts(1)})); + let past_secs = time_from_entry(&past).unwrap(); + let future_secs = time_from_entry(&future).unwrap(); + assert!(future_secs > past_secs); +} + +// --- serialize --- + +#[test] +fn serialize_produces_packages_object_without_minified_key() { + let out = serialize("vendor/pkg".to_owned(), vec![json!({"version": "1.0.0"})]).unwrap(); + let parsed: Value = serde_json::from_slice(&out).unwrap(); + assert!( + parsed.get("minified").is_none(), + "minified key must be absent" + ); + assert!(parsed["packages"]["vendor/pkg"].is_array()); +} + +#[test] +fn serialize_preserves_empty_array_when_all_filtered() { + let out = serialize("vendor/pkg".to_owned(), vec![]).unwrap(); + let parsed: Value = serde_json::from_slice(&out).unwrap(); + let arr = parsed["packages"]["vendor/pkg"].as_array().unwrap(); + assert!(arr.is_empty()); +} diff --git a/proxy-lib/src/http/firewall/rule/packagist/mod.rs b/proxy-lib/src/http/firewall/rule/packagist/mod.rs new file mode 100644 index 00000000..ffdcec58 --- /dev/null +++ b/proxy-lib/src/http/firewall/rule/packagist/mod.rs @@ -0,0 +1,267 @@ +use std::fmt; + +use rama::{ + Service, + error::{BoxError, ErrorContext as _, extra::OpaqueError}, + graceful::ShutdownGuard, + http::{ + Body, Request, Response, Uri, + body::util::BodyExt as _, + headers::{ContentType, HeaderMapExt as _}, + }, + net::address::Domain, + telemetry::tracing, + utils::{str::arcstr::ArcStr, time::now_unix_ms}, +}; +use serde_json::Value; + +use crate::{ + endpoint_protection::{PackagePolicyDecision, PolicyEvaluator, RemoteEndpointConfig}, + http::{ + KnownContentType, + firewall::{ + domain_matcher::DomainMatcher, + events::{Artifact, BlockReason, BlockedEvent, MinPackageAgeEvent}, + notifier::EventNotifier, + }, + }, + package::{ + malware_list::{LowerCaseEntryFormatter, RemoteMalwareList}, + released_packages_list::{LowerCaseReleasedPackageFormatter, RemoteReleasedPackagesList}, + version::PackageVersion, + }, + storage::SyncCompactDataStorage, +}; + +#[cfg(feature = "pac")] +use crate::http::firewall::pac::PacScriptGenerator; + +use super::{HttpRequestMatcherView, Rule, make_response_uncacheable}; + +mod json; +mod path; + +pub(in crate::http::firewall) struct RulePackagist { + target_domains: DomainMatcher, + remote_malware_list: RemoteMalwareList, + remote_released_packages_list: RemoteReleasedPackagesList, + remote_endpoint_config: Option, + policy_evaluator: Option, + notifier: Option, +} + +impl RulePackagist { + pub(in crate::http::firewall) async fn try_new( + guard: ShutdownGuard, + remote_malware_list_https_client: C, + sync_storage: SyncCompactDataStorage, + policy_evaluator: Option, + notifier: Option, + remote_endpoint_config: Option, + ) -> Result + where + C: Service + Clone, + { + let remote_malware_list = RemoteMalwareList::try_new( + guard.clone(), + Uri::from_static("https://malware-list.aikido.dev/malware_packagist.json"), + sync_storage.clone(), + remote_malware_list_https_client.clone(), + LowerCaseEntryFormatter, + ) + .await + .context("create remote malware list for packagist block rule")?; + + let remote_released_packages_list = RemoteReleasedPackagesList::try_new( + guard.clone(), + Uri::from_static("https://malware-list.aikido.dev/releases/packagist.json"), + sync_storage, + remote_malware_list_https_client, + LowerCaseReleasedPackageFormatter, + ) + .await + .context("create remote released packages list for packagist block rule")?; + + Ok(Self { + target_domains: ["repo.packagist.org"].into_iter().collect(), + remote_malware_list, + remote_released_packages_list, + remote_endpoint_config, + policy_evaluator, + notifier, + }) + } + + const DEFAULT_MIN_PACKAGE_AGE_SECS: i64 = 48 * 3600; + + fn get_package_age_cutoff_secs(&self) -> i64 { + let maybe_ts = self.remote_endpoint_config.as_ref().and_then(|c| { + c.get_ecosystem_config("packagist") + .config() + .and_then(|cfg| cfg.minimum_allowed_age_timestamp) + }); + if let Some(ts_secs) = maybe_ts { + return ts_secs; + } + (now_unix_ms() / 1000) - Self::DEFAULT_MIN_PACKAGE_AGE_SECS + } + + fn is_version_malware(&self, package: &str, version: &PackageVersion) -> bool { + self.remote_malware_list + .has_entries_with_version(package, version.clone()) + } +} + +impl fmt::Debug for RulePackagist { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RulePackagist").finish() + } +} + +impl Rule for RulePackagist { + #[inline(always)] + fn match_domain(&self, domain: &Domain) -> bool { + self.target_domains.is_match(domain) + } + + #[cfg(feature = "pac")] + #[inline(always)] + fn collect_pac_domains(&self, generator: &mut PacScriptGenerator) { + for domain in self.target_domains.iter() { + generator.write_domain(&domain); + } + } + + #[inline(always)] + fn match_http_response_payload_inspection_request( + &self, + req: HttpRequestMatcherView<'_>, + ) -> bool { + path::parse_package_name_from_path(req.uri.path()).is_some() + } + + async fn evaluate_response(&self, resp: Response, req_uri: &Uri) -> Result { + let Some(package_name) = path::parse_package_name_from_path(req_uri.path()) else { + return Ok(resp); + }; + + let Some(KnownContentType::Json) = resp + .headers() + .typed_get::() + .and_then(KnownContentType::detect_from_content_type_header) + else { + return Ok(resp); + }; + + let cutoff_secs = self.get_package_age_cutoff_secs(); + let (mut parts, body) = resp.into_parts(); + + let bytes = body + .collect() + .await + .context("collect packagist metadata response body")? + .to_bytes(); + + let Some((package_key, entries)) = json::parse_and_deminify(&bytes, &package_name) else { + return Ok(Response::from_parts(parts, Body::from(bytes))); + }; + + if let Some(evaluator) = self.policy_evaluator.as_ref() + && matches!( + evaluator.evaluate_package_install("packagist", &package_name), + PackagePolicyDecision::Allow + ) + { + return Ok(Response::from_parts(parts, Body::from(bytes))); + } + + let mut suppressed_malware: Vec = Vec::new(); + let mut suppressed_min_age: Vec = Vec::new(); + let mut kept: Vec = Vec::new(); + + for entry in &entries { + let Some(version) = json::version_from_entry(entry) else { + kept.push(Value::Object(entry.clone())); + continue; + }; + + if self.is_version_malware(&package_name, &version) { + tracing::info!( + package = %package_name, + version = %version, + "packagist: suppressing malware version from metadata response" + ); + if let Some(notifier) = &self.notifier { + notifier + .notify_blocked(BlockedEvent { + ts_ms: now_unix_ms(), + artifact: Artifact { + product: "packagist".into(), + identifier: ArcStr::from(package_name.as_str()), + display_name: Some(ArcStr::from(package_name.as_str())), + version: Some(version.clone()), + }, + block_reason: BlockReason::Malware, + }) + .await; + } + suppressed_malware.push(version); + continue; + } + + let is_recent = json::time_from_entry(entry).is_some_and(|t| t > cutoff_secs) + || self.remote_released_packages_list.is_recently_released( + &package_name, + Some(&version), + cutoff_secs, + ); + + if is_recent { + tracing::info!( + package = %package_name, + version = %version, + "packagist: suppressing too-new version from metadata response" + ); + suppressed_min_age.push(version); + continue; + } + + kept.push(Value::Object(entry.clone())); + } + + if suppressed_malware.is_empty() && suppressed_min_age.is_empty() { + return Ok(Response::from_parts(parts, Body::from(bytes))); + } + + let Some(new_bytes) = json::serialize(package_key, kept) else { + return Ok(Response::from_parts(parts, Body::from(bytes))); + }; + + tracing::info!( + package = %package_name, + suppressed_malware = ?suppressed_malware, + suppressed_min_age = ?suppressed_min_age, + "packagist metadata rewritten: suppressed versions" + ); + + make_response_uncacheable(&mut parts.headers); + + if !suppressed_min_age.is_empty() + && let Some(notifier) = &self.notifier + { + let event = MinPackageAgeEvent { + ts_ms: now_unix_ms(), + artifact: Artifact { + product: "packagist".into(), + identifier: ArcStr::from(package_name.as_str()), + display_name: Some(ArcStr::from(package_name.as_str())), + version: None, + }, + suppressed_versions: suppressed_min_age, + }; + notifier.notify_min_package_age(event).await; + } + + Ok(Response::from_parts(parts, Body::from(new_bytes))) + } +} diff --git a/proxy-lib/src/http/firewall/rule/packagist/path.rs b/proxy-lib/src/http/firewall/rule/packagist/path.rs new file mode 100644 index 00000000..fd44e86b --- /dev/null +++ b/proxy-lib/src/http/firewall/rule/packagist/path.rs @@ -0,0 +1,17 @@ +/// Extract the vendor/package name from a Packagist v2 metadata path. +/// +/// Recognises `/p2/vendor/package.json` and `/p2/vendor/package~dev.json`. +/// Returns the name lowercased, e.g. `"league/flysystem-local"`. +pub(super) fn parse_package_name_from_path(path: &str) -> Option { + let rest = path.strip_prefix("/p2/")?; + let rest = rest + .strip_suffix("~dev.json") + .or_else(|| rest.strip_suffix(".json"))?; + // Must be exactly "vendor/package" (one slash, two non-empty segments). + (rest.matches('/').count() == 1 && !rest.starts_with('/') && !rest.ends_with('/')) + .then(|| rest.to_ascii_lowercase()) +} + +#[cfg(test)] +#[path = "path_tests.rs"] +mod tests; diff --git a/proxy-lib/src/http/firewall/rule/packagist/path_tests.rs b/proxy-lib/src/http/firewall/rule/packagist/path_tests.rs new file mode 100644 index 00000000..7bf3a168 --- /dev/null +++ b/proxy-lib/src/http/firewall/rule/packagist/path_tests.rs @@ -0,0 +1,50 @@ +use super::parse_package_name_from_path; + +#[test] +fn parses_standard_path() { + assert_eq!( + parse_package_name_from_path("/p2/league/flysystem-local.json"), + Some("league/flysystem-local".to_owned()) + ); +} + +#[test] +fn parses_dev_path() { + assert_eq!( + parse_package_name_from_path("/p2/league/flysystem-local~dev.json"), + Some("league/flysystem-local".to_owned()) + ); +} + +#[test] +fn lowercases_name() { + assert_eq!( + parse_package_name_from_path("/p2/League/Flysystem-Local.json"), + Some("league/flysystem-local".to_owned()) + ); +} + +#[test] +fn rejects_missing_p2_prefix() { + assert!(parse_package_name_from_path("/p/vendor/pkg.json").is_none()); +} + +#[test] +fn rejects_no_vendor_separator() { + assert!(parse_package_name_from_path("/p2/vendor.json").is_none()); +} + +#[test] +fn rejects_too_many_segments() { + assert!(parse_package_name_from_path("/p2/a/b/c.json").is_none()); +} + +#[test] +fn rejects_missing_json_suffix() { + assert!(parse_package_name_from_path("/p2/vendor/pkg").is_none()); +} + +#[test] +fn rejects_packages_json() { + assert!(parse_package_name_from_path("/packages.json").is_none()); +}