diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index 2a75ac729..486430146 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -3,8 +3,7 @@ use std::path::PathBuf; use cratesfyi::db::{self, add_path_into_database, connect_db}; use cratesfyi::utils::{add_crate_to_queue, remove_crate_priority, set_crate_priority}; -use cratesfyi::Server; -use cratesfyi::{DocBuilder, DocBuilderOptions, RustwideBuilder}; +use cratesfyi::{DocBuilder, DocBuilderOptions, Limits, RustwideBuilder, Server}; use structopt::StructOpt; pub fn main() { @@ -412,7 +411,7 @@ impl DatabaseSubcommand { Self::AddDirectory { directory, prefix } => { let conn = db::connect_db().expect("failed to connect to the database"); - add_path_into_database(&conn, &prefix, directory) + add_path_into_database(&conn, &prefix, directory, &Limits::default()) .expect("Failed to add directory into database"); } diff --git a/src/db/file.rs b/src/db/file.rs index 3280d1e40..427b43ec2 100644 --- a/src/db/file.rs +++ b/src/db/file.rs @@ -4,8 +4,7 @@ //! They are using so many inodes and it is better to store them in database instead of //! filesystem. This module is adding files into database and retrieving them. -use crate::error::Result; -use crate::storage::Storage; +use crate::{docbuilder::Limits, error::Result, storage::Storage}; use postgres::Connection; use serde_json::Value; @@ -30,9 +29,11 @@ pub fn add_path_into_database>( conn: &Connection, prefix: &str, path: P, + limits: &Limits, ) -> Result { let mut backend = Storage::new(conn); - let file_list = backend.store_all(conn, prefix, path.as_ref())?; + let file_list = backend.store_all(conn, prefix, path.as_ref(), limits)?; + file_list_to_json(file_list.into_iter().collect()) } diff --git a/src/db/migrate.rs b/src/db/migrate.rs index d996d99cc..4a43ac0e5 100644 --- a/src/db/migrate.rs +++ b/src/db/migrate.rs @@ -325,6 +325,17 @@ pub fn migrate(version: Option, conn: &Connection) -> CratesfyiResult<( ALTER TABLE releases ALTER COLUMN doc_targets DROP NOT NULL; " ), + migration!( + context, + // version + 13, + // description + "Allow max file upload size to be overridden", + // upgrade query + "ALTER TABLE sandbox_overrides ADD COLUMN upload_size BIGINT;", + // downgrade query + "ALTER TABLE sandbox_overrides DROP COLUMN upload_size;" + ), ]; for migration in migrations { diff --git a/src/docbuilder/limits.rs b/src/docbuilder/limits.rs index 3db3c45fc..ae99f68b2 100644 --- a/src/docbuilder/limits.rs +++ b/src/docbuilder/limits.rs @@ -3,13 +3,21 @@ use postgres::Connection; use std::collections::BTreeMap; use std::time::Duration; +/// The limits imposed on a crate for building its documentation #[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) struct Limits { +pub struct Limits { + /// The maximum memory usage allowed for a crate memory: usize, + /// The maximum number of targets targets: usize, + /// The build timeout timeout: Duration, + /// Whether networking is enabled networking: bool, + /// The maximum log size kept max_log_size: usize, + /// The maximum allowed size of a file upload + upload_size: usize, } impl Default for Limits { @@ -17,9 +25,10 @@ impl Default for Limits { Self { memory: 3 * 1024 * 1024 * 1024, // 3 GB timeout: Duration::from_secs(15 * 60), // 15 minutes - targets: 10, - networking: false, - max_log_size: 100 * 1024, // 100 KB + targets: 10, // 10 documentation targets + networking: false, // Networking disabled + max_log_size: 100 * 1024, // 100 KB + upload_size: 500 * 1024 * 1024, // 500 MB } } } @@ -32,17 +41,25 @@ impl Limits { "SELECT * FROM sandbox_overrides WHERE crate_name = $1;", &[&name], )?; + if !res.is_empty() { let row = res.get(0); + if let Some(memory) = row.get::<_, Option>("max_memory_bytes") { limits.memory = memory as usize; } + if let Some(timeout) = row.get::<_, Option>("timeout_seconds") { limits.timeout = Duration::from_secs(timeout as u64); } + if let Some(targets) = row.get::<_, Option>("max_targets") { limits.targets = targets as usize; } + + if let Some(upload_size) = row.get::<_, Option>("upload_size") { + limits.upload_size = upload_size as usize; + } } Ok(limits) @@ -68,6 +85,15 @@ impl Limits { self.targets } + pub(crate) fn upload_size(&self) -> usize { + self.upload_size + } + + #[cfg(test)] + pub(crate) fn set_upload_size(&mut self, size: usize) { + self.upload_size = size; + } + pub(crate) fn for_website(&self) -> BTreeMap { let mut res = BTreeMap::new(); res.insert("Available RAM".into(), SIZE_SCALE(self.memory)); @@ -88,6 +114,10 @@ impl Limits { "Maximum number of build targets".into(), self.targets.to_string(), ); + res.insert( + "Maximum uploaded file size".into(), + SIZE_SCALE(self.upload_size), + ); res } } @@ -119,22 +149,23 @@ fn scale(value: usize, interval: usize, labels: &[&str]) -> String { mod test { use super::*; use crate::test::*; + #[test] fn retrieve_limits() { wrapper(|env| { let db = env.db(); - let krate = "hexponent"; // limits work if no crate has limits set - let hexponent = Limits::for_crate(&db.conn(), krate)?; + let hexponent = Limits::for_crate(&db.conn(), "hexponent")?; assert_eq!(hexponent, Limits::default()); db.conn().query( "INSERT INTO sandbox_overrides (crate_name, max_targets) VALUES ($1, 15)", - &[&krate], + &[&"hexponent"], )?; + // limits work if crate has limits set - let hexponent = Limits::for_crate(&db.conn(), krate)?; + let hexponent = Limits::for_crate(&db.conn(), "hexponent")?; assert_eq!( hexponent, Limits { @@ -149,42 +180,65 @@ mod test { memory: 100_000, timeout: Duration::from_secs(300), targets: 1, + upload_size: 32, ..Limits::default() }; - db.conn().query("INSERT INTO sandbox_overrides (crate_name, max_memory_bytes, timeout_seconds, max_targets) - VALUES ($1, $2, $3, $4)", - &[&krate, &(limits.memory as i64), &(limits.timeout.as_secs() as i32), &(limits.targets as i32)])?; + + db.conn().query( + "INSERT INTO sandbox_overrides ( + crate_name, + max_memory_bytes, + timeout_seconds, + max_targets, + upload_size + ) + VALUES ($1, $2, $3, $4, $5)", + &[ + &krate, + &(limits.memory as i64), + &(limits.timeout.as_secs() as i32), + &(limits.targets as i32), + &(limits.upload_size as i64), + ], + )?; + assert_eq!(limits, Limits::for_crate(&db.conn(), krate)?); + Ok(()) }); } + #[test] fn display_limits() { let limits = Limits { memory: 102400, timeout: Duration::from_secs(300), targets: 1, + upload_size: 32, ..Limits::default() }; let display = limits.for_website(); + + assert_eq!(display.get("Network access"), Some(&"blocked".to_string())); assert_eq!( - display.get("Network access".into()), - Some(&"blocked".into()) - ); - assert_eq!( - display.get("Maximum size of a build log".into()), - Some(&"100 KB".into()) + display.get("Maximum size of a build log"), + Some(&"100 KB".to_string()) ); assert_eq!( - display.get("Maximum number of build targets".into()), + display.get("Maximum number of build targets"), Some(&limits.targets.to_string()) ); assert_eq!( - display.get("Maximum rustdoc execution time".into()), - Some(&"5 minutes".into()) + display.get("Maximum rustdoc execution time"), + Some(&"5 minutes".to_string()) ); - assert_eq!(display.get("Available RAM".into()), Some(&"100 KB".into())); + assert_eq!(display.get("Available RAM"), Some(&"100 KB".to_string())); + assert_eq!( + display.get("Maximum uploaded file size"), + Some(&"32 bytes".to_string()) + ) } + #[test] fn scale_limits() { // time diff --git a/src/docbuilder/mod.rs b/src/docbuilder/mod.rs index 0a0dac3d8..52c8b17fd 100644 --- a/src/docbuilder/mod.rs +++ b/src/docbuilder/mod.rs @@ -5,7 +5,7 @@ pub(crate) mod options; mod queue; mod rustwide_builder; -pub(crate) use self::limits::Limits; +pub use self::limits::Limits; pub(self) use self::metadata::Metadata; pub(crate) use self::rustwide_builder::BuildResult; pub use self::rustwide_builder::RustwideBuilder; diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 5b9e05251..cd776418a 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -242,7 +242,7 @@ impl RustwideBuilder { })?; } - add_path_into_database(&conn, "", &dest)?; + add_path_into_database(&conn, "", &dest, &limits)?; conn.query( "INSERT INTO config (name, value) VALUES ('rustc_version', $1) \ ON CONFLICT (name) DO UPDATE SET value = $1;", @@ -349,6 +349,7 @@ impl RustwideBuilder { &conn, &prefix, build.host_source_dir(), + &limits, )?); if let Some(name) = res.cargo_metadata.root().library_name() { @@ -376,7 +377,7 @@ impl RustwideBuilder { &metadata, )?; } - self.upload_docs(&conn, name, version, local_storage.path())?; + self.upload_docs(&conn, name, version, local_storage.path(), &limits)?; } let has_examples = build.host_source_dir().join("examples").is_dir(); @@ -572,13 +573,16 @@ impl RustwideBuilder { name: &str, version: &str, local_storage: &Path, + limits: &Limits, ) -> Result<()> { debug!("Adding documentation into database"); add_path_into_database( conn, &format!("rustdoc/{}/{}", name, version), local_storage, + limits, )?; + Ok(()) } } diff --git a/src/lib.rs b/src/lib.rs index 2a5769307..b5b3eca19 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ pub use self::docbuilder::options::DocBuilderOptions; pub use self::docbuilder::DocBuilder; -pub use self::docbuilder::RustwideBuilder; +pub use self::docbuilder::{Limits, RustwideBuilder}; pub use self::web::Server; pub mod db; diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 7d10e076f..c62e227fc 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3,16 +3,19 @@ pub(crate) mod s3; pub(crate) use self::database::DatabaseBackend; pub(crate) use self::s3::S3Backend; -use failure::Error; -use time::Timespec; +use crate::docbuilder::Limits; use failure::err_msg; +use failure::Error; use postgres::{transaction::Transaction, Connection}; -use std::collections::HashMap; -use std::ffi::OsStr; -use std::fs; -use std::io::Read; -use std::path::{Path, PathBuf}; +use std::{ + collections::HashMap, + ffi::OsStr, + fs::File, + io::Read, + path::{Path, PathBuf}, +}; +use time::Timespec; const MAX_CONCURRENT_UPLOADS: usize = 1000; @@ -72,6 +75,7 @@ impl<'a> Storage<'a> { DatabaseBackend::new(conn).into() } } + pub(crate) fn get(&self, path: &str) -> Result { match self { Self::Database(db) => db.get(path), @@ -97,6 +101,7 @@ impl<'a> Storage<'a> { conn: &Connection, prefix: &str, root_dir: &Path, + limits: &Limits, ) -> Result, Error> { let trans = conn.transaction()?; let mut file_paths_and_mimes = HashMap::new(); @@ -107,9 +112,19 @@ impl<'a> Storage<'a> { // Some files have insufficient permissions // (like .lock file created by cargo in documentation directory). // Skip these files. - fs::File::open(root_dir.join(&file_path)) + let (path, file) = File::open(root_dir.join(&file_path)) .ok() - .map(|file| (file_path, file)) + .map(|file| (file_path, file))?; + + // Filter out files which are larger than the current crate's upload limit + let file_size = file.metadata().expect("Failed to get file metadata").len(); + if file_size <= limits.upload_size() as u64 { + Some((path, file)) + } else { + log::error!("Failed to upload {:?}, {} bytes is larger than the crate limit of {} bytes", path, file_size, limits.upload_size()); + + None + } }) .map(|(file_path, mut file)| -> Result<_, Error> { let mut content: Vec = Vec::new(); @@ -188,7 +203,7 @@ impl<'a> From> for Storage<'a> { mod test { use super::*; use crate::test::wrapper; - use std::env; + use std::{env, fs}; pub(crate) fn assert_blob_eq(blob: &Blob, actual: &Blob) { assert_eq!(blob.path, actual.path); @@ -197,24 +212,29 @@ mod test { // NOTE: this does _not_ compare the upload time since min.io doesn't allow this to be configured } - pub(crate) fn test_roundtrip(blobs: &[Blob]) { + pub(crate) fn test_roundtrip(blobs: &[Blob], limits: &Limits) { let dir = tempfile::Builder::new() .prefix("docs.rs-upload-test") .tempdir() .unwrap(); + for blob in blobs { let path = dir.path().join(&blob.path); if let Some(parent) = path.parent() { fs::create_dir_all(parent).unwrap(); } + fs::write(path, &blob.content).expect("failed to write to file"); } + wrapper(|env| { let db = env.db(); let conn = db.conn(); + let mut backend = Storage::Database(DatabaseBackend::new(&conn)); - let stored_files = backend.store_all(&conn, "", dir.path()).unwrap(); + let stored_files = backend.store_all(&conn, "", dir.path(), limits).unwrap(); assert_eq!(stored_files.len(), blobs.len()); + for blob in blobs { let name = Path::new(&blob.path); assert!(stored_files.contains_key(name)); @@ -230,23 +250,32 @@ mod test { #[test] fn test_uploads() { use std::fs; + + let limits = Limits::default(); let dir = tempfile::Builder::new() .prefix("docs.rs-upload-test") .tempdir() .unwrap(); + let files = ["Cargo.toml", "src/main.rs"]; for &file in &files { let path = dir.path().join(file); if let Some(parent) = path.parent() { fs::create_dir_all(parent).unwrap(); } + fs::write(path, "data").expect("failed to write to file"); } + wrapper(|env| { let db = env.db(); let conn = db.conn(); + let mut backend = Storage::Database(DatabaseBackend::new(&conn)); - let stored_files = backend.store_all(&conn, "rustdoc", dir.path()).unwrap(); + let stored_files = backend + .store_all(&conn, "rustdoc", dir.path(), &limits) + .unwrap(); + assert_eq!(stored_files.len(), files.len()); for name in &files { let name = Path::new(name); @@ -276,6 +305,7 @@ mod test { #[test] fn test_batched_uploads() { + let limits = Limits::default(); let uploads: Vec<_> = (0..=MAX_CONCURRENT_UPLOADS + 1) .map(|i| Blob { mime: "text/rust".into(), @@ -284,7 +314,8 @@ mod test { date_updated: Timespec::new(42, 0), }) .collect(); - test_roundtrip(&uploads); + + test_roundtrip(&uploads, &limits); } #[test] @@ -297,6 +328,7 @@ mod test { let files = get_file_list(env::current_dir().unwrap().join("Cargo.toml")).unwrap(); assert_eq!(files[0], std::path::Path::new("Cargo.toml")); } + #[test] fn test_mime_types() { check_mime(".gitignore", "text/plain"); @@ -317,4 +349,34 @@ mod test { let detected_mime = detected_mime.expect("no mime was given"); assert_eq!(detected_mime, expected_mime); } + + #[test] + fn uploads_limited() { + let mut limits = Limits::default(); + limits.set_upload_size(64); + let dir = tempfile::Builder::new() + .prefix("docs.rs-max-upload-test") + .tempdir() + .unwrap(); + + let files = [("over-limit", 128), ("at-limit", 64), ("under-limit", 20)]; + for (file, size) in files.iter() { + fs::write(dir.path().join(file), b"\x00".repeat(*size)).unwrap(); + } + + wrapper(|env| { + let db = env.db(); + let conn = db.conn(); + + let mut backend = Storage::Database(DatabaseBackend::new(&conn)); + let stored_files = backend.store_all(&conn, "", dir.path(), &limits).unwrap(); + + // The only files that should have been uploaded are the ones <= the upload limit + assert!(stored_files.contains_key(&PathBuf::from("under-limit"))); + assert!(stored_files.contains_key(&PathBuf::from("at-limit"))); + assert_eq!(stored_files.len(), 2); + + Ok(()) + }); + } } diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 227a9e255..7f50d6dd2 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -1,7 +1,9 @@ use super::TestDatabase; -use crate::docbuilder::BuildResult; -use crate::index::api::RegistryCrateData; -use crate::utils::{Dependency, MetadataPackage, Target}; +use crate::{ + docbuilder::{BuildResult, Limits}, + index::api::RegistryCrateData, + utils::{Dependency, MetadataPackage, Target}, +}; use failure::Error; #[must_use = "FakeRelease does nothing until you call .create()"] @@ -181,8 +183,14 @@ impl<'a> FakeRelease<'a> { package.version, target.unwrap_or("") ); + log::debug!("adding directory {} from {}", prefix, path_prefix.display()); - crate::db::add_path_into_database(&db.conn(), &prefix, path_prefix) + crate::db::add_path_into_database( + &db.conn(), + &prefix, + path_prefix, + &Limits::default(), + ) }; let index = [&package.name, "index.html"].join("/");