Skip to content

Commit be5d521

Browse files
committed
fix content-type for zstd files, additionally build & serve gzip compressed rustdoc json
1 parent d932948 commit be5d521

File tree

15 files changed

+564
-95
lines changed

15 files changed

+564
-95
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ prometheus = { version = "0.14.0", default-features = false }
4444
rustwide = { version = "0.19.0", features = ["unstable-toolchain-ci", "unstable"] }
4545
mime_guess = "2"
4646
zstd = "0.13.0"
47+
flate2 = "1.1.1"
4748
hostname = "0.4.0"
4849
path-slash = "0.2.0"
4950
once_cell = { version = "1.4.0", features = ["parking_lot"] }

benches/compression.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,18 @@ pub fn regex_capture_matches(c: &mut Criterion) {
3333
5 * 1024 * 1024,
3434
)
3535
});
36+
})
37+
.bench_function("compress gzip", |b| {
38+
b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Gzip));
39+
})
40+
.bench_function("decompress gzip", |b| {
41+
b.iter(|| {
42+
decompress(
43+
black_box(html_slice),
44+
CompressionAlgorithm::Gzip,
45+
5 * 1024 * 1024,
46+
)
47+
});
3648
});
3749
}
3850

src/db/delete.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ mod tests {
222222
use super::*;
223223
use crate::db::ReleaseId;
224224
use crate::registry_api::{CrateOwner, OwnerKind};
225-
use crate::storage::rustdoc_json_path;
225+
use crate::storage::{CompressionAlgorithm, rustdoc_json_path};
226226
use crate::test::{async_wrapper, fake_release_that_failed_before_build};
227227
use test_case::test_case;
228228

@@ -413,6 +413,7 @@ mod tests {
413413
version,
414414
"x86_64-unknown-linux-gnu",
415415
crate::storage::RustdocJsonFormatVersion::Latest,
416+
Some(CompressionAlgorithm::Zstd),
416417
))
417418
.await
418419
}

src/db/file.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ pub(crate) fn detect_mime(file_path: impl AsRef<Path>) -> Mime {
4747
Some("toml") => mimes::TEXT_TOML.clone(),
4848
Some("js") => mime::TEXT_JAVASCRIPT,
4949
Some("json") => mime::APPLICATION_JSON,
50+
Some("gz") => mimes::APPLICATION_GZIP.clone(),
51+
Some("zst") => mimes::APPLICATION_ZSTD.clone(),
5052
_ => mime,
5153
}
5254
}
@@ -103,3 +105,27 @@ pub(crate) fn file_list_to_json(files: impl IntoIterator<Item = FileEntry>) -> V
103105
.collect(),
104106
)
105107
}
108+
109+
#[cfg(test)]
110+
mod tests {
111+
use super::*;
112+
use test_case::test_case;
113+
114+
// some standard mime types that mime-guess handles
115+
#[test_case("txt", &mime::TEXT_PLAIN)]
116+
#[test_case("html", &mime::TEXT_HTML)]
117+
// overrides of other mime types and defaults for
118+
// types mime-guess doesn't know about
119+
#[test_case("md", &mimes::TEXT_MARKDOWN)]
120+
#[test_case("rs", &mimes::TEXT_RUST)]
121+
#[test_case("markdown", &mimes::TEXT_MARKDOWN)]
122+
#[test_case("css", &mime::TEXT_CSS)]
123+
#[test_case("toml", &mimes::TEXT_TOML)]
124+
#[test_case("js", &mime::TEXT_JAVASCRIPT)]
125+
#[test_case("json", &mime::APPLICATION_JSON)]
126+
#[test_case("zst", &mimes::APPLICATION_ZSTD)]
127+
#[test_case("gz", &mimes::APPLICATION_GZIP)]
128+
fn test_detect_mime(ext: &str, expected: &Mime) {
129+
assert_eq!(&detect_mime(format!("something.{ext}")), expected);
130+
}
131+
}

src/db/mimes.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ macro_rules! mime {
88
}
99

1010
mime!(APPLICATION_ZIP, "application/zip");
11+
mime!(APPLICATION_ZSTD, "application/zstd");
12+
mime!(APPLICATION_GZIP, "application/gzip");
1113
mime!(TEXT_MARKDOWN, "text/markdown");
1214
mime!(TEXT_RUST, "text/rust");
1315
mime!(TEXT_TOML, "text/toml");

src/docbuilder/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ mod rustwide_builder;
44
pub(crate) use self::limits::Limits;
55
pub(crate) use self::rustwide_builder::DocCoverage;
66
pub use self::rustwide_builder::{BuildPackageSummary, PackageKind, RustwideBuilder};
7+
8+
#[cfg(test)]
9+
pub use self::rustwide_builder::RUSTDOC_JSON_COMPRESSION_ALGORITHMS;

src/docbuilder/rustwide_builder.rs

Lines changed: 57 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ use crate::docbuilder::Limits;
1313
use crate::error::Result;
1414
use crate::repositories::RepositoryStatsUpdater;
1515
use crate::storage::{
16-
RustdocJsonFormatVersion, get_file_list, rustdoc_archive_path, rustdoc_json_path,
17-
source_archive_path,
16+
CompressionAlgorithm, RustdocJsonFormatVersion, compress, get_file_list, rustdoc_archive_path,
17+
rustdoc_json_path, source_archive_path,
1818
};
1919
use crate::utils::{
2020
CargoMetadata, ConfigName, copy_dir_all, get_config, parse_rustc_version, report_error,
@@ -45,6 +45,9 @@ const COMPONENTS: &[&str] = &["llvm-tools-preview", "rustc-dev", "rustfmt"];
4545
const DUMMY_CRATE_NAME: &str = "empty-library";
4646
const DUMMY_CRATE_VERSION: &str = "1.0.0";
4747

48+
pub const RUSTDOC_JSON_COMPRESSION_ALGORITHMS: &[CompressionAlgorithm] =
49+
&[CompressionAlgorithm::Zstd, CompressionAlgorithm::Gzip];
50+
4851
/// read the format version from a rustdoc JSON file.
4952
fn read_format_version_from_rustdoc_json(
5053
reader: impl std::io::Read,
@@ -909,12 +912,25 @@ impl RustwideBuilder {
909912
.context("couldn't parse rustdoc json to find format version")?
910913
};
911914

912-
for format_version in [format_version, RustdocJsonFormatVersion::Latest] {
913-
let _span = info_span!("store_json", %format_version).entered();
914-
let path = rustdoc_json_path(name, version, target, format_version);
915+
for alg in RUSTDOC_JSON_COMPRESSION_ALGORITHMS {
916+
let compressed_json: Vec<u8> = {
917+
let _span =
918+
info_span!("compress_json", file_size = json_filename.metadata()?.len(), algorithm=%alg)
919+
.entered();
920+
921+
compress(BufReader::new(File::open(&json_filename)?), *alg)?
922+
};
915923

916-
self.storage.store_path(&path, &json_filename)?;
917-
self.storage.set_public_access(&path, true)?;
924+
for format_version in [format_version, RustdocJsonFormatVersion::Latest] {
925+
let path = rustdoc_json_path(name, version, target, format_version, Some(*alg));
926+
let _span =
927+
info_span!("store_json", %format_version, algorithm=%alg, target_path=%path)
928+
.entered();
929+
930+
self.storage
931+
.store_one_uncompressed(&path, compressed_json.clone())?;
932+
self.storage.set_public_access(&path, true)?;
933+
}
918934
}
919935

920936
Ok(())
@@ -1279,7 +1295,7 @@ mod tests {
12791295
use super::*;
12801296
use crate::db::types::Feature;
12811297
use crate::registry_api::ReleaseData;
1282-
use crate::storage::CompressionAlgorithm;
1298+
use crate::storage::{CompressionAlgorithm, compression};
12831299
use crate::test::{AxumRouterTestExt, TestEnvironment, wrapper};
12841300
use std::{io, iter};
12851301
use test_case::test_case;
@@ -1467,29 +1483,39 @@ mod tests {
14671483

14681484
// other targets too
14691485
for target in DEFAULT_TARGETS {
1470-
// check if rustdoc json files exist for all targets
1471-
let path = rustdoc_json_path(
1472-
crate_,
1473-
version,
1474-
target,
1475-
RustdocJsonFormatVersion::Latest,
1476-
);
1477-
assert!(storage.exists(&path)?);
1478-
assert!(storage.get_public_access(&path)?);
1479-
1480-
let json_prefix = format!("rustdoc-json/{crate_}/{version}/{target}/");
1481-
let mut json_files: Vec<_> = storage
1482-
.list_prefix(&json_prefix)
1483-
.filter_map(|res| res.ok())
1484-
.map(|f| f.strip_prefix(&json_prefix).unwrap().to_owned())
1485-
.collect();
1486-
json_files.sort();
1487-
assert!(json_files[0].starts_with(&format!("empty-library_1.0.0_{target}_")));
1488-
assert!(json_files[0].ends_with(".json"));
1489-
assert_eq!(
1490-
json_files[1],
1491-
format!("empty-library_1.0.0_{target}_latest.json")
1492-
);
1486+
for alg in RUSTDOC_JSON_COMPRESSION_ALGORITHMS {
1487+
// check if rustdoc json files exist for all targets
1488+
let path = rustdoc_json_path(
1489+
crate_,
1490+
version,
1491+
target,
1492+
RustdocJsonFormatVersion::Latest,
1493+
Some(*alg),
1494+
);
1495+
assert!(storage.exists(&path)?);
1496+
assert!(storage.get_public_access(&path)?);
1497+
1498+
let ext = compression::file_extension_for(*alg);
1499+
1500+
let json_prefix = format!("rustdoc-json/{crate_}/{version}/{target}/");
1501+
let mut json_files: Vec<_> = storage
1502+
.list_prefix(&json_prefix)
1503+
.filter_map(|res| res.ok())
1504+
.map(|f| f.strip_prefix(&json_prefix).unwrap().to_owned())
1505+
.collect();
1506+
json_files.retain(|f| f.ends_with(&format!(".json.{ext}")));
1507+
json_files.sort();
1508+
dbg!(&json_files);
1509+
assert!(
1510+
json_files[0].starts_with(&format!("empty-library_1.0.0_{target}_"))
1511+
);
1512+
1513+
assert!(json_files[0].ends_with(&format!(".json.{ext}")));
1514+
assert_eq!(
1515+
json_files[1],
1516+
format!("empty-library_1.0.0_{target}_latest.json.{ext}")
1517+
);
1518+
}
14931519

14941520
if target == &default_target {
14951521
continue;

src/storage/compression.rs

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use anyhow::Error;
2-
use bzip2::Compression;
32
use bzip2::read::{BzDecoder, BzEncoder};
3+
use flate2::read::{GzDecoder, GzEncoder};
44
use serde::{Deserialize, Serialize};
55
use std::{
66
collections::HashSet,
@@ -29,6 +29,13 @@ pub enum CompressionAlgorithm {
2929
#[default]
3030
Zstd = 0,
3131
Bzip2 = 1,
32+
Gzip = 2,
33+
}
34+
35+
impl CompressionAlgorithm {
36+
pub fn file_extension(&self) -> &'static str {
37+
file_extension_for(*self)
38+
}
3239
}
3340

3441
impl std::convert::TryFrom<i32> for CompressionAlgorithm {
@@ -45,17 +52,40 @@ impl std::convert::TryFrom<i32> for CompressionAlgorithm {
4552
}
4653
}
4754

55+
pub(crate) fn file_extension_for(algorithm: CompressionAlgorithm) -> &'static str {
56+
match algorithm {
57+
CompressionAlgorithm::Zstd => "zst",
58+
CompressionAlgorithm::Bzip2 => "bz2",
59+
CompressionAlgorithm::Gzip => "gz",
60+
}
61+
}
62+
63+
pub(crate) fn compression_from_file_extension(ext: &str) -> Option<CompressionAlgorithm> {
64+
match ext {
65+
"zst" => Some(CompressionAlgorithm::Zstd),
66+
"bz2" => Some(CompressionAlgorithm::Bzip2),
67+
"gz" => Some(CompressionAlgorithm::Gzip),
68+
_ => None,
69+
}
70+
}
71+
4872
// public for benchmarking
4973
pub fn compress(content: impl Read, algorithm: CompressionAlgorithm) -> Result<Vec<u8>, Error> {
5074
match algorithm {
5175
CompressionAlgorithm::Zstd => Ok(zstd::encode_all(content, 9)?),
5276
CompressionAlgorithm::Bzip2 => {
53-
let mut compressor = BzEncoder::new(content, Compression::best());
77+
let mut compressor = BzEncoder::new(content, bzip2::Compression::best());
5478

5579
let mut data = vec![];
5680
compressor.read_to_end(&mut data)?;
5781
Ok(data)
5882
}
83+
CompressionAlgorithm::Gzip => {
84+
let mut compressor = GzEncoder::new(content, flate2::Compression::default());
85+
let mut data = vec![];
86+
compressor.read_to_end(&mut data)?;
87+
Ok(data)
88+
}
5989
}
6090
}
6191

@@ -72,6 +102,9 @@ pub fn decompress(
72102
CompressionAlgorithm::Bzip2 => {
73103
io::copy(&mut BzDecoder::new(content), &mut buffer)?;
74104
}
105+
CompressionAlgorithm::Gzip => {
106+
io::copy(&mut GzDecoder::new(content), &mut buffer)?;
107+
}
75108
}
76109

77110
Ok(buffer.into_inner())
@@ -81,6 +114,7 @@ pub fn decompress(
81114
mod tests {
82115
use super::*;
83116
use strum::IntoEnumIterator;
117+
use test_case::test_case;
84118

85119
#[test]
86120
fn test_compression() {
@@ -134,9 +168,18 @@ mod tests {
134168
}
135169
}
136170

137-
#[test]
138-
fn test_enum_display() {
139-
assert_eq!(CompressionAlgorithm::Zstd.to_string(), "Zstd");
140-
assert_eq!(CompressionAlgorithm::Bzip2.to_string(), "Bzip2");
171+
#[test_case(CompressionAlgorithm::Zstd, "Zstd")]
172+
#[test_case(CompressionAlgorithm::Bzip2, "Bzip2")]
173+
#[test_case(CompressionAlgorithm::Gzip, "Gzip")]
174+
fn test_enum_display(alg: CompressionAlgorithm, expected: &str) {
175+
assert_eq!(alg.to_string(), expected);
176+
}
177+
178+
#[test_case(CompressionAlgorithm::Zstd, "zst")]
179+
#[test_case(CompressionAlgorithm::Bzip2, "bz2")]
180+
#[test_case(CompressionAlgorithm::Gzip, "gz")]
181+
fn test_file_extensions(alg: CompressionAlgorithm, expected: &str) {
182+
assert_eq!(file_extension_for(alg), expected);
183+
assert_eq!(compression_from_file_extension(expected), Some(alg));
141184
}
142185
}

0 commit comments

Comments
 (0)