From 1160bfd51fea618ab20b52659d0727443a286e24 Mon Sep 17 00:00:00 2001 From: Marco A L Barbosa Date: Tue, 29 Oct 2019 15:28:26 -0300 Subject: [PATCH] Add features to allow choosing between reqwest or curl for downloading --- Cargo.lock | 44 ++++++++++++++++ Cargo.toml | 7 +-- dist/travis.sh | 8 ++- src/config.rs | 4 +- src/errors.rs | 6 +-- src/io/cached_itarbundle.rs | 74 +++----------------------- src/io/download_curl.rs | 102 ++++++++++++++++++++++++++++++++++++ src/io/download_none.rs | 74 ++++++++++++++++++++++++++ src/io/download_reqwest.rs | 77 +++++++++++++++++++++++++++ src/io/mod.rs | 12 +++++ 10 files changed, 333 insertions(+), 75 deletions(-) create mode 100644 src/io/download_curl.rs create mode 100644 src/io/download_none.rs create mode 100644 src/io/download_reqwest.rs diff --git a/Cargo.lock b/Cargo.lock index 2ec167bb9..67f96f078 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -255,6 +255,34 @@ dependencies = [ "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "curl" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "curl-sys 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", + "openssl-probe 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "openssl-sys 0.9.53 (registry+https://github.com/rust-lang/crates.io-index)", + "schannel 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)", + "socket2 0.3.11 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "curl-sys" +version = "0.4.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", + "libz-sys 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "openssl-sys 0.9.53 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)", + "vcpkg 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "digest" version = "0.8.1" @@ -1232,6 +1260,17 @@ name = "smallvec" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "socket2" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "string" version = "0.2.1" @@ -1294,6 +1333,7 @@ dependencies = [ "app_dirs 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "cc 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)", "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "curl 0.4.25 (registry+https://github.com/rust-lang/crates.io-index)", "error-chain 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", "flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", "fs2 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1315,6 +1355,7 @@ dependencies = [ "termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", "tokio 0.1.22 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", + "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "vcpkg 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", "zip 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1797,6 +1838,8 @@ dependencies = [ "checksum crossbeam-queue 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7c979cd6cfe72335896575c6b5688da489e420d36a27a0b9eb0c73db574b4a4b" "checksum crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" "checksum crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ce446db02cdc3165b94ae73111e570793400d0794e46125cc4056c81cbb039f4" +"checksum curl 0.4.25 (registry+https://github.com/rust-lang/crates.io-index)" = "06aa71e9208a54def20792d877bc663d6aae0732b9852e612c4a933177c31283" +"checksum curl-sys 0.4.24 (registry+https://github.com/rust-lang/crates.io-index)" = "f659f3ffac9582d6177bb86d1d2aa649f4eb9d0d4de9d03ccc08b402832ea340" "checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" "checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e" "checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" @@ -1907,6 +1950,7 @@ dependencies = [ "checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" "checksum smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "f7b0758c52e15a8b5e3691eae6cc559f08eee9406e548a4477ba4e67770a82b6" "checksum smallvec 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecf3b85f68e8abaa7555aa5abdb1153079387e60b718283d732f03897fcfc86" +"checksum socket2 0.3.11 (registry+https://github.com/rust-lang/crates.io-index)" = "e8b74de517221a2cb01a53349cf54182acdc31a074727d3079068448c0676d85" "checksum string 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d24114bfcceb867ca7f71a0d3fe45d45619ec47a6fbfa98cb14e14250bfa5d6d" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum structopt 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "30b3a3e93f5ad553c38b3301c8a0a0cec829a36783f6a0c467fc4bf553a5f5bf" diff --git a/Cargo.toml b/Cargo.toml index eceb8ff06..271aca950 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,12 +48,13 @@ structopt = "0.3" error-chain = "^0.12" flate2 = { version = "^1.0", default-features = false, features = ["zlib"] } fs2 = "^0.4" -headers = "^0.2" lazy_static = "^1.4" libc = "^0.2" tempfile = "^3.1" md-5 = "^0.8" -reqwest = "^0.9" +url = { version = "^1.7" } +reqwest = { version = "^0.9", optional = true } +curl = { version = "^0.4", optional = true} sha2 = "^0.8" serde = { version = "^1.0", features = ["derive"], optional = true } tectonic_xdv = { path = "xdv", version = "0.1.9-dev" } @@ -62,7 +63,7 @@ toml = { version = "^0.5", optional = true } zip = { version = "^0.5", default-features = false, features = ["deflate"] } [features] -default = ["serialization"] +default = ["serialization", "reqwest"] # Note: we used to have this to couple "serde" and "serde-derive", but we've # adopted the newer scheme to avoid having to depend on both -- should maybe # just get rid of this feature: diff --git a/dist/travis.sh b/dist/travis.sh index e560216e2..ef66c5712 100644 --- a/dist/travis.sh +++ b/dist/travis.sh @@ -242,7 +242,9 @@ if $is_main_build ; then travis_fold_end cargo_fmt travis_fold_start cargo_clippy "cargo clippy" verbose travis_retry rustup component add clippy - cargo clippy --all --all-targets --all-features -- --deny warnings + cargo clippy --all --all-targets -- --deny warnings + cargo clippy --all --all-targets --no-default-features --features serialization -- --deny warnings + cargo clippy --all --all-targets --no-default-features --features curl -- --deny warnings travis_fold_end cargo_clippy fi @@ -261,6 +263,10 @@ else travis_fold_start cargo_build_no_default_features "cargo build --no-default-features" verbose cargo build --no-default-features --verbose travis_fold_end cargo_build_no_default_features + travis_fold_start cargo_build_curl \ + "cargo build --no-default-features --features serialization,curl" verbose + cargo build --no-default-features --features serialization,curl + travis_fold_end cargo_build_curl travis_fold_start cargo_build "cargo build" verbose cargo build --verbose travis_fold_end cargo_build diff --git a/src/config.rs b/src/config.rs index 78f795c76..83a225d9e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -12,7 +12,6 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use std::fs::File; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicBool, Ordering}; @@ -59,6 +58,7 @@ impl PersistentConfig { /// false, the default configuration is returned and the filesystem is not /// modified. pub fn open(auto_create_config_file: bool) -> Result { + use std::fs::File; use std::io::ErrorKind as IoErrorKind; use std::io::{Read, Write}; let mut cfg_path = if auto_create_config_file { @@ -135,8 +135,8 @@ impl PersistentConfig { only_cached: bool, status: &mut dyn StatusBackend, ) -> Result> { - use reqwest::Url; use std::io; + use url::Url; if CONFIG_TEST_MODE_ACTIVATED.load(Ordering::SeqCst) { return Ok(Box::new(crate::test_util::TestBundle::default())); diff --git a/src/errors.rs b/src/errors.rs index f8f142d14..da176a442 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -10,9 +10,9 @@ // hopefully show up in a future version. #![allow(deprecated)] +use crate::io::download; use app_dirs; use error_chain::error_chain; -use reqwest::StatusCode; use std::io::Write; use std::result::Result as StdResult; use std::{convert, ffi, io, num, str}; @@ -63,7 +63,7 @@ error_chain! { Nul(ffi::NulError); ParseInt(num::ParseIntError); Persist(tempfile::PersistError); - Reqwest(reqwest::Error); + Download(download::Error); ConfigRead(ReadError); ConfigWrite(WriteError); Utf8(str::Utf8Error); @@ -97,7 +97,7 @@ error_chain! { display("the {} engine had an unrecoverable error", engine) } - UnexpectedHttpResponse(url: String, status: StatusCode) { + UnexpectedHttpResponse(url: String, status: download::StatusCode) { description("unexpected HTTP response to URL") display("unexpected HTTP response to URL {}: {}", url, status) } diff --git a/src/io/cached_itarbundle.rs b/src/io/cached_itarbundle.rs index 9c6e9a328..0de3a9f68 100644 --- a/src/io/cached_itarbundle.rs +++ b/src/io/cached_itarbundle.rs @@ -5,7 +5,6 @@ use error_chain::bail; use flate2::read::GzDecoder; use fs2::FileExt; -use reqwest::{header::HeaderMap, Client, RedirectPolicy, Response, StatusCode}; use std::collections::HashMap; use std::ffi::OsStr; use std::fs::{self, File}; @@ -21,13 +20,14 @@ use crate::errors::{Error, ErrorKind, Result, ResultExt}; use crate::status::StatusBackend; use crate::{ctry, tt_note, tt_warning}; -const MAX_HTTP_REDIRECTS_ALLOWED: usize = 10; +use crate::io::download::{self, Client, Response, StatusCode}; + const MAX_HTTP_ATTEMPTS: usize = 4; /// A simple way to read chunks out of a big seekable byte stream. You could /// implement this for io::File pretty trivially but that's not currently /// needed. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct HttpRangeReader { url: String, client: Client, @@ -44,13 +44,8 @@ impl HttpRangeReader { impl HttpRangeReader { fn read_range(&mut self, offset: u64, length: usize) -> Result { - let end_inclusive = offset + length as u64 - 1; - - let mut headers = HeaderMap::new(); - use headers::HeaderMapExt; - headers.typed_insert(headers::Range::bytes(offset..=end_inclusive).unwrap()); - - let res = self.client.get(&self.url).headers(headers).send()?; + let end = offset + length as u64 - 1; + let res = download::get_range_inclusive(&mut self.client, &self.url, offset, end)?; if res.status() != StatusCode::PARTIAL_CONTENT { return Err(Error::from(ErrorKind::UnexpectedHttpResponse( @@ -81,7 +76,7 @@ fn get_index(url: &str, status: &mut dyn StatusBackend) -> Result Result Result { - tt_note!(status, "connecting to {}", url); - - // First, we actually do a HEAD request on the URL for the data file. - // If it's redirected, we update our URL to follow the redirects. If - // we didn't do this separately, the index file would have to be the - // one with the redirect setup, which would be confusing and annoying. - - let redirect_policy = RedirectPolicy::custom(|attempt| { - // In the process of resolving the file url it might be neccesary - // to stop at a certain level of redirection. This might be required - // because some hosts might redirect to a version of the url where - // it isn't possible to select the index file by appending .index.gz. - // (This mostly happens because CDNs redirect to the file hash.) - if attempt.previous().len() >= MAX_HTTP_REDIRECTS_ALLOWED { - attempt.too_many_redirects() - } else if let Some(segments) = attempt.url().clone().path_segments() { - let follow = segments - .last() - .map(|file| file.contains('.')) - .unwrap_or(true); - if follow { - attempt.follow() - } else { - attempt.stop() - } - } else { - attempt.follow() - } - }); - let res = Client::builder() - .redirect(redirect_policy) - .build()? - .head(url) - .send()?; - - if !(res.status().is_success() || res.status() == StatusCode::FOUND) { - return Err(Error::from(ErrorKind::UnexpectedHttpResponse( - url.to_string(), - res.status(), - ))) - .chain_err(|| "couldn\'t probe".to_string()); - } - - let final_url = res.url().clone().into_string(); - - if final_url != url { - tt_note!(status, "resolved to {}", final_url); - } - - Ok(final_url) -} - /// Attempts to download a file from the bundle. fn get_file( data: &mut HttpRangeReader, @@ -224,7 +166,7 @@ fn parse_index_line(line: &str) -> Result> { /// Attempts to find the redirected url, download the index and digest. fn get_everything(url: &str, status: &mut dyn StatusBackend) -> Result<(String, String, String)> { - let url = resolve_url(url, status)?; + let url = download::resolve_url(url, status)?; let index = { let mut index = String::new(); @@ -325,7 +267,7 @@ fn make_txt_path(base: &Path, digest_text: &str) -> PathBuf { } /// Bundle provided by an indexed tar file over http with a local cache. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct CachedITarBundle { url: String, redirect_url: String, diff --git a/src/io/download_curl.rs b/src/io/download_curl.rs new file mode 100644 index 000000000..2bcd95324 --- /dev/null +++ b/src/io/download_curl.rs @@ -0,0 +1,102 @@ +use std::fmt; +use std::io; + +use crate::errors::Result; +use crate::status::StatusBackend; +use curl::easy::Easy; + +pub use curl::Error; + +const MAX_HTTP_REDIRECTS_ALLOWED: u32 = 10; + +#[derive(Debug)] +pub struct Client { + handle: Easy, +} + +pub struct Response { + data: io::Cursor>, + status: StatusCode, +} + +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct StatusCode(u32); + +impl Default for Client { + fn default() -> Client { + Client { + handle: Easy::new(), + } + } +} + +impl Client { + pub fn new() -> Client { + Client::default() + } + + fn get(&mut self, url: &str, range: Option<(u64, u64)>) -> Result { + let handle = &mut self.handle; + handle.url(url)?; + handle.follow_location(true)?; + handle.max_redirections(MAX_HTTP_REDIRECTS_ALLOWED)?; + if let Some((start, end)) = range { + handle.range(&format!("{}-{}", start, end))?; + } + let mut buf = Vec::new(); + { + let mut transfer = handle.transfer(); + transfer.write_function(|data| { + buf.extend_from_slice(data); + Ok(data.len()) + })?; + transfer.perform()?; + } + let data = io::Cursor::new(buf); + let status = StatusCode(handle.response_code()?); + Ok(Response { data, status }) + } +} + +impl Response { + pub fn status(&self) -> StatusCode { + self.status + } +} + +impl io::Read for Response { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.data.read(buf) + } +} + +impl StatusCode { + pub const PARTIAL_CONTENT: StatusCode = StatusCode(206); + + pub fn is_success(self) -> bool { + 200 <= self.0 && self.0 <= 299 + } +} + +impl fmt::Display for StatusCode { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "StatusCode({})", self.0) + } +} + +pub fn get(url: &str) -> Result { + Client::new().get(url, None) +} + +pub fn get_range_inclusive( + client: &mut Client, + url: &str, + start: u64, + end: u64, +) -> Result { + client.get(url, Some((start, end))) +} + +pub fn resolve_url(url: &str, _status: &mut dyn StatusBackend) -> Result { + Ok(url.into()) +} diff --git a/src/io/download_none.rs b/src/io/download_none.rs new file mode 100644 index 000000000..f925ea506 --- /dev/null +++ b/src/io/download_none.rs @@ -0,0 +1,74 @@ +use std::error; +use std::fmt; +use std::io; + +use crate::errors::Result; +use crate::status::StatusBackend; + +#[derive(Default, Debug)] +pub struct Client {} + +pub struct Response {} + +#[derive(Debug)] +pub struct Error {} + +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct StatusCode(u32); + +impl error::Error for Error {} + +impl fmt::Display for Error { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "Tectonic was compiled without a download backend.") + } +} + +impl Client { + pub fn new() -> Client { + Client {} + } +} + +impl Response { + pub fn status(&self) -> StatusCode { + unreachable!() + } +} + +impl io::Read for Response { + fn read(&mut self, _buf: &mut [u8]) -> io::Result { + unreachable!() + } +} + +impl StatusCode { + pub const PARTIAL_CONTENT: StatusCode = StatusCode(206); + + pub fn is_success(self) -> bool { + unreachable!() + } +} + +impl fmt::Display for StatusCode { + fn fmt(&self, _fmt: &mut fmt::Formatter) -> fmt::Result { + unreachable!() + } +} + +pub fn get(_url: &str) -> Result { + Err(Error {}.into()) +} + +pub fn get_range_inclusive( + _client: &mut Client, + _url: &str, + _start: u64, + _end: u64, +) -> Result { + Err(Error {}.into()) +} + +pub fn resolve_url(_url: &str, _status: &mut dyn StatusBackend) -> Result { + Err(Error {}.into()) +} diff --git a/src/io/download_reqwest.rs b/src/io/download_reqwest.rs new file mode 100644 index 000000000..711270a22 --- /dev/null +++ b/src/io/download_reqwest.rs @@ -0,0 +1,77 @@ +const MAX_HTTP_REDIRECTS_ALLOWED: usize = 10; + +use crate::errors::{self, ErrorKind, Result, ResultExt}; +use crate::status::StatusBackend; +use crate::tt_note; +use reqwest::RedirectPolicy; + +pub use reqwest::{Client, Error, Response, StatusCode}; + +pub fn get(url: &str) -> Result { + Ok(Client::new().get(url).send()?) +} + +pub fn get_range_inclusive( + client: &mut Client, + url: &str, + start: u64, + end: u64, +) -> Result { + Ok(client + .get(url) + .header("Range", &format!("bytes={}-{}", start, end)) + .send()?) +} + +pub fn resolve_url(url: &str, status: &mut dyn StatusBackend) -> Result { + tt_note!(status, "connecting to {}", url); + + // First, we actually do a HEAD request on the URL for the data file. + // If it's redirected, we update our URL to follow the redirects. If + // we didn't do this separately, the index file would have to be the + // one with the redirect setup, which would be confusing and annoying. + + let redirect_policy = RedirectPolicy::custom(|attempt| { + // In the process of resolving the file url it might be neccesary + // to stop at a certain level of redirection. This might be required + // because some hosts might redirect to a version of the url where + // it isn't possible to select the index file by appending .index.gz. + // (This mostly happens because CDNs redirect to the file hash.) + if attempt.previous().len() >= MAX_HTTP_REDIRECTS_ALLOWED { + attempt.too_many_redirects() + } else if let Some(segments) = attempt.url().clone().path_segments() { + let follow = segments + .last() + .map(|file| file.contains('.')) + .unwrap_or(true); + if follow { + attempt.follow() + } else { + attempt.stop() + } + } else { + attempt.follow() + } + }); + let res = Client::builder() + .redirect(redirect_policy) + .build()? + .head(url) + .send()?; + + if !(res.status().is_success() || res.status() == StatusCode::FOUND) { + return Err(errors::Error::from(ErrorKind::UnexpectedHttpResponse( + url.to_string(), + res.status(), + ))) + .chain_err(|| "couldn\'t probe".to_string()); + } + + let final_url = res.url().clone().into_string(); + + if final_url != url { + tt_note!(status, "resolved to {}", final_url); + } + + Ok(final_url) +} diff --git a/src/io/mod.rs b/src/io/mod.rs index ba7e2221b..3ac3ef7fc 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -17,6 +17,18 @@ use crate::digest::{self, Digest, DigestData}; use crate::errors::{Error, ErrorKind, Result}; use crate::status::StatusBackend; +#[cfg(feature = "reqwest")] +#[path = "download_reqwest.rs"] +pub mod download; + +#[cfg(feature = "curl")] +#[path = "download_curl.rs"] +pub mod download; + +#[cfg(all(not(feature = "curl"), not(feature = "reqwest")))] +#[path = "download_none.rs"] +pub mod download; + pub mod cached_itarbundle; pub mod dirbundle; pub mod filesystem;