From 3e519f606f953dbfbdc7e59b79c84cde992aeec0 Mon Sep 17 00:00:00 2001 From: Conor Brady Date: Wed, 27 Oct 2021 17:32:40 -0700 Subject: [PATCH 1/8] Move index_result to it's own module --- src/index_result.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++ src/mod.rs | 2 ++ 2 files changed, 49 insertions(+) create mode 100644 src/index_result.rs create mode 100644 src/mod.rs diff --git a/src/index_result.rs b/src/index_result.rs new file mode 100644 index 0000000..5830bcc --- /dev/null +++ b/src/index_result.rs @@ -0,0 +1,47 @@ +use std::{collections::HashMap, fmt}; + +pub struct IndexResult { + pub cid: String, + pub title: String, + pub excerpt: String, + pub keywords: HashMap, +} + +impl IndexResult { + pub fn new( + cid: String, + title: String, + excerpt: String, + keywords: HashMap, + ) -> IndexResult { + IndexResult { + cid: cid, + title: title, + excerpt: excerpt, + keywords: keywords, + } + } + + /** + * Returns the top n keywords. Todo: use a tree structure to store the rankings of the keywords + * so that this is faster + */ + pub fn top_n_keywords(&self, n: u32) -> Vec<(&String, &u32)> { + let mut hash_vec: Vec<(&String, &u32)> = self.keywords.iter().collect(); + hash_vec.sort_by(|a, b| b.1.cmp(a.1)); + hash_vec.iter().take(n as usize).cloned().collect() + } +} + +impl fmt::Display for IndexResult { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "CID: {} \nTitle: {}\n{}\nKeywords: {:?}", + self.cid, + self.title, + self.excerpt, + self.top_n_keywords(10) + ) + } +} diff --git a/src/mod.rs b/src/mod.rs new file mode 100644 index 0000000..4975874 --- /dev/null +++ b/src/mod.rs @@ -0,0 +1,2 @@ +mod index_result; +pub use index_result::*; From 18dcfc4c78de8f95e7d4722b7674acc74b2e6930 Mon Sep 17 00:00:00 2001 From: Conor Brady Date: Wed, 27 Oct 2021 17:38:29 -0700 Subject: [PATCH 2/8] Formatting and check changes from cargo fmt and cargo check --- src/indexer.rs | 64 +++++++++++++++++++++++++++++++++----------------- src/main.rs | 20 +++++++--------- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/src/indexer.rs b/src/indexer.rs index 10aacbd..1e747d5 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -1,13 +1,13 @@ use chashmap::CHashMap; -use std::sync::mpsc::{channel, Receiver, Sender}; -use std::{sync, thread, time}; -use std::sync::atomic::{AtomicBool, Ordering}; use cid::multihash::{Code, MultihashDigest}; use cid::Cid; -use log::{trace,info,warn}; -use std::fmt; -use scraper::{Html,Selector}; +use log::{info, trace, warn}; +use scraper::{Html, Selector}; use std::collections::HashMap; +use std::fmt; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::{sync, thread, time}; pub const RAW: u64 = 0x55; @@ -15,16 +15,21 @@ struct IndexResult { pub cid: String, pub title: String, pub excerpt: String, - pub keywords: HashMap + pub keywords: HashMap, } impl IndexResult { - pub fn new(cid: String, title: String, excerpt: String, keywords: HashMap) -> IndexResult { + pub fn new( + cid: String, + title: String, + excerpt: String, + keywords: HashMap, + ) -> IndexResult { IndexResult { cid: cid, title: title, excerpt: excerpt, - keywords: keywords + keywords: keywords, } } @@ -32,7 +37,7 @@ impl IndexResult { * Returns the top n keywords. Todo: use a tree structure to store the rankings of the keywords * so that this is faster */ - pub fn top_n_keywords(&self, n: u32) -> Vec<(&String, &u32)>{ + pub fn top_n_keywords(&self, n: u32) -> Vec<(&String, &u32)> { let mut hash_vec: Vec<(&String, &u32)> = self.keywords.iter().collect(); hash_vec.sort_by(|a, b| b.1.cmp(a.1)); hash_vec.iter().take(n as usize).cloned().collect() @@ -41,7 +46,14 @@ impl IndexResult { impl fmt::Display for IndexResult { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "CID: {} \nTitle: {}\n{}\nKeywords: {:?}", self.cid, self.title, self.excerpt, self.top_n_keywords(10)) + write!( + f, + "CID: {} \nTitle: {}\n{}\nKeywords: {:?}", + self.cid, + self.title, + self.excerpt, + self.top_n_keywords(10) + ) } } @@ -70,7 +82,7 @@ impl Indexer { running: sync::Arc::new(AtomicBool::new(false)), handle: None, poison_pill: Cid::new_v1(RAW, Code::Sha2_256.digest(b"Poison Pill")), - ipfs_gateway: ipfs_gateway + ipfs_gateway: ipfs_gateway, } } @@ -127,7 +139,12 @@ impl Indexer { if res.0.is_some() { map.insert(cid.clone(), res.0.unwrap()); } - info!("indexed cid {}. Have {} entries. Have {} more cids to add to the queue", cid, map.len(), res.1.len()); + info!( + "indexed cid {}. Have {} entries. Have {} more cids to add to the queue", + cid, + map.len(), + res.1.len() + ); for new_cid in res.1 { if map.contains_key(&new_cid) { trace!("cid {} already in map", new_cid); @@ -152,7 +169,7 @@ impl Indexer { let url = format!("http://{}/ipfs/{}", gateway, cid); info!("retreiving content from {}", url); let client = reqwest::blocking::Client::new(); - + let result = client.get(&url).send(); let result = match result { Ok(r) => r, @@ -184,7 +201,10 @@ impl Indexer { } else { info!("found meta http-equiv=\"refresh\""); let start_bytes = inner_html.find("url=").unwrap_or(0); - let end_bytes = inner_html[start_bytes..].find("\"").unwrap_or(inner_html.len()) + start_bytes; + let end_bytes = inner_html[start_bytes..] + .find("\"") + .unwrap_or(inner_html.len()) + + start_bytes; let redirect_url = &inner_html[start_bytes + 4..end_bytes]; // assuming relative let newurl = format!("{}/{}", url, redirect_url); @@ -220,11 +240,11 @@ impl Indexer { for element in document.select(&selector) { let link = element.value().attr("href").unwrap_or(""); if link.starts_with(format!("http://{}/ipfs/", gateway).as_str()) { - let cid = link[12 + gateway.len() ..].to_string(); + let cid = link[12 + gateway.len()..].to_string(); info!("found link to {}", cid); cids.push(cid); } else if link.starts_with(format!("https://{}/ipfs/", gateway).as_str()) { - let cid = link[13 + gateway.len() ..].to_string(); + let cid = link[13 + gateway.len()..].to_string(); info!("found link to {}", cid); cids.push(cid); } else if link.starts_with("http") || link.starts_with("https") { @@ -245,14 +265,14 @@ impl Indexer { let selector = Selector::parse("body").unwrap(); let body = document.select(&selector).next(); let mut excerpt = "".to_string(); - let mut keywords : HashMap = HashMap::new(); + let mut keywords: HashMap = HashMap::new(); if body.is_some() { // collect up the tags in the body, and get the contents within them without their tags let inner = body.unwrap().text().collect::>(); let mut content = inner.join(" "); // this leaves a ton of whitespace between things, so do this next step to remove that let iter = content.split_whitespace(); - content = iter.fold(String::new(), | a,b| a + b + " "); + content = iter.fold(String::new(), |a, b| a + b + " "); content = content.trim_start().trim_end().to_string(); // get the frequency of words and turn it into a btree @@ -269,7 +289,7 @@ impl Indexer { } } } - + if content.contains("no link named") { warn!("ipfs error on page {}, likely doesn't exist", fullcid); } @@ -282,7 +302,7 @@ impl Indexer { info!("retreived content for cid {}:\n{}", cid, result); (Some(result), cids) } - + pub fn stop(&mut self) { if !self.running.load(Ordering::SeqCst) { warn!("trying to stop before indexer started"); @@ -292,4 +312,4 @@ impl Indexer { self.running.store(false, Ordering::SeqCst); self.handle.take().unwrap().join().unwrap(); } -} \ No newline at end of file +} diff --git a/src/main.rs b/src/main.rs index c095629..7e30f79 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,11 @@ +use cid::Cid; use futures::executor::block_on; -use futures::prelude::*; -use libp2p::ping::{Ping, PingConfig}; -use libp2p::swarm::{Swarm, SwarmEvent}; use libp2p::{identity, PeerId}; -use std::error::Error; -use std::task::Poll; -use cid::Cid; -use log::{info,warn}; +use log::{info, warn}; use simple_logger::SimpleLogger; use std::convert::TryFrom; use std::env; +use std::error::Error; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -31,7 +27,6 @@ fn main() -> Result<(), Box> { gateway = &args[1]; } - let mut index = Indexer::new(gateway.to_string()); index.start(); @@ -39,16 +34,16 @@ fn main() -> Result<(), Box> { // note: delays are so that we don't stop before the indexer has a chance to work, in reality we don't need them let wikipedia_cid = Cid::try_from("QmXoypizjW3WknFiJnKLwHCnL72vedxjQkDDP1mXWo6uco").unwrap(); index.enqueue_cid(wikipedia_cid); - + let local_key = identity::Keypair::generate_ed25519(); let local_peer_id = PeerId::from(local_key.public()); info!("Local peer id: {:?}", local_peer_id); - let transport = block_on(libp2p::development_transport(local_key))?; + let _transport = block_on(libp2p::development_transport(local_key))?; // this stuff conflicts with the running ipfs node, // so need to rejig it otherwise it panics before indexing starts - + // Create a ping network behaviour. // // For illustrative purposes, the ping protocol is configured to @@ -87,7 +82,8 @@ fn main() -> Result<(), Box> { ctrlc::set_handler(move || { r.store(false, Ordering::SeqCst); - }).expect("Error setting Ctrl-C handler"); + }) + .expect("Error setting Ctrl-C handler"); info!("Waiting for Ctrl-C..."); while running.load(Ordering::SeqCst) {} From dfca58b6720b9fbb410f8ea9dabf7b3e276951f4 Mon Sep 17 00:00:00 2001 From: Conor Brady Date: Wed, 27 Oct 2021 17:49:16 -0700 Subject: [PATCH 3/8] Undo mod.rs --- src/mod.rs | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 src/mod.rs diff --git a/src/mod.rs b/src/mod.rs deleted file mode 100644 index 4975874..0000000 --- a/src/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -mod index_result; -pub use index_result::*; From 4e51b13786e79411ae956b9c1315f56f2f8f8b7b Mon Sep 17 00:00:00 2001 From: Conor Brady Date: Wed, 27 Oct 2021 18:06:24 -0700 Subject: [PATCH 4/8] Pull out IndexResult into module --- src/indexer.rs | 48 +----------------------------------------------- src/main.rs | 2 ++ 2 files changed, 3 insertions(+), 47 deletions(-) diff --git a/src/indexer.rs b/src/indexer.rs index 1e747d5..8c6c8e2 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -1,62 +1,16 @@ +use super::index_result::IndexResult; use chashmap::CHashMap; use cid::multihash::{Code, MultihashDigest}; use cid::Cid; use log::{info, trace, warn}; use scraper::{Html, Selector}; use std::collections::HashMap; -use std::fmt; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::mpsc::{channel, Receiver, Sender}; use std::{sync, thread, time}; pub const RAW: u64 = 0x55; -struct IndexResult { - pub cid: String, - pub title: String, - pub excerpt: String, - pub keywords: HashMap, -} - -impl IndexResult { - pub fn new( - cid: String, - title: String, - excerpt: String, - keywords: HashMap, - ) -> IndexResult { - IndexResult { - cid: cid, - title: title, - excerpt: excerpt, - keywords: keywords, - } - } - - /** - * Returns the top n keywords. Todo: use a tree structure to store the rankings of the keywords - * so that this is faster - */ - pub fn top_n_keywords(&self, n: u32) -> Vec<(&String, &u32)> { - let mut hash_vec: Vec<(&String, &u32)> = self.keywords.iter().collect(); - hash_vec.sort_by(|a, b| b.1.cmp(a.1)); - hash_vec.iter().take(n as usize).cloned().collect() - } -} - -impl fmt::Display for IndexResult { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "CID: {} \nTitle: {}\n{}\nKeywords: {:?}", - self.cid, - self.title, - self.excerpt, - self.top_n_keywords(10) - ) - } -} - pub struct Indexer { // this map is for keeping track of which entries have been indexed map: sync::Arc>, diff --git a/src/main.rs b/src/main.rs index 7e30f79..bcde204 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,7 +9,9 @@ use std::error::Error; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; +mod index_result; mod indexer; + use indexer::Indexer; fn main() -> Result<(), Box> { From ddda28f233c5f2a7541c5be1fbbc3e226e4b7a4c Mon Sep 17 00:00:00 2001 From: Conor Brady Date: Wed, 27 Oct 2021 18:25:05 -0700 Subject: [PATCH 5/8] Add placeholder test --- src/index_result.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/index_result.rs b/src/index_result.rs index 5830bcc..6a5120f 100644 --- a/src/index_result.rs +++ b/src/index_result.rs @@ -45,3 +45,11 @@ impl fmt::Display for IndexResult { ) } } + +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + assert_eq!(2 + 2, 4); + } +} From 49db2944e290fff0251dfa9b0a607f5962e79281 Mon Sep 17 00:00:00 2001 From: Conor Brady Date: Wed, 27 Oct 2021 19:23:07 -0700 Subject: [PATCH 6/8] Basic test --- src/index_result.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/index_result.rs b/src/index_result.rs index 6a5120f..5843f9f 100644 --- a/src/index_result.rs +++ b/src/index_result.rs @@ -48,8 +48,21 @@ impl fmt::Display for IndexResult { #[cfg(test)] mod tests { + use std::array::IntoIter; + use std::{collections::HashMap, iter::FromIterator}; + + use crate::index_result::IndexResult; + #[test] - fn it_works() { - assert_eq!(2 + 2, 4); + fn single_keyword() { + let keywords = HashMap::<_, _>::from_iter(IntoIter::new([("key1".to_string(), 1)])); + + let result = IndexResult::new( + "1".to_string(), + "title".to_string(), + "excerpt".to_string(), + keywords, + ); + assert_eq!(result.top_n_keywords(10).len(), 1); } } From 64eaa991f760e12f53fc42410925ffdccf24bc4b Mon Sep 17 00:00:00 2001 From: Conor Brady Date: Wed, 27 Oct 2021 19:32:23 -0700 Subject: [PATCH 7/8] Peppered with some tests --- src/index_result.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/index_result.rs b/src/index_result.rs index 5843f9f..c151e2a 100644 --- a/src/index_result.rs +++ b/src/index_result.rs @@ -65,4 +65,37 @@ mod tests { ); assert_eq!(result.top_n_keywords(10).len(), 1); } + #[test] + + fn all_keywords() { + let keywords = HashMap::<_, _>::from_iter(IntoIter::new([ + ("key1".to_string(), 1), + ("key2".to_string(), 2), + ])); + + let result = IndexResult::new( + "1".to_string(), + "title".to_string(), + "excerpt".to_string(), + keywords, + ); + assert_eq!(result.top_n_keywords(2).len(), 2); + } + + #[test] + fn subset_of_keywords() { + let keywords = HashMap::<_, _>::from_iter(IntoIter::new([ + ("key1".to_string(), 1), + ("key2".to_string(), 2), + ("key2".to_string(), 3), + ])); + + let result = IndexResult::new( + "1".to_string(), + "title".to_string(), + "excerpt".to_string(), + keywords, + ); + assert_eq!(result.top_n_keywords(2).len(), 2); + } } From f67c1959312f05542892bf4d422b465a4ba4865c Mon Sep 17 00:00:00 2001 From: Conor Brady Date: Fri, 29 Oct 2021 16:29:35 -0700 Subject: [PATCH 8/8] Adds criterion and benchmark setup --- Cargo.lock | 275 +++++++++++++++++++++++++++++++++++++- Cargo.toml | 9 +- benches/test_benchmark.rs | 17 +++ 3 files changed, 297 insertions(+), 4 deletions(-) create mode 100644 benches/test_benchmark.rs diff --git a/Cargo.lock b/Cargo.lock index a689c90..f499f18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -389,6 +389,18 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "771fe0050b883fcc3ea2359b1a96bcfbc090b7116eae7c3c512c7a083fdf23d3" +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.8.0" @@ -419,6 +431,15 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "631ae5198c9be5e753e5cc215e1bd73c2b466a3565173db433f52bb9d3e66dba" +[[package]] +name = "cast" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a" +dependencies = [ + "rustc_version 0.4.0", +] + [[package]] name = "cc" version = "1.0.71" @@ -505,6 +526,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "clap" +version = "2.33.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + [[package]] name = "colored" version = "1.9.3" @@ -571,6 +603,76 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "criterion" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + [[package]] name = "crossbeam-utils" version = "0.8.5" @@ -624,6 +726,28 @@ dependencies = [ "syn", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + [[package]] name = "ctor" version = "0.1.21" @@ -712,7 +836,7 @@ dependencies = [ "convert_case", "proc-macro2", "quote", - "rustc_version", + "rustc_version 0.3.3", "syn", ] @@ -1106,6 +1230,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + [[package]] name = "hashbrown" version = "0.11.2" @@ -1347,6 +1477,7 @@ version = "0.1.0" dependencies = [ "chashmap", "cid", + "criterion", "ctrlc", "futures", "libp2p", @@ -2233,6 +2364,12 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "opaque-debug" version = "0.3.0" @@ -2483,6 +2620,34 @@ version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12295df4f294471248581bc09bef3c38a5e46f1e36d6a37353621a0c6c357e1f" +[[package]] +name = "plotters" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c" + +[[package]] +name = "plotters-svg" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9" +dependencies = [ + "plotters-backend", +] + [[package]] name = "polling" version = "2.1.0" @@ -2782,6 +2947,31 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rayon" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + [[package]] name = "rdrand" version = "0.4.0" @@ -2811,6 +3001,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-syntax" version = "0.6.25" @@ -2894,7 +3090,16 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" dependencies = [ - "semver", + "semver 0.11.0", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver 1.0.4", ] [[package]] @@ -2936,6 +3141,15 @@ dependencies = [ "cipher", ] +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.19" @@ -3030,6 +3244,12 @@ dependencies = [ "semver-parser", ] +[[package]] +name = "semver" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" + [[package]] name = "semver-parser" version = "0.10.2" @@ -3048,6 +3268,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.130" @@ -3188,7 +3418,7 @@ dependencies = [ "rand 0.8.4", "rand_core 0.6.3", "ring", - "rustc_version", + "rustc_version 0.3.3", "sha2", "subtle", "x25519-dalek", @@ -3329,6 +3559,15 @@ dependencies = [ "utf-8", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thin-slice" version = "0.1.1" @@ -3365,6 +3604,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.5.0" @@ -3641,6 +3890,17 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" @@ -3815,6 +4075,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 760b100..6d8022b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,4 +14,11 @@ log = "0.4.14" simple_logger = "1.13.0" reqwest = { version = "0.11.6", features = ["gzip", "blocking"] } scraper = "0.12.0" -ctrlc = "3.2.1" \ No newline at end of file +ctrlc = "3.2.1" + +[dev-dependencies] +criterion = "0.3" + +[[bench]] +name = "test_benchmark" +harness = false diff --git a/benches/test_benchmark.rs b/benches/test_benchmark.rs new file mode 100644 index 0000000..4aa1e5f --- /dev/null +++ b/benches/test_benchmark.rs @@ -0,0 +1,17 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +fn fibonacci(n: u64) -> u64 { + match n { + 0 => 1, + 1 => 1, + n => fibonacci(n - 1) + fibonacci(n - 2), + } +} + +fn criterion_benchmark(c: &mut Criterion) { + // TODO: replace with real benchmarks + c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches);