From 051ef62d6f5370749db2aed028c8d497f92c0c77 Mon Sep 17 00:00:00 2001 From: Stephane Raux Date: Thu, 12 Jun 2025 22:36:54 -0500 Subject: [PATCH] Initial implementation --- .gitignore | 1 + Cargo.lock | 335 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 12 ++ README.md | 36 ++++-- src/main.rs | 185 +++++++++++++++++++++++++++++ 5 files changed, 561 insertions(+), 8 deletions(-) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..8f8d469 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,335 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "0.6.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "clap" +version = "4.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +dependencies = [ + "memchr", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rust-etl-code-test" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "csv", + "serdapt", + "serde", + "serde_json", +] + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serdapt" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9ac629b791a8c244dbe0dbca6569ed9e6284de7c98c41911e87ac0185c435f5" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a78aa47 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "rust-etl-code-test" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0.98" +clap = { version = "4.5.40", features = ["derive"] } +csv = "1.3.1" +serdapt = "0.1.1" +serde = { version = "1.0.219", features = ["derive"] } +serde_json = "1.0.140" diff --git a/README.md b/README.md index 1003148..d454263 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,33 @@ # Rust ETL Code Test -Fork this repo for this test. When you are done submit a PR against this repo. +## Purpose -Given the sample data provided, convert to csv in the format specified: +This command-line tool transforms a JSONL billing report into a CSV file containing the average rate +for each record, excluding records with an average rate greater than 30. -`name, billing_code, avg_rate` where `avg_rate` is the average of all `negotiated_rate` values for each record. Exclude records with an `avg_rate` greater than 30. +## Requirements -- Feel free to use any tools or libraries of your choice. -- The program should be as fast as possible. -- The program should accept inputs of unbounded size. -- The program should accept input from a file or STDIN. -- Output should be written to a file or STDOUT. +- Rust 1.87.0 or newer +- Internet connection for cargo to fetch dependencies + +## Run + +```sh +cargo run --release < sample.jsonl +``` + +## Run tests + +```sh +cargo test +``` + +## Help + +```sh +cargo run -- --help +``` + +## Notes + +- Every error in the input is considered fatal. This could easily be changed if that is undesirable. diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..84489e6 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,185 @@ +use anyhow::Context; +use clap::Parser; +use serde::{Deserialize, Serialize}; +use std::{ + fs::File, + io::{BufRead, BufReader, BufWriter, Write}, + ops::Add, + path::{Path, PathBuf}, +}; + +#[derive(Debug, Deserialize, PartialEq, Serialize)] +struct Record { + name: String, + billing_code: String, + #[serde( + deserialize_with = "serdapt::From::>::deserialize", + rename(deserialize = "negotiated_rates") + )] + avg_rate: Option, +} + +#[derive(Debug, Default)] +struct AccumulatedRate { + rate: f64, + count: u64, +} + +impl From for Option { + fn from(value: AccumulatedRate) -> Self { + if value.count == 0 { + None + } else { + Some(value.rate / value.count as f64) + } + } +} + +impl Add for AccumulatedRate { + type Output = Self; + + fn add(self, rhs: NegotiatedRate) -> Self::Output { + Self { + rate: self.rate + rhs.negotiated_prices.rate, + count: self.count + rhs.negotiated_prices.count, + } + } +} + +impl Add for AccumulatedRate { + type Output = Self; + + fn add(self, rhs: NegotiatedPrice) -> Self::Output { + Self { + rate: self.rate + rhs.negotiated_rate, + count: self.count + 1, + } + } +} + +#[derive(Debug, Deserialize)] +struct NegotiatedRate { + #[serde(with = "serdapt::Fold::")] + negotiated_prices: AccumulatedRate, +} + +#[derive(Debug, Deserialize)] +struct NegotiatedPrice { + negotiated_rate: f64, +} + +/// Extract billing information from JSONL input and outputs records in CSV format +#[derive(Debug, Parser)] +struct Cli { + /// Input file to read JSONL from (defaults to stdin) + #[arg(short, long)] + input: Option, + /// Output file to write CSV to (defaults to stdout) + #[arg(short, long)] + output: Option, +} + +fn main() -> anyhow::Result<()> { + let cli = Cli::parse(); + match (cli.input, cli.output) { + (None, None) => process(std::io::stdin().lock(), std::io::stdout().lock()), + (None, Some(output)) => process(std::io::stdin().lock(), open_output(&output)?), + (Some(input), None) => process(open_input(&input)?, std::io::stdout().lock()), + (Some(input), Some(output)) => process(open_input(&input)?, open_output(&output)?), + } +} + +fn open_input(p: &Path) -> anyhow::Result> { + Ok(BufReader::new(File::open(p).with_context(|| { + format!("failed to open {}", p.display()) + })?)) +} + +fn open_output(p: &Path) -> anyhow::Result> { + Ok(BufWriter::new(File::create(p).with_context(|| { + format!("failed to open {}", p.display()) + })?)) +} + +fn process(input: I, output: O) -> anyhow::Result<()> +where + I: BufRead, + O: Write, +{ + let mut output = csv::Writer::from_writer(output); + for (i, r) in records(input).enumerate() { + let r = r.with_context(|| format!("error on line {}", i + 1))?; + if r.avg_rate.is_some_and(|r| r <= 30.0) { + output.serialize(r).context("failed to write record")?; + } + } + output.flush()?; + Ok(()) +} + +fn records(input: I) -> impl Iterator> +where + I: BufRead, +{ + input.lines().map(|line| { + let line = line.context("failed to read line")?; + serde_json::from_str(&line).context("failed to parse record") + }) +} + +#[cfg(test)] +mod tests { + use crate::Record; + use serde_json::json; + + #[test] + fn average_is_calculated() { + let input = json!({ + "name": "alpha", + "billing_code": "1", + "negotiated_rates": [ + { + "negotiated_prices": [ + { + "negotiated_rate": 10, + }, + ], + }, + { + "negotiated_prices": [], + }, + { + "negotiated_prices": [ + { + "negotiated_rate": 20, + }, + { + "negotiated_rate": 60, + }, + ], + }, + ], + }); + + let expected = Record { + name: "alpha".into(), + billing_code: "1".into(), + avg_rate: Some(30.0), + }; + + let actual = serde_json::from_value::(input).unwrap(); + assert_eq!(actual, expected); + } + + #[test] + fn average_is_none_when_no_rates() { + let input = json!({ + "name": "alpha", + "billing_code": "1", + "negotiated_rates": [], + }); + + let actual = serde_json::from_value::(input).unwrap(); + assert_eq!(actual.avg_rate, None); + } +}