Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Cargo
/target/

# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock

# These are backup files generated by rustfmt
**/*.rs.bk

# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb

# Output files
*.csv
output/

# IDE specific files
.idea/
.vscode/
*.swp
*.swo
.DS_Store
12 changes: 12 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "rust-etl-code-test"
version = "0.1.0"
edition = "2021"

[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
csv = "1.2"
clap = { version = "4.4", features = ["derive"] }
tokio = { version = "1.0", features = ["full"] }
futures = "0.3"
54 changes: 54 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,57 @@ Given the sample data provided, convert to csv in the format specified:
- The program should accept inputs of unbounded size.
- The program should accept input from a file or STDIN.
- Output should be written to a file or STDOUT.

## Project Documentation

### Requirements

- Rust 1.70 or higher
- Cargo (Rust's package manager)

### Usage

The program can be run in several ways:

1. Process a file and output to a file:

```bash
cargo run --release -- -i sample.jsonl -o output.csv
```

2. Process from STDIN and output to STDOUT:

```bash
cargo run --release
```

3. Process a file and output to STDOUT:

```bash
cargo run --release -- -i sample.jsonl
```

4. Process from STDIN and output to a file:

```bash
cargo run --release -- -o output.csv
```

#### Performance Optimizations

- **Streaming Processing**: Processes data line by line
- **Buffered I/O**: Uses buffered readers and writers
- **Memory Management**: Minimal memory footprint through streaming

#### Error Handling

- Gracefully handles JSON parsing errors
- Provides detailed error reporting
- Continues processing despite individual record errors

### Dependencies

- `serde`: For JSON deserialization
- `csv`: For CSV output
- `clap`: For command-line argument parsing
- `tokio`: For async runtime support
129 changes: 129 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
use clap::Parser;
use csv::Writer;
use serde::{Deserialize, Serialize};
use std::{
fs::File,
io::{self, BufRead, BufReader, Write},
path::PathBuf,
};

#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Input file path (if not provided, reads from stdin)
#[arg(short, long)]
input: Option<PathBuf>,

/// Output file path (if not provided, writes to stdout)
#[arg(short, long)]
output: Option<PathBuf>,
}

#[derive(Debug, Deserialize)]
struct InputRecord {
name: String,
billing_code: String,
negotiated_rates: Vec<NegotiatedRate>,
}

#[derive(Debug, Deserialize)]
struct NegotiatedRate {
negotiated_prices: Vec<NegotiatedPrice>,
}

#[derive(Debug, Deserialize)]
struct NegotiatedPrice {
negotiated_rate: f64,
}

#[derive(Debug, Serialize)]
struct OutputRecord {
name: String,
billing_code: String,
avg_rate: f64,
}

fn calculate_average_rate(record: &InputRecord) -> Option<f64> {
let mut total_rate = 0.0;
let mut rate_count = 0;

for negotiated_rate in &record.negotiated_rates {
for price in &negotiated_rate.negotiated_prices {
total_rate += price.negotiated_rate;
rate_count += 1;
}
}

if rate_count > 0 {
Some(total_rate / rate_count as f64)
} else {
None
}
}

fn process_records(reader: Box<dyn BufRead>, writer: &mut Box<dyn Write>) -> io::Result<()> {
let mut csv_writer = Writer::from_writer(writer);
let mut stats = ProcessingStats::default();

for line in reader.lines() {
stats.total_lines += 1;
let line = line?;

match serde_json::from_str::<InputRecord>(&line) {
Ok(record) => {
if let Some(avg_rate) = calculate_average_rate(&record) {
if avg_rate <= 30.0 {
csv_writer.serialize(OutputRecord {
name: record.name,
billing_code: record.billing_code,
avg_rate,
})?;
stats.successful_records += 1;
}
}
}
Err(e) => {
stats.error_count += 1;
println!("Error parsing line {}: {}", stats.total_lines, e);
}
}
}

print_processing_summary(&stats);

csv_writer.flush()?;
Ok(())
}

#[derive(Default)]
struct ProcessingStats {
total_lines: usize,
error_count: usize,
successful_records: usize,
}

fn print_processing_summary(stats: &ProcessingStats) {
println!("\nProcessing Summary:");
println!("Total lines processed: {}", stats.total_lines);
println!("Parsing errors: {}", stats.error_count);
println!("Records with avg_rate <= 30: {}", stats.successful_records);
}

#[tokio::main]
async fn main() -> io::Result<()> {
let args = Args::parse();

// Setup input
let input: Box<dyn BufRead> = match args.input {
Some(path) => Box::new(BufReader::new(File::open(path)?)),
None => Box::new(BufReader::new(io::stdin())),
};

// Setup output
let mut output: Box<dyn Write> = match args.output {
Some(path) => Box::new(File::create(path)?),
None => Box::new(io::stdout()),
};

process_records(input, &mut output)
}