Skip to content

Commit

Permalink
Add format and make path positional
Browse files Browse the repository at this point in the history
  • Loading branch information
guywaldman committed Jul 13, 2019
1 parent fc9b082 commit 81f2029
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 98 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ edition = "2018"

[dependencies]
avro-rs = "0.6"
csv = "1.1"
failure = "0.1.5"
glob = "0.3.0"
prettytable-rs = "0.8"
Expand Down
81 changes: 47 additions & 34 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,48 +33,61 @@ They can be downloaded from the [releases](https://github.com/guywald1/ravro/rel
> # Retrieve all columns for a list of records
> ravro get .\test_assets\bttf.avro

+---------------+--------------+-------------+
| firstName | lastName | nickname |
+---------------+--------------+-------------+
| Marty | McFly | Marty |
+---------------+--------------+-------------+
| Emmett | Brown | Doc |
+---------------+--------------+-------------+
| Biff | Tannen | Biff |
+---------------+--------------+-------------+
+-----------+--------------+-----+
| firstName | lastName | age |
+-----------+--------------+-----+
| Marty | McFly | 24 |
+-----------+--------------+-----+
| Biff | Tannen | 72 |
+-----------+--------------+-----+
| Emmett | Brown | 65 |
+-----------+--------------+-----+
| Loraine | Baines-McFly | 62 |
+-----------+--------------+-----+

> # Search (using regular expressions)
> ravro get .\test_assets\bttf.avro --search McFly

+---------------+--------------+-------------+
| firstName | lastName | nickname |
+---------------+--------------+-------------+
| Marty | McFly | Marty | # McFly should appear in bold green here
+---------------+--------------+-------------+
+-----------+--------------+-----+
| firstName | lastName | age |
+-----------+--------------+-----+
| Marty | McFly | 24 | # the second field will appear in bold green here
+-----------+--------------+-----+
| Loraine | Baines-McFly | 62 | # the second field will appear in bold green here
+-----------+--------------+-----+

> # Select only some columns
> ravro get .\test_assets\bttf.avro --fields firstName nickname

+---------------+--------------+
| firstName | nickname |
+---------------+--------------+
| Marty | Marty |
+---------------+--------------+
| Emmett | Doc |
+---------------+--------------+
| Biff | Biff |
+---------------+--------------+
> ravro get .\test_assets\bttf.avro --fields firstName age

+-----------+-----+
| firstName | age |
+-----------+-----+
| Marty | 24 |
+-----------+-----+
| Biff | 72 |
+-----------+-----+
| Emmett | 65 |
+-----------+-----+
| Loraine | 62 |
+-----------+-----+

> # Select the first 2 columns
> ravro get .\test_assets\bttf*.avro --fields firstName nickname --take 2

+---------------+--------------+
| firstName | nickname |
+---------------+--------------+
| Marty | Marty |
+---------------+--------------+
| Emmett | Doc |
+---------------+--------------+
> ravro get .\test_assets\bttf*.avro --fields firstName age --take 2

+-----------+-----+
| firstName | age |
+-----------+-----+
| Marty | 24 |
+-----------+-----+
| Biff | 72 |
+-----------+-----+

> # Output as CSV
> ravro get .\test_assets\bttf*.avro --fields firstName age --take 2 --format csv

firstName,age
Marty,24
Biff,72
```

## Options
Expand Down
6 changes: 4 additions & 2 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use glob::glob;
use std::fs;
use std::path::PathBuf;
use crate::avro_value::AvroValue;

pub(crate) const CODEC_DEFLATE: &'static str = "deflate";
pub(crate) type AvroData = Vec<Vec<AvroColumnarValue>>;

#[derive(Debug)]
pub(crate) struct AvroFile {
Expand Down Expand Up @@ -98,7 +100,7 @@ impl CliService {
/// # Arguments
/// * `fields_to_get` - Names of the columns to retrieve
/// * `take` - Number of rows to take
pub fn get_fields(&self, fields_to_get: Vec<String>, take: Option<u32>) -> Vec<Vec<AvroColumnarValue>> {
pub fn get_fields(&self, fields_to_get: &[String], take: Option<u32>) -> Vec<Vec<AvroColumnarValue>> {
let mut extracted_fields = Vec::new();
for file in &self.files {
let reader = Reader::new(&file.data[..])
Expand All @@ -112,7 +114,7 @@ impl CliService {
let row = row.expect(&format!("Could not parse row {} from the Avro", i));
if let Value::Record(fields) = row {
let mut extracted_fields_for_row = Vec::new();
for field_name in &fields_to_get {
for field_name in fields_to_get {
let field_value_to_insert =
match fields.iter().find(|(n, _)| n == field_name) {
Some((field_name, field_value)) => {
Expand Down
154 changes: 92 additions & 62 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use failure::Error;
use prettytable::{color, Attr, Cell, Row, Table};
use regex::Regex;
use structopt::StructOpt;
use cli::{CliService, AvroColumnarValue};
use cli::{CliService, AvroColumnarValue, AvroData};
use avro_value::AvroValue;

mod avro_value;
Expand All @@ -19,14 +19,13 @@ enum RavroArgs {
#[structopt(name = "get")]
/// Get fields from an Avro file
Get {
/// Files to process
path: String,

/// Names of the fields to get to get
#[structopt(short = "f", long = "fields")]
fields_to_get: Vec<String>,

/// Files to process
#[structopt(short = "p", long = "path")]
path: String,

/// Codec to uncompress with.
/// Can be omitted or "deflate"
#[structopt(short = "c", long = "codec")]
Expand All @@ -38,7 +37,12 @@ enum RavroArgs {

/// Maximum number of records to show
#[structopt(short = "t", long = "take")]
take: Option<u32>
take: Option<u32>,

/// Output format.
/// Omit for pretty table output, or specify: "csv"
#[structopt(short = "p", long = "format")]
output_format: Option<String>
},
}

Expand All @@ -49,7 +53,8 @@ fn main() -> Result<(), Error> {
path,
search,
codec,
take
take,
output_format
} => {
let avro = CliService::from(path, codec);
let fields_to_get = if fields_to_get.is_empty() {
Expand All @@ -58,65 +63,90 @@ fn main() -> Result<(), Error> {
fields_to_get
};

let mut table = Table::new();
let data = avro.get_fields(&fields_to_get, take);

let header_cells: Vec<Cell> = fields_to_get
.iter()
.map(|f| {
Cell::new(f)
.with_style(Attr::Bold)
.with_style(Attr::ForegroundColor(color::BLUE))
.with_style(Attr::Underline(true))
})
.collect();
table.add_row(Row::new(header_cells));

let rows = avro.get_fields(fields_to_get, take);
let filtered_rows: Vec<Vec<AvroColumnarValue>> = rows
.into_iter()
.filter(|r| {
r.iter()
.find(|v| match &search {
None => true,
Some(search) => {
let search =
Regex::new(&search).expect("Regular expression is invalid");
search.is_match(&v.value().to_string())
}
})
.is_some()
})
.collect();

for fields_for_row in filtered_rows {
let row_cells: Vec<Cell> = fields_for_row
.iter()
.filter_map(|v: &AvroColumnarValue| {
let value_str = v.value().to_string();
let mut cell = Cell::new(&value_str);
if let Some(search) = &search {
let search =
Regex::new(&search).expect("Regular expression is invalid");
if search.is_match(&value_str) {
cell.style(Attr::Bold);
cell.style(Attr::ForegroundColor(color::GREEN));
}
}

match v.value() {
AvroValue::Na => cell.style(Attr::ForegroundColor(color::RED)),
_ => {}
}

Some(cell)
})
.collect();
table.add_row(Row::new(row_cells));
match output_format {
None => print_as_table(&fields_to_get, data, search),
Some(format_option) => match format_option.as_ref() {
"csv" => print_as_csv(&fields_to_get, data).expect("Could not print Avro as CSV"),
_ => panic!("Output format not recognized")
}
}

table.printstd();
}
}

Ok(())
}

fn print_as_table(field_names: &[String], data: AvroData, search: Option<String>) {
let mut table = Table::new();

let header_cells: Vec<Cell> = field_names
.iter()
.map(|f| {
Cell::new(f)
.with_style(Attr::Bold)
.with_style(Attr::ForegroundColor(color::BLUE))
.with_style(Attr::Underline(true))
})
.collect();
table.add_row(Row::new(header_cells));

let filtered_data: AvroData = data
.into_iter()
.filter(|r| {
r.iter()
.find(|v| match &search {
None => true,
Some(search) => {
let search =
Regex::new(&search).expect("Regular expression is invalid");
search.is_match(&v.value().to_string())
}
})
.is_some()
})
.collect();

for fields_for_row in filtered_data {
let row_cells: Vec<Cell> = fields_for_row
.iter()
.filter_map(|v: &AvroColumnarValue| {
let value_str = v.value().to_string();
let mut cell = Cell::new(&value_str);
if let Some(search) = &search {
let search =
Regex::new(&search).expect("Regular expression is invalid");
if search.is_match(&value_str) {
cell.style(Attr::Bold);
cell.style(Attr::ForegroundColor(color::GREEN));
}
}

match v.value() {
AvroValue::Na => cell.style(Attr::ForegroundColor(color::RED)),
_ => {}
}

Some(cell)
})
.collect();
table.add_row(Row::new(row_cells));
}

table.printstd();
}

fn print_as_csv(field_names: &[String], data: AvroData) -> Result<(), Box<dyn std::error::Error>> {
let mut csv_writer = csv::Writer::from_writer(std::io::stdout());

// Headers
csv_writer.write_record(field_names)?;

for row in data {
csv_writer.write_record(row.iter().map(|val: &AvroColumnarValue| val.value().to_string()).collect::<Vec<String>>())?;
}

csv_writer.flush()?;
Ok(())
}

0 comments on commit 81f2029

Please sign in to comment.