Skip to content

Commit

Permalink
Refactor and add take
Browse files Browse the repository at this point in the history
  • Loading branch information
guywaldman committed Jul 12, 2019
1 parent 989f273 commit fc9b082
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 74 deletions.
25 changes: 19 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ They can be downloaded from the [releases](https://github.com/guywald1/ravro/rel

```shell
> # Retrieve all columns for a list of records
> ravro get .\bttf.avro
> ravro get .\test_assets\bttf.avro

+---------------+--------------+-------------+
| firstName | lastName | nickname |
Expand All @@ -44,7 +44,7 @@ They can be downloaded from the [releases](https://github.com/guywald1/ravro/rel
+---------------+--------------+-------------+

> # Search (using regular expressions)
> ravro get .\bttf.avro --search McFly
> ravro get .\test_assets\bttf.avro --search McFly

+---------------+--------------+-------------+
| firstName | lastName | nickname |
Expand All @@ -53,7 +53,7 @@ They can be downloaded from the [releases](https://github.com/guywald1/ravro/rel
+---------------+--------------+-------------+

> # Select only some columns
> ravro get .\bttf.avro --fields firstName nickname
> ravro get .\test_assets\bttf.avro --fields firstName nickname

+---------------+--------------+
| firstName | nickname |
Expand All @@ -64,17 +64,30 @@ They can be downloaded from the [releases](https://github.com/guywald1/ravro/rel
+---------------+--------------+
| Biff | Biff |
+---------------+--------------+

> # Select the first 2 columns
> ravro get .\test_assets\bttf*.avro --fields firstName nickname --take 2

+---------------+--------------+
| firstName | nickname |
+---------------+--------------+
| Marty | Marty |
+---------------+--------------+
| Emmett | Doc |
+---------------+--------------+
```

## Options

- `fields (f)` - select only the fields you wish to retrieve
- `path (p)` - a glob to one or multiple Avro files
- `search (s)` - A regular expression to filter and display only rows with columns that contain matching values. The matching fields will be highlighed
- `fields (f)` - The list (separated by spaces) of the fields you wish to retrieve
- `path (p)` - The glob to one or multiple Avro files
- `search (s)` - The regular expression to filter and display only rows with columns that contain matching values. The matching fields will be highlighed
- `take (t)` - The number of records you wish to retrieve
- `codec (c)` - The codec for decompression - omit for no codec, or specify "deflate"

## TODO

- Extract CLI functionality into a library
- Configurable display formats (CSV, JSON, etc.)
- Avro generation from JSON
- Schema
Expand Down
84 changes: 84 additions & 0 deletions src/avro_value.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use avro_rs::types::Value;
use std::fmt;

pub(crate) const NULL: &'static str = "null";
pub(crate) const NA: &'static str = "N/A";

#[derive(Debug, Clone)]
pub(crate) enum AvroValue {
Value(Value),
Na
}


impl<'a> AvroValue {
pub fn from(value: Value) -> Self {
AvroValue::Value(value)
}

pub fn na() -> Self {
AvroValue::Na
}

pub fn to_string(&self) -> String {
format!("{}", self)
}
}

impl<'a> fmt::Display for AvroValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AvroValue::Value(v) => write!(f, "{}", format_avro_value(v)),
AvroValue::Na => write!(f, "{}", NA)
}
}
}

fn format_avro_value(value: &Value) -> String {
match value {
Value::Array(a) => format!(
"{}",
a.iter()
.map(|v| format_avro_value(v))
.collect::<Vec<String>>()
.join(", ")
),
Value::Bytes(b) => format!(
"{}",
b.iter()
.map(|n| format!("{}", n))
.collect::<Vec<String>>()
.join(", ")
),
Value::Boolean(b) => format!("{}", b),
Value::Double(d) => format!("{}", d),
Value::Enum(id, desc) => format!("{} ({})", id, desc),
Value::Fixed(_, f) => format!(
"{}",
f.iter()
.map(|n| format!("{}", n))
.collect::<Vec<String>>()
.join(", ")
),
Value::Float(f) => format!("{}", f),
Value::Int(i) => format!("{}", i),
Value::Long(l) => format!("{}", l),
Value::Map(m) => format!(
"{}",
m.iter()
.map(|(k, v)| format!("{}: {}", k, format_avro_value(v)))
.collect::<Vec<String>>()
.join(", ")
),
Value::Null => NULL.to_owned(),
Value::Record(m) => format!(
"{}",
m.iter()
.map(|(k, v)| format!("{}: {}", k, format_avro_value(v)))
.collect::<Vec<String>>()
.join(", ")
),
Value::String(s) => s.clone(),
Value::Union(u) => format_avro_value(&*u),
}
}
108 changes: 53 additions & 55 deletions src/avro_cli.rs → src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ use avro_rs::{Codec, Reader};
use glob::glob;
use std::fs;
use std::path::PathBuf;

pub(crate) const NULL: &'static str = "null";
pub(crate) const NA: &'static str = "N/A";
use crate::avro_value::AvroValue;
pub(crate) const CODEC_DEFLATE: &'static str = "deflate";

#[derive(Debug)]
Expand All @@ -15,11 +13,27 @@ pub(crate) struct AvroFile {
}

#[derive(Debug)]
pub(crate) struct AvroCli {
pub(crate) struct CliService {
files: Vec<AvroFile>,
}

impl AvroCli {
#[derive(Debug, Clone)]
pub(crate) struct AvroColumnarValue {
name: String,
value: AvroValue
}

impl AvroColumnarValue {
pub fn from(name: String, value: AvroValue) -> Self {
AvroColumnarValue { name, value }
}

pub fn value(&self) -> &AvroValue {
&self.value
}
}

impl CliService {
/// Creates an `Avro` as a union of all avros in the received paths
///
/// # Arguments
Expand Down Expand Up @@ -55,7 +69,7 @@ impl AvroCli {
files.push(AvroFile { data, path });
}

AvroCli { files }
CliService { files }
}

/// Get all the names of the columns.
Expand Down Expand Up @@ -83,21 +97,29 @@ impl AvroCli {
///
/// # Arguments
/// * `fields_to_get` - Names of the columns to retrieve
pub fn get_fields(&self, fields_to_get: Vec<String>) -> Vec<Vec<String>> {
/// * `take` - Number of rows to take
pub fn get_fields(&self, fields_to_get: Vec<String>, take: Option<u32>) -> Vec<Vec<AvroColumnarValue>> {
let mut extracted_fields = Vec::new();
for file in &self.files {
let reader = Reader::new(&file.data[..])
.expect(&format!("Could not read Avro file {}", file.path.display()));

for (i, row) in reader.enumerate() {
if extracted_fields.len() as u32 >= take.unwrap_or(u32::max_value()) {
break;
}

let row = row.expect(&format!("Could not parse row {} from the Avro", i));
if let Value::Record(fields) = row {
let mut extracted_fields_for_row = Vec::new();
for field_name in &fields_to_get {
let field_value_to_insert =
match fields.iter().find(|(n, _)| n == field_name) {
Some((_, val)) => format_avro_value(&val),
None => NA.to_owned(),
Some((field_name, field_value)) => {
let v = field_value.clone();
AvroColumnarValue::from(field_name.to_owned(), AvroValue::from(v))
},
None => AvroColumnarValue::from(field_name.to_owned(), AvroValue::na())
};
extracted_fields_for_row.push(field_value_to_insert);
}
Expand All @@ -109,51 +131,27 @@ impl AvroCli {
}
}

pub(crate) fn format_avro_value(value: &Value) -> String {
match value {
Value::Array(a) => format!(
"{}",
a.iter()
.map(|v| format_avro_value(v))
.collect::<Vec<String>>()
.join(", ")
),
Value::Bytes(b) => format!(
"{}",
b.iter()
.map(|n| format!("{}", n))
.collect::<Vec<String>>()
.join(", ")
),
Value::Boolean(b) => format!("{}", b),
Value::Double(d) => format!("{}", d),
Value::Enum(id, desc) => format!("{} ({})", id, desc),
Value::Fixed(_, f) => format!(
"{}",
f.iter()
.map(|n| format!("{}", n))
.collect::<Vec<String>>()
.join(", ")
),
Value::Float(f) => format!("{}", f),
Value::Int(i) => format!("{}", i),
Value::Long(l) => format!("{}", l),
Value::Map(m) => format!(
"{}",
m.iter()
.map(|(k, v)| format!("{}: {}", k, format_avro_value(v)))
.collect::<Vec<String>>()
.join(", ")
),
Value::Null => NULL.to_owned(),
Value::Record(m) => format!(
"{}",
m.iter()
.map(|(k, v)| format!("{}: {}", k, format_avro_value(v)))
.collect::<Vec<String>>()
.join(", ")
),
Value::String(s) => s.clone(),
Value::Union(u) => format_avro_value(&*u),

#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;

#[test]
fn test_get_all_field_names() {
println!("asdas");
let path_to_test_avro = Path::new("./test_assets/bttf.avro").to_str().unwrap().to_owned();
let cli = CliService::from(path_to_test_avro, None);
let field_names = cli.get_all_field_names();
assert_eq!(field_names, vec!["firstName", "lastName", "age"]);
}

#[test]
fn test_get_fields() {
println!("asdas");
let path_to_test_avro = Path::new("./test_assets/bttf.avro").to_str().unwrap().to_owned();
let _cli = CliService::from(path_to_test_avro, None);
// let field_names = cli.get_fields(vec!["firstName", "age"], None);
// assert_eq!(field_names, vec!["firstName", "lastName", "age"]);
}
}
34 changes: 21 additions & 13 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
///! A CLI for manipulating [AVRO](https://avro.apache.org/) files.
///!
///! This crate currently expects each line to be a [Record](https://avro.apache.org/docs/1.8.1/spec.html#schema_record).
use avro_cli::AvroCli;
use failure::Error;
use prettytable::{color, Attr, Cell, Row, Table};
use regex::Regex;
use structopt::StructOpt;
use cli::{CliService, AvroColumnarValue};
use avro_value::AvroValue;

mod avro_cli;
mod avro_value;
mod cli;

#[derive(StructOpt, Debug)]
#[structopt(name = "ravro")]
Expand All @@ -33,6 +35,10 @@ enum RavroArgs {
/// Regex to search. Only a row with a matching field will appear in the outputted table
#[structopt(short = "s", long = "search")]
search: Option<String>,

/// Maximum number of records to show
#[structopt(short = "t", long = "take")]
take: Option<u32>
},
}

Expand All @@ -43,8 +49,9 @@ fn main() -> Result<(), Error> {
path,
search,
codec,
take
} => {
let avro = AvroCli::from(path, codec);
let avro = CliService::from(path, codec);
let fields_to_get = if fields_to_get.is_empty() {
avro.get_all_field_names()
} else {
Expand All @@ -64,8 +71,8 @@ fn main() -> Result<(), Error> {
.collect();
table.add_row(Row::new(header_cells));

let rows = avro.get_fields(fields_to_get);
let filtered_rows: Vec<Vec<String>> = rows
let rows = avro.get_fields(fields_to_get, take);
let filtered_rows: Vec<Vec<AvroColumnarValue>> = rows
.into_iter()
.filter(|r| {
r.iter()
Expand All @@ -74,30 +81,31 @@ fn main() -> Result<(), Error> {
Some(search) => {
let search =
Regex::new(&search).expect("Regular expression is invalid");
search.is_match(v)
search.is_match(&v.value().to_string())
}
})
.is_some()
})
.collect();

for fields_for_row in filtered_rows {
let row_cells: Vec<Cell> = fields_for_row
.iter()
.filter_map(|v| {
let mut cell = Cell::new(v);
.filter_map(|v: &AvroColumnarValue| {
let value_str = v.value().to_string();
let mut cell = Cell::new(&value_str);
if let Some(search) = &search {
let search =
Regex::new(&search).expect("Regular expression is invalid");
if search.is_match(v) {
if search.is_match(&value_str) {
cell.style(Attr::Bold);
cell.style(Attr::ForegroundColor(color::GREEN));
}
}

if v == avro_cli::NULL {
cell.style(Attr::ForegroundColor(color::RED));
} else if v == avro_cli::NA {
cell.style(Attr::ForegroundColor(color::BRIGHT_RED));
match v.value() {
AvroValue::Na => cell.style(Attr::ForegroundColor(color::RED)),
_ => {}
}

Some(cell)
Expand Down
Binary file added test_assets/bttf.avro
Binary file not shown.
Binary file added test_assets/bttfCopy.avro
Binary file not shown.

0 comments on commit fc9b082

Please sign in to comment.