Skip to content

Commit

Permalink
Prep for release
Browse files Browse the repository at this point in the history
  • Loading branch information
guywaldman committed Jul 10, 2019
1 parent b32cc7d commit 8d66d58
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 36 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ Cargo.lock

# These are backup files generated by rustfmt
**/*.rs.bk

temp
9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ authors = ["Guy Waldman <[email protected]>"]
edition = "2018"

[dependencies]
structopt = "0.2"
prettytable-rs = "0.8"
avro-rs = "0.6"
regex = "1"
failure = "0.1.5"
serde = { version = "1.0", features = ["derive"] }
glob = "0.3.0"
prettytable-rs = "0.8"
regex = "1"
serde = { version = "1.0", features = ["derive"] }
structopt = "0.2"
21 changes: 19 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

A CLI for Avro files, written in Rust.

![Screenshot](./assets/screenshot.png)

> **⚠ Under heavily development ⚠**
>
> Please use at your own discretion.
Expand All @@ -25,6 +27,7 @@ There are existing compiled binaries for Windows at the moment.
## Usage

```shell
> # Retrieve all columns for a list of records
> rargo get .\bttf.avro

+---------------+--------------+-------------+
Expand All @@ -37,14 +40,16 @@ There are existing compiled binaries for Windows at the moment.
| Biff | Tannen | Biff |
+---------------+--------------+-------------+

> # Search (using regular expressions)
> rargo get .\bttf.avro --search McFly

+---------------+--------------+-------------+
| firstName | lastName | nickname |
+---------------+--------------+-------------+
| Marty | McFly | Marty | # McFly should appear in bold here
| Marty | McFly | Marty | # McFly should appear in bold green here
+---------------+--------------+-------------+

> # Select only some columns
> rargo get .\bttf.avro --fields firstName nickname

+---------------+--------------+
Expand All @@ -56,4 +61,16 @@ There are existing compiled binaries for Windows at the moment.
+---------------+--------------+
| Biff | Biff |
+---------------+--------------+
```
```

## Options

- `fields (f)` - select only the fields you wish to retrieve
- `path (p)` - a glob to one or multiple Avro files
- `search (s)` - A regular expression to filter and display only rows with columns that contain matching values. The matching fields will be highlighed
- `codec (c)` - The codec for decompression - omit for no codec, or specify "deflate"

## Caveats

- Only supports top-level records right now
- Snappy not included
Binary file added assets/screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
85 changes: 61 additions & 24 deletions src/avro.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,66 @@
use avro_rs::types::Value;
use avro_rs::Reader;
use avro_rs::{Codec, Reader};
use std::fs;
use glob::glob;
use std::path::PathBuf;

pub(crate) const NULL: &'static str = "null";
pub(crate) const NA: &'static str = "N/A";
pub(crate) const CODEC_DEFLATE: &'static str = "deflate";

#[derive(Debug)]
pub(crate) struct AvroFile {
data: Vec<u8>,
path: PathBuf
}

#[derive(Debug)]
pub(crate) struct Avro {
buf: Vec<u8>,
files: Vec<AvroFile>
}

impl Avro {
/// Creates an `Avro` as a union of all avros in the received paths
pub fn from(paths: Vec<PathBuf>) -> Self {
let mut buf: Vec<u8> = Vec::new();
for path in &paths {
buf.append(&mut fs::read(path).expect(&format!(
"Could not read from path {0}",
&path.to_str().unwrap()
)));
///
/// # Arguments
///
/// * `path` - A glob to match against Avro files to load
pub fn from(path: String, codec: Option<String>) -> Self {
let mut paths: Vec<PathBuf> = Vec::new();
for entry in glob(&path).expect("Failed to read glob pattern") {
match entry {
Ok(p) => paths.push(p),
Err(e) => panic!("{:?}", e),
}
}
Avro { buf }

if paths.len() == 0 {
panic!("No files found")
}

let mut codec_for_decompressing: Codec = Codec::Null;
// TODO: Add `Codec::Snappy`
if let Some(c) = codec {
if c == CODEC_DEFLATE {
codec_for_decompressing = Codec::Deflate;
}
}

let mut files: Vec<AvroFile> = Vec::new();
for path in paths {
let mut data = fs::read(&path).expect(&format!(
"Could not read from path {0}", path.display())
);
codec_for_decompressing.decompress(&mut data).expect("Could not successfully decompress Avro file. Make sure that the codec you specified is correct");
files.push(AvroFile { data, path });
}

Avro { files }
}

pub fn get_all_field_names(&self) -> Vec<String> {
let mut reader = Reader::new(&self.buf[..]).expect("Could not read joined Avro file");
let first_file = &self.files[0];
let mut reader = Reader::new(&first_file.data[..]).expect(&format!("Could not read Avro file {}", first_file.path.display()));
if let Ok(Value::Record(fields)) = reader.next().expect("Avro must have at least one record row to infer schema") {
fields.iter().map(|(f, _)| f.to_owned()).collect::<Vec<String>>()
} else {
Expand All @@ -34,21 +69,23 @@ impl Avro {
}

pub fn get_fields(&self, fields_to_get: Vec<String>) -> Vec<Vec<String>> {
let reader = Reader::new(&self.buf[..]).expect("Could not read joined Avro file");

let mut extracted_fields: Vec<Vec<String>> = Vec::new();
for (i, row) in reader.enumerate() {
let row = row.expect(&format!("Could not parse row {} from Avro", i));
if let Value::Record(fields) = row {
let mut extracted_fields_for_row: Vec<String> = Vec::new();
for field_name in &fields_to_get {
let field_value_to_insert = match fields.iter().find(|(n, _)| n == field_name) {
Some((_, val)) => format_avro_value(&val),
None => NA.to_owned()
};
extracted_fields_for_row.push(field_value_to_insert);
for file in &self.files {
let reader = Reader::new(&file.data[..]).expect(&format!("Could not read Avro file {}", file.path.display()));

for (i, row) in reader.enumerate() {
let row = row.expect(&format!("Could not parse row {} from the Avro", i));
if let Value::Record(fields) = row {
let mut extracted_fields_for_row: Vec<String> = Vec::new();
for field_name in &fields_to_get {
let field_value_to_insert = match fields.iter().find(|(n, _)| n == field_name) {
Some((_, val)) => format_avro_value(&val),
None => NA.to_owned()
};
extracted_fields_for_row.push(field_value_to_insert);
}
extracted_fields.push(extracted_fields_for_row);
}
extracted_fields.push(extracted_fields_for_row);
}
}
extracted_fields
Expand Down
18 changes: 12 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use avro::Avro;
use failure::Error;
use prettytable::{color, Attr, Cell, Row, Table};
use regex::Regex;
use std::path::PathBuf;
use structopt::StructOpt;

mod avro;
Expand All @@ -23,11 +22,16 @@ enum RavroArgs {
fields_to_get: Vec<String>,

/// Files to process
#[structopt(short = "p", long = "path", parse(from_os_str))]
paths: Vec<PathBuf>,
#[structopt(short = "p", long = "path")]
path: String,

/// Codec to uncompress with.
/// Can be omitted or "deflate"
#[structopt(short = "c", long = "codec")]
codec: Option<String>,

/// Regex to search. Only a row with a matching field will appear in the outputted table
#[structopt(short = "r", long = "search")]
#[structopt(short = "s", long = "search")]
search: Option<String>,
},
}
Expand All @@ -36,10 +40,11 @@ fn main() -> Result<(), Error> {
match RavroArgs::from_args() {
RavroArgs::Get {
fields_to_get,
paths,
path,
search,
codec
} => {
let avro = Avro::from(paths);
let avro = Avro::from(path, codec);
let fields_to_get = if fields_to_get.is_empty() {
avro.get_all_field_names()
} else {
Expand Down Expand Up @@ -84,6 +89,7 @@ fn main() -> Result<(), Error> {
let search = Regex::new(&search).expect("Regular expression is invalid");
if search.is_match(v) {
cell.style(Attr::Bold);
cell.style(Attr::ForegroundColor(color::GREEN));
}
}

Expand Down

0 comments on commit 8d66d58

Please sign in to comment.