Skip to content

Commit 3dd640b

Browse files
update done to read the static files, clippy and deepsource fixes
1 parent b9f477b commit 3dd640b

File tree

4 files changed

+211
-363
lines changed

4 files changed

+211
-363
lines changed

Dockerfile

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ LABEL org.opencontainers.image.licenses="AGPL-3.0"
2323

2424
WORKDIR /parseable
2525
COPY . .
26+
2627
RUN cargo build --release
2728

2829
# final stage
@@ -32,5 +33,6 @@ WORKDIR /parseable
3233

3334
# Copy the static shell into base image.
3435
COPY --from=builder /parseable/target/release/parseable /usr/bin/parseable
36+
COPY --from=builder /parseable/src/event/known-formats /parseable/src/event/known-formats
3537

3638
CMD ["/usr/bin/parseable"]

src/event/detect_schema.rs

+35-18
Original file line numberDiff line numberDiff line change
@@ -19,32 +19,49 @@
1919
use arrow_json::reader::infer_json_schema_from_iterator;
2020
use arrow_schema::Schema;
2121
use once_cell::sync::OnceCell;
22-
use std::collections::HashMap;
22+
use serde::Deserialize;
23+
use serde_json::Value;
24+
use std::{collections::HashMap, fs, path::Path};
2325

2426
use crate::{event::format::update_data_type_to_datetime, utils::json::flatten_json_body};
2527

2628
// Expose some static variables for internal usage
2729
pub static KNOWN_SCHEMA_LIST: OnceCell<HashMap<String, Schema>> = OnceCell::new();
30+
const FORMATS_JSON: &str = include_str!("known-formats/formats.json");
31+
32+
#[derive(Debug, Deserialize)]
33+
struct Format {
34+
name: String,
35+
schema_type: String,
36+
sample_json_path: String,
37+
}
2838

2939
pub fn detect_schema() -> HashMap<String, Schema> {
3040
let mut known_schema_list: HashMap<String, Schema> = HashMap::new();
31-
//read file formats.json
32-
let formats_file = std::fs::File::open("src/event/known-formats/formats.json").unwrap();
33-
let formats_reader = std::io::BufReader::new(formats_file);
34-
let formats: serde_json::Value = serde_json::from_reader(formats_reader).unwrap();
35-
//iterate over the formats
36-
for format in formats.as_array().unwrap() {
37-
let schema_type = format["schema_type"].as_str().unwrap();
38-
let sample_json_path = format["sample_json_path"].as_str().unwrap();
39-
let sample_file = std::fs::File::open(sample_json_path).unwrap();
40-
let sample_reader = std::io::BufReader::new(sample_file);
41-
let sample_json: serde_json::Value = serde_json::from_reader(sample_reader).unwrap();
42-
let flattened_json = flatten_json_body(sample_json, None, None, None, false).unwrap();
43-
let sample_json_records = [flattened_json.clone()];
44-
let mut schema =
45-
infer_json_schema_from_iterator(sample_json_records.iter().map(Ok)).unwrap();
46-
schema = update_data_type_to_datetime(schema, flattened_json, Vec::new());
47-
known_schema_list.insert(schema_type.to_string(), schema);
41+
let json_data: serde_json::Value = serde_json::from_str(FORMATS_JSON).unwrap();
42+
43+
let formats: Vec<Format> =
44+
serde_json::from_value(json_data).expect("Failed to parse formats.json");
45+
46+
for format in &formats {
47+
let sample_path = Path::new(&format.sample_json_path);
48+
let schema_type = &format.schema_type;
49+
let _name = &format.name;
50+
match fs::read_to_string(sample_path) {
51+
Ok(content) => match serde_json::from_str::<Value>(&content) {
52+
Ok(json) => {
53+
let flattened_json = flatten_json_body(json, None, None, None, false).unwrap();
54+
let sample_json_records = [flattened_json.clone()];
55+
let mut schema =
56+
infer_json_schema_from_iterator(sample_json_records.iter().map(Ok))
57+
.unwrap();
58+
schema = update_data_type_to_datetime(schema, flattened_json, Vec::new());
59+
known_schema_list.insert(schema_type.to_string(), schema);
60+
}
61+
Err(err) => eprintln!("Invalid JSON in {}: {}", sample_path.display(), err),
62+
},
63+
Err(err) => eprintln!("Failed to read {}: {}", sample_path.display(), err),
64+
}
4865
}
4966
prepare_known_schema_list(known_schema_list.clone());
5067
known_schema_list

0 commit comments

Comments
 (0)