diff --git a/Cargo.toml b/Cargo.toml index d3b8730..ec8556d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dmap" -version = "0.2.1" +version = "0.3.0" edition = "2021" rust-version = "1.63.0" diff --git a/pyproject.toml b/pyproject.toml index b964138..bb456ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "darn-dmap" -version = "0.2.1" +version = "0.3.0" requires-python = ">=3.8" authors = [ { name = "Remington Rohel" } diff --git a/src/lib.rs b/src/lib.rs index 2b8d1d5..75d8847 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -163,7 +163,7 @@ read_rust!(dmap); /// Generates two functions: `read_[type]` and `read_[type]_lax`, for strict and lax /// reading, respectively. macro_rules! read_py { - ($name:ident, $py_name:literal, $lax_name:literal, $bytes_name:literal, $lax_bytes_name:literal) => { + ($name:ident, $py_name:literal, $lax_name:literal, $bytes_name:literal, $lax_bytes_name:literal, $sniff_name:literal) => { paste! { #[doc = "Reads a `" $name:upper "` file, returning a list of dictionaries containing the fields." ] #[pyfunction] @@ -219,29 +219,42 @@ macro_rules! read_py { result.1, )) } + + #[doc = "Reads a `" $name:upper "` file, returning the first record." ] + #[pyfunction] + #[pyo3(name = $sniff_name)] + #[pyo3(text_signature = "(infile: str, /)")] + fn [< sniff_ $name _py >](infile: PathBuf) -> PyResult> { + Ok([< $name:camel Record >]::sniff_file(&infile) + .map_err(PyErr::from)? + .inner() + ) + } } } } -read_py!(iqdat, "read_iqdat", "read_iqdat_lax", "read_iqdat_bytes", "read_iqdat_bytes_lax"); +read_py!(iqdat, "read_iqdat", "read_iqdat_lax", "read_iqdat_bytes", "read_iqdat_bytes_lax", "sniff_iqdat"); read_py!( rawacf, "read_rawacf", "read_rawacf_lax", "read_rawacf_bytes", - "read_rawacf_bytes_lax" + "read_rawacf_bytes_lax", + "sniff_rawacf" ); read_py!( fitacf, "read_fitacf", "read_fitacf_lax", "read_fitacf_bytes", - "read_fitacf_bytes_lax" + "read_fitacf_bytes_lax", + "sniff_fitacf" ); -read_py!(grid, "read_grid", "read_grid_lax", "read_grid_bytes", "read_grid_bytes_lax"); -read_py!(map, "read_map", "read_map_lax", "read_map_bytes", "read_map_bytes_lax"); -read_py!(snd, "read_snd", "read_snd_lax", "read_snd_bytes", "read_snd_bytes_lax"); -read_py!(dmap, "read_dmap", "read_dmap_lax", "read_dmap_bytes", "read_dmap_bytes_lax"); +read_py!(grid, "read_grid", "read_grid_lax", "read_grid_bytes", "read_grid_bytes_lax", "sniff_grid"); +read_py!(map, "read_map", "read_map_lax", "read_map_bytes", "read_map_bytes_lax", "sniff_map"); +read_py!(snd, "read_snd", "read_snd_lax", "read_snd_bytes", "read_snd_bytes_lax", "sniff_snd"); +read_py!(dmap, "read_dmap", "read_dmap_lax", "read_dmap_bytes", "read_dmap_bytes_lax", "sniff_dmap"); /// Checks that a list of dictionaries contains DMAP records, then appends to outfile. /// @@ -359,5 +372,14 @@ fn dmap(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(write_grid_bytes_py, m)?)?; m.add_function(wrap_pyfunction!(write_map_bytes_py, m)?)?; + // Sniff the first record + m.add_function(wrap_pyfunction!(sniff_dmap_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_iqdat_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_rawacf_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_fitacf_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_snd_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_grid_py, m)?)?; + m.add_function(wrap_pyfunction!(sniff_map_py, m)?)?; + Ok(()) } diff --git a/src/record.rs b/src/record.rs index 46837a2..3bc99e5 100644 --- a/src/record.rs +++ b/src/record.rs @@ -18,6 +18,36 @@ pub trait Record<'a>: /// Gets the underlying data of `self`. fn inner(self) -> IndexMap; + /// Reads from `dmap_data` and parses into `Vec`. + /// + /// Returns `DmapError` if `dmap_data` cannot be read or contains invalid data. + fn read_first_record(mut dmap_data: impl Read) -> Result + where + Self: Sized, + Self: Send, + { + let mut buffer = [0; 8]; // record size should be an i32 of the data + let read_result = dmap_data.read(&mut buffer[..])?; + if read_result < buffer.len() { + return Err(DmapError::CorruptStream("Unable to read size of first record")) + } + + let rec_size = i32::from_le_bytes(buffer[4..8].try_into().unwrap()) as usize; // advance 4 bytes, skipping the "code" field + if rec_size <= 0 { + return Err(DmapError::InvalidRecord(format!( + "Record 0 starting at byte 0 has non-positive size {} <= 0", + rec_size + ))); + } + + let mut rec = vec![0; rec_size]; + rec[0..8].clone_from_slice(&buffer[..]); + dmap_data.read_exact(&mut rec[8..])?; + let first_rec = Self::parse_record(&mut Cursor::new(rec))?; + + Ok(first_rec) + } + /// Reads from `dmap_data` and parses into `Vec`. /// /// Returns `DmapError` if `dmap_data` cannot be read or contains invalid data. @@ -171,6 +201,22 @@ pub trait Record<'a>: } } + /// Reads the first record of a DMAP file of type `Self`. + fn sniff_file(infile: &PathBuf) -> Result + where + Self: Sized, + Self: Send, + { + let file = File::open(infile)?; + match infile.extension() { + Some(ext) if ext == OsStr::new("bz2") => { + let compressor = BzDecoder::new(file); + Self::read_first_record(compressor) + } + _ => Self::read_first_record(file), + } + } + /// Reads a record from `cursor`. fn parse_record(cursor: &mut Cursor>) -> Result where diff --git a/tests/tests.rs b/tests/tests.rs index 4d4aa6d..8266b66 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -92,6 +92,14 @@ macro_rules! make_test { // Clean up tempfile remove_file(&tempfile).expect("Unable to delete tempfile"); } + + #[test] + fn [< test_ $record_type _sniff >] () { + let filename: PathBuf = PathBuf::from(format!("tests/test_files/test.{}", stringify!($record_type))); + let data = [< $record_type:camel Record >]::sniff_file(&filename).expect("Unable to sniff file"); + let all_recs = [< $record_type:camel Record >]::read_file(&filename).expect("Unable to read file"); + assert_eq!(data, all_recs[0]) + } } }; }