From 8b85a9ac70844d4dba5f7a96bbf00bfa295f9bae Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Wed, 3 Dec 2025 19:08:28 +0000 Subject: [PATCH 1/5] Add Cargo.lock to git ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 4f5f7e3..ab5dc82 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ __pycache__/ # Built files target/ +# Rust dependency versions +Cargo.lock + From e4c8f960fb8542301cd592d90d345e58d532c75e Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Thu, 22 Jan 2026 18:43:38 +0000 Subject: [PATCH 2/5] Added ability to compress with bzip2 always. * Not limited to detecting file extension, can compress and return raw bytes as well. * Updated Python and Rust APIs accordingly. --- python/README.md | 14 ++++++++---- python/dmap/_wrapper.py | 50 +++++++++++++++++++++++++++-------------- src/compression.rs | 16 ++++++++++++- src/io.rs | 33 ++++++++++++++++----------- src/lib.rs | 47 ++++++++++++++++++++++++++------------ src/record.rs | 8 +++++-- tests/tests.rs | 6 ++--- 7 files changed, 120 insertions(+), 54 deletions(-) diff --git a/python/README.md b/python/README.md index 73e08e9..5a62240 100644 --- a/python/README.md +++ b/python/README.md @@ -80,8 +80,8 @@ fitacf_file = "path/to/file.bz2" data, _ = dmap.read_fitacf(fitacf_file) dmap.write_fitacf(data, "temp.fitacf.bz2") ``` -will read in the compressed file, then also write out a new compressed file. Note that compression on the writing side -will only be done when writing to file, as the detection is done based on the file extension of the output file. +will read in the compressed file, then also write out a new compressed file. You can also pass the argument `bz2=True` +to compress with `bzip2` regardless of file extension, or even to return compressed byte objects. ### Generic I/O dmap supports generic DMAP I/O, without verifying the field names and types. The file must still @@ -162,10 +162,10 @@ assert binary_data == raw_bytes ``` As a note, this binary data can be compressed ~2x typically using zlib, or with another compression utility. This is quite useful if sending data over a network where speed and bandwidth must be considered. Note that the binary writing functions -don't compress automatically, an external package like `zlib` or `bzip2` must be used. +can compress with bzip2 by passing `bz2=True` as an argument. ### File "sniffing" -If you only want to inspect a file, without actually needing access to all of the data, you can use the `read_[type]` +If you only want to inspect a file, without actually needing access to all the data, you can use the `read_[type]` functions in `"sniff"` mode. This will only read in the first record from a file, and works on both compressed and non-compressed files. Note that this mode does not work with bytes objects directly. @@ -174,3 +174,9 @@ import dmap path = "path/to/file" first_rec = dmap.read_dmap(path, mode="sniff") ``` + +### Reading only metadata fields +Each DMAP format consists of metadata and data fields. You can read only the metadata fields by passing `mode="metadata"` +to any of the writing functions. Note that the generic read function `read_dmap` will return all fields, as it by nature +has no knowledge of the underlying fields. Note also that the read functions operating on a file still read the entire +file into memory first, so reading metadata only does not largely decrease read times. diff --git a/python/dmap/_wrapper.py b/python/dmap/_wrapper.py index d62ac8b..0a95dbb 100644 --- a/python/dmap/_wrapper.py +++ b/python/dmap/_wrapper.py @@ -73,7 +73,7 @@ def read_dispatcher( def write_dispatcher( - source: list[dict], fmt: str, outfile: Union[None, str] + source: list[dict], fmt: str, outfile: Union[None, str], bz2: bool, ) -> Union[None, bytes]: """ Writes DMAP data from `source` to either a `bytes` object or to `outfile`. @@ -88,15 +88,17 @@ def write_dispatcher( If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be compressed using bzip2. + bz2: bool + If `True`, the data will be compressed with `bzip2`. """ if fmt not in ["dmap", "iqdat", "rawacf", "fitacf", "grid", "map", "snd"]: raise ValueError( f"invalid fmt `{fmt}`: expected one of ['dmap', 'iqdat', 'rawacf', 'fitacf', 'grid', 'map', 'snd']" ) if outfile is None: - return getattr(dmap_rs, f"write_{fmt}_bytes")(source) + return getattr(dmap_rs, f"write_{fmt}_bytes")(source, bz2=bz2) elif isinstance(outfile, str): - getattr(dmap_rs, f"write_{fmt}")(source, outfile) + getattr(dmap_rs, f"write_{fmt}")(source, outfile, bz2=bz2) else: raise TypeError( f"invalid type for `outfile` {type(outfile)}: expected `str` or `None`" @@ -308,7 +310,7 @@ def read_snd( def write_dmap( - source: list[dict], outfile: Union[None, str] = None + source: list[dict], outfile: Union[None, str] = None, bz2: bool = False, ) -> Union[None, bytes]: """ Writes DMAP data from `source` to either a `bytes` object or to `outfile`. @@ -321,12 +323,14 @@ def write_dmap( If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be compressed using bzip2. + bz2: bool + If `True`, the data will be compressed with `bzip2`. """ - return write_dispatcher(source, "dmap", outfile) + return write_dispatcher(source, "dmap", outfile, bz2=bz2) def write_iqdat( - source: list[dict], outfile: Union[None, str] = None + source: list[dict], outfile: Union[None, str] = None, bz2: bool = False, ) -> Union[None, bytes]: """ Writes IQDAT data from `source` to either a `bytes` object or to `outfile`. @@ -339,12 +343,14 @@ def write_iqdat( If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be compressed using bzip2. + bz2: bool + If `True`, the data will be compressed with `bzip2`. """ - return write_dispatcher(source, "iqdat", outfile) + return write_dispatcher(source, "iqdat", outfile, bz2=bz2) def write_rawacf( - source: list[dict], outfile: Union[None, str] = None + source: list[dict], outfile: Union[None, str] = None, bz2: bool = False, ) -> Union[None, bytes]: """ Writes RAWACF data from `source` to either a `bytes` object or to `outfile`. @@ -357,12 +363,14 @@ def write_rawacf( If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be compressed using bzip2. + bz2: bool + If `True`, the data will be compressed with `bzip2`. """ - return write_dispatcher(source, "rawacf", outfile) + return write_dispatcher(source, "rawacf", outfile, bz2=bz2) def write_fitacf( - source: list[dict], outfile: Union[None, str] = None + source: list[dict], outfile: Union[None, str] = None, bz2: bool = False, ) -> Union[None, bytes]: """ Writes FITACF data from `source` to either a `bytes` object or to `outfile`. @@ -375,12 +383,14 @@ def write_fitacf( If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be compressed using bzip2. + bz2: bool + If `True`, the data will be compressed with `bzip2`. """ - return write_dispatcher(source, "fitacf", outfile) + return write_dispatcher(source, "fitacf", outfile, bz2=bz2) def write_grid( - source: list[dict], outfile: Union[None, str] = None + source: list[dict], outfile: Union[None, str] = None, bz2: bool = False, ) -> Union[None, bytes]: """ Writes GRID data from `source` to either a `bytes` object or to `outfile`. @@ -393,12 +403,14 @@ def write_grid( If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be compressed using bzip2. + bz2: bool + If `True`, the data will be compressed with `bzip2`. """ - return write_dispatcher(source, "grid", outfile) + return write_dispatcher(source, "grid", outfile, bz2=bz2) def write_map( - source: list[dict], outfile: Union[None, str] = None + source: list[dict], outfile: Union[None, str] = None, bz2: bool = False, ) -> Union[None, bytes]: """ Writes MAP data from `source` to either a `bytes` object or to `outfile`. @@ -411,12 +423,14 @@ def write_map( If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be compressed using bzip2. + bz2: bool + If `True`, the data will be compressed with `bzip2`. """ - return write_dispatcher(source, "map", outfile) + return write_dispatcher(source, "map", outfile, bz2=bz2) def write_snd( - source: list[dict], outfile: Union[None, str] = None + source: list[dict], outfile: Union[None, str] = None, bz2: bool = False, ) -> Union[None, bytes]: """ Writes SND data from `source` to either a `bytes` object or to `outfile`. @@ -429,5 +443,7 @@ def write_snd( If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be compressed using bzip2. + bz2: bool + If `True`, the data will be compressed with `bzip2`. """ - return write_dispatcher(source, "snd", outfile) + return write_dispatcher(source, "snd", outfile, bz2=bz2) diff --git a/src/compression.rs b/src/compression.rs index 6ba48a8..1b33268 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -2,15 +2,29 @@ //! //! Currently only supports bz2 compression detection. +use bzip2::read::BzEncoder; +use bzip2::Compression; use std::io::{Chain, Cursor, Error, Read}; +/// Compress bytes using [`bzip2::BzEncoder`]. +/// +/// # Errors +/// See [`Read::read_to_end`]. +pub(crate) fn compress_bz2(bytes: &[u8]) -> Result, Error> { + let mut out_bytes: Vec = vec![]; + let mut compressor = BzEncoder::new(bytes, Compression::best()); + compressor.read_to_end(&mut out_bytes)?; + + Ok(out_bytes) +} + type PartiallyReadStream = Chain, T>; /// Detects bz2 compression on the input `stream`. Returns a reader /// which includes all data from `stream`. /// /// # Errors -/// See [`std::io::Read::read_exact`]. +/// See [`Read::read_exact`]. pub(crate) fn detect_bz2(mut stream: T) -> Result<(bool, PartiallyReadStream), Error> where T: for<'a> Read, diff --git a/src/io.rs b/src/io.rs index bb09655..d4302ed 100644 --- a/src/io.rs +++ b/src/io.rs @@ -1,36 +1,43 @@ //! Utility functions for file operations. -use bzip2::{read::BzEncoder, Compression}; +use crate::compression::compress_bz2; use std::ffi::OsStr; use std::fs::{File, OpenOptions}; -use std::io::{Read, Write}; +use std::io::Write; use std::path::Path; /// Write bytes to file. /// /// Ordinarily, this function opens the file in `append` mode. If the extension of `outfile` is -/// `.bz2`, the bytes will be compressed using bzip2 before being written. +/// `.bz2` or `bz2` is `true`, the bytes will be compressed using bzip2 before being written. /// /// # Errors -/// If opening the file in append mode is not possible (permissions, path doesn't exist, etc.). See [`std::fs::File::open`]. +/// If opening the file in append mode is not possible (permissions, path doesn't exist, etc.). See [`File::open`]. /// -/// If an error is encountered when compressing the bytes. -/// -/// If an error is encountered when writing the bytes to the filesystem. See [`std::io::Write::write_all`] +/// If an error is encountered when writing the bytes to the filesystem. See [`Write::write_all`] pub(crate) fn bytes_to_file>( bytes: Vec, outfile: P, + bz2: bool, ) -> Result<(), std::io::Error> { - let mut out_bytes: Vec = vec![]; let compress_file: bool = - matches!(outfile.as_ref().extension(), Some(ext) if ext == OsStr::new("bz2")); + bz2 || matches!(outfile.as_ref().extension(), Some(ext) if ext == OsStr::new("bz2")); let mut file: File = OpenOptions::new().append(true).create(true).open(outfile)?; if compress_file { - let mut compressor = BzEncoder::new(bytes.as_slice(), Compression::best()); - compressor.read_to_end(&mut out_bytes)?; + write_bytes_bz2(bytes, &mut file) } else { - out_bytes = bytes; + file.write_all(&bytes) } +} - file.write_all(&out_bytes) +/// Writes `bytes` to a [`Write`] implementor, compressing with [`bzip2::BzEncoder`] first. +/// +/// # Errors +/// From [`compress_bz2`] or [`Write::write_all`]. +pub(crate) fn write_bytes_bz2( + bytes: Vec, + writer: &mut W, +) -> Result<(), std::io::Error> { + let out_bytes: Vec = compress_bz2(&bytes)?; + writer.write_all(&out_bytes) } diff --git a/src/lib.rs b/src/lib.rs index fb70a27..83bd300 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -62,7 +62,7 @@ //! //! // Write the records to a file //! let out_path = PathBuf::from("tests/test_files/output.rawacf"); -//! RawacfRecord::write_to_file(&rawacf_data, &out_path)?; +//! RawacfRecord::write_to_file(&rawacf_data, &out_path, false)?; //! # std::fs::remove_file(out_path)?; //! # Ok(()) //! # } @@ -121,9 +121,10 @@ macro_rules! write_rust { pub fn [< try_write_ $type >]>( recs: Vec>, outfile: P, + bz2: bool, ) -> Result<(), DmapError> { let bytes = [< $type:camel Record >]::try_into_bytes(recs)?; - crate::io::bytes_to_file(bytes, outfile).map_err(DmapError::from) + crate::io::bytes_to_file(bytes, outfile, bz2).map_err(DmapError::from) } } } @@ -308,9 +309,14 @@ read_py!( /// does not know that typically `stid` is two bytes. #[pyfunction] #[pyo3(name = "write_dmap")] -#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")] -fn write_dmap_py(recs: Vec>, outfile: PathBuf) -> PyResult<()> { - try_write_dmap(recs, &outfile).map_err(PyErr::from) +#[pyo3(signature = (recs, outfile, /, bz2))] +#[pyo3(text_signature = "(recs: list[dict], outfile: str, /, bz2: bool = False)")] +fn write_dmap_py( + recs: Vec>, + outfile: PathBuf, + bz2: bool, +) -> PyResult<()> { + try_write_dmap(recs, &outfile, bz2).map_err(PyErr::from) } /// Checks that a list of dictionaries contains valid DMAP records, then converts them to bytes. @@ -321,9 +327,17 @@ fn write_dmap_py(recs: Vec>, outfile: PathBuf) -> Py /// does not know that typically `stid` is two bytes. #[pyfunction] #[pyo3(name = "write_dmap_bytes")] -#[pyo3(text_signature = "(recs: list[dict], /)")] -fn write_dmap_bytes_py(py: Python, recs: Vec>) -> PyResult> { - let bytes = DmapRecord::try_into_bytes(recs).map_err(PyErr::from)?; +#[pyo3(signature = (recs, /, bz2))] +#[pyo3(text_signature = "(recs: list[dict], /, bz2: bool = False)")] +fn write_dmap_bytes_py( + py: Python, + recs: Vec>, + bz2: bool, +) -> PyResult> { + let mut bytes = DmapRecord::try_into_bytes(recs).map_err(PyErr::from)?; + if bz2 { + bytes = compression::compress_bz2(&bytes).map_err(PyErr::from)?; + } Ok(PyBytes::new(py, &bytes).into()) } @@ -334,18 +348,23 @@ macro_rules! write_py { #[doc = "Checks that a list of dictionaries contains valid `" $name:upper "` records, then appends to outfile." ] #[pyfunction] #[pyo3(name = $fn_name)] - #[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")] - fn [< write_ $name _py >](recs: Vec>, outfile: PathBuf) -> PyResult<()> { - [< try_write_ $name >](recs, &outfile).map_err(PyErr::from) + #[pyo3(signature = (recs, outfile, /, bz2))] + #[pyo3(text_signature = "(recs: list[dict], outfile: str, /, bz2: bool = False)")] + fn [< write_ $name _py >](recs: Vec>, outfile: PathBuf, bz2: bool) -> PyResult<()> { + [< try_write_ $name >](recs, &outfile, bz2).map_err(PyErr::from) } #[doc = "Checks that a list of dictionaries contains valid `" $name:upper "` records, then converts them to bytes." ] #[doc = "Returns `list[bytes]`, one entry per record." ] #[pyfunction] #[pyo3(name = $bytes_name)] - #[pyo3(text_signature = "(recs: list[dict], /)")] - fn [< write_ $name _bytes_py >](py: Python, recs: Vec>) -> PyResult> { - let bytes = [< $name:camel Record >]::try_into_bytes(recs).map_err(PyErr::from)?; + #[pyo3(signature = (recs, /, bz2))] + #[pyo3(text_signature = "(recs: list[dict], /, bz2: bool = False)")] + fn [< write_ $name _bytes_py >](py: Python, recs: Vec>, bz2: bool) -> PyResult> { + let mut bytes = [< $name:camel Record >]::try_into_bytes(recs).map_err(PyErr::from)?; + if bz2 { + bytes = compression::compress_bz2(&bytes).map_err(PyErr::from)?; + } Ok(PyBytes::new(py, &bytes).into()) } } diff --git a/src/record.rs b/src/record.rs index 5ee25b9..7ff008f 100644 --- a/src/record.rs +++ b/src/record.rs @@ -950,9 +950,13 @@ pub trait Record<'a>: /// /// Prefer using the specific functions, e.g. `write_dmap`, `write_rawacf`, etc. for their /// specific field checks. - fn write_to_file>(recs: &Vec, outfile: P) -> Result<(), DmapError> { + fn write_to_file>( + recs: &Vec, + outfile: P, + bz2: bool, + ) -> Result<(), DmapError> { let bytes: Vec = Self::into_bytes(recs)?; - io::bytes_to_file(bytes, outfile)?; + io::bytes_to_file(bytes, outfile, bz2)?; Ok(()) } } diff --git a/tests/tests.rs b/tests/tests.rs index 0c9dad6..74fb90f 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -17,7 +17,7 @@ macro_rules! make_test { let data = [< $record_type:camel Record >]::read_file(&filename).expect("Unable to read file"); - _ = [< $record_type:camel Record >]::write_to_file(&data, &tempfile).expect("Unable to write to file"); + _ = [< $record_type:camel Record >]::write_to_file(&data, &tempfile, false).expect("Unable to write to file"); let new_recs = [< $record_type:camel Record >]::read_file(&tempfile).expect("Cannot read tempfile"); for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { assert_eq!(read_rec, written_rec) @@ -35,7 +35,7 @@ macro_rules! make_test { let data = [< $record_type:camel Record >]::read_file(&filename).expect("Unable to read file"); - _ = [< $record_type:camel Record >]::write_to_file(&data, &tempfile).expect("Unable to write to file"); + _ = [< $record_type:camel Record >]::write_to_file(&data, &tempfile, false).expect("Unable to write to file"); let new_recs = [< $record_type:camel Record >]::read_file(&tempfile).expect("Cannot read tempfile"); for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { assert_eq!(read_rec, written_rec) @@ -75,7 +75,7 @@ macro_rules! make_test { tempfile.set_file_name(format!("tmp.{}.generic", stringify!($record_type))); let gen_data = DmapRecord::read_file(&filename).expect("Unable to read file"); - _ = DmapRecord::write_to_file(&gen_data, &tempfile).expect("Unable to write to file"); + _ = DmapRecord::write_to_file(&gen_data, &tempfile, false).expect("Unable to write to file"); let new_recs = DmapRecord::read_file(&tempfile).expect("Cannot read tempfile"); for (new_rec, ref_rec) in izip!(new_recs.iter(), gen_data.iter()) { assert_eq!(new_rec, ref_rec) From bec439e011b0248d640557930a4e52ebbf462248 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Thu, 22 Jan 2026 18:50:06 +0000 Subject: [PATCH 3/5] Version bump --- Cargo.toml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 025a7e7..adb76b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "darn-dmap" -version = "0.6.0" +version = "0.7.0" edition = "2021" rust-version = "1.63.0" authors = ["Remington Rohel"] diff --git a/pyproject.toml b/pyproject.toml index c96dc9e..7a7d488 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "darn-dmap" -version = "0.6.0" +version = "0.7.0" requires-python = ">=3.8" authors = [ { name = "Remington Rohel" } From b278fb32f550f304c1bb4c36ac89b037f4d40087 Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Thu, 22 Jan 2026 18:58:38 +0000 Subject: [PATCH 4/5] Updated macos runners for Actions. --- .github/workflows/CI.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index e2d6975..88e41c5 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -148,10 +148,10 @@ jobs: strategy: matrix: platform: - - runner: macos-13 + - runner: macos-15-intel target: x86_64 - - runner: macos-14 - target: aarch64 + - runner: macos-latest + target: arm64 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From d634643114bf4d9847d57fb1a6872d2e811e55ea Mon Sep 17 00:00:00 2001 From: Remington Rohel Date: Thu, 22 Jan 2026 20:42:50 +0000 Subject: [PATCH 5/5] Corrected macos-latest target --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 88e41c5..cc29788 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -151,7 +151,7 @@ jobs: - runner: macos-15-intel target: x86_64 - runner: macos-latest - target: arm64 + target: aarch64 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5