Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,9 @@ jobs:
strategy:
matrix:
platform:
- runner: macos-13
- runner: macos-15-intel
target: x86_64
- runner: macos-14
- runner: macos-latest
target: aarch64
steps:
- uses: actions/checkout@v4
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ __pycache__/
# Built files
target/

# Rust dependency versions
Cargo.lock

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "darn-dmap"
version = "0.6.0"
version = "0.7.0"
edition = "2021"
rust-version = "1.63.0"
authors = ["Remington Rohel"]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "darn-dmap"
version = "0.6.0"
version = "0.7.0"
requires-python = ">=3.8"
authors = [
{ name = "Remington Rohel" }
Expand Down
14 changes: 10 additions & 4 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ fitacf_file = "path/to/file.bz2"
data, _ = dmap.read_fitacf(fitacf_file)
dmap.write_fitacf(data, "temp.fitacf.bz2")
```
will read in the compressed file, then also write out a new compressed file. Note that compression on the writing side
will only be done when writing to file, as the detection is done based on the file extension of the output file.
will read in the compressed file, then also write out a new compressed file. You can also pass the argument `bz2=True`
to compress with `bzip2` regardless of file extension, or even to return compressed byte objects.

### Generic I/O
dmap supports generic DMAP I/O, without verifying the field names and types. The file must still
Expand Down Expand Up @@ -162,10 +162,10 @@ assert binary_data == raw_bytes
```
As a note, this binary data can be compressed ~2x typically using zlib, or with another compression utility. This is quite
useful if sending data over a network where speed and bandwidth must be considered. Note that the binary writing functions
don't compress automatically, an external package like `zlib` or `bzip2` must be used.
can compress with bzip2 by passing `bz2=True` as an argument.

### File "sniffing"
If you only want to inspect a file, without actually needing access to all of the data, you can use the `read_[type]`
If you only want to inspect a file, without actually needing access to all the data, you can use the `read_[type]`
functions in `"sniff"` mode. This will only read in the first record from a file, and works on both compressed and
non-compressed files. Note that this mode does not work with bytes objects directly.

Expand All @@ -174,3 +174,9 @@ import dmap
path = "path/to/file"
first_rec = dmap.read_dmap(path, mode="sniff")
```

### Reading only metadata fields
Each DMAP format consists of metadata and data fields. You can read only the metadata fields by passing `mode="metadata"`
to any of the writing functions. Note that the generic read function `read_dmap` will return all fields, as it by nature
has no knowledge of the underlying fields. Note also that the read functions operating on a file still read the entire
file into memory first, so reading metadata only does not largely decrease read times.
50 changes: 33 additions & 17 deletions python/dmap/_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def read_dispatcher(


def write_dispatcher(
source: list[dict], fmt: str, outfile: Union[None, str]
source: list[dict], fmt: str, outfile: Union[None, str], bz2: bool,
) -> Union[None, bytes]:
"""
Writes DMAP data from `source` to either a `bytes` object or to `outfile`.
Expand All @@ -88,15 +88,17 @@ def write_dispatcher(
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
compressed using bzip2.
bz2: bool
If `True`, the data will be compressed with `bzip2`.
"""
if fmt not in ["dmap", "iqdat", "rawacf", "fitacf", "grid", "map", "snd"]:
raise ValueError(
f"invalid fmt `{fmt}`: expected one of ['dmap', 'iqdat', 'rawacf', 'fitacf', 'grid', 'map', 'snd']"
)
if outfile is None:
return getattr(dmap_rs, f"write_{fmt}_bytes")(source)
return getattr(dmap_rs, f"write_{fmt}_bytes")(source, bz2=bz2)
elif isinstance(outfile, str):
getattr(dmap_rs, f"write_{fmt}")(source, outfile)
getattr(dmap_rs, f"write_{fmt}")(source, outfile, bz2=bz2)
else:
raise TypeError(
f"invalid type for `outfile` {type(outfile)}: expected `str` or `None`"
Expand Down Expand Up @@ -308,7 +310,7 @@ def read_snd(


def write_dmap(
source: list[dict], outfile: Union[None, str] = None
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
) -> Union[None, bytes]:
"""
Writes DMAP data from `source` to either a `bytes` object or to `outfile`.
Expand All @@ -321,12 +323,14 @@ def write_dmap(
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
compressed using bzip2.
bz2: bool
If `True`, the data will be compressed with `bzip2`.
"""
return write_dispatcher(source, "dmap", outfile)
return write_dispatcher(source, "dmap", outfile, bz2=bz2)


def write_iqdat(
source: list[dict], outfile: Union[None, str] = None
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
) -> Union[None, bytes]:
"""
Writes IQDAT data from `source` to either a `bytes` object or to `outfile`.
Expand All @@ -339,12 +343,14 @@ def write_iqdat(
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
compressed using bzip2.
bz2: bool
If `True`, the data will be compressed with `bzip2`.
"""
return write_dispatcher(source, "iqdat", outfile)
return write_dispatcher(source, "iqdat", outfile, bz2=bz2)


def write_rawacf(
source: list[dict], outfile: Union[None, str] = None
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
) -> Union[None, bytes]:
"""
Writes RAWACF data from `source` to either a `bytes` object or to `outfile`.
Expand All @@ -357,12 +363,14 @@ def write_rawacf(
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
compressed using bzip2.
bz2: bool
If `True`, the data will be compressed with `bzip2`.
"""
return write_dispatcher(source, "rawacf", outfile)
return write_dispatcher(source, "rawacf", outfile, bz2=bz2)


def write_fitacf(
source: list[dict], outfile: Union[None, str] = None
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
) -> Union[None, bytes]:
"""
Writes FITACF data from `source` to either a `bytes` object or to `outfile`.
Expand All @@ -375,12 +383,14 @@ def write_fitacf(
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
compressed using bzip2.
bz2: bool
If `True`, the data will be compressed with `bzip2`.
"""
return write_dispatcher(source, "fitacf", outfile)
return write_dispatcher(source, "fitacf", outfile, bz2=bz2)


def write_grid(
source: list[dict], outfile: Union[None, str] = None
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
) -> Union[None, bytes]:
"""
Writes GRID data from `source` to either a `bytes` object or to `outfile`.
Expand All @@ -393,12 +403,14 @@ def write_grid(
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
compressed using bzip2.
bz2: bool
If `True`, the data will be compressed with `bzip2`.
"""
return write_dispatcher(source, "grid", outfile)
return write_dispatcher(source, "grid", outfile, bz2=bz2)


def write_map(
source: list[dict], outfile: Union[None, str] = None
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
) -> Union[None, bytes]:
"""
Writes MAP data from `source` to either a `bytes` object or to `outfile`.
Expand All @@ -411,12 +423,14 @@ def write_map(
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
compressed using bzip2.
bz2: bool
If `True`, the data will be compressed with `bzip2`.
"""
return write_dispatcher(source, "map", outfile)
return write_dispatcher(source, "map", outfile, bz2=bz2)


def write_snd(
source: list[dict], outfile: Union[None, str] = None
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
) -> Union[None, bytes]:
"""
Writes SND data from `source` to either a `bytes` object or to `outfile`.
Expand All @@ -429,5 +443,7 @@ def write_snd(
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
compressed using bzip2.
bz2: bool
If `True`, the data will be compressed with `bzip2`.
"""
return write_dispatcher(source, "snd", outfile)
return write_dispatcher(source, "snd", outfile, bz2=bz2)
16 changes: 15 additions & 1 deletion src/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,29 @@
//!
//! Currently only supports bz2 compression detection.

use bzip2::read::BzEncoder;
use bzip2::Compression;
use std::io::{Chain, Cursor, Error, Read};

/// Compress bytes using [`bzip2::BzEncoder`].
///
/// # Errors
/// See [`Read::read_to_end`].
pub(crate) fn compress_bz2(bytes: &[u8]) -> Result<Vec<u8>, Error> {
let mut out_bytes: Vec<u8> = vec![];
let mut compressor = BzEncoder::new(bytes, Compression::best());
compressor.read_to_end(&mut out_bytes)?;

Ok(out_bytes)
}

type PartiallyReadStream<T> = Chain<Cursor<[u8; 3]>, T>;

/// Detects bz2 compression on the input `stream`. Returns a reader
/// which includes all data from `stream`.
///
/// # Errors
/// See [`std::io::Read::read_exact`].
/// See [`Read::read_exact`].
pub(crate) fn detect_bz2<T>(mut stream: T) -> Result<(bool, PartiallyReadStream<T>), Error>
where
T: for<'a> Read,
Expand Down
33 changes: 20 additions & 13 deletions src/io.rs
Original file line number Diff line number Diff line change
@@ -1,36 +1,43 @@
//! Utility functions for file operations.

use bzip2::{read::BzEncoder, Compression};
use crate::compression::compress_bz2;
use std::ffi::OsStr;
use std::fs::{File, OpenOptions};
use std::io::{Read, Write};
use std::io::Write;
use std::path::Path;

/// Write bytes to file.
///
/// Ordinarily, this function opens the file in `append` mode. If the extension of `outfile` is
/// `.bz2`, the bytes will be compressed using bzip2 before being written.
/// `.bz2` or `bz2` is `true`, the bytes will be compressed using bzip2 before being written.
///
/// # Errors
/// If opening the file in append mode is not possible (permissions, path doesn't exist, etc.). See [`std::fs::File::open`].
/// If opening the file in append mode is not possible (permissions, path doesn't exist, etc.). See [`File::open`].
///
/// If an error is encountered when compressing the bytes.
///
/// If an error is encountered when writing the bytes to the filesystem. See [`std::io::Write::write_all`]
/// If an error is encountered when writing the bytes to the filesystem. See [`Write::write_all`]
pub(crate) fn bytes_to_file<P: AsRef<Path>>(
bytes: Vec<u8>,
outfile: P,
bz2: bool,
) -> Result<(), std::io::Error> {
let mut out_bytes: Vec<u8> = vec![];
let compress_file: bool =
matches!(outfile.as_ref().extension(), Some(ext) if ext == OsStr::new("bz2"));
bz2 || matches!(outfile.as_ref().extension(), Some(ext) if ext == OsStr::new("bz2"));
let mut file: File = OpenOptions::new().append(true).create(true).open(outfile)?;
if compress_file {
let mut compressor = BzEncoder::new(bytes.as_slice(), Compression::best());
compressor.read_to_end(&mut out_bytes)?;
write_bytes_bz2(bytes, &mut file)
} else {
out_bytes = bytes;
file.write_all(&bytes)
}
}

file.write_all(&out_bytes)
/// Writes `bytes` to a [`Write`] implementor, compressing with [`bzip2::BzEncoder`] first.
///
/// # Errors
/// From [`compress_bz2`] or [`Write::write_all`].
pub(crate) fn write_bytes_bz2<W: Write>(
bytes: Vec<u8>,
writer: &mut W,
) -> Result<(), std::io::Error> {
let out_bytes: Vec<u8> = compress_bz2(&bytes)?;
writer.write_all(&out_bytes)
}
47 changes: 33 additions & 14 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
//!
//! // Write the records to a file
//! let out_path = PathBuf::from("tests/test_files/output.rawacf");
//! RawacfRecord::write_to_file(&rawacf_data, &out_path)?;
//! RawacfRecord::write_to_file(&rawacf_data, &out_path, false)?;
//! # std::fs::remove_file(out_path)?;
//! # Ok(())
//! # }
Expand Down Expand Up @@ -121,9 +121,10 @@ macro_rules! write_rust {
pub fn [< try_write_ $type >]<P: AsRef<Path>>(
recs: Vec<IndexMap<String, DmapField>>,
outfile: P,
bz2: bool,
) -> Result<(), DmapError> {
let bytes = [< $type:camel Record >]::try_into_bytes(recs)?;
crate::io::bytes_to_file(bytes, outfile).map_err(DmapError::from)
crate::io::bytes_to_file(bytes, outfile, bz2).map_err(DmapError::from)
}
}
}
Expand Down Expand Up @@ -308,9 +309,14 @@ read_py!(
/// does not know that typically `stid` is two bytes.
#[pyfunction]
#[pyo3(name = "write_dmap")]
#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")]
fn write_dmap_py(recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf) -> PyResult<()> {
try_write_dmap(recs, &outfile).map_err(PyErr::from)
#[pyo3(signature = (recs, outfile, /, bz2))]
#[pyo3(text_signature = "(recs: list[dict], outfile: str, /, bz2: bool = False)")]
fn write_dmap_py(
recs: Vec<IndexMap<String, DmapField>>,
outfile: PathBuf,
bz2: bool,
) -> PyResult<()> {
try_write_dmap(recs, &outfile, bz2).map_err(PyErr::from)
}

/// Checks that a list of dictionaries contains valid DMAP records, then converts them to bytes.
Expand All @@ -321,9 +327,17 @@ fn write_dmap_py(recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf) -> Py
/// does not know that typically `stid` is two bytes.
#[pyfunction]
#[pyo3(name = "write_dmap_bytes")]
#[pyo3(text_signature = "(recs: list[dict], /)")]
fn write_dmap_bytes_py(py: Python, recs: Vec<IndexMap<String, DmapField>>) -> PyResult<Py<PyAny>> {
let bytes = DmapRecord::try_into_bytes(recs).map_err(PyErr::from)?;
#[pyo3(signature = (recs, /, bz2))]
#[pyo3(text_signature = "(recs: list[dict], /, bz2: bool = False)")]
fn write_dmap_bytes_py(
py: Python,
recs: Vec<IndexMap<String, DmapField>>,
bz2: bool,
) -> PyResult<Py<PyAny>> {
let mut bytes = DmapRecord::try_into_bytes(recs).map_err(PyErr::from)?;
if bz2 {
bytes = compression::compress_bz2(&bytes).map_err(PyErr::from)?;
}
Ok(PyBytes::new(py, &bytes).into())
}

Expand All @@ -334,18 +348,23 @@ macro_rules! write_py {
#[doc = "Checks that a list of dictionaries contains valid `" $name:upper "` records, then appends to outfile." ]
#[pyfunction]
#[pyo3(name = $fn_name)]
#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")]
fn [< write_ $name _py >](recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf) -> PyResult<()> {
[< try_write_ $name >](recs, &outfile).map_err(PyErr::from)
#[pyo3(signature = (recs, outfile, /, bz2))]
#[pyo3(text_signature = "(recs: list[dict], outfile: str, /, bz2: bool = False)")]
fn [< write_ $name _py >](recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf, bz2: bool) -> PyResult<()> {
[< try_write_ $name >](recs, &outfile, bz2).map_err(PyErr::from)
}

#[doc = "Checks that a list of dictionaries contains valid `" $name:upper "` records, then converts them to bytes." ]
#[doc = "Returns `list[bytes]`, one entry per record." ]
#[pyfunction]
#[pyo3(name = $bytes_name)]
#[pyo3(text_signature = "(recs: list[dict], /)")]
fn [< write_ $name _bytes_py >](py: Python, recs: Vec<IndexMap<String, DmapField>>) -> PyResult<Py<PyAny>> {
let bytes = [< $name:camel Record >]::try_into_bytes(recs).map_err(PyErr::from)?;
#[pyo3(signature = (recs, /, bz2))]
#[pyo3(text_signature = "(recs: list[dict], /, bz2: bool = False)")]
fn [< write_ $name _bytes_py >](py: Python, recs: Vec<IndexMap<String, DmapField>>, bz2: bool) -> PyResult<Py<PyAny>> {
let mut bytes = [< $name:camel Record >]::try_into_bytes(recs).map_err(PyErr::from)?;
if bz2 {
bytes = compression::compress_bz2(&bytes).map_err(PyErr::from)?;
}
Ok(PyBytes::new(py, &bytes).into())
}
}
Expand Down
Loading