diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4f5f7e3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +# C extensions +*.so + +# Byte-compiled / optimized / DLL files +__pycache__/ + +# Built files +target/ + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..30a4fbf --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,50 @@ +# Developer Guidelines + +Any contributions are welcome! Below is a brief description of the project structure. + +## Structure + +### `src/record.rs` +This file contains the `Record` trait, which defines a set of functions that specific DMAP formats must implement. +For example, `read_file(infile: &PathBuf) -> Result, DmapError>` is defined in the `Record` trait, and handles +reading in records from a file at the specified path. This function is generic, in that it doesn't know what type of records +(RAWACF, FITACF, etc.) are expected. Also, since it is a trait function, you can only use it through a struct which implements +the trait. For example, the `FitacfRecord` struct defined in `src/formats/fitacf.rs` implements the `Record` trait, and so +you can call `FitacfRecord::read_file(...)` to read a FITACF file, but you couldn't invoke `Record::read_file(...)`. + +### `src/types.rs` +This file defines necessary structs and enums for encapsulating basic types (`i8`, `u32`, `String`, etc.) into +objects like `DmapField`, `DmapScalar`, `DmapVec`, etc. that abstract over the supported underlying types. +For instance, when reading scalar from a DMAP file, the underlying data type is inferred from the `type` field in the +scalar's metadata, so it can't be known beforehand. This requires some encapsulating type, `DmapScalar` in this case, +which contains the metadata of the field and has a known size for the stack memory. + +This file defines the `Fields` struct, which is used to hold the names and types of the required and optional +scalar and vector fields for a type of DMAP record (RAWACF, FITACF, etc.). + +This file defines the `DmapType` trait and implements it for supported data types that can be in DMAP records, namely +`u8`, `u16`, `u32`, `u64`, `i8`, `i16`, `i32`, `i64`, `f32`, `f64`, and `String`. The implementation of the trait for +these types only means that other types, e.g. `i128`, cannot be stored in DMAP records. + +Lastly, functions for parsing scalars and vectors from a byte buffer are defined in this file. + +### `src/formats` +This directory holds the files that define the DMAP record formats: IQDAT, RAWACF, FITACF, GRID, MAP, SND, and the generic DMAP. +If you are defining a new DMAP format, you will need to make a new file in this directory following the structure of the +existing files. Essentially, you define the scalar and vector fields, both required and optional, and the groups of vector +fields which must have identical dimensions, then call a macro to autogenerate the struct code for you. + +### `src/compression.rs` +This file contains the automatic bz2 detection function. If more compression types are to be supported, this is where you would +put them. + +### `src/io.rs` +This file contains the function `bytes_to_file`, which handles writing a `Vec` to `AsRef`. If the path +ends in `.bz2`, the function will compress first with bz2. + +### `tests` +In `tests.rs`, integration tests for reading and writing all file types are present. Small example files +are contained in `tests/test_files`. + +### `benches/io_benchmarking.rs` +This file contains benchmarking functions for checking the performance of the basic read functions. \ No newline at end of file diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..0a04128 --- /dev/null +++ b/COPYING @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/Cargo.toml b/Cargo.toml index 61b1973..567bed5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,14 @@ [package] -name = "dmap" -version = "0.4.0" +name = "darn-dmap" +version = "0.5.0" edition = "2021" rust-version = "1.63.0" +authors = ["Remington Rohel"] +description = "SuperDARN DMAP file format I/O" +repository = "https://github.com/SuperDARNCanada/dmap" +license = "LGPL-3.0-or-later" +keywords = ["SuperDARN", "dmap", "I/O"] +categories = ["parser-implementations", "science"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -13,8 +19,8 @@ name = "dmap" crate-type = ["cdylib", "rlib"] [dependencies] -pyo3 = { version = "0.22.5", features = ["extension-module", "indexmap", "abi3-py38"] } -numpy = "0.22.0" +pyo3 = { version = "0.26.0", features = ["extension-module", "indexmap", "abi3-py38"] } +numpy = "0.26.0" indexmap = "2.3.0" itertools = "0.13.0" rayon = "1.10.0" diff --git a/README.md b/README.md index 88e8d2b..5d41e5e 100644 --- a/README.md +++ b/README.md @@ -1,49 +1,30 @@ -# Dmap +A library for SuperDARN DMAP file I/O +===================================== -Rust tools for SuperDARN DMAP file format operations. +[![github]](https://github.com/SuperDARNCanada/dmap) [![crates-io]](https://crates.io/crates/darn-dmap) [![docs-rs]](crate) + +[github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github +[crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust +[docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs This project exposes both Rust and Python APIs for handling DMAP I/O. -I/O can be conducted either on byte buffers, or directly to/from files. +I/O can be conducted either directly to/from files or byte buffers. The SuperDARN DMAP file formats are all supported (IQDAT, RAWACF, FITACF, GRID, MAP, and SND) -as well as a generic DMAP format that is unaware of any required fields or types -(e.g. char, int32) for any fields. +as well as a generic DMAP format that is unaware of any required fields or types (e.g. char, int32) for any fields. -## Developer Guidelines +## Installation -### `src/record.rs` -This file contains the `Record` trait, which defines a set of functions that specific DMAP formats must implement. -For example, `read_file(infile: &PathBuf) -> Result, DmapError>` is defined in the `Record` trait, and handles -reading in records from a file at the specified path. This function is generic, in that it doesn't know what type of records -(RAWACF, FITACF, etc.) are expected. Also, since it is a trait function, you can only use it through a struct which implements -the trait. For example, the `FitacfRecord` struct defined in `src/formats/fitacf.rs` implements the `Record` trait, and so -you can call `FitacfRecord::read_file(...)` to read a FITACF file, but you couldn't invoke `Record::read_file(...)`. +### Rust +1. Add the crate to your dependencies in your `Cargo.toml` file +2. Add `use dmap::*;` to your imports. -### `src/types.rs` -This file defines necessary structs and enums for encapsulating basic types (`i8`, `u32`, `String`, etc.) into -objects like `DmapField`, `DmapScalar`, `DmapVec`, etc. that abstract over the supported underlying types. -For instance, when reading scalar from a DMAP file, the underlying data type is inferred from the `type` field in the -scalar's metadata, so it can't be known beforehand. This requires some encapsulating type, `DmapScalar` in this case, -which contains the metadata of the field and has a known size for the stack memory. +### Python +This package is registered on PyPI as `darn-dmap`, you can install the package with your package manager. -This file defines the `Fields` struct, which is used to hold the names and types of the required and optional -scalar and vector fields for a type of DMAP record (RAWACF, FITACF, etc.). - -This file defines the `DmapType` trait and implements it for supported data types that can be in DMAP records, namely -`u8`, `u16`, `u32`, `u64`, `i8`, `i16`, `i32`, `i64`, `f32`, `f64`, and `String`. The implementation of the trait for -these types only means that other types, e.g. `i128`, cannot be stored in DMAP records. - -Lastly, functions for parsing scalars and vectors from a byte buffer are defined in this file. - -### `src/formats` -This directory holds the files that define the DMAP record formats: IQDAT, RAWACF, FITACF, GRID, MAP, SND, and the generic DMAP. -If you are defining a new DMAP format, you will need to make a new file in this directory following the structure of the -existing files. Essentially, you define the scalar and vector fields, both required and optional, and the groups of vector -fields which must have identical dimensions, then call a macro to autogenerate the struct code for you. - -### `tests` -In `tests.rs`, integration tests for reading and writing all file types are present. Small example files -are contained in `tests/test_files`. - -### `benches/io_benchmarking.rs` -This file contains benchmarking functions for checking the performance of the basic read functions. +### From source +If you want to build from source, you first need to have Rust installed on your machine. Then: +1. Clone the repository: `git clone https://github.com/SuperDARNCanada/dmap` +2. Run `cargo build` in the repository directory +3. If wanting to install the Python API, create a virtual environment and source it, then install `maturin` +4. In the project directory, run `maturin develop` to build and install the Python bindings. This will make a wheel file based on your operating system and architecture that you can install directly on any compatible machine. diff --git a/pyproject.toml b/pyproject.toml index 45f4da5..9f7f204 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "darn-dmap" -version = "0.4.0" +version = "0.5.0" requires-python = ">=3.8" authors = [ { name = "Remington Rohel" } @@ -13,11 +13,25 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Rust" ] +description = "SuperDARN DMAP file format I/O" +readme = "python/README.md" dependencies = ["numpy<3"] +license = "LGPL-3.0-or-later" + +[project.urls] +Repository = "https://github.com/SuperDARNCanada/dmap" [tool.maturin] +python-source = "python" +module-name = "dmap.dmap_rs" bindings = "pyo3" profile = "release" compatibility = "manylinux2014" auditwheel = "repair" strip = true + +[project.optional-dependencies] +dev = [ + "pytest", + "ruff", +] \ No newline at end of file diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000..73e08e9 --- /dev/null +++ b/python/README.md @@ -0,0 +1,176 @@ +A library for SuperDARN DMAP file I/O +===================================== + +[github](https://github.com/SuperDARNCanada/dmap) + + +The SuperDARN DMAP file formats are all supported (IQDAT, RAWACF, FITACF, GRID, MAP, and SND) +as well as a generic DMAP format that is unaware of any required fields or types (e.g. char, int32) for any fields. +For more information on DMAP please see [RST Documentation](https://radar-software-toolkit-rst.readthedocs.io/en/latest/). + +## Installation + +### Package manager +This package is registered on PyPI as `darn-dmap`, you can install the package with your package manager, e.g. `pip install darn-dmap`. + +### From source +If you want to build from source, you first need to have Rust installed on your machine. Then: +1. Clone the repository: `git clone https://github.com/SuperDARNCanada/dmap` +2. Create a virtual environment and source it, then install `maturin` +3. In the project directory, run `maturin develop` to build and install the Python bindings. This will make a wheel file based on your operating system and architecture that you can install directly on any compatible machine. + +## Usage + +### The basics + +The basic code to read and write a DMAP file is: +```python +import dmap + +file = "path/to/rawacf_file" +data, _ = dmap.read_rawacf(file) # returns `tuple[list[dict], Optional[int]]` +outfile = "path/to/outfile.rawacf" +dmap.write_rawacf(data, outfile) # writes binary data to `outfile` +raw_bytes = dmap.write_rawacf(data) # returns a `bytes` object +``` +`dmap.read_rawacf(...)` reads the file into a list of dictionaries, returning the list as well as the byte where any corrupted records start. + +The supported reading functions are: + +- `read_iqdat`, +- `read_rawacf`, +- `read_fitacf`, +- `read_grid`, +- `read_map`, +- `read_snd`, and +- `read_dmap`. + +The supported writing functions are: + +- `write_iqdat`, +- `write_rawacf`, +- `write_fitacf`, +- `write_grid`, +- `write_map`, +- `write_snd`, and +- `write_dmap`. + +### Accessing data fields +To see the names of the variables you've loaded in and now have access to, try using the `keys()` method: +```python +print(data[0].keys()) +``` +which will tell you all the variables in the first (zeroth) record. + +Let's say you loaded in a MAP file, and wanted to grab the cross polar-cap potentials for each record: +```python +import dmap +file = "20150302.n.map" +map_data, _ = dmap.read_map(file) + +cpcps=[rec['pot.drop'] for rec in map_data] +``` + +### I/O on a bz2 compressed file + +dmap will handle compressing and decompressing `.bz2` files seamlessly, detecting the compression automatically. E.g. +```python +import dmap +fitacf_file = "path/to/file.bz2" +data, _ = dmap.read_fitacf(fitacf_file) +dmap.write_fitacf(data, "temp.fitacf.bz2") +``` +will read in the compressed file, then also write out a new compressed file. Note that compression on the writing side +will only be done when writing to file, as the detection is done based on the file extension of the output file. + +### Generic I/O +dmap supports generic DMAP I/O, without verifying the field names and types. The file must still +be properly formatted as a DMAP file, but otherwise no checks are conducted. + +**NOTE:** When using the generic writing function `write_dmap`, scalar fields will possibly be resized; e.g., the `stid` +field may be stored as an 8-bit integer, as opposed to a 16-bit integer as usual. As such, reading with a specific method +(e.g. `read_fitacf`) on a file written using `write_dmap` will likely not pass the FITACF format checks. + +```python +import dmap +generic_file = "path/to/file" # can be iqdat, rawacf, fitacf, grid, map, snd, and optionally .bz2 compressed +data, _ = dmap.read_dmap(generic_file) +dmap.write_dmap(data, "temp.generic.fitacf") # fitacf as an example +data2, bad_byte = dmap.read_rawacf("temp.generic.fitacf") # This will fail due to different types for scalar fields +assert bad_byte == 0 # The first record should be corrupted, i.e. not be a valid FITACF record +assert len(data2) == 0 # No valid records encountered +``` + +### Handling corrupted data files +The self-describing data format of DMAP files makes it susceptible to corruption. The metadata fields which describe +how to interpret the following bytes are very important, and so any corruption will lead to the remainder of the file being +effectively useless. dmap is able to handle corruption in two ways. The keyword argument `mode` of the `read_rawacf`, etc. +functions allows you to choose how to handle corrupt records. + +In `"lax"` mode (the default), no error is raised if a corrupt file is read, and the byte where the corrupted records start is +returned along with the non-corrupted records. +In `"strict"` mode, the I/O functions will raise an error if a corrupted record is encountered. + +```python +import dmap + +corrupted_file = "path/to/file" +data, bad_byte = dmap.read_dmap(corrupted_file, mode="lax") +assert bad_byte > 0 + +good_file = "path/to/file" +data, bad_byte = dmap.read_dmap(good_file, mode="lax") +assert bad_byte is None +``` +In both uses of the above example, `data` will be a list of all records extracted from the file, but may be +considerably smaller than the file. + +```python +import dmap + +corrupted_file = "path/to/file" +try: + data = dmap.read_dmap(corrupted_file, mode="strict") + had_error = False +except: + had_error = True +assert had_error + +good_file = "path/to/file" +try: + data = dmap.read_dmap(good_file, mode="strict") + had_error = False +except: + had_error = True +assert had_error is False +``` + +### Stream I/O +`dmap` also can conduct read/write operations from/to Python `bytes` objects directly. These bytes must be formatted in +accordance with the DMAP format. Simply pass in a `bytes` object to any of the `read_[type]` functions instead of a path +and the input will be parsed. + +While not the recommended way to read data from a DMAP file, the following example shows the use of these byte I/O functions: +```python +import dmap +file = "path/to/file.fitacf" +with open(file, 'rb') as f: # 'rb' specifies to open the binary (b) file as read-only (r) + raw_bytes = f.read() # reads the file in its entirety +data, _ = dmap.read_dmap(raw_bytes) +binary_data = dmap.write_fitacf(data) +assert binary_data == raw_bytes +``` +As a note, this binary data can be compressed ~2x typically using zlib, or with another compression utility. This is quite +useful if sending data over a network where speed and bandwidth must be considered. Note that the binary writing functions +don't compress automatically, an external package like `zlib` or `bzip2` must be used. + +### File "sniffing" +If you only want to inspect a file, without actually needing access to all of the data, you can use the `read_[type]` +functions in `"sniff"` mode. This will only read in the first record from a file, and works on both compressed and +non-compressed files. Note that this mode does not work with bytes objects directly. + +```python +import dmap +path = "path/to/file" +first_rec = dmap.read_dmap(path, mode="sniff") +``` diff --git a/python/dmap/__init__.py b/python/dmap/__init__.py new file mode 100644 index 0000000..aab7730 --- /dev/null +++ b/python/dmap/__init__.py @@ -0,0 +1,33 @@ +__all__ = [ + "read_iqdat", + "read_rawacf", + "read_fitacf", + "read_grid", + "read_map", + "read_snd", + "read_dmap", + "write_iqdat", + "write_rawacf", + "write_fitacf", + "write_grid", + "write_map", + "write_snd", + "write_dmap", +] + +from ._wrapper import ( + read_iqdat, + read_rawacf, + read_fitacf, + read_grid, + read_map, + read_snd, + read_dmap, + write_iqdat, + write_rawacf, + write_fitacf, + write_grid, + write_map, + write_snd, + write_dmap, +) diff --git a/python/dmap/_wrapper.py b/python/dmap/_wrapper.py new file mode 100644 index 0000000..a4a7754 --- /dev/null +++ b/python/dmap/_wrapper.py @@ -0,0 +1,424 @@ +""" +Wrappers around the `dmap_rs` Python API. + +Each file type will have one function for calling any type of reading (strict, lax, bytes, sniff) or any type of writing +(regular, bytes). +""" + +from typing import Union, Optional +from . import dmap_rs + + +def read_dispatcher( + source: Union[str, bytes], fmt: str, mode: str +) -> Union[dict, list[dict], tuple[list[dict], Optional[int]]]: + """ + Reads in DMAP data from `source`. + + Parameters + ---------- + source: Union[str, bytes] + Where to read data from. If input is of type `str`, this is interpreted as the path to a file. + If input is of type `bytes`, this is interpreted as the raw data itself. + fmt: str + DMAP format being read. One of `["dmap", "iqdat", "rawacf", "fitacf", "grid", "map", "snd"]`. + mode: str + Mode in which to read the data, either `strict`, `lax`, or `sniff`. In `strict` mode, any corruption + in the data will raise an error. In `lax` mode, all valid records will be returned in a tuple along with + the byte index of `source` where the corruption starts. In `sniff` mode, `source` must be a `str`, and + only the first record will be read. + + Returns + ------- + If `mode` is `strict`, returns `list[dict]` which is the parsed records. + If `mode` is `lax`, returns `tuple[list[dict], Optional[int]]`, where the first element is the records which were parsed, + and the second is the byte index where `source` was no longer a valid record of type `fmt`. + If `mode` is `sniff`, returns `dict` of the first record. + """ + if fmt not in ["dmap", "iqdat", "rawacf", "fitacf", "grid", "map", "snd"]: + raise ValueError( + f"invalid fmt `{fmt}`: expected one of ['dmap', 'iqdat', 'rawacf', 'fitacf', 'grid', 'map', 'snd']" + ) + + if mode not in ["strict", "lax", "sniff"]: + raise ValueError(f"invalid mode `{mode}`: expected `strict`, `lax`, or `sniff`") + + if mode == "sniff" and not isinstance(source, str): + raise TypeError( + f"invalid type for `source` {type(source)} in `sniff` mode: expected `str`" + ) + + if not isinstance(source, bytes) and not isinstance(source, str): + raise TypeError( + f"invalid type for `source` {type(source)}: expected `str` or `bytes`" + ) + + # Construct the darn-dmap function name dynamically based on parameters: + # fn_name = [sniff|read]_[fmt][_bytes][_lax] + # All possibilites for, e.g., a FITACF file: + # read_fitacf + # read_fitacf_bytes + # read_fitacf_lax + # read_fitacf_bytes_lax + # sniff_fitacf + fn_name = ( + f"{'sniff' if mode == 'sniff' else 'read'}" + f"_{fmt}" + f"{'_bytes' if isinstance(source, bytes) else ''}" + f"{'_lax' if mode == 'lax' else ''}" + ) + + return getattr(dmap_rs, fn_name)(source) + + +def write_dispatcher( + source: list[dict], fmt: str, outfile: Union[None, str] +) -> Union[None, bytes]: + """ + Writes DMAP data from `source` to either a `bytes` object or to `outfile`. + + Parameters + ---------- + source: list[dict] + list of DMAP records as dictionaries. + fmt: str + DMAP format being read. One of `["dmap", "iqdat", "rawacf", "fitacf", "grid", "map", "snd"]`. + outfile: Union[None, str] + If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path + and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be + compressed using bzip2. + """ + if fmt not in ["dmap", "iqdat", "rawacf", "fitacf", "grid", "map", "snd"]: + raise ValueError( + f"invalid fmt `{fmt}`: expected one of ['dmap', 'iqdat', 'rawacf', 'fitacf', 'grid', 'map', 'snd']" + ) + if outfile is None: + return getattr(dmap_rs, f"write_{fmt}_bytes")(source) + elif isinstance(outfile, str): + getattr(dmap_rs, f"write_{fmt}")(source, outfile) + else: + raise TypeError( + f"invalid type for `outfile` {type(outfile)}: expected `str` or `None`" + ) + + +def read_dmap( + source: Union[str, bytes], mode: str = "lax" +) -> Union[dict, list[dict], tuple[list[dict], Optional[int]]]: + """ + Reads in DMAP data from `source`. + + Parameters + ---------- + source: Union[str, bytes] + Where to read data from. If input is of type `str`, this is interpreted as the path to a file. + If input is of type `bytes`, this is interpreted as the raw data itself. + mode: str + Mode in which to read the data, either "lax" (default), "strict", or "sniff". + In "lax" mode, all valid records will be returned in a tuple along with the byte index of `source` where the + corruption starts. + In "strict" mode, any corruption in the data will raise an error. + In "sniff" mode, `source` must be a path, and only the first record will be read. + + Returns + ------- + If `mode` is `lax`, returns `tuple[list[dict], Optional[int]]`, where the first element is the records which were parsed, + and the second is the byte index where `source` was no longer a valid record of type `fmt`. + If `mode` is `strict`, returns `list[dict]` which is the parsed records. + If `mode` is `sniff`, returns `dict`, which is the first record. + """ + return read_dispatcher(source, "dmap", mode) + + +def read_iqdat( + source: Union[str, bytes], mode: str = "lax" +) -> Union[dict, list[dict], tuple[list[dict], Optional[int]]]: + """ + Reads in IQDAT data from `source`. + + Parameters + ---------- + source: Union[str, bytes] + Where to read data from. If input is of type `str`, this is interpreted as the path to a file. + If input is of type `bytes`, this is interpreted as the raw data itself. + mode: str + Mode in which to read the data, either "lax" (default), "strict", or "sniff". + In "lax" mode, all valid records will be returned in a tuple along with the byte index of `source` where the + corruption starts. + In "strict" mode, any corruption in the data will raise an error. + In "sniff" mode, `source` must be a path, and only the first record will be read. + + Returns + ------- + If `mode` is `lax`, returns `tuple[list[dict], Optional[int]]`, where the first element is the records which were parsed, + and the second is the byte index where `source` was no longer a valid record of type `fmt`. + If `mode` is `strict`, returns `list[dict]` which is the parsed records. + If `mode` is `sniff`, returns `dict`, which is the first record. + """ + return read_dispatcher(source, "iqdat", mode) + + +def read_rawacf( + source: Union[str, bytes], mode: str = "lax" +) -> Union[dict, list[dict], tuple[list[dict], Optional[int]]]: + """ + Reads in RAWACF data from `source`. + + Parameters + ---------- + source: Union[str, bytes] + Where to read data from. If input is of type `str`, this is interpreted as the path to a file. + If input is of type `bytes`, this is interpreted as the raw data itself. + mode: str + Mode in which to read the data, either "lax" (default), "strict", or "sniff". + In "lax" mode, all valid records will be returned in a tuple along with the byte index of `source` where the + corruption starts. + In "strict" mode, any corruption in the data will raise an error. + In "sniff" mode, `source` must be a path, and only the first record will be read. + + Returns + ------- + If `mode` is `lax`, returns `tuple[list[dict], Optional[int]]`, where the first element is the records which were parsed, + and the second is the byte index where `source` was no longer a valid record of type `fmt`. + If `mode` is `strict`, returns `list[dict]` which is the parsed records. + If `mode` is `sniff`, returns `dict`, which is the first record. + """ + return read_dispatcher(source, "rawacf", mode) + + +def read_fitacf( + source: Union[str, bytes], mode: str = "lax" +) -> Union[dict, list[dict], tuple[list[dict], Optional[int]]]: + """ + Reads in FITACF data from `source`. + + Parameters + ---------- + source: Union[str, bytes] + Where to read data from. If input is of type `str`, this is interpreted as the path to a file. + If input is of type `bytes`, this is interpreted as the raw data itself. + mode: str + Mode in which to read the data, either "lax" (default), "strict", or "sniff". + In "lax" mode, all valid records will be returned in a tuple along with the byte index of `source` where the + corruption starts. + In "strict" mode, any corruption in the data will raise an error. + In "sniff" mode, `source` must be a path, and only the first record will be read. + + Returns + ------- + If `mode` is `lax`, returns `tuple[list[dict], Optional[int]]`, where the first element is the records which were parsed, + and the second is the byte index where `source` was no longer a valid record of type `fmt`. + If `mode` is `strict`, returns `list[dict]` which is the parsed records. + If `mode` is `sniff`, returns `dict`, which is the first record. + """ + return read_dispatcher(source, "fitacf", mode) + + +def read_grid( + source: Union[str, bytes], mode: str = "lax" +) -> Union[dict, list[dict], tuple[list[dict], Optional[int]]]: + """ + Reads in GRID data from `source`. + + Parameters + ---------- + source: Union[str, bytes] + Where to read data from. If input is of type `str`, this is interpreted as the path to a file. + If input is of type `bytes`, this is interpreted as the raw data itself. + mode: str + Mode in which to read the data, either "lax" (default), "strict", or "sniff". + In "lax" mode, all valid records will be returned in a tuple along with the byte index of `source` where the + corruption starts. + In "strict" mode, any corruption in the data will raise an error. + In "sniff" mode, `source` must be a path, and only the first record will be read. + + Returns + ------- + If `mode` is `lax`, returns `tuple[list[dict], Optional[int]]`, where the first element is the records which were parsed, + and the second is the byte index where `source` was no longer a valid record of type `fmt`. + If `mode` is `strict`, returns `list[dict]` which is the parsed records. + If `mode` is `sniff`, returns `dict`, which is the first record. + """ + return read_dispatcher(source, "grid", mode) + + +def read_map( + source: Union[str, bytes], mode: str = "lax" +) -> Union[dict, list[dict], tuple[list[dict], Optional[int]]]: + """ + Reads in MAP data from `source`. + + Parameters + ---------- + source: Union[str, bytes] + Where to read data from. If input is of type `str`, this is interpreted as the path to a file. + If input is of type `bytes`, this is interpreted as the raw data itself. + mode: str + Mode in which to read the data, either "lax" (default), "strict", or "sniff". + In "lax" mode, all valid records will be returned in a tuple along with the byte index of `source` where the + corruption starts. + In "strict" mode, any corruption in the data will raise an error. + In "sniff" mode, `source` must be a path, and only the first record will be read. + + Returns + ------- + If `mode` is `lax`, returns `tuple[list[dict], Optional[int]]`, where the first element is the records which were parsed, + and the second is the byte index where `source` was no longer a valid record of type `fmt`. + If `mode` is `strict`, returns `list[dict]` which is the parsed records. + If `mode` is `sniff`, returns `dict`, which is the first record. + """ + return read_dispatcher(source, "map", mode) + + +def read_snd( + source: Union[str, bytes], mode: str = "lax" +) -> Union[dict, list[dict], tuple[list[dict], Optional[int]]]: + """ + Reads in SND data from `source`. + + Parameters + ---------- + source: Union[str, bytes] + Where to read data from. If input is of type `str`, this is interpreted as the path to a file. + If input is of type `bytes`, this is interpreted as the raw data itself. + mode: str + Mode in which to read the data, either "lax" (default), "strict", or "sniff". + In "lax" mode, all valid records will be returned in a tuple along with the byte index of `source` where the + corruption starts. + In "strict" mode, any corruption in the data will raise an error. + In "sniff" mode, `source` must be a path, and only the first record will be read. + + Returns + ------- + If `mode` is `lax`, returns `tuple[list[dict], Optional[int]]`, where the first element is the records which were parsed, + and the second is the byte index where `source` was no longer a valid record of type `fmt`. + If `mode` is `strict`, returns `list[dict]` which is the parsed records. + If `mode` is `sniff`, returns `dict`, which is the first record. + """ + return read_dispatcher(source, "snd", mode) + + +def write_dmap( + source: list[dict], outfile: Union[None, str] = None +) -> Union[None, bytes]: + """ + Writes DMAP data from `source` to either a `bytes` object or to `outfile`. + + Parameters + ---------- + source: list[dict] + list of DMAP records as dictionaries. + outfile: Union[None, str] + If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path + and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be + compressed using bzip2. + """ + return write_dispatcher(source, "dmap", outfile) + + +def write_iqdat( + source: list[dict], outfile: Union[None, str] = None +) -> Union[None, bytes]: + """ + Writes IQDAT data from `source` to either a `bytes` object or to `outfile`. + + Parameters + ---------- + source: list[dict] + list of IQDAT records as dictionaries. + outfile: Union[None, str] + If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path + and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be + compressed using bzip2. + """ + return write_dispatcher(source, "iqdat", outfile) + + +def write_rawacf( + source: list[dict], outfile: Union[None, str] = None +) -> Union[None, bytes]: + """ + Writes RAWACF data from `source` to either a `bytes` object or to `outfile`. + + Parameters + ---------- + source: list[dict] + list of RAWACF records as dictionaries. + outfile: Union[None, str] + If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path + and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be + compressed using bzip2. + """ + return write_dispatcher(source, "rawacf", outfile) + + +def write_fitacf( + source: list[dict], outfile: Union[None, str] = None +) -> Union[None, bytes]: + """ + Writes FITACF data from `source` to either a `bytes` object or to `outfile`. + + Parameters + ---------- + source: list[dict] + list of FITACF records as dictionaries. + outfile: Union[None, str] + If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path + and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be + compressed using bzip2. + """ + return write_dispatcher(source, "fitacf", outfile) + + +def write_grid( + source: list[dict], outfile: Union[None, str] = None +) -> Union[None, bytes]: + """ + Writes GRID data from `source` to either a `bytes` object or to `outfile`. + + Parameters + ---------- + source: list[dict] + list of GRID records as dictionaries. + outfile: Union[None, str] + If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path + and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be + compressed using bzip2. + """ + return write_dispatcher(source, "grid", outfile) + + +def write_map( + source: list[dict], outfile: Union[None, str] = None +) -> Union[None, bytes]: + """ + Writes MAP data from `source` to either a `bytes` object or to `outfile`. + + Parameters + ---------- + source: list[dict] + list of MAP records as dictionaries. + outfile: Union[None, str] + If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path + and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be + compressed using bzip2. + """ + return write_dispatcher(source, "map", outfile) + + +def write_snd( + source: list[dict], outfile: Union[None, str] = None +) -> Union[None, bytes]: + """ + Writes SND data from `source` to either a `bytes` object or to `outfile`. + + Parameters + ---------- + source: list[dict] + list of SND records as dictionaries. + outfile: Union[None, str] + If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path + and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be + compressed using bzip2. + """ + return write_dispatcher(source, "snd", outfile) diff --git a/src/compression.rs b/src/compression.rs index c7fd5ee..6ba48a8 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -1,8 +1,17 @@ +//! Utility functions for detecting compression on a stream. +//! +//! Currently only supports bz2 compression detection. + use std::io::{Chain, Cursor, Error, Read}; +type PartiallyReadStream = Chain, T>; + /// Detects bz2 compression on the input `stream`. Returns a reader /// which includes all data from `stream`. -pub(crate) fn detect_bz2(mut stream: T) -> Result<(bool, Chain, T>), Error> +/// +/// # Errors +/// See [`std::io::Read::read_exact`]. +pub(crate) fn detect_bz2(mut stream: T) -> Result<(bool, PartiallyReadStream), Error> where T: for<'a> Read, { diff --git a/src/formats/dmap.rs b/src/formats/dmap.rs index 535f3ce..c5b1f1f 100644 --- a/src/formats/dmap.rs +++ b/src/formats/dmap.rs @@ -1,4 +1,6 @@ -//! Defines the `DmapRecord` struct which implements `Record`, which can be used +//! The generic [DMAP file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/dmap_data/). +//! +//! Defines [`DmapRecord`] which implements [`Record`], which can be used //! for reading/writing DMAP files without checking that certain fields are or //! are not present, or have a given type. @@ -12,20 +14,16 @@ pub struct DmapRecord { pub data: IndexMap, } -impl DmapRecord { - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } -} - impl Record<'_> for DmapRecord { fn inner(self) -> IndexMap { self.data } - + fn get(&self, key: &str) -> Option<&DmapField> { + self.data.get(key) + } + fn keys(&self) -> Vec<&String> { + self.data.keys().collect() + } fn new(fields: &mut IndexMap) -> Result { Ok(DmapRecord { data: fields.to_owned(), @@ -71,3 +69,11 @@ impl TryFrom<&mut IndexMap> for DmapRecord { DmapRecord::new(value) } } + +impl TryFrom> for DmapRecord { + type Error = DmapError; + + fn try_from(mut value: IndexMap) -> Result { + DmapRecord::new(&mut value) + } +} diff --git a/src/formats/fitacf.rs b/src/formats/fitacf.rs index 628dca6..65110ea 100644 --- a/src/formats/fitacf.rs +++ b/src/formats/fitacf.rs @@ -1,3 +1,5 @@ +//! The [FitACF file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/fitacf/). + use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; @@ -46,9 +48,9 @@ static SCALAR_FIELDS: [(&str, Type); 49] = [ ("tfreq", Type::Short), ("mxpwr", Type::Int), ("lvmax", Type::Int), - ("combf", Type::String), ("fitacf.revision.major", Type::Int), ("fitacf.revision.minor", Type::Int), + ("combf", Type::String), ("noise.sky", Type::Float), ("noise.lag0", Type::Float), ("noise.vel", Type::Float), diff --git a/src/formats/grid.rs b/src/formats/grid.rs index f7b3757..4f7792f 100644 --- a/src/formats/grid.rs +++ b/src/formats/grid.rs @@ -1,3 +1,5 @@ +//! The [Grid file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/grid/). + use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; diff --git a/src/formats/iqdat.rs b/src/formats/iqdat.rs index 780aeef..78dcfa9 100644 --- a/src/formats/iqdat.rs +++ b/src/formats/iqdat.rs @@ -1,3 +1,5 @@ +//! The [IQDat file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/iqdat/). + use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; diff --git a/src/formats/map.rs b/src/formats/map.rs index a2bd725..128d93c 100644 --- a/src/formats/map.rs +++ b/src/formats/map.rs @@ -1,3 +1,5 @@ +//! The [Map file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/map/). + use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; diff --git a/src/formats/mod.rs b/src/formats/mod.rs index 8bc4a01..d76b73c 100644 --- a/src/formats/mod.rs +++ b/src/formats/mod.rs @@ -1,22 +1,9 @@ //! The supported DMAP file formats. -/// The [FitACF file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/fitacf/) +pub mod dmap; pub mod fitacf; - -/// The [Grid file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/grid/) pub mod grid; - -/// The [IQDat file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/iqdat/) pub mod iqdat; - -/// The [Map file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/map/) pub mod map; - -/// The [RawACF file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/rawacf/) pub mod rawacf; - -/// The [SND file format](https://github.com/SuperDARN/rst/pull/315) pub mod snd; - -/// The generic [Dmap file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/dmap_data/) -pub mod dmap; diff --git a/src/formats/rawacf.rs b/src/formats/rawacf.rs index 40827a4..2d98f9c 100644 --- a/src/formats/rawacf.rs +++ b/src/formats/rawacf.rs @@ -1,3 +1,5 @@ +//! The [RawACF file format](https://radar-software-toolkit-rst.readthedocs.io/en/latest/references/general/rawacf/). + use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; @@ -46,9 +48,9 @@ static SCALAR_FIELDS: [(&str, Type); 47] = [ ("tfreq", Type::Short), ("mxpwr", Type::Int), ("lvmax", Type::Int), - ("combf", Type::String), ("rawacf.revision.major", Type::Int), ("rawacf.revision.minor", Type::Int), + ("combf", Type::String), ("thr", Type::Float), ]; diff --git a/src/formats/snd.rs b/src/formats/snd.rs index 2c59445..1ec1c32 100644 --- a/src/formats/snd.rs +++ b/src/formats/snd.rs @@ -1,3 +1,5 @@ +//! The [SND file format](https://github.com/SuperDARN/rst/pull/315). + use crate::record::create_record_type; use crate::types::{Fields, Type}; use lazy_static::lazy_static; diff --git a/src/io.rs b/src/io.rs new file mode 100644 index 0000000..bb09655 --- /dev/null +++ b/src/io.rs @@ -0,0 +1,36 @@ +//! Utility functions for file operations. + +use bzip2::{read::BzEncoder, Compression}; +use std::ffi::OsStr; +use std::fs::{File, OpenOptions}; +use std::io::{Read, Write}; +use std::path::Path; + +/// Write bytes to file. +/// +/// Ordinarily, this function opens the file in `append` mode. If the extension of `outfile` is +/// `.bz2`, the bytes will be compressed using bzip2 before being written. +/// +/// # Errors +/// If opening the file in append mode is not possible (permissions, path doesn't exist, etc.). See [`std::fs::File::open`]. +/// +/// If an error is encountered when compressing the bytes. +/// +/// If an error is encountered when writing the bytes to the filesystem. See [`std::io::Write::write_all`] +pub(crate) fn bytes_to_file>( + bytes: Vec, + outfile: P, +) -> Result<(), std::io::Error> { + let mut out_bytes: Vec = vec![]; + let compress_file: bool = + matches!(outfile.as_ref().extension(), Some(ext) if ext == OsStr::new("bz2")); + let mut file: File = OpenOptions::new().append(true).create(true).open(outfile)?; + if compress_file { + let mut compressor = BzEncoder::new(bytes.as_slice(), Compression::best()); + compressor.read_to_end(&mut out_bytes)?; + } else { + out_bytes = bytes; + } + + file.write_all(&out_bytes) +} diff --git a/src/lib.rs b/src/lib.rs index c41bcbd..128998a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,96 @@ -//! `dmap` is an I/O library for SuperDARN DMAP files. -//! This library has a Python API using pyo3 that supports -//! reading and writing whole files. +//! A library for SuperDARN DMAP file I/O. +//! +//! [![github]](https://github.com/SuperDARNCanada/dmap) [![crates-io]](https://crates.io/crates/darn-dmap) [![docs-rs]](crate) +//! +//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github +//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust +//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs +//! +//!
+//! +//! This library also has a Python API using pyo3. //! //! For more information about DMAP files, see [RST](https://radar-software-toolkit-rst.readthedocs.io/en/latest/) //! or [pyDARNio](https://pydarnio.readthedocs.io/en/latest/). +//! +//! The main feature of this crate is the [`Record`] trait, which defines a valid DMAP record and functions for +//! reading from and writing to byte streams. The SuperDARN file formats IQDAT, RAWACF, FITACF, GRID, MAP, and SND are +//! all supported with structs that implement [`Record`], namely: +//! +//! - [`IqdatRecord`] +//! - [`RawacfRecord`] +//! - [`FitacfRecord`] +//! - [`GridRecord`] +//! - [`MapRecord`] +//! - [`SndRecord`] +//! +//! Each struct has a list of required and optional fields that it uses to verify the integrity of the record. +//! Only fields listed in the required and optional lists are allowed, no required field can be missing, and +//! each field has an expected primitive type. Additionally, each format has groupings of vector fields which +//! must all share the same shape; e.g. `acfd` and `xcfd` in a RAWACF file. +//! +//! There is also a generic [`DmapRecord`] struct which has no knowledge of required or optional fields. When reading from +//! a byte stream, the parsed data will be identical when using both a specific format like [`RawacfRecord`] and the generic +//! [`DmapRecord`]; however, when writing to a byte stream, the output may differ. Since [`DmapRecord`] has no knowledge of +//! the expected primitive type for each field, it defaults to a type that fits the data. For example, the `stid` field may +//! be saved as an `i8` when using [`DmapRecord`] instead of an `i16` which [`RawacfRecord`] specifies it must be. +//! +//!
+//! Each type of record has a specific field ordering hard-coded by this library. This is the order in which fields are written to file, +//! and may not match the ordering of fields generated by RST. This also means that round-trip I/O (i.e. reading a file and +//! writing back out to a new file) is not guaranteed to generate an identical file; however, it is guaranteed that all the +//! information is the same, just not necessarily in the same order. +//!
+//! +//!
+//! +//! # Examples +//! +//! Convenience functions for reading from and writing to a file exist to simplify the most common use cases. +//! This is defined by [`Record::read_file`] +//! ``` +//! use dmap::*; +//! use std::path::PathBuf; +//! +//! # fn main() -> Result<(), DmapError> { +//! let path = PathBuf::from("tests/test_files/test.rawacf"); +//! let rawacf_data = RawacfRecord::read_file(&path)?; +//! let unchecked_data = DmapRecord::read_file(&path)?; +//! +//! assert_eq!(rawacf_data.len(), unchecked_data.len()); +//! assert_eq!(rawacf_data[0].get(&"stid".to_string()), unchecked_data[0].get(&"stid".to_string())); +//! +//! // Write the records to a file +//! let out_path = PathBuf::from("tests/test_files/output.rawacf"); +//! RawacfRecord::write_to_file(&rawacf_data, &out_path)?; +//! # std::fs::remove_file(out_path)?; +//! # Ok(()) +//! # } +//! ``` +//! You can read from anything that implements the `Read` trait using the functions exposed by the [`Record`] trait. +//! Detection and decompression of bz2 is also conducted automatically. +//! ``` +//! use dmap::*; +//! use std::fs::File; +//! use itertools::izip; +//! +//! # fn main() -> Result<(), DmapError> { +//! let file = File::open("tests/test_files/test.rawacf.bz2")?; // `File` implements the `Read` trait +//! let rawacf_data = RawacfRecord::read_records(file)?; +//! +//! let uncompressed_data = RawacfRecord::read_file("tests/test_files/test.rawacf")?; +//! assert_eq!(rawacf_data.len(), uncompressed_data.len()); +//! for (left, right) in izip!(rawacf_data, uncompressed_data) { +//! assert_eq!(left, right) +//! } +//! # Ok(()) +//! # } +//! ``` -pub mod compression; +pub(crate) mod compression; pub mod error; pub mod formats; +pub(crate) mod io; pub mod record; pub mod types; @@ -21,112 +104,26 @@ pub use crate::formats::rawacf::RawacfRecord; pub use crate::formats::snd::SndRecord; pub use crate::record::Record; use crate::types::DmapField; -use bzip2::read::BzEncoder; -use bzip2::Compression; use indexmap::IndexMap; use paste::paste; use pyo3::prelude::*; use pyo3::types::PyBytes; -use rayon::iter::Either; -use rayon::prelude::*; -use std::ffi::OsStr; -use std::fmt::Debug; -use std::fs::{File, OpenOptions}; -use std::io::{Read, Write}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; -/// Write bytes to file. -/// -/// Ordinarily, this function opens the file in `append` mode. If the extension of `outfile` is -/// `.bz2`, the bytes will be compressed using bzip2 before being written, and the file is instead -/// opened in `create_new` mode, meaning it will fail if a file already exists at the given path. -fn bytes_to_file(bytes: Vec, outfile: &PathBuf) -> Result<(), std::io::Error> { - let mut out_bytes: Vec = vec![]; - let mut file: File = OpenOptions::new().append(true).create(true).open(outfile)?; - match outfile.extension() { - Some(ext) if ext == OsStr::new("bz2") => { - let mut compressor = BzEncoder::new(bytes.as_slice(), Compression::best()); - compressor.read_to_end(&mut out_bytes)?; - } - _ => { - out_bytes = bytes; - } - } - file.write_all(&out_bytes) -} - -/// Writes a collection of `Record`s to `outfile`. -/// -/// Prefer using the specific functions, e.g. `write_dmap`, `write_rawacf`, etc. for their -/// specific field checks. -pub fn write_records<'a>( - mut recs: Vec>, - outfile: &PathBuf, -) -> Result<(), DmapError> { - let mut bytes: Vec = vec![]; - let (errors, rec_bytes): (Vec<_>, Vec<_>) = - recs.par_iter_mut() - .enumerate() - .partition_map(|(i, rec)| match rec.to_bytes() { - Err(e) => Either::Left((i, e)), - Ok(y) => Either::Right(y), - }); - if !errors.is_empty() { - Err(DmapError::InvalidRecord(format!( - "Corrupted records: {errors:?}" - )))? - } - bytes.par_extend(rec_bytes.into_par_iter().flatten()); - bytes_to_file(bytes, outfile)?; - Ok(()) -} - -/// Attempts to convert `recs` to `T` then convert to bytes. -fn try_to_bytes Record<'a>>( - mut recs: Vec>, -) -> Result, DmapError> -where - for<'a> >>::Error: Send + Debug, -{ - let mut bytes: Vec = vec![]; - let (errors, rec_bytes): (Vec<_>, Vec<_>) = - recs.par_iter_mut() - .enumerate() - .partition_map(|(i, rec)| match T::try_from(rec) { - Err(e) => Either::Left((i, e)), - Ok(x) => match x.to_bytes() { - Err(e) => Either::Left((i, e)), - Ok(y) => Either::Right(y), - }, - }); - if !errors.is_empty() { - Err(DmapError::BadRecords( - errors.iter().map(|(i, _)| *i).collect(), - errors[0].1.to_string(), - ))? - } - bytes.par_extend(rec_bytes.into_par_iter().flatten()); - Ok(bytes) -} - -/// This macro generates two functions for writing to file. The first, `write_[type]`, takes in -/// records of type `[Type]Record`, while the second, `try_write_[type]`, takes in `Vec` -/// and attempts to coerce into `[Type]Record` then write to file. +/// This macro generates a function for attempting to convert `Vec` to `Vec<$type>` and write it to file. macro_rules! write_rust { ($type:ident) => { paste! { - #[doc = "Write `" $type:upper "` records to `outfile`." ] - pub fn [< write_ $type >](recs: Vec<[< $type:camel Record >]>, outfile: &PathBuf) -> Result<(), DmapError> { - write_records(recs, outfile) - } - - #[doc = "Attempts to convert `recs` to `" $type:camel Record "` then append to `outfile`." ] - pub fn [< try_write_ $type >]( + #[doc = "Attempts to convert `recs` to `" $type:camel Record "` then append to `outfile`."] + #[doc = ""] + #[doc = "# Errors"] + #[doc = "if any of the `IndexMap`s are unable to be interpreted as a `" $type:camel Record "`, or there is an issue writing to file."] + pub fn [< try_write_ $type >]>( recs: Vec>, - outfile: &PathBuf, + outfile: P, ) -> Result<(), DmapError> { - let bytes = try_to_bytes::<[< $type:camel Record >]>(recs)?; - bytes_to_file(bytes, outfile).map_err(DmapError::from) + let bytes = [< $type:camel Record >]::try_into_bytes(recs)?; + crate::io::bytes_to_file(bytes, outfile).map_err(DmapError::from) } } } @@ -140,25 +137,6 @@ write_rust!(map); write_rust!(snd); write_rust!(dmap); -macro_rules! read_rust { - ($type:ident) => { - paste! { - #[doc = "Read in a `" $type:upper "` file" ] - pub fn [< read_ $type >](infile: PathBuf) -> Result]>, DmapError> { - [< $type:camel Record >]::read_file(&infile) - } - } - } -} - -read_rust!(iqdat); -read_rust!(rawacf); -read_rust!(fitacf); -read_rust!(grid); -read_rust!(map); -read_rust!(snd); -read_rust!(dmap); - /// Creates functions for reading DMAP files for the Python API. /// /// Generates two functions: `read_[type]` and `read_[type]_lax`, for strict and lax @@ -313,9 +291,9 @@ fn write_dmap_py(recs: Vec>, outfile: PathBuf) -> Py #[pyfunction] #[pyo3(name = "write_dmap_bytes")] #[pyo3(text_signature = "(recs: list[dict], /)")] -fn write_dmap_bytes_py(py: Python, recs: Vec>) -> PyResult { - let bytes = try_to_bytes::(recs).map_err(PyErr::from)?; - Ok(PyBytes::new_bound(py, &bytes).into()) +fn write_dmap_bytes_py(py: Python, recs: Vec>) -> PyResult> { + let bytes = DmapRecord::try_into_bytes(recs).map_err(PyErr::from)?; + Ok(PyBytes::new(py, &bytes).into()) } /// Generates functions exposed to the Python API for writing specific file types. @@ -335,9 +313,9 @@ macro_rules! write_py { #[pyfunction] #[pyo3(name = $bytes_name)] #[pyo3(text_signature = "(recs: list[dict], /)")] - fn [< write_ $name _bytes_py >](py: Python, recs: Vec>) -> PyResult { - let bytes = try_to_bytes::<[< $name:camel Record >]>(recs).map_err(PyErr::from)?; - Ok(PyBytes::new_bound(py, &bytes).into()) + fn [< write_ $name _bytes_py >](py: Python, recs: Vec>) -> PyResult> { + let bytes = [< $name:camel Record >]::try_into_bytes(recs).map_err(PyErr::from)?; + Ok(PyBytes::new(py, &bytes).into()) } } } @@ -353,7 +331,7 @@ write_py!(snd, "write_snd", "write_snd_bytes"); /// Functions for SuperDARN DMAP file format I/O. #[pymodule] -fn dmap(m: &Bound<'_, PyModule>) -> PyResult<()> { +fn dmap_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { // Strict read functions m.add_function(wrap_pyfunction!(read_dmap_py, m)?)?; m.add_function(wrap_pyfunction!(read_iqdat_py, m)?)?; diff --git a/src/record.rs b/src/record.rs index cf01490..3896f75 100644 --- a/src/record.rs +++ b/src/record.rs @@ -1,23 +1,40 @@ -//! Defines the `Record` trait, which contains the shared behaviour that all -//! DMAP records must have. +//! Defines the [`Record`] trait, which contains the shared behaviour that all DMAP records must have. use crate::compression::detect_bz2; use crate::error::DmapError; +use crate::io; use crate::types::{parse_scalar, parse_vector, read_data, DmapField, DmapType, DmapVec, Fields}; use bzip2::read::BzDecoder; use indexmap::IndexMap; +use itertools::izip; +use rayon::iter::Either; use rayon::prelude::*; use std::fmt::Debug; use std::fs::File; use std::io::{Cursor, Read}; -use std::path::PathBuf; +use std::path::Path; +/// DMAP record template. +/// +/// This trait defines functionality for parsing bytes into records, converting records to bytes, +/// and reading from / writing to files. pub trait Record<'a>: - Debug + Send + TryFrom<&'a mut IndexMap, Error = DmapError> + Debug + Send + Sync + TryFrom, Error = DmapError> { + /// Creates a new object from the parsed scalars and vectors. + fn new(fields: &mut IndexMap) -> Result + where + Self: Sized; + /// Gets the underlying data of `self`. fn inner(self) -> IndexMap; + /// Returns the field with name `key`, if it exists in the record. + fn get(&self, key: &str) -> Option<&DmapField>; + + /// Returns the names of all fields stored in the record. + fn keys(&self) -> Vec<&String>; + /// Reads from `dmap_data` and parses into `Vec`. /// /// Returns `DmapError` if `dmap_data` cannot be read or contains invalid data. @@ -40,7 +57,7 @@ pub trait Record<'a>: .map_err(|_| DmapError::CorruptStream("Unable to read size of first record"))?; let rec_size = i32::from_le_bytes(buffer[4..8].try_into().unwrap()) as usize; // advance 4 bytes, skipping the "code" field - if rec_size <= 0 { + if rec_size == 0 { return Err(DmapError::InvalidRecord(format!( "Record 0 starting at byte 0 has non-positive size {} <= 0", rec_size @@ -83,7 +100,7 @@ pub trait Record<'a>: rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() if rec_end > buffer.len() { return Err(DmapError::InvalidRecord(format!("Record {} starting at byte {} has size greater than remaining length of buffer ({} > {})", slices.len(), rec_start, rec_size, buffer.len() - rec_start))); - } else if rec_size <= 0 { + } else if rec_size == 0 { return Err(DmapError::InvalidRecord(format!( "Record {} starting at byte {} has non-positive size {} <= 0", slices.len(), @@ -121,7 +138,7 @@ pub trait Record<'a>: } } } - if dmap_errors.len() > 0 { + if !dmap_errors.is_empty() { return Err(DmapError::BadRecords(bad_recs, dmap_errors[0].to_string())); } Ok(dmap_records) @@ -158,7 +175,7 @@ pub trait Record<'a>: rec_size = i32::from_le_bytes(buffer[rec_start + 4..rec_start + 8].try_into().unwrap()) as usize; // advance 4 bytes, skipping the "code" field rec_end = rec_start + rec_size; // error-checking the size is conducted in Self::parse_record() - if rec_end > buffer.len() || rec_size <= 0 { + if rec_end > buffer.len() || rec_size == 0 { bad_byte = Some(rec_start); break; // rec_start = buffer.len(); // break from loop @@ -168,6 +185,9 @@ pub trait Record<'a>: rec_start = rec_end; } } + if rec_start != buffer.len() { + bad_byte = Some(rec_start); + } let mut dmap_results: Vec> = vec![]; dmap_results.par_extend( slices @@ -187,7 +207,7 @@ pub trait Record<'a>: } /// Read a DMAP file of type `Self` - fn read_file(infile: &PathBuf) -> Result, DmapError> + fn read_file>(infile: P) -> Result, DmapError> where Self: Sized, Self: Send, @@ -200,7 +220,7 @@ pub trait Record<'a>: /// /// If the file is corrupted, it will return the leading uncorrupted records as well as the /// position corresponding to the start of the first corrupted record. - fn read_file_lax(infile: &PathBuf) -> Result<(Vec, Option), DmapError> + fn read_file_lax>(infile: P) -> Result<(Vec, Option), DmapError> where Self: Sized, Self: Send, @@ -210,7 +230,7 @@ pub trait Record<'a>: } /// Reads the first record of a DMAP file of type `Self`. - fn sniff_file(infile: &PathBuf) -> Result + fn sniff_file>(infile: P) -> Result where Self: Sized, Self: Send, @@ -298,11 +318,6 @@ pub trait Record<'a>: Self::new(&mut fields) } - /// Creates a new object from the parsed scalars and vectors. - fn new(fields: &mut IndexMap) -> Result - where - Self: Sized; - /// Checks the validity of an `IndexMap` as a representation of a DMAP record. /// /// Validity checks include ensuring that no unfamiliar entries exist, that all required @@ -418,10 +433,10 @@ pub trait Record<'a>: } /// Attempts to massage the entries of an `IndexMap` into the proper types for a DMAP record. - fn coerce>( + fn coerce( fields_dict: &mut IndexMap, fields_for_type: &Fields, - ) -> Result { + ) -> Result { let unsupported_keys: Vec<&String> = fields_dict .keys() .filter(|&k| !fields_for_type.all_fields.contains(&&**k)) @@ -501,7 +516,7 @@ pub trait Record<'a>: } } - T::new(fields_dict) + Self::new(fields_dict) } /// Attempts to copy `self` to a raw byte representation. @@ -522,6 +537,7 @@ pub trait Record<'a>: let mut num_scalars: i32 = 0; let mut num_vectors: i32 = 0; + // let scalar_fields = data.keys().filter(|k| ) for (field, _) in fields_for_type.scalars_required.iter() { match data.get(&field.to_string()) { Some(x @ DmapField::Scalar(_)) => { @@ -587,6 +603,175 @@ pub trait Record<'a>: Ok((num_scalars, num_vectors, data_bytes)) } + + /// Converts the entries of a `Record` into a raw byte representation, for debugging the conversion. + /// + /// If all is good, returns a vector containing tuples of: + /// * `String`: the name of the field (`"header"` denoting the record header) + /// * `usize`: where the serialized bytes of the field start in the record byte representation + /// * `Vec` the byte representation of the field. + fn inspect_bytes( + &self, + fields_for_type: &Fields, + ) -> Result)>, DmapError> { + let mut data_bytes: Vec> = vec![]; + let mut indices: Vec = vec![16]; // start at 16 to account for header + let mut fields: Vec = vec![]; + + let (mut num_scalars, mut num_vectors) = (0, 0); + + for (field, _) in fields_for_type.scalars_required.iter() { + fields.push(field.to_string()); + match self.get(field) { + Some(x @ DmapField::Scalar(_)) => { + let mut bytes = vec![]; + bytes.extend(field.as_bytes()); + bytes.extend([0]); // null-terminate string + bytes.append(&mut x.as_bytes()); + indices.push(indices[indices.len() - 1] + bytes.len()); + data_bytes.push(bytes); + num_scalars += 1; + } + Some(_) => Err(DmapError::InvalidScalar(format!( + "Field {field} is a vector, expected scalar" + )))?, + None => Err(DmapError::InvalidRecord(format!( + "Field {field} missing from record" + )))?, + } + } + for (field, _) in fields_for_type.scalars_optional.iter() { + fields.push(field.to_string()); + if let Some(x) = self.get(field) { + match x { + DmapField::Scalar(_) => { + let mut bytes = vec![]; + bytes.extend(field.as_bytes()); + bytes.extend([0]); // null-terminate string + bytes.append(&mut x.as_bytes()); + indices.push(indices[indices.len() - 1] + bytes.len()); + data_bytes.push(bytes); + num_scalars += 1; + } + DmapField::Vector(_) => Err(DmapError::InvalidScalar(format!( + "Field {field} is a vector, expected scalar" + )))?, + } + } + } + for (field, _) in fields_for_type.vectors_required.iter() { + fields.push(field.to_string()); + match self.get(field) { + Some(x @ DmapField::Vector(_)) => { + let mut bytes = vec![]; + bytes.extend(field.as_bytes()); + bytes.extend([0]); // null-terminate string + bytes.append(&mut x.as_bytes()); + indices.push(indices[indices.len() - 1] + bytes.len()); + data_bytes.push(bytes); + num_vectors += 1; + } + Some(_) => Err(DmapError::InvalidVector(format!( + "Field {field} is a scalar, expected vector" + )))?, + None => Err(DmapError::InvalidRecord(format!( + "Field {field} missing from record" + )))?, + } + } + for (field, _) in fields_for_type.vectors_optional.iter() { + fields.push(field.to_string()); + if let Some(x) = self.get(field) { + match x { + DmapField::Vector(_) => { + let mut bytes = vec![]; + bytes.extend(field.as_bytes()); + bytes.extend([0]); // null-terminate string + bytes.append(&mut x.as_bytes()); + indices.push(indices[indices.len() - 1] + data_bytes.len()); + data_bytes.push(bytes); + num_vectors += 1; + } + DmapField::Scalar(_) => Err(DmapError::InvalidVector(format!( + "Field {field} is a scalar, expected vector" + )))?, + } + } + } + + // Now build up the header + let num_bytes: usize = data_bytes.iter().map(|x| x.len()).sum(); + let mut bytes: Vec = vec![]; + bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter + bytes.extend((num_bytes as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors + bytes.extend(num_scalars.as_bytes()); + bytes.extend(num_vectors.as_bytes()); + + // Accumulate all the results into one big `Vec` + let mut field_info: Vec<(String, usize, Vec)> = vec![("header".to_string(), 0, bytes)]; + for (f, (s, b)) in izip!( + fields.into_iter(), + izip!(indices[..indices.len() - 1].iter(), data_bytes.into_iter()) + ) { + field_info.push((f, *s, b)); + } + + Ok(field_info) + } + + /// Creates the byte represenation of a collection of [`Record`]s. + /// + /// Ordering of the members is preserved. + fn into_bytes(recs: &Vec) -> Result, DmapError> { + let mut bytes: Vec = vec![]; + let (errors, rec_bytes): (Vec<_>, Vec<_>) = + recs.par_iter() + .enumerate() + .partition_map(|(i, rec)| match rec.to_bytes() { + Err(e) => Either::Left((i, e)), + Ok(y) => Either::Right(y), + }); + if !errors.is_empty() { + Err(DmapError::InvalidRecord(format!( + "Corrupted records: {errors:?}" + )))? + } + bytes.par_extend(rec_bytes.into_par_iter().flatten()); + Ok(bytes) + } + + /// Attempts to convert `recs` to `Self` then convert to bytes. + fn try_into_bytes(recs: Vec>) -> Result, DmapError> { + let mut bytes: Vec = vec![]; + let (errors, rec_bytes): (Vec<_>, Vec<_>) = + recs.into_par_iter() + .enumerate() + .partition_map(|(i, rec)| match Self::try_from(rec) { + Err(e) => Either::Left((i, e)), + Ok(x) => match x.to_bytes() { + Err(e) => Either::Left((i, e)), + Ok(y) => Either::Right(y), + }, + }); + if !errors.is_empty() { + Err(DmapError::BadRecords( + errors.iter().map(|(i, _)| *i).collect(), + errors[0].1.to_string(), + ))? + } + bytes.par_extend(rec_bytes.into_par_iter().flatten()); + Ok(bytes) + } + + /// Writes a collection of `Record`s to `outfile`. + /// + /// Prefer using the specific functions, e.g. `write_dmap`, `write_rawacf`, etc. for their + /// specific field checks. + fn write_to_file>(recs: &Vec, outfile: P) -> Result<(), DmapError> { + let bytes: Vec = Self::into_bytes(recs)?; + io::bytes_to_file(bytes, outfile)?; + Ok(()) + } } macro_rules! create_record_type { @@ -603,22 +788,16 @@ macro_rules! create_record_type { pub data: IndexMap, } - impl [< $format:camel Record >] { - /// Returns the field with name `key`, if it exists in the record. - pub fn get(&self, key: &String) -> Option<&DmapField> { - self.data.get(key) - } - - /// Returns the names of all fields stored in the record. - pub fn keys(&self) -> Vec<&String> { - self.data.keys().collect() - } - } - impl Record<'_> for [< $format:camel Record>] { fn inner(self) -> IndexMap { self.data } + fn get(&self, key: &str) -> Option<&DmapField> { + self.data.get(key) + } + fn keys(&self) -> Vec<&String> { + self.data.keys().collect() + } fn new(fields: &mut IndexMap) -> Result<[< $format:camel Record>], DmapError> { match Self::check_fields(fields, &$fields) { Ok(_) => {} @@ -647,7 +826,15 @@ macro_rules! create_record_type { type Error = DmapError; fn try_from(value: &mut IndexMap) -> Result { - Self::coerce::<[< $format:camel Record>]>(value, &$fields) + Self::coerce(value, &$fields) + } + } + + impl TryFrom> for [< $format:camel Record >] { + type Error = DmapError; + + fn try_from(mut value: IndexMap) -> Result { + Self::coerce(&mut value, &$fields) } } diff --git a/src/types.rs b/src/types.rs index 818a201..62635be 100644 --- a/src/types.rs +++ b/src/types.rs @@ -7,7 +7,7 @@ use numpy::PyArrayMethods; use paste::paste; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; -use pyo3::{Bound, FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python}; +use pyo3::{Bound, FromPyObject, PyAny, PyResult, Python}; use std::cmp::PartialEq; use std::fmt::{Display, Formatter}; use std::io::Cursor; @@ -15,7 +15,7 @@ use zerocopy::{AsBytes, ByteOrder, FromBytes, LittleEndian}; type Result = std::result::Result; -/// Defines the fields of a record and their `Type`. +/// Defines the fields of a record and their [`Type`]. pub struct Fields<'a> { /// The names of all fields of the record type pub all_fields: Vec<&'a str>, @@ -120,7 +120,7 @@ impl Type { } /// A scalar field in a DMAP record. -#[derive(Debug, Clone, PartialEq, FromPyObject)] +#[derive(Debug, Clone, PartialEq, FromPyObject, IntoPyObject)] #[repr(C)] pub enum DmapScalar { Char(i8), @@ -208,23 +208,23 @@ impl Display for DmapScalar { } } } -impl IntoPy for DmapScalar { - fn into_py(self, py: Python<'_>) -> PyObject { - match self { - Self::Char(x) => x.into_py(py), - Self::Short(x) => x.into_py(py), - Self::Int(x) => x.into_py(py), - Self::Long(x) => x.into_py(py), - Self::Uchar(x) => x.into_py(py), - Self::Ushort(x) => x.into_py(py), - Self::Uint(x) => x.into_py(py), - Self::Ulong(x) => x.into_py(py), - Self::Float(x) => x.into_py(py), - Self::Double(x) => x.into_py(py), - Self::String(x) => x.into_py(py), - } - } -} +// impl IntoPy for DmapScalar { +// fn into_py(self, py: Python<'_>) -> PyObject { +// match self { +// Self::Char(x) => x.into_py(py), +// Self::Short(x) => x.into_py(py), +// Self::Int(x) => x.into_py(py), +// Self::Long(x) => x.into_py(py), +// Self::Uchar(x) => x.into_py(py), +// Self::Ushort(x) => x.into_py(py), +// Self::Uint(x) => x.into_py(py), +// Self::Ulong(x) => x.into_py(py), +// Self::Float(x) => x.into_py(py), +// Self::Double(x) => x.into_py(py), +// Self::String(x) => x.into_py(py), +// } +// } +// } macro_rules! vec_to_bytes { ($bytes:ident, $x:ident) => {{ @@ -253,7 +253,8 @@ pub enum DmapVec { Double(ArrayD), } impl DmapVec { - /// Gets the corresponding `Type` of the vector. + /// Gets the corresponding [`Type`] of the vector. + #[inline] pub(crate) fn get_type(&self) -> Type { match self { DmapVec::Char(_) => Type::Char, @@ -268,7 +269,8 @@ impl DmapVec { DmapVec::Double(_) => Type::Double, } } - /// Copies the data and metadata (dimensions, `Type` key) to raw bytes + /// Copies the data and metadata (dimensions, [`Type`] key) to raw bytes + #[inline] pub(crate) fn as_bytes(&self) -> Vec { let mut bytes: Vec = DmapType::as_bytes(&self.get_type().key()).to_vec(); match self { @@ -298,6 +300,7 @@ impl DmapVec { /// let arr = DmapVec::Uint(array![[0, 1, 2], [3, 4, 5]].into_dyn()); /// assert_eq!(arr.shape(), &[2, 3]); /// ``` + #[must_use] pub fn shape(&self) -> &[usize] { match self { DmapVec::Char(x) => x.shape(), @@ -313,20 +316,24 @@ impl DmapVec { } } } -impl IntoPy for DmapVec { - fn into_py(self, py: Python<'_>) -> PyObject { - match self { - DmapVec::Char(x) => PyObject::from(PyArray::from_owned_array_bound(py, x)), - DmapVec::Short(x) => PyObject::from(PyArray::from_owned_array_bound(py, x)), - DmapVec::Int(x) => PyObject::from(PyArray::from_owned_array_bound(py, x)), - DmapVec::Long(x) => PyObject::from(PyArray::from_owned_array_bound(py, x)), - DmapVec::Uchar(x) => PyObject::from(PyArray::from_owned_array_bound(py, x)), - DmapVec::Ushort(x) => PyObject::from(PyArray::from_owned_array_bound(py, x)), - DmapVec::Uint(x) => PyObject::from(PyArray::from_owned_array_bound(py, x)), - DmapVec::Ulong(x) => PyObject::from(PyArray::from_owned_array_bound(py, x)), - DmapVec::Float(x) => PyObject::from(PyArray::from_owned_array_bound(py, x)), - DmapVec::Double(x) => PyObject::from(PyArray::from_owned_array_bound(py, x)), - } +impl<'py> IntoPyObject<'py> for DmapVec { + type Target = PyAny; + type Output = Bound<'py, Self::Target>; + type Error = std::convert::Infallible; + + fn into_pyobject(self, py: Python<'py>) -> std::result::Result { + Ok(match self { + DmapVec::Char(x) => PyArray::from_owned_array(py, x).into_any(), + DmapVec::Short(x) => PyArray::from_owned_array(py, x).into_any(), + DmapVec::Int(x) => PyArray::from_owned_array(py, x).into_any(), + DmapVec::Long(x) => PyArray::from_owned_array(py, x).into_any(), + DmapVec::Uchar(x) => PyArray::from_owned_array(py, x).into_any(), + DmapVec::Ushort(x) => PyArray::from_owned_array(py, x).into_any(), + DmapVec::Uint(x) => PyArray::from_owned_array(py, x).into_any(), + DmapVec::Ulong(x) => PyArray::from_owned_array(py, x).into_any(), + DmapVec::Float(x) => PyArray::from_owned_array(py, x).into_any(), + DmapVec::Double(x) => PyArray::from_owned_array(py, x).into_any(), + }) } } impl<'py> FromPyObject<'py> for DmapVec { @@ -357,13 +364,15 @@ impl<'py> FromPyObject<'py> for DmapVec { } } -/// Generates trait implementations for infallible conversion into DmapVec and fallible conversion +/// Generates trait implementations for infallible conversion into [`DmapVec`] and fallible conversion /// back. +/// /// Example: `vec_impls!(ArrayD, DmapVec::Char)` will generate `impl From> for /// DmapVec` and `impl TryFrom for ArrayD` code blocks. macro_rules! vec_impls { ($type:ty, $enum_var:path) => { impl From<$type> for DmapVec { + #[inline] fn from(value: $type) -> Self { $enum_var(value) } @@ -385,6 +394,7 @@ macro_rules! vec_impls { } impl From<$type> for DmapField { + #[inline] fn from(value: $type) -> Self { DmapField::Vector($enum_var(value)) } @@ -421,7 +431,7 @@ vec_impls!(ArrayD, DmapVec::Double); /// /// This is the type that is stored in a DMAP record, representing either a scalar or /// vector field. -#[derive(Debug, Clone, PartialEq, FromPyObject)] +#[derive(Debug, Clone, PartialEq, FromPyObject, IntoPyObject)] #[repr(C)] pub enum DmapField { Vector(DmapVec), @@ -429,6 +439,8 @@ pub enum DmapField { } impl DmapField { /// Converts the field and metadata (`Type` key and dimensions if applicable) to raw bytes. + #[inline] + #[must_use] pub fn as_bytes(&self) -> Vec { match self { Self::Scalar(x) => x.as_bytes(), @@ -436,16 +448,16 @@ impl DmapField { } } } -impl IntoPy for DmapField { - fn into_py(self, py: Python<'_>) -> PyObject { - match self { - DmapField::Scalar(x) => x.into_py(py), - DmapField::Vector(x) => x.into_py(py), - } - } -} - -/// Macro for implementing conversion traits between primitives and `DmapField`, `DmapScalar` +// impl IntoPyObject for DmapField { +// fn into_py(self, py: Python<'_>) -> PyObject { +// match self { +// DmapField::Scalar(x) => x.into_py(py), +// DmapField::Vector(x) => x.into_py(py), +// } +// } +// } + +/// Macro for implementing conversion traits between primitives and [`DmapField`], [`DmapScalar`] /// types. /// /// Example: `scalar_impls(i8, DmapScalar::Char)` will implement: @@ -495,6 +507,9 @@ pub trait DmapType: std::fmt::Debug { /// Create a copy of the data as raw bytes. fn as_bytes(&self) -> Vec; /// Convert raw bytes to `Self` + /// + /// # Errors + /// If the bytes are not a valid DMAP record of type `Self`. fn from_bytes(bytes: &[u8]) -> Result where Self: Sized; @@ -502,19 +517,21 @@ pub trait DmapType: std::fmt::Debug { fn dmap_type() -> Type; } -/// Macro for implementing DmapType trait for primitive types. +/// Macro for implementing [`DmapType`] trait for primitive types. /// Example: `type_impls!(i8, Type::Char, 1)` macro_rules! type_impls { // This variant captures single-byte types ($type:ty, $enum_var:path, 1) => { impl DmapType for $type { - + #[inline] fn size() -> usize { 1 } + #[inline] fn as_bytes(&self) -> Vec { AsBytes::as_bytes(self).to_vec() } + #[inline] fn from_bytes(bytes: &[u8]) -> Result where Self: Sized, @@ -522,6 +539,7 @@ macro_rules! type_impls { Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) } + #[inline] fn dmap_type() -> Type { $enum_var } } }; @@ -529,15 +547,17 @@ macro_rules! type_impls { ($type:ty, $enum_var:path, $num_bytes:expr) => { paste! { impl DmapType for $type { - + #[inline] fn size() -> usize { $num_bytes } + #[inline] fn as_bytes(&self) -> Vec { let mut bytes = [0; $num_bytes]; LittleEndian::[< write_ $type >](&mut bytes, *self); bytes.to_vec() } + #[inline] fn from_bytes(bytes: &[u8]) -> Result where Self: Sized, @@ -545,6 +565,7 @@ macro_rules! type_impls { Self::read_from(bytes).ok_or(DmapError::CorruptStream("Unable to interpret bytes")) } + #[inline] fn dmap_type() -> Type { $enum_var } } } @@ -564,19 +585,26 @@ type_impls!(f64, Type::Double, 8); // This implementation differs significantly from the others, so it doesn't use the macro impl DmapType for String { + #[inline] fn size() -> usize { 0 } + + #[inline] fn as_bytes(&self) -> Vec { let mut bytes = self.as_bytes().to_vec(); bytes.push(0); // null-terminate bytes } + + #[inline] fn from_bytes(bytes: &[u8]) -> Result { let data = String::from_utf8(bytes.to_owned()) .map_err(|_| DmapError::InvalidScalar("Cannot convert bytes to String".to_string()))?; Ok(data.trim_end_matches(char::from(0)).to_string()) } + + #[inline] fn dmap_type() -> Type { Type::String } @@ -806,7 +834,7 @@ impl TryFrom for f64 { DmapScalar::Ushort(x) => Ok(x as f64), DmapScalar::Uint(x) => Ok(x as f64), DmapScalar::Ulong(x) => Ok(x as f64), - DmapScalar::Float(x) => Ok(x as f64), + DmapScalar::Float(x) => Ok(f64::from(x)), DmapScalar::Double(x) => Ok(x), DmapScalar::String(x) => Err(DmapError::InvalidScalar(format!( "Unable to convert {x} to f64" @@ -826,14 +854,19 @@ impl TryFrom for String { } } -/// Verify that `name` exists in `fields` and is of the correct `Type`. +/// Verify that `name` exists in `fields` and is of the correct [`Type`]. +/// +/// # Errors +/// If `name` is not in `fields`. +/// +/// If `name` is in `fields`, but is not a [`DmapField::Scalar`] of `expected_type`. pub fn check_scalar( fields: &IndexMap, name: &str, - expected_type: Type, + expected_type: &Type, ) -> Result<()> { match fields.get(name) { - Some(DmapField::Scalar(data)) if data.get_type() == expected_type => Ok(()), + Some(DmapField::Scalar(data)) if data.get_type() == *expected_type => Ok(()), Some(DmapField::Scalar(data)) => Err(DmapError::InvalidScalar(format!( "{name} is of type {}, expected {}", data.get_type(), @@ -846,14 +879,17 @@ pub fn check_scalar( } } -/// If `name` is in `fields`, verify that it is of the correct `Type`. +/// If `name` is in `fields`, verify that it is of the correct [`Type`]. +/// +/// # Errors +/// If `name` is in `fields`, but is not a [`DmapField::Scalar`] of `expected_type`. pub fn check_scalar_opt( fields: &IndexMap, name: &str, - expected_type: Type, + expected_type: &Type, ) -> Result<()> { match fields.get(name) { - Some(DmapField::Scalar(data)) if data.get_type() == expected_type => Ok(()), + Some(DmapField::Scalar(data)) if data.get_type() == *expected_type => Ok(()), Some(DmapField::Scalar(data)) => Err(DmapError::InvalidScalar(format!( "{name} is of type {}, expected {}", data.get_type(), @@ -866,14 +902,19 @@ pub fn check_scalar_opt( } } -/// Verify that `name` exists in `fields` and is of the correct `Type`. +/// Verify that `name` exists in `fields` and is of the correct [`Type`]. +/// +/// # Errors +/// If `name` is not in `fields`. +/// +/// If `name` is in `fields`, but is not a [`DmapField::Vector`] of `expected_type`. pub fn check_vector( fields: &IndexMap, name: &str, - expected_type: Type, + expected_type: &Type, ) -> Result<()> { match fields.get(name) { - Some(DmapField::Vector(data)) if data.get_type() != expected_type => { + Some(DmapField::Vector(data)) if data.get_type() != *expected_type => { Err(DmapError::InvalidVector(format!( "{name} is of type {}, expected {}", data.get_type(), @@ -888,14 +929,17 @@ pub fn check_vector( } } -/// If `name` is in `fields`, verify that it is of the correct `Type`. +/// If `name` is in `fields`, verify that it is of the correct [`Type`]. +/// +/// # Errors +/// If `name` is in `fields`, but is not a [`DmapField::Vector`] of `expected_type`. pub fn check_vector_opt( fields: &IndexMap, name: &str, - expected_type: Type, + expected_type: &Type, ) -> Result<()> { match fields.get(name) { - Some(DmapField::Vector(data)) if data.get_type() != expected_type => { + Some(DmapField::Vector(data)) if data.get_type() != *expected_type => { Err(DmapError::InvalidVector(format!( "{name} is of type {}, expected {}", data.get_type(), @@ -915,8 +959,8 @@ pub fn check_vector_opt( /// 1. `name`: a null-terminated string /// 2. `type`: an i32 key, which maps to a data type (see [`Type`]) /// 3. `data`: the actual data as raw bytes. +#[inline] pub(crate) fn parse_scalar(cursor: &mut Cursor>) -> Result<(String, DmapField)> { - let _mode = 6; let (name, data_type) = parse_header(cursor)?; let data: DmapScalar = match data_type { Type::Char => DmapScalar::Char(read_data::(cursor)?), @@ -936,6 +980,7 @@ pub(crate) fn parse_scalar(cursor: &mut Cursor>) -> Result<(String, Dmap } /// Grabs the name and data type key from `cursor`. +#[inline] fn parse_header(cursor: &mut Cursor>) -> Result<(String, Type)> { let name = read_data::(cursor).map_err(|e| { DmapError::InvalidField(format!("Invalid name, byte {}: {e}", cursor.position())) @@ -963,7 +1008,6 @@ pub(crate) fn parse_vector( cursor: &mut Cursor>, record_size: i32, ) -> Result<(String, DmapField)> { - let _mode = 7; let start_position = cursor.position(); let (name, data_type) = parse_header(cursor)?; @@ -997,15 +1041,15 @@ pub(crate) fn parse_vector( cursor.position() - i32::size() as u64, ))); } - dimensions.push(dim as u32 as usize); + dimensions.push(usize::try_from(dim)?); total_elements *= dim; } dimensions = dimensions.into_iter().rev().collect(); // reverse the dimensions, stored in column-major order - if total_elements * data_type.size() as i32 > record_size { + if total_elements * i32::try_from(data_type.size())? > record_size { return Err(DmapError::InvalidVector(format!( "Vector `{name}` size starting at byte {} exceeds record size ({} > {record_size})", - cursor.position() - vector_dimension as u64 * i32::size() as u64, - total_elements * data_type.size() as i32, + cursor.position() - u64::try_from(vector_dimension)? * u64::try_from(i32::size())?, + total_elements * i32::try_from(data_type.size())?, ))); } @@ -1078,16 +1122,15 @@ pub(crate) fn parse_vector( total_elements, name ), - _ => { + Type::String => { return Err(DmapError::InvalidVector(format!( - "Invalid type {} for DMAP vector {}", - data_type, name + "Invalid type {data_type} for DMAP vector {name}" ))) } }; let num_bytes = cursor.position() - start_position; - if num_bytes > record_size as u64 { + if num_bytes > u64::try_from(record_size)? { return Err(DmapError::InvalidVector(format!( "Vector `{name}` occupies more bytes than record ({num_bytes} > {record_size})" ))); @@ -1106,8 +1149,9 @@ fn read_vector(cursor: &mut Cursor>, num_elements: i32) -> } /// Reads a singular value of type `T` starting from the `cursor` position. +#[inline] pub(crate) fn read_data(cursor: &mut Cursor>) -> Result { - let position = cursor.position() as usize; + let position = usize::try_from(cursor.position())?; let stream = cursor.get_mut(); if position > stream.len() { @@ -1472,42 +1516,42 @@ mod tests { use numpy::ndarray::array; let mut rec = IndexMap::::new(); - let res = check_scalar(&rec, "test", Type::Char); + let res = check_scalar(&rec, "test", &Type::Char); assert!(res.is_err()); - let res = check_scalar_opt(&rec, "test", Type::Char); + let res = check_scalar_opt(&rec, "test", &Type::Char); assert!(res.is_ok()); - let res = check_vector(&rec, "test", Type::Char); + let res = check_vector(&rec, "test", &Type::Char); assert!(res.is_err()); - let res = check_vector_opt(&rec, "test", Type::Char); + let res = check_vector_opt(&rec, "test", &Type::Char); assert!(res.is_ok()); let res = rec.insert("test".to_string(), DmapField::from(1i32)); assert!(res.is_none()); - let res = check_scalar(&rec, "test", Type::Int); + let res = check_scalar(&rec, "test", &Type::Int); assert!(res.is_ok()); - let res = check_scalar_opt(&rec, "test", Type::Char); + let res = check_scalar_opt(&rec, "test", &Type::Char); assert!(res.is_err()); - let res = check_scalar_opt(&rec, "test", Type::Int); + let res = check_scalar_opt(&rec, "test", &Type::Int); assert!(res.is_ok()); - let res = check_vector(&rec, "test", Type::Char); + let res = check_vector(&rec, "test", &Type::Char); assert!(res.is_err()); - let res = check_vector_opt(&rec, "test", Type::Char); + let res = check_vector_opt(&rec, "test", &Type::Char); assert!(res.is_err()); let test_vec = array![1.0f32, 2.0f32].into_dyn(); let res = rec.insert("test_vec".to_string(), test_vec.into()); assert!(res.is_none()); - let res = check_scalar(&rec, "test_vec", Type::Float); + let res = check_scalar(&rec, "test_vec", &Type::Float); assert!(res.is_err()); - let res = check_scalar_opt(&rec, "test_vec", Type::Float); + let res = check_scalar_opt(&rec, "test_vec", &Type::Float); assert!(res.is_err()); - let res = check_vector(&rec, "test_vec", Type::Float); + let res = check_vector(&rec, "test_vec", &Type::Float); assert!(res.is_ok()); - let res = check_vector(&rec, "test_vec", Type::Double); + let res = check_vector(&rec, "test_vec", &Type::Double); assert!(res.is_err()); - let res = check_vector_opt(&rec, "test_vec", Type::Float); + let res = check_vector_opt(&rec, "test_vec", &Type::Float); assert!(res.is_ok()); - let res = check_vector_opt(&rec, "test_vec", Type::Int); + let res = check_vector_opt(&rec, "test_vec", &Type::Int); assert!(res.is_err()); Ok(()) diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..411df63 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,283 @@ +""" +Integration tests for the Python API of darn-dmap. +""" + +import bz2 +import dmap +import numpy as np +import pytest +import os + + +# Path to this file +HERE = os.path.dirname(__file__) +FORMATS = ("iqdat", "rawacf", "fitacf", "grid", "map", "snd") +FILE_LENGTHS = (247688, 73528, 10780, 4612, 32668, 1659) + + +def compare_recs(data1, data2): + """Compare two `list[dict]`s, checking they are identical.""" + assert len(data1) == len(data2) + for rec1, rec2 in zip(data1, data2): + assert rec1.keys() == rec2.keys() + for k in rec1.keys(): + val1 = rec1[k] + val2 = rec2[k] + assert type(val1) is type(val2), k + if isinstance(val1, np.ndarray): + assert np.allclose(val1, val2) + elif isinstance(val1, float): + assert np.isclose(val1, val2) + else: + assert val1 == val2, k + return True + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_dmap(fmt): + data = dmap.read_dmap(f"{HERE}/test_files/test.{fmt}", mode="strict") + assert len(data) == 2 + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_dmap_lax(fmt): + data, bad_byte = dmap.read_dmap(f"{HERE}/test_files/test.{fmt}", mode="lax") + assert len(data) == 2 + assert bad_byte is None + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_dmap_bz2(fmt): + data = dmap.read_dmap(f"{HERE}/test_files/test.{fmt}.bz2", mode="strict") + assert len(data) == 2 + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_dmap_bz2_lax(fmt): + data, bad_byte = dmap.read_dmap(f"{HERE}/test_files/test.{fmt}.bz2", mode="lax") + assert len(data) == 2 + assert bad_byte is None + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_dmap_sniff(fmt): + data = dmap.read_dmap(f"{HERE}/test_files/test.{fmt}", mode="sniff") + assert isinstance(data, dict) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_sniff_against_specific(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + data = dmap.read_dmap(infile, mode="sniff") + data2 = getattr(dmap, f"read_{fmt}")(infile, mode="sniff") + assert compare_recs([data], [data2]) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_sniff_against_specific_strict(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + data1 = getattr(dmap, f"read_{fmt}")(infile, mode="strict")[0] + data2 = getattr(dmap, f"read_{fmt}")(infile, mode="sniff") + assert compare_recs([data1], [data2]) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_file_vs_bytes_read(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + with open(infile, "rb") as f: + raw_bytes = f.read() + + data1 = getattr(dmap, f"read_{fmt}")(infile, mode="strict") + data2 = getattr(dmap, f"read_{fmt}")(raw_bytes, mode="strict") + assert compare_recs(data1, data2) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_reading_compressed_vs_not(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + + data1 = getattr(dmap, f"read_{fmt}")(infile, mode="strict") + data2 = getattr(dmap, f"read_{fmt}")(infile + ".bz2", mode="strict") + assert compare_recs(data1, data2) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_file_vs_bytes_read_bz2(fmt): + infile = f"{HERE}/test_files/test.{fmt}.bz2" + with open(infile, "rb") as f: + raw_bytes = f.read() + + data1 = getattr(dmap, f"read_{fmt}")(infile, mode="strict") + data2 = getattr(dmap, f"read_{fmt}")(raw_bytes, mode="strict") + assert compare_recs(data1, data2) + + +@pytest.mark.parametrize("fmt,bad_at", zip(FORMATS, FILE_LENGTHS)) +def test_corrupted(fmt, bad_at): + infile = f"{HERE}/test_files/test.{fmt}" + with open(infile, "rb") as f: + raw_bytes = f.read() + data1 = getattr(dmap, f"read_{fmt}")(raw_bytes, mode="strict") + + corrupted_bytes = raw_bytes + b"this is not valid DMAP data" + with pytest.raises(ValueError): + _ = getattr(dmap, f"read_{fmt}")(corrupted_bytes, mode="strict") + data2, bad_byte = getattr(dmap, f"read_{fmt}")(corrupted_bytes, mode="lax") + assert bad_byte == bad_at + + assert compare_recs(data1, data2) + + +@pytest.mark.parametrize("fmt,bad_at", zip(FORMATS, FILE_LENGTHS)) +def test_corrupted_bz2(fmt, bad_at): + infile = f"{HERE}/test_files/test.{fmt}" + with open(infile, "rb") as f: + raw_bytes = f.read() + data1 = getattr(dmap, f"read_{fmt}")(raw_bytes, mode="strict") + + corrupted_bytes = bz2.compress(raw_bytes + b"this is not valid DMAP data") + with pytest.raises(ValueError): + _ = getattr(dmap, f"read_{fmt}")(corrupted_bytes, mode="strict") + data2, bad_byte = getattr(dmap, f"read_{fmt}")(corrupted_bytes, mode="lax") + assert bad_byte == bad_at + + assert compare_recs(data1, data2) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_roundtrip(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + + data1 = getattr(dmap, f"read_{fmt}")(infile, mode="strict") + raw_bytes = getattr(dmap, f"write_{fmt}")(data1) + data2 = getattr(dmap, f"read_{fmt}")(raw_bytes, mode="strict") + assert compare_recs(data1, data2) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_roundtrip_bz2(fmt): + infile = f"{HERE}/test_files/test.{fmt}.bz2" + + data1 = getattr(dmap, f"read_{fmt}")(infile, mode="strict") + raw_bytes = getattr(dmap, f"write_{fmt}")(data1) + data2 = getattr(dmap, f"read_{fmt}")(raw_bytes, mode="strict") + assert compare_recs(data1, data2) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_roundtrip_dmap(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + + data1 = dmap.read_dmap(infile, mode="strict") + raw_bytes = dmap.write_dmap(data1) + data2 = dmap.read_dmap(raw_bytes, mode="strict") + assert compare_recs(data1, data2) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_roundtrip_dmap_bz2(fmt): + infile = f"{HERE}/test_files/test.{fmt}.bz2" + + data1 = dmap.read_dmap(infile, mode="strict") + raw_bytes = dmap.write_dmap(data1) + data2 = dmap.read_dmap(raw_bytes, mode="strict") + assert compare_recs(data1, data2) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_extra_key_write(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + + data = getattr(dmap, f"read_{fmt}")(infile, mode="strict") + data[0]["test"] = 1.0 + with pytest.raises(ValueError): + _ = getattr(dmap, f"write_{fmt}")(data) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_missing_key_write(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + + data = getattr(dmap, f"read_{fmt}")(infile, mode="strict") + del data[0]["stid"] + with pytest.raises(ValueError): + _ = getattr(dmap, f"write_{fmt}")(data) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_key_wrong_type_write(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + + data = getattr(dmap, f"read_{fmt}")(infile, mode="strict") + if isinstance(data[0]["stid"], np.ndarray): + data[0]["stid"] = np.array(data[0]["stid"], dtype=np.float64) + else: + data[0]["stid"] = float(data[0]["stid"]) + with pytest.raises(ValueError): + _ = getattr(dmap, f"write_{fmt}")(data) + + +def test_extra_key_dmap(): + infile = f"{HERE}/test_files/test.rawacf" + + data = dmap.read_dmap(infile, mode="strict") + data[0]["test"] = 1.0 + _ = dmap.write_dmap(data) + + +def test_missing_key_dmap(): + infile = f"{HERE}/test_files/test.rawacf" + + data = dmap.read_dmap(infile, mode="strict") + del data[0]["stid"] + _ = dmap.write_dmap(data) + + +def test_key_wrong_type_dmap(): + infile = f"{HERE}/test_files/test.rawacf" + + data = dmap.read_dmap(infile, mode="strict") + if isinstance(data[0]["stid"], np.ndarray): + data[0]["stid"] = np.array(data[0]["stid"], dtype=np.float64) + else: + data[0]["stid"] = float(data[0]["stid"]) + _ = dmap.write_dmap(data) + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_extra_key_read(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + + data = dmap.read_dmap(infile, mode="strict") + data[0]["test"] = 1.0 + raw_bytes = dmap.write_dmap(data) + + with pytest.raises(ValueError): + _ = getattr(dmap, f"read_{fmt}")(raw_bytes, mode="strict") + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_missing_key_read(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + + data = getattr(dmap, f"read_{fmt}")(infile, mode="strict") + del data[0]["stid"] + raw_bytes = dmap.write_dmap(data) + + with pytest.raises(ValueError): + _ = getattr(dmap, f"read_{fmt}")(raw_bytes, mode="strict") + + +@pytest.mark.parametrize("fmt", FORMATS) +def test_key_wrong_type_read(fmt): + infile = f"{HERE}/test_files/test.{fmt}" + + data = getattr(dmap, f"read_{fmt}")(infile, mode="strict") + if isinstance(data[0]["stid"], np.ndarray): + data[0]["stid"] = np.array(data[0]["stid"], dtype=np.float64) + else: + data[0]["stid"] = float(data[0]["stid"]) + raw_bytes = dmap.write_dmap(data) + + with pytest.raises(ValueError): + _ = getattr(dmap, f"read_{fmt}")(raw_bytes, mode="strict") diff --git a/tests/tests.rs b/tests/tests.rs index 8266b66..ec3b857 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,12 +1,4 @@ -use dmap::formats::dmap::DmapRecord; -use dmap::formats::fitacf::FitacfRecord; -use dmap::formats::grid::GridRecord; -use dmap::formats::iqdat::IqdatRecord; -use dmap::formats::map::MapRecord; -use dmap::formats::rawacf::RawacfRecord; -use dmap::formats::snd::SndRecord; -use dmap::record::Record; -use dmap::{write_dmap, write_fitacf, write_grid, write_iqdat, write_map, write_rawacf, write_snd}; +use dmap::*; use itertools::izip; use paste::paste; use std::fs::{remove_file, File}; @@ -25,7 +17,7 @@ macro_rules! make_test { let data = [< $record_type:camel Record >]::read_file(&filename).expect("Unable to read file"); - _ = [< write_ $record_type >](data.clone(), &tempfile).expect("Unable to write to file"); + _ = [< $record_type:camel Record >]::write_to_file(&data, &tempfile).expect("Unable to write to file"); let new_recs = [< $record_type:camel Record >]::read_file(&tempfile).expect("Cannot read tempfile"); for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { assert_eq!(read_rec, written_rec) @@ -43,7 +35,7 @@ macro_rules! make_test { let data = [< $record_type:camel Record >]::read_file(&filename).expect("Unable to read file"); - _ = [< write_ $record_type >](data.clone(), &tempfile).expect("Unable to write to file"); + _ = [< $record_type:camel Record >]::write_to_file(&data, &tempfile).expect("Unable to write to file"); let new_recs = [< $record_type:camel Record >]::read_file(&tempfile).expect("Cannot read tempfile"); for (ref read_rec, ref written_rec) in izip!(data.iter(), new_recs.iter()) { assert_eq!(read_rec, written_rec) @@ -83,7 +75,7 @@ macro_rules! make_test { tempfile.set_file_name(format!("tmp.{}.generic", stringify!($record_type))); let gen_data = DmapRecord::read_file(&filename).expect("Unable to read file"); - _ = write_dmap(gen_data.clone(), &tempfile).expect("Unable to write to file"); + _ = DmapRecord::write_to_file(&gen_data, &tempfile).expect("Unable to write to file"); let new_recs = DmapRecord::read_file(&tempfile).expect("Cannot read tempfile"); for (new_rec, ref_rec) in izip!(new_recs.iter(), gen_data.iter()) { assert_eq!(new_rec, ref_rec)