Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 32 additions & 10 deletions refl1d/probe/data_loaders/load4.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,26 +87,46 @@ def get_key(orso_name, refl1d_name, refl1d_resolution_name):
(i for i, c in enumerate(columns) if getattr(c, "physical_quantity", None) == orso_name),
None,
)
resolution_index = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_key() does not need to be defined in the for loop. It picks up the local scope regardless of where things are defined, so long as it is defined before it is used.

if column_index is not None:
# NOTE: this is based on column being second index (under debate in ORSO)
header_out[refl1d_name] = data[:, column_index]
cname = columns[column_index].name
resolution_index = next(
(i for i, c in enumerate(columns) if getattr(c, "error_of", None) == cname),
None,
resolution_index, resolution_column = next(
((i, c) for i, c in enumerate(columns) if getattr(c, "error_of", None) == cname),
(None, None),
)
if resolution_index is not None:
header_out[refl1d_resolution_name] = data[:, resolution_index]
else:
v = getattr(settings, orso_name, None)
if hasattr(v, "magnitude"):
header_out[refl1d_name] = v.magnitude
if hasattr(v, "error"):
header_out[refl1d_resolution_name] = v.error.error_value
if resolution_column.value_is == "FWHM":
header_out[refl1d_resolution_name] = FWHM2sigma(header_out[refl1d_resolution_name])

# Fall back to instrument_settings if no column found
v = getattr(settings, orso_name, None)
if hasattr(v, "magnitude") and column_index is None:
# only set if not already set from column
header_out[refl1d_name] = v.magnitude
if hasattr(v, "error") and resolution_index is None:
header_out[refl1d_resolution_name] = v.error.error_value
if v.error.value_is == "FWHM":
header_out[refl1d_resolution_name] = float(FWHM2sigma(header_out[refl1d_resolution_name]))

get_key("incident_angle", "angle", "angular_resolution")
get_key("wavelength", "wavelength", "wavelength_resolution")

# convert error columns from FWHM to sigma if they are present
# and need conversion
if len(columns) >= 3:
# third column is always uncertainty on reflectivity
dR_col = columns[2]
if dR_col.value_is == "FWHM":
data[:, 2] = FWHM2sigma(data[:, 2])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This modifies the entry in place as returned by the orso/nexus reader. That's probably okay, but since you are copying the data anyway on line 130, you may as well move the copy to line 62 so that you are operating on your own copy of the data. You might also force the data type to be float64 when you create the copy.

I'm a little concerned that ΔR is potentially being scaled by FWHM2sigma. Is there any risk that the ΔR column is mislabeled as FWHM, e.g., because it is defaulting to FWHM when the value isn't specified? This would be happening in the orso/nexus readers.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there's a risk that the dR is not properly tagged as sigma or FWHM in the ORSO file. I don't really know how to mitigate that, since each "error" column or ErrorValue in the ORSO format independently describes whether it is sigma or FWHM. With that level of complexity, a single FHWM boolean flag like what we have in load4 is not helpful.

At least, in the implementation here, if the ORSO file is correctly described, it will correctly load, even if sigma and FWHM types are both present.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we could separate the parse step from the step that fills in the Probe classes, and allow users to edit parts of the ORSO classes before loading them...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the column is mislabeled in the stored file that's on the facility. The user can correct this manually after loading the file. My concern was whether the orso or nexus loaders default the ΔR column to FWHM if it is not otherwise labelled in the file.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the value_is attribute does not default to "FWHM" - it actually defaults to None, which I guess we're interpreting to mean sigma here. In the ORSO code, it is indicated that not specifying FWHM implies that it is sigma.

if len(columns) >= 4:
# fourth column is always uncertainty on Q
dQ_col = columns[3]
if dQ_col.value_is == "FWHM":
data[:, 3] = FWHM2sigma(data[:, 3])

entries_out.append((header_out, np.array(data).T))
return entries_out

Expand Down Expand Up @@ -232,6 +252,8 @@ def load4(

if filename.endswith(".ort") or filename.endswith(".orb"):
entries = parse_orso(filename)
FWHM = False # ORSO files specify sigma vs. FWHM and will be converted to sigma
# TODO: ORSO also specifies resolution type (normal vs. uniform vs. triangular etc.)
else:
json_header_encoding = True # for .refl files, header values are json-encoded
entries = parse_multi(filename, keysep=keysep, sep=sep, comment=comment)
Expand Down Expand Up @@ -345,7 +367,7 @@ def fetch_key(key, override):
return override
elif key in header:
v = decoder(header[key])
return np.array(v)[index] if isinstance(v, list) else v
return np.array(v)[index] if hasattr(v, "__getitem__") else v
else:
return None

Expand Down
108 changes: 106 additions & 2 deletions tests/refl1d/probe/data_loaders/test_load4.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@

# third-party imports
import numpy as np
from orsopy.fileio.orso import load_orso, save_nexus
from orsopy.fileio.orso import load_orso, save_nexus, Orso, OrsoDataset, Column, ErrorColumn
from orsopy.fileio.base import ErrorValue, Value
from orsopy.fileio.data_source import InstrumentSettings
import pytest

# refl1d imports
from refl1d.probe.data_loaders.load4 import parse_orso
from refl1d import probe
from refl1d.probe.data_loaders.load4 import parse_orso, load4, sigma2FWHM, FWHM2sigma


OrsoFiles = namedtuple("OrsoFiles", ["ort", "orb"])
ArrayTOrsoFiles = namedtuple("ArrayTOrsoFiles", ["scalar_dT", "array_dT"])


@pytest.fixture
Expand Down Expand Up @@ -98,6 +102,67 @@ def orsofiles(tmp_path):
yield OrsoFiles(ort=ort_file_path, orb=orb_file_path)


@pytest.fixture
def theta_column_orso_files(tmp_path):
# Generate files with different T and dT formats

orso_obj = Orso.empty()
orso_obj.columns = [
Column(name="Qz", unit="1/angstrom", physical_quantity="normal momentum transfer"),
Column(name="R", unit="", physical_quantity="specular reflectivity"),
ErrorColumn(error_of="R", error_type="uncertainty", value_is="sigma"),
ErrorColumn(error_of="Qz", error_type="resolution", value_is="sigma"),
Column(name="theta", unit="degrees", physical_quantity="incident_angle"),
# ErrorColumn(error_of="theta", error_type="resolution", value_is="sigma"),
]
# Create test data
Qz = np.array([0.01, 0.02, 0.03, 0.04])
R = np.array([1.0, 0.8, 0.6, 0.4])
dR = np.array([0.1, 0.08, 0.06, 0.04])
dQ = np.array([0.001, 0.002, 0.003, 0.004])
T = np.array([0.005, 0.010, 0.015, 0.020]) # Array of angles
dT_array = np.array([0.001, 0.002, 0.003, 0.004]) # Array of angular resolutions
dT_scalar = 0.001 # Scalar angular resolution, FWHM, in degrees
dLoL_target = 1e-4 # expected output of dQdT2dLoL
T_rad = np.radians(T)

# Set up dQ for scalar dT
dT_scalar_rad = np.radians(dT_scalar)
dQoQ_scalar = np.sqrt(dT_scalar_rad**2 / np.tan(T_rad) ** 2 + dLoL_target**2)
dQ_scalar_FWHM = dQoQ_scalar * Qz
dQ_scalar_sigma = FWHM2sigma(dQ_scalar_FWHM)

# Set up dQ for array dT
dQoQ_array = np.sqrt((np.radians(dT_array) / np.tan(T_rad)) ** 2 + dLoL_target**2)
dQ_array_FWHM = dQoQ_array * Qz
dQ_array_sigma = FWHM2sigma(dQ_array_FWHM)

dT_errorvalue = ErrorValue(error_value=dT_scalar, value_is="FWHM", error_type="resolution", distribution="gaussian")
dL_errorvalue = ErrorValue(error_value=None, value_is="sigma", error_type="resolution", distribution="gaussian")
instrument_settings = InstrumentSettings(
incident_angle=Value(magnitude=float(np.mean(T)), error=dT_errorvalue, unit="deg"),
wavelength=Value(magnitude=None, error=dL_errorvalue, unit="angstrom"),
)
orso_obj.data_source.measurement.instrument_settings = instrument_settings

# create dataset with scalar dT
orso_dataset = OrsoDataset(orso_obj, np.vstack((Qz, R, dR, dQ_scalar_sigma, T)).T)
scalar_dT_filename = "test_t_array_dt_scalar.ort"
orso_dataset.save(tmp_path / scalar_dT_filename)

# add dT as column
orso_obj.columns.append(ErrorColumn(error_of="theta", error_type="resolution", value_is="FWHM"))
# create new dataset with dT as array
orso_dataset = OrsoDataset(orso_obj, np.vstack((Qz, R, dR, dQ_array_sigma, T, dT_array)).T)
array_dT_filename = "test_t_array_dt_array.ort"
orso_dataset.save(tmp_path / array_dT_filename)

yield ArrayTOrsoFiles(
scalar_dT=str(tmp_path / scalar_dT_filename),
array_dT=str(tmp_path / array_dT_filename),
)


def test_parse_orso(orsofiles):
ortset = parse_orso(orsofiles.ort)

Expand Down Expand Up @@ -129,3 +194,42 @@ def test_parse_orso(orsofiles):
header_other, data_other = orbset[0]
assert header == header_other
assert np.allclose(data, data_other)


def test_T_dT_data_range(theta_column_orso_files):
"""Test that T and dT are correctly read from Orso files with different formats."""
# Load the probe from the .ort file with dT as scalar
scalar_dT_file = theta_column_orso_files.scalar_dT
probe_scalar_dT = load4(scalar_dT_file, radiation="neutron")
assert np.allclose(probe_scalar_dT.T, [0.005, 0.010, 0.015, 0.020])
assert np.allclose(probe_scalar_dT.dL / probe_scalar_dT.L, 1e-4) # dL/L should match target
assert np.allclose(probe_scalar_dT.dT, [0.001, 0.001, 0.001, 0.001]) # dT should be broadcasted
expected_L = 4 * np.pi * np.sin(np.radians(probe_scalar_dT.T)) / probe_scalar_dT.Q
assert np.allclose(probe_scalar_dT.L, expected_L)
assert len(probe_scalar_dT.dL) == len(probe_scalar_dT.Q)

# Load with data_range
probe_scalar_dT_range = load4(scalar_dT_file, radiation="neutron", data_range=(1, -1))
assert np.allclose(probe_scalar_dT_range.T, [0.010, 0.015])
assert np.allclose(probe_scalar_dT_range.dT, [0.001, 0.001]) # dT should be broadcasted
expected_L_scalar_range = 4 * np.pi * np.sin(np.radians(probe_scalar_dT_range.T)) / probe_scalar_dT_range.Q
assert np.allclose(probe_scalar_dT_range.L, expected_L_scalar_range)
assert len(probe_scalar_dT_range.dL) == len(probe_scalar_dT_range.Q)

# Load the probe from the .ort file with dT as array
array_dT_file = theta_column_orso_files.array_dT
probe_array_dT = load4(array_dT_file, radiation="neutron")
assert np.allclose(probe_array_dT.T, [0.005, 0.010, 0.015, 0.020])
assert np.allclose(probe_array_dT.dT, [0.001, 0.002, 0.003, 0.004]) # dT should match input array
assert np.allclose(probe_array_dT.dL / probe_array_dT.L, 1e-4) # dL/L should match target
expected_L_array = 4 * np.pi * np.sin(np.radians(probe_array_dT.T)) / probe_array_dT.Q
assert np.allclose(probe_array_dT.L, expected_L_array)
assert len(probe_array_dT.dL) == len(probe_array_dT.Q)

# Load with data range
probe_array_dT_range = load4(array_dT_file, radiation="neutron", data_range=(1, -1))
assert np.allclose(probe_array_dT_range.T, [0.010, 0.015])
assert np.allclose(probe_array_dT_range.dT, [0.002, 0.003]) # dT should match input array
expected_L_array_range = 4 * np.pi * np.sin(np.radians(probe_array_dT_range.T)) / probe_array_dT_range.Q
assert np.allclose(probe_array_dT_range.L, expected_L_array_range) # dL/L should match target
assert len(probe_array_dT_range.dL) == len(probe_array_dT_range.Q)
Loading