diff --git a/foqus_lib/framework/surrogate/keras_nn.py b/foqus_lib/framework/surrogate/keras_nn.py index a045a5b52..3e581bf30 100644 --- a/foqus_lib/framework/surrogate/keras_nn.py +++ b/foqus_lib/framework/surrogate/keras_nn.py @@ -41,6 +41,7 @@ from pathlib import Path from tokenize import String +from typing import Tuple import numpy as np import pandas as pd import tensorflow as tf # pylint: disable=import-error @@ -52,6 +53,20 @@ from foqus_lib.framework.surrogate.surrogate import surrogate from foqus_lib.framework.uq.SurrogateParser import SurrogateParser +from foqus_lib.framework.surrogate.scaling import ( + BaseScaler, + LinearScaler, + LogScaler, + LogScaler2, + PowerScaler, + PowerScaler2, + map_name_to_scaler, + scale_dataframe, +) + +# mapping between the human-readable name for the scaling variant +# and an instance of the corresponding scaler class + # custom class to define Keras NN layers @tf.keras.utils.register_keras_serializable() @@ -293,6 +308,14 @@ def __init__(self, dat=None): desc="Name of output file for model, should have file extension: .keras", hint="Enter a custom file name if desired", ) + # add option for normalization_form, make dropdown option + self.options.add( + name="scaling_function", + default="Linear", + dtype=str, + desc="Scaling/normalization function for input data", + validValues=list(map_name_to_scaler.keys()), + ) def run(self): """ @@ -316,6 +339,9 @@ def run(self): self.msgQueue.put(f"input data columns: {input_data.columns}") self.msgQueue.put(f"output data columns: {output_data.columns}") + # extract scaling function option, apply it to the input data + # get scaler object + # np.random.seed(46) # rn.seed(1342) # tf.random.set_seed(62) @@ -341,22 +367,13 @@ def run(self): xdata = input_data zdata = output_data - xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds - zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds - - # normalize data using Linear form - # users can normalize with any allowed form # manually, and then pass the - # appropriate flag to FOQUS from the allowed list: - # ["Linear", "Log", "Power", "Log 2", "Power 2"] - see the documentation for - # details on the scaling formulations - xmax, xmin = xdata.max(axis=0), xdata.min(axis=0) - zmax, zmin = zdata.max(axis=0), zdata.min(axis=0) - xdata, zdata = np.array(xdata), np.array(zdata) - for i in range(len(xdata)): - for j in range(len(xlabels)): - xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j]) - for j in range(len(zlabels)): - zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j]) + scaling_func_option = self.options["scaling_function"].value + + scaler_instance = map_name_to_scaler[scaling_func_option] + xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance) + zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance) + + print(f"using scaling function: {scaling_func_option}") # method to create model def create_model(): @@ -370,7 +387,7 @@ def create_model(): input_bounds=xdata_bounds, output_bounds=zdata_bounds, normalized=True, - normalization_form="Linear", + normalization_form=scaling_func_option, ) outputs = layers(inputs) # use network as function outputs = f(inputs) diff --git a/foqus_lib/framework/surrogate/pytorch_nn.py b/foqus_lib/framework/surrogate/pytorch_nn.py index f1fb2f15f..1e7ae1fd0 100644 --- a/foqus_lib/framework/surrogate/pytorch_nn.py +++ b/foqus_lib/framework/surrogate/pytorch_nn.py @@ -50,6 +50,16 @@ # from foqus_lib.framework.graph.graph import Graph from foqus_lib.framework.surrogate.surrogate import surrogate from foqus_lib.framework.uq.SurrogateParser import SurrogateParser +from foqus_lib.framework.surrogate.scaling import ( + BaseScaler, + LinearScaler, + LogScaler, + LogScaler2, + PowerScaler, + PowerScaler2, + map_name_to_scaler, + scale_dataframe, +) # custom class to define Keras NN layers np.random.seed(46) @@ -284,6 +294,13 @@ def __init__(self, dat=None): desc="Name of output file for model, should have file extension: .pt", hint="Enter a custom file name if desired", ) + self.options.add( + name="scaling_function", + default="Linear", + dtype=str, + desc="Scaling/normalization function for input data", + validValues=["Linear", "Log", "Log2", "Power", "Power2"], + ) def run(self): """ @@ -326,22 +343,16 @@ def run(self): zlabels = list(output_data.columns) xdata = input_data zdata = output_data - xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds - zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds - - # normalize data using Linear form, pass as custom string and parse with SymPy - # users can normalize with any allowed form # manually, and then pass the - # appropriate flag to FOQUS from the allowed list: - # ["Linear", "Log", "Power", "Log 2", "Power 2", "Custom] - see the - # documentation for details on the scaling formulations - xmax, xmin = xdata.max(axis=0), xdata.min(axis=0) - zmax, zmin = zdata.max(axis=0), zdata.min(axis=0) - xdata, zdata = np.array(xdata), np.array(zdata) - for i in range(len(xdata)): - for j in range(len(xlabels)): - xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j]) - for j in range(len(zlabels)): - zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j]) + # xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds + # zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds + + scaling_func_option = self.options["scaling_function"].value + + scaler_instance = map_name_to_scaler[scaling_func_option] + xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance) + zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance) + + print(f"using scaling function: {scaling_func_option}") model_data = np.concatenate( (xdata, zdata), axis=1 @@ -353,8 +364,11 @@ def run(self): # raise exception here after BPC position # create model - x_train = torch.from_numpy(xdata).float().to(device) - z_train = torch.from_numpy(zdata).float().to(device) + + # need to convert xdata to a numpy array for the below to work + # otherwise causes TypeError: expected np.ndarray (got DataFrame) + x_train = torch.from_numpy(xdata.to_numpy()).float().to(device) + z_train = torch.from_numpy(zdata.to_numpy()).float().to(device) # print type at this point # can also print inside create_model diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py new file mode 100644 index 000000000..ae79d7a88 --- /dev/null +++ b/foqus_lib/framework/surrogate/scaling.py @@ -0,0 +1,202 @@ +import copy +import json +import logging +import math +from collections import OrderedDict + +import numpy as np +import pandas as pd +from typing import Tuple + + +def validate_for_scaling(array_in, lo, hi) -> None: + if not np.all(np.isfinite(array_in)): + raise ValueError("Input data cannot contain NaN or inf values") + if array_in.ndim != 1: + raise ValueError("Only 1D arrays supported") + if array_in.size < 2: + raise ValueError("Array must have at least 2 values") + if np.allclose(lo, hi): + raise ValueError("Array must contain non-identical values") + if not check_under_or_overflow(array_in): + raise ValueError("Array contains under/overflow values for dtype") + + +def check_under_or_overflow(arr): + if np.issubdtype(arr.dtype, np.integer): + info = np.iinfo(arr.dtype) + elif np.issubdtype(arr.dtype, np.floating): + info = np.finfo(arr.dtype) + else: + raise ValueError("Unsupported data type") + max_value = info.max + min_value = info.min + return np.all(arr < max_value) & np.all(arr > min_value) + + +def scale_linear(array_in, lo=None, hi=None): + if lo is None: + lo = np.min(array_in) + if hi is None: + hi = np.max(array_in) + validate_for_scaling(array_in, lo, hi) + if (hi - lo) == 0: + result = 0 + else: + result = (array_in - lo) / (hi - lo) + return result + + +def scale_log(array_in, lo=None, hi=None): + # need to account for log domain + epsilon = 1e-8 + if np.any(array_in < epsilon): + raise ValueError(f"All values must be greater than {epsilon}") + if lo is None: + lo = np.min(array_in) + if hi is None: + hi = np.max(array_in) + validate_for_scaling(array_in, lo, hi) + result = (np.log10(array_in) - np.log10(lo)) / (np.log10(hi) - np.log10(lo)) + return result + + +def scale_log2(array_in, lo=None, hi=None): + if lo is None: + lo = np.min(array_in) + if hi is None: + hi = np.max(array_in) + validate_for_scaling(array_in, lo, hi) + result = np.log10(9 * (array_in - lo) / (hi - lo) + 1) + return result + + +def scale_power(array_in, lo=None, hi=None): + if lo is None: + lo = np.min(array_in) + if hi is None: + hi = np.max(array_in) + validate_for_scaling(array_in, lo, hi) + result = (np.power(10, array_in) - np.power(10, lo)) / ( + np.power(10, hi) - np.power(10, lo) + ) + return result + + +def scale_power2(array_in, lo=None, hi=None): + if lo is None: + lo = np.min(array_in) + if hi is None: + hi = np.max(array_in) + validate_for_scaling(array_in, lo, hi) + result = 1 / 9 * (np.power(10, (array_in - lo) / (hi - lo)) - 1) + return result + + +def unscale_linear(array_in, lo, hi): + result = array_in * (hi - lo) / 1.0 + lo + return result + + +def unscale_log(array_in, lo, hi): + result = lo * np.power(hi / lo, array_in) + return result + + +def unscale_log2(array_in, lo=None, hi=None): + result = (np.power(10, array_in / 1.0) - 1) * (hi - lo) / 9.0 + lo + return result + + +def unscale_power(array_in, lo, hi): + result = np.log10( + (array_in / 1.0) * (np.power(10, hi) - np.power(10, lo)) + np.power(10, lo) + ) + return result + + +def unscale_power2(array_in, lo, hi): + result = np.log10(9.0 * array_in / 1.0 + 1) * (hi - lo) + lo + return result + + +class BaseScaler: + """BaseScaler is the base class for the scaler classes defined + below. It exposes the transformer interface from scikit-learn, + and is not supposed to be instantiated directly.""" + + def fit(self, X: np.ndarray): + self.lo_ = np.min(X) + self.hi_ = np.max(X) + return self + + def fit_transform(self, X: np.ndarray) -> np.ndarray: + return self.fit(X).transform(X) + + def transform(self, X: np.ndarray) -> np.ndarray: + raise NotImplementedError + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + raise NotImplementedError + + +class LinearScaler(BaseScaler): + def transform(self, X: np.ndarray) -> np.ndarray: + return scale_linear(X, self.lo_, self.hi_) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + return unscale_linear(X, self.lo_, self.hi_) + + +class LogScaler(BaseScaler): + def transform(self, X: np.ndarray) -> np.ndarray: + return scale_log(X, self.lo_, self.hi_) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + return unscale_log(X, self.lo_, self.hi_) + + +class LogScaler2(BaseScaler): + def transform(self, X: np.ndarray) -> np.ndarray: + return scale_log2(X, self.lo_, self.hi_) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + return unscale_log2(X, self.lo_, self.hi_) + + +class PowerScaler(BaseScaler): + def transform(self, X: np.ndarray) -> np.ndarray: + return scale_power(X, self.lo_, self.hi_) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + return unscale_power(X, self.lo_, self.hi_) + + +class PowerScaler2(BaseScaler): + def transform(self, X: np.ndarray) -> np.ndarray: + return scale_power2(X, self.lo_, self.hi_) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + return unscale_power2(X, self.lo_, self.hi_) + + +map_name_to_scaler = { + "Linear": LinearScaler(), + "Log": LogScaler(), + "Log2": LogScaler2(), + "Power": PowerScaler(), + "Power2": PowerScaler2(), +} + + +def scale_dataframe(df: pd.DataFrame, scaler: BaseScaler) -> Tuple[pd.DataFrame, dict]: + scaled_df = pd.DataFrame(np.nan, columns=df.columns, index=df.index) + bounds = {} + + for col_name in df: + unscaled_col_data = df[col_name] + scaled_col_data = scaler.fit_transform(unscaled_col_data) + bounds[col_name] = scaler.lo_, scaler.hi_ + scaled_df.loc[:, col_name] = scaled_col_data + + return scaled_df, bounds diff --git a/foqus_lib/framework/surrogate/scikit_nn.py b/foqus_lib/framework/surrogate/scikit_nn.py index 85736ad3f..46f7c08ab 100644 --- a/foqus_lib/framework/surrogate/scikit_nn.py +++ b/foqus_lib/framework/surrogate/scikit_nn.py @@ -52,6 +52,17 @@ from foqus_lib.framework.surrogate.surrogate import surrogate from foqus_lib.framework.uq.SurrogateParser import SurrogateParser +from foqus_lib.framework.surrogate.scaling import ( + BaseScaler, + LinearScaler, + LogScaler, + LogScaler2, + PowerScaler, + PowerScaler2, + map_name_to_scaler, + scale_dataframe, +) + def validate_training_data(xdata: np.ndarray, zdata: np.ndarray): number_columns_in_xdata = xdata.shape[1] @@ -250,6 +261,14 @@ def __init__(self, dat=None): hint="Enter a custom file name if desired", ) + self.options.add( + name="scaling_function", + default="Linear", + dtype=str, + desc="Scaling/normalization function for input data", + validValues=["Linear", "Log", "Log2", "Power", "Power2"], + ) + def run(self): """ This function overloads the Thread class function, @@ -300,22 +319,13 @@ def run(self): xdata = input_data zdata = output_data - xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds - zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds - - # normalize data using Linear form, pass as custom string and parse with SymPy - # users can normalize with any allowed form # manually, and then pass the - # appropriate flag to FOQUS from the allowed list: - # ["Linear", "Log", "Power", "Log 2", "Power 2", "Custom] - see the - # documentation for details on the scaling formulations - xmax, xmin = xdata.max(axis=0), xdata.min(axis=0) - zmax, zmin = zdata.max(axis=0), zdata.min(axis=0) - xdata, zdata = np.array(xdata), np.array(zdata) - for i in range(len(xdata)): - for j in range(len(xlabels)): - xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j]) - for j in range(len(zlabels)): - zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j]) + scaling_func_option = self.options["scaling_function"].value + + scaler_instance = map_name_to_scaler[scaling_func_option] + xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance) + zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance) + + print(f"using scaling function: {scaling_func_option}") model_data = np.concatenate( (xdata, zdata), axis=1 diff --git a/foqus_lib/framework/surrogate/tests/__init__.py b/foqus_lib/framework/surrogate/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/foqus_lib/framework/surrogate/tests/test_scaling.py b/foqus_lib/framework/surrogate/tests/test_scaling.py new file mode 100644 index 000000000..10ffa6922 --- /dev/null +++ b/foqus_lib/framework/surrogate/tests/test_scaling.py @@ -0,0 +1,261 @@ +import numpy as np +import pytest +from foqus_lib.framework.surrogate.scaling import ( + scale_linear, + unscale_linear, + scale_log, + unscale_log, + scale_log2, + unscale_log2, + scale_power, + unscale_power, + scale_power2, + unscale_power2, + validate_for_scaling, + map_name_to_scaler, + BaseScaler, + LinearScaler, + LogScaler, + LogScaler2, + PowerScaler, + PowerScaler2, +) + +from hypothesis.extra import numpy as hypothesis_np +from hypothesis import given, example, assume +from contextlib import contextmanager + +POSITIVE_VALS_ONLY = {scale_log} + + +@contextmanager +def does_not_raise(): + yield + + +def test_scale_linear(): + # Test case 1: Basic scaling + input_array = np.array([1, 2, 3, 4, 5]) + scaled_array = scale_linear(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.25, 0.5, 0.75, 1.0]) + + # Test case 2: Custom range scaling + input_array = np.array([10, 20, 30, 40, 50]) + scaled_array = scale_linear(input_array, lo=10, hi=50) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.25, 0.5, 0.75, 1.0]) + + # Test case 3: Scaling with negative values + input_array = np.array([-5, 0, 5]) + scaled_array = scale_linear(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.5, 1.0]) + + +def test_unscale_linear(): + # Test case 1: Basic unscaling + input_array = np.array([0.0, 0.25, 0.5, 0.75, 1.0]) + unscaled_array = unscale_linear(input_array, lo=1, hi=5) + assert np.allclose(unscaled_array, [1, 2, 3, 4, 5]) + + # Test case 2: Custom range unscaling + input_array = np.array([0.0, 0.25, 0.5, 0.75, 1.0]) + unscaled_array = unscale_linear(input_array, lo=10, hi=50) + assert np.allclose(unscaled_array, [10, 20, 30, 40, 50]) + + # Test case 3: Unscaling with negative values + input_array = np.array([0.0, 0.5, 1.0]) + unscaled_array = unscale_linear(input_array, lo=-5, hi=5) + assert np.allclose(unscaled_array, [-5, 0, 5]) + + # Test case 4: Unscaling with repeated values + input_array = np.array([0.0, 0.0, 0.0, 0.0]) + unscaled_array = unscale_linear(input_array, lo=0, hi=5) + assert np.allclose(unscaled_array, [0, 0, 0, 0]) + + +def test_scale_log(): + # Test case 1: Basic log scaling + input_array = np.array([1, 2, 3, 4, 5]) + scaled_array = scale_log(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.43067656, 0.68260619, 0.86135312, 1.0]) + + # Test case 2: Custom range log scaling + input_array = np.array([10, 20, 30, 40, 50]) + scaled_array = scale_log(input_array, lo=10, hi=50) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.43067656, 0.68260619, 0.86135312, 1.0]) + + +def test_scale_log2(): + # Test case 1: Basic log2 scaling + input_array = np.array([1, 2, 3, 4, 5]) + scaled_array = scale_log2(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.51188336, 0.74036269, 0.8893017, 1.0]) + + # Test case 2: Custom range log2 scaling + input_array = np.array([10, 20, 30, 40, 50]) + scaled_array = scale_log2(input_array, lo=10, hi=50) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.51188336, 0.74036269, 0.8893017, 1.0]) + + +def test_scale_power(): + # Test case 1: Basic power scaling + input_array = np.array([1, 2, 3, 4, 5]) + scaled_array = scale_power(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose( + scaled_array, + [0.00000000e00, 9.00090009e-04, 9.90099010e-03, 9.99099910e-02, 1.00000000e00], + ) + + # Test case 2: Custom range power scaling + input_array = np.array([1.0, 4.7, 4.8, 4.999, 5.0]) + scaled_array = scale_power(input_array) + print(scaled_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.50113735, 0.63092044, 0.99769983, 1.0]) + + +def test_scale_power2(): + # Test case 1: Basic power scaling + input_array = np.array([1, 2, 3, 4, 5]) + scaled_array = scale_power2(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.08647549, 0.24025307, 0.51371258, 1.0]) + + # Test case 2: Custom range power scaling + input_array = np.array([1.0, 4.7, 4.8, 4.999, 5.0]) + scaled_array = scale_power2(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.82377238, 0.87916771, 0.99936058, 1.0]) + + +# @pytest.mark.xfail(reason="function formula is wrong", strict=True) +def test_unscale_log(): + input_array = np.array([0.0, 0.43067656, 0.68260619, 0.86135312, 1.0]) + unscaled_array = unscale_log(input_array, lo=1, hi=5) + assert np.allclose(unscaled_array, [1, 2, 3, 4, 5]) + + input_array = np.array([0.0, 0.43067656, 0.68260619, 0.86135312, 1.0]) + unscaled_array = unscale_log(input_array, lo=10, hi=50) + assert np.allclose(unscaled_array, [10, 20, 30, 40, 50]) + + +def test_unscale_log2(): + input_array = np.array([0.0, 0.51188336, 0.74036269, 0.8893017, 1.0]) + unscaled_array = unscale_log2(input_array, lo=1, hi=5) + assert np.allclose(unscaled_array, [1, 2, 3, 4, 5]) + + input_array = np.array([0.0, 0.51188336, 0.74036269, 0.8893017, 1.0]) + unscaled_array = unscale_log2(input_array, lo=10, hi=50) + assert np.allclose(unscaled_array, [10, 20, 30, 40, 50]) + + +def test_unscale_power(): + input_array = np.array( + [0.00000000e00, 9.00090009e-04, 9.90099010e-03, 9.99099910e-02, 1.00000000e00] + ) + unscaled_array = unscale_power(input_array, lo=1, hi=5) + assert np.allclose(unscaled_array, [1, 2, 3, 4, 5]) + + input_array = np.array([0.0, 0.50113735, 0.63092044, 0.99769983, 1.0]) + unscaled_array = unscale_power(input_array, lo=1.0, hi=5.0) + assert np.allclose(unscaled_array, [1.0, 4.7, 4.8, 4.999, 5.0]) + + +def test_unscale_power2(): + input_array = np.array([0.0, 0.08647549, 0.24025307, 0.51371258, 1.0]) + unscaled_array = unscale_power2(input_array, lo=1, hi=5) + assert np.allclose(unscaled_array, [1, 2, 3, 4, 5]) + + input_array = np.array([0.0, 0.82377238, 0.87916771, 0.99936058, 1.0]) + unscaled_array = unscale_power2(input_array, lo=1.0, hi=5.0) + assert np.allclose(unscaled_array, [1.0, 4.7, 4.8, 4.999, 5.0]) + + +@given( + x=hypothesis_np.arrays( + np.float64, + hypothesis_np.array_shapes(), + # TODO: see if these bounds can be relaxed + # larger values cause failures in scale_power + elements={"min_value": -5, "max_value": 5}, + ) +) +@pytest.mark.parametrize( + "scale,unscale", + [ + (scale_linear, unscale_linear), + (scale_log, unscale_log), + (scale_log2, unscale_log2), + (scale_power, unscale_power), + (scale_power2, unscale_power2), + ], +) +def test_roundtrip(x, scale, unscale): + + lo = np.min(x) + hi = np.max(x) + if not passes_validation(x, lo, hi): + expected_failure = pytest.raises(ValueError) + elif lo < 1e-08 and scale in POSITIVE_VALS_ONLY: + expected_failure = pytest.raises( + ValueError, match="All values must be greater than 1e-08" + ) + else: + expected_failure = does_not_raise() + with expected_failure: + scaled = scale(x, lo=lo, hi=hi) + unscaled = unscale(scaled, lo=lo, hi=hi) + assert np.allclose(x, unscaled) + + +@pytest.mark.parametrize( + "variant", + [ + "Linear", + "Log", + "Log2", + "Power", + "Power2", + ], +) +def test_use_scaler_objects(variant): + input_array = np.array([1, 3, 5, 6, 8, 9, 10]) + scaler_instance = map_name_to_scaler[variant] + + result_arr = scaler_instance.fit_transform(input_array) + + print(result_arr) + assert np.all(result_arr >= 0) + assert np.all(result_arr <= 1) + + +def passes_validation(array_in, lo, hi): + try: + validate_for_scaling(array_in, lo, hi) + except Exception: + return False + else: + return True + + +# Run the tests +if __name__ == "__main__": + pytest.main() diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py index f83feb640..4a3188d99 100644 --- a/foqus_lib/gui/tests/test_surrogate.py +++ b/foqus_lib/gui/tests/test_surrogate.py @@ -17,6 +17,7 @@ from foqus_lib.gui.main.mainWindow import mainWindow from foqus_lib.gui.surrogate.surrogateFrame import surrogateFrame +from PyQt5.QtWidgets import QComboBox pytestmark = pytest.mark.gui @@ -56,6 +57,16 @@ class TestFrame: # "ACOSSO", ], ) + @pytest.mark.parametrize( + "scaling_variant", + [ + '"Linear"', + '"Log"', + '"Log2"', + '"Power"', + '"Power2"', + ], + ) def test_run_surrogate( self, qtbot, @@ -63,6 +74,7 @@ def test_run_surrogate( main_window: mainWindow, name: str, required_import: str, + scaling_variant: str, ): qtbot.focused = frame pytest.importorskip(required_import, reason=f"{required_import} not available") @@ -81,6 +93,10 @@ def test_run_surrogate( qtbot.click(button="Select All") with qtbot.focusing_on(group_box="Output Variables"): qtbot.click(button="Select All") + qtbot.select_tab("Method Settings") + with qtbot.focusing_on(table=any): + qtbot.select_row("scaling_function") + qtbot.using(column="Value").set_option(scaling_variant) qtbot.select_tab("Execution") run_button, stop_button = qtbot.locate(button=any, index=[0, 1]) run_button.click() diff --git a/pytest_qt_extras.py b/pytest_qt_extras.py index caf758bf2..4115c1f00 100644 --- a/pytest_qt_extras.py +++ b/pytest_qt_extras.py @@ -589,6 +589,10 @@ def run(cls, table, hint: TableRowSpec): raise InvalidMatchError( f"row index {hint} out of range: (count: {count})" ) + elif isinstance(hint, str): + matching_items = table.findItems(hint, QtCore.Qt.MatchExactly) + InvalidMatchError.check(matching_items, expected=1) + idx = int(matching_items[0].row()) elif hint is None: if count == 1: idx = 0