From bcf75931fdbf801a4ee8f1d58c1c76484b82f011 Mon Sep 17 00:00:00 2001 From: franflame Date: Fri, 1 Mar 2024 14:24:54 -0800 Subject: [PATCH 01/12] Add first implementation of scaling functions --- .../surrogate/scale_test/test_scaling.py | 232 ++++++++++++++++++ foqus_lib/framework/surrogate/scaling.py | 187 ++++++++++++++ 2 files changed, 419 insertions(+) create mode 100644 foqus_lib/framework/surrogate/scale_test/test_scaling.py create mode 100644 foqus_lib/framework/surrogate/scaling.py diff --git a/foqus_lib/framework/surrogate/scale_test/test_scaling.py b/foqus_lib/framework/surrogate/scale_test/test_scaling.py new file mode 100644 index 000000000..09a4f0596 --- /dev/null +++ b/foqus_lib/framework/surrogate/scale_test/test_scaling.py @@ -0,0 +1,232 @@ +import numpy as np +import pytest +from foqus_lib.framework.surrogate.scaling import ( + scale_linear, + unscale_linear, + scale_log, + unscale_log, + scale_log2, + unscale_log2, + scale_power, + unscale_power, + scale_power2, + unscale_power2, + validate_for_scaling, +) + +from hypothesis.extra.numpy import arrays as arrays_strat, array_shapes +from hypothesis import given, example, assume +from contextlib import contextmanager + +POSITIVE_VALS_ONLY = {scale_log} + + +@contextmanager +def does_not_raise(): + yield + + +def test_scale_linear(): + # Test case 1: Basic scaling + input_array = np.array([1, 2, 3, 4, 5]) + scaled_array = scale_linear(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.25, 0.5, 0.75, 1.0]) + + # Test case 2: Custom range scaling + input_array = np.array([10, 20, 30, 40, 50]) + scaled_array = scale_linear(input_array, lo=10, hi=50) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.25, 0.5, 0.75, 1.0]) + + # Test case 3: Scaling with negative values + input_array = np.array([-5, 0, 5]) + scaled_array = scale_linear(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.5, 1.0]) + + # Test case 4: Scaling with repeated values + input_array = np.array([2, 2, 2, 2]) + scaled_array = scale_linear(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.0, 0.0, 0.0]) + + +def test_unscale_linear(): + # Test case 1: Basic unscaling + input_array = np.array([0.0, 0.25, 0.5, 0.75, 1.0]) + unscaled_array = unscale_linear(input_array, lo=1, hi=5) + assert np.allclose(unscaled_array, [1, 2, 3, 4, 5]) + + # Test case 2: Custom range unscaling + input_array = np.array([0.0, 0.25, 0.5, 0.75, 1.0]) + unscaled_array = unscale_linear(input_array, lo=10, hi=50) + assert np.allclose(unscaled_array, [10, 20, 30, 40, 50]) + + # Test case 3: Unscaling with negative values + input_array = np.array([0.0, 0.5, 1.0]) + unscaled_array = unscale_linear(input_array, lo=-5, hi=5) + assert np.allclose(unscaled_array, [-5, 0, 5]) + + # Test case 4: Unscaling with repeated values + input_array = np.array([0.0, 0.0, 0.0, 0.0]) + unscaled_array = unscale_linear(input_array, lo=0, hi=5) + assert np.allclose(unscaled_array, [0, 0, 0, 0]) + + +def test_scale_log(): + # Test case 1: Basic log scaling + input_array = np.array([1, 2, 3, 4, 5]) + scaled_array = scale_log(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.43067656, 0.68260619, 0.86135312, 1.0]) + + # Test case 2: Custom range log scaling + input_array = np.array([10, 20, 30, 40, 50]) + scaled_array = scale_log(input_array, lo=10, hi=50) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.43067656, 0.68260619, 0.86135312, 1.0]) + + +def test_scale_log2(): + # Test case 1: Basic log2 scaling + input_array = np.array([1, 2, 3, 4, 5]) + scaled_array = scale_log2(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.51188336, 0.74036269, 0.8893017, 1.0]) + + # Test case 2: Custom range log2 scaling + input_array = np.array([10, 20, 30, 40, 50]) + scaled_array = scale_log2(input_array, lo=10, hi=50) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.51188336, 0.74036269, 0.8893017, 1.0]) + + +def test_scale_power(): + # Test case 1: Basic power scaling + input_array = np.array([1, 2, 3, 4, 5]) + scaled_array = scale_power(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose( + scaled_array, + [0.00000000e00, 9.00090009e-04, 9.90099010e-03, 9.99099910e-02, 1.00000000e00], + ) + + # Test case 2: Custom range power scaling + input_array = np.array([1.0, 4.7, 4.8, 4.999, 5.0]) + scaled_array = scale_power(input_array) + print(scaled_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.50113735, 0.63092044, 0.99769983, 1.0]) + + +def test_scale_power2(): + # Test case 1: Basic power scaling + input_array = np.array([1, 2, 3, 4, 5]) + scaled_array = scale_power2(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.08647549, 0.24025307, 0.51371258, 1.0]) + + # Test case 2: Custom range power scaling + input_array = np.array([1.0, 4.7, 4.8, 4.999, 5.0]) + scaled_array = scale_power2(input_array) + assert np.all(scaled_array >= 0) + assert np.all(scaled_array <= 1) + assert np.allclose(scaled_array, [0.0, 0.82377238, 0.87916771, 0.99936058, 1.0]) + + +# @pytest.mark.xfail(reason="function formula is wrong", strict=True) +def test_unscale_log(): + input_array = np.array([0.0, 0.43067656, 0.68260619, 0.86135312, 1.0]) + unscaled_array = unscale_log(input_array, lo=1, hi=5) + assert np.allclose(unscaled_array, [1, 2, 3, 4, 5]) + + input_array = np.array([0.0, 0.43067656, 0.68260619, 0.86135312, 1.0]) + unscaled_array = unscale_log(input_array, lo=10, hi=50) + assert np.allclose(unscaled_array, [10, 20, 30, 40, 50]) + + +def test_unscale_log2(): + input_array = np.array([0.0, 0.51188336, 0.74036269, 0.8893017, 1.0]) + unscaled_array = unscale_log2(input_array, lo=1, hi=5) + assert np.allclose(unscaled_array, [1, 2, 3, 4, 5]) + + input_array = np.array([0.0, 0.51188336, 0.74036269, 0.8893017, 1.0]) + unscaled_array = unscale_log2(input_array, lo=10, hi=50) + assert np.allclose(unscaled_array, [10, 20, 30, 40, 50]) + + +def test_unscale_power(): + input_array = np.array( + [0.00000000e00, 9.00090009e-04, 9.90099010e-03, 9.99099910e-02, 1.00000000e00] + ) + unscaled_array = unscale_power(input_array, lo=1, hi=5) + assert np.allclose(unscaled_array, [1, 2, 3, 4, 5]) + + input_array = np.array([0.0, 0.50113735, 0.63092044, 0.99769983, 1.0]) + unscaled_array = unscale_power(input_array, lo=1.0, hi=5.0) + assert np.allclose(unscaled_array, [1.0, 4.7, 4.8, 4.999, 5.0]) + + +def test_unscale_power2(): + input_array = np.array([0.0, 0.08647549, 0.24025307, 0.51371258, 1.0]) + unscaled_array = unscale_power2(input_array, lo=1, hi=5) + assert np.allclose(unscaled_array, [1, 2, 3, 4, 5]) + + input_array = np.array([0.0, 0.82377238, 0.87916771, 0.99936058, 1.0]) + unscaled_array = unscale_power2(input_array, lo=1.0, hi=5.0) + assert np.allclose(unscaled_array, [1.0, 4.7, 4.8, 4.999, 5.0]) + + +# fill in with more cases, parameters, functions +@pytest.mark.parametrize("x", [np.array([1, 2, 3, 4, 5]), np.array([0, 7, 9, 10, 12])]) +# @given(x=arrays_strat(np.float32, array_shapes())) +@pytest.mark.parametrize( + "scale,unscale", + [ + (scale_linear, unscale_linear), + (scale_log, unscale_log), + (scale_log2, unscale_log2), + (scale_power, unscale_power), + (scale_power2, unscale_power2), + ], +) +def test_roundtrip(x, scale, unscale): + + lo = np.min(x) + hi = np.max(x) + if not passes_validation(x, lo, hi): + expected_failure = pytest.raises(ValueError) + elif lo <= 0 and scale in POSITIVE_VALS_ONLY: + expected_failure = pytest.raises(ValueError, match="All values must be > 0.*") + else: + expected_failure = does_not_raise() + with expected_failure: + scaled = scale(x, lo=lo, hi=hi) + unscaled = unscale(scaled, lo=lo, hi=hi) + assert np.allclose(x, unscaled) + + +def passes_validation(array_in, lo, hi): + try: + validate_for_scaling(array_in, lo, hi) + except Exception: + return False + else: + return True + + +# Run the tests +if __name__ == "__main__": + pytest.main() diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py new file mode 100644 index 000000000..535cbfebf --- /dev/null +++ b/foqus_lib/framework/surrogate/scaling.py @@ -0,0 +1,187 @@ +import copy +import json +import logging +import math +from collections import OrderedDict + +import numpy as np + + +def validate_for_scaling(array_in, lo, hi) -> None: + if not np.all(np.isfinite(array_in)): + raise ValueError("Input data cannot contain NaN or inf values") + if array_in.ndim != 1: + raise ValueError("Only 1D arrays supported") + if array_in.size < 2: + raise ValueError("Array must have at least 2 values") + if lo == hi: + raise ValueError("Array must contain non-identical values") + if not check_under_or_overflow(array_in): + raise ValueError("Array contains under/overflow values for dtype") + +def check_under_or_overflow(arr): + if np.issubdtype(arr.dtype, np.integer): + info = np.iinfo(arr.dtype) + elif np.issubdtype(arr.dtype, np.floating): + info = np.finfo(arr.dtype) + else: + raise ValueError("Unsupported data type") + max_value = info.max + min_value = info.min + return np.all(arr < max_value) & np.all(arr > min_value) + + +def scale_linear(array_in, lo=None, hi=None): + if lo is None: + lo = np.min(array_in) + if hi is None: + hi = np.max(array_in) + validate_for_scaling(array_in, lo, hi) + if (hi - lo) == 0: + result = 0 + else: + result = (array_in - lo) / (hi - lo) + return result + +def scale_log(array_in, lo=None, hi=None): +# need to account for log domain + if np.any(array_in <= 0): + raise ValueError("All values must be > 0 to use scale_log") + if lo is None: + lo = np.min(array_in) + if hi is None: + hi = np.max(array_in) + validate_for_scaling(array_in, lo, hi) + result = ((np.log10(array_in) - np.log10(lo)) + / (np.log10(hi) - np.log10(lo))) + return result + +def scale_log2(array_in, lo=None, hi=None): + if lo is None: + lo = np.min(array_in) + if hi is None: + hi = np.max(array_in) + validate_for_scaling(array_in, lo, hi) + result = np.log10(9 * (array_in - lo) / (hi - lo) + 1) + return result +# fix expected values in test + +def scale_power(array_in, lo=None, hi=None): + if lo is None: + lo = np.min(array_in) + if hi is None: + hi = np.max(array_in) + validate_for_scaling(array_in, lo, hi) + result = (np.power(10, array_in) - np.power(10, lo)) / (np.power(10, hi) - np.power(10, lo)) + return result + +def scale_power2(array_in, lo=None, hi=None): + if lo is None: + lo = np.min(array_in) + if hi is None: + hi = np.max(array_in) + validate_for_scaling(array_in, lo, hi) + result = (1/9 * + (np.power(10, (array_in - lo) / (hi - lo)) - 1) + ) + return result + +def unscale_linear(array_in, lo, hi): + result = array_in * (hi - lo) / 1.0 + lo + return result + +def unscale_log(array_in, lo, hi): + result = lo * np.power(hi / lo, array_in) + + # result = ((np.log10(array_in) - np.log10(lo)) + # / (np.log10(hi) - np.log10(lo))) +# out = math.pow(lo * (hi / lo), (array_in / 10.0)) +# out = ( +# 10 +# * (math.log10(array_in) - math.log10(lo)) +# / (math.log10(hi) - math.log10(lo)) +# ) + return result + +def unscale_log2(array_in, lo=None, hi=None): + result = (np.power(10, array_in / 1.0) - 1) * ( + hi - lo + ) / 9.0 + lo + # out = (math.pow(10, array_in / 10.0) - 1) * ( + # hi - lo + # ) / 9.0 + lo + + return result + +def unscale_power(array_in, lo, hi): + # check if lo and hi were provided + # result = np.log10((array_in / 10.0) * (np.power(10, hi) - np.power(10, lo)) + # + np.power(10, lo)) + result = np.log10( + (array_in / 1.0) * (np.power(10, hi) - np.power(10, lo)) + + np.power(10, lo) + ) + return result + +def unscale_power2(array_in, lo, hi): + result = ( + np.log10(9.0 * array_in / 1.0 + 1) * (hi - lo) + lo + ) + return result + +class BaseScaler: + + def fit(self, X: np.ndarray): + self.lo_ = np.min(X) + self.hi_ = np.max(X) + return self + + def fit_transform(self, X: np.ndarray) -> np.ndarray: + return ( + self + .fit(X) + .transform(X) + ) + + def transform(self, X: np.ndarray) -> np.ndarray: + raise NotImplementedError + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + raise NotImplementedError + + +class LinearScaler(BaseScaler): + def transform(self, X: np.ndarray) -> np.ndarray: + return scale_linear(X, self.lo_, self.hi_) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + return unscale_linear(X, self.lo_, self.hi_) + +class LogScaler(BaseScaler): + def transform(self, X: np.ndarray) -> np.ndarray: + return scale_log(X, self.lo_, self.hi_) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + return unscale_log(X, self.lo_, self.hi_) + +class LogScaler2(BaseScaler): + def transform(self, X: np.ndarray) -> np.ndarray: + return scale_log2(X, self.lo_, self.hi_) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + return unscale_log2(X, self.lo_, self.hi_) + +class PowerScaler(BaseScaler): + def transform(self, X: np.ndarray) -> np.ndarray: + return scale_power(X, self.lo_, self.hi_) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + return unscale_power(X, self.lo_, self.hi_) + +class PowerScaler2(BaseScaler): + def transform(self, X: np.ndarray) -> np.ndarray: + return scale_power2(X, self.lo_, self.hi_) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + return unscale_power2(X, self.lo_, self.hi_) + \ No newline at end of file From 3e4ff22eb6c477e02da259fdeb652d0ff5e903d4 Mon Sep 17 00:00:00 2001 From: franflame Date: Fri, 8 Mar 2024 17:12:35 -0800 Subject: [PATCH 02/12] Implementing scaling.py functions in plugins --- foqus_lib/framework/surrogate/keras_nn.py | 50 ++++++++++++++------- foqus_lib/framework/surrogate/pytorch_nn.py | 50 +++++++++++++-------- foqus_lib/framework/surrogate/scaling.py | 28 +++++++++++- foqus_lib/framework/surrogate/scikit_nn.py | 41 ++++++++++------- 4 files changed, 117 insertions(+), 52 deletions(-) diff --git a/foqus_lib/framework/surrogate/keras_nn.py b/foqus_lib/framework/surrogate/keras_nn.py index a045a5b52..bae1e6f5f 100644 --- a/foqus_lib/framework/surrogate/keras_nn.py +++ b/foqus_lib/framework/surrogate/keras_nn.py @@ -41,6 +41,7 @@ from pathlib import Path from tokenize import String +from typing import Tuple import numpy as np import pandas as pd import tensorflow as tf # pylint: disable=import-error @@ -52,6 +53,19 @@ from foqus_lib.framework.surrogate.surrogate import surrogate from foqus_lib.framework.uq.SurrogateParser import SurrogateParser +from foqus_lib.framework.surrogate.scaling import ( + BaseScaler, + LinearScaler, + LogScaler, + LogScaler2, + PowerScaler, + PowerScaler2, + map_name_to_scaler, + scale_dataframe +) + +# mapping between the human-readable name for the scaling variant +# and an instance of the corresponding scaler class # custom class to define Keras NN layers @tf.keras.utils.register_keras_serializable() @@ -293,6 +307,14 @@ def __init__(self, dat=None): desc="Name of output file for model, should have file extension: .keras", hint="Enter a custom file name if desired", ) + # add option for normalization_form, make dropdown option + self.options.add( + name="scaling_function", + default="Linear", + dtype=str, + desc="Scaling/normalization function for input data", + validValues=list(map_name_to_scaler.keys()), + ) def run(self): """ @@ -316,6 +338,9 @@ def run(self): self.msgQueue.put(f"input data columns: {input_data.columns}") self.msgQueue.put(f"output data columns: {output_data.columns}") + # extract scaling function option, apply it to the input data + # get scaler object + # np.random.seed(46) # rn.seed(1342) # tf.random.set_seed(62) @@ -341,22 +366,13 @@ def run(self): xdata = input_data zdata = output_data - xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds - zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds - - # normalize data using Linear form - # users can normalize with any allowed form # manually, and then pass the - # appropriate flag to FOQUS from the allowed list: - # ["Linear", "Log", "Power", "Log 2", "Power 2"] - see the documentation for - # details on the scaling formulations - xmax, xmin = xdata.max(axis=0), xdata.min(axis=0) - zmax, zmin = zdata.max(axis=0), zdata.min(axis=0) - xdata, zdata = np.array(xdata), np.array(zdata) - for i in range(len(xdata)): - for j in range(len(xlabels)): - xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j]) - for j in range(len(zlabels)): - zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j]) + scaling_func_option = self.options["scaling_function"].value + + scaler_instance = map_name_to_scaler[scaling_func_option] + xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance) + zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance) + + print(f"using scaling function: {scaling_func_option}") # method to create model def create_model(): @@ -370,7 +386,7 @@ def create_model(): input_bounds=xdata_bounds, output_bounds=zdata_bounds, normalized=True, - normalization_form="Linear", + normalization_form=scaling_func_option, ) outputs = layers(inputs) # use network as function outputs = f(inputs) diff --git a/foqus_lib/framework/surrogate/pytorch_nn.py b/foqus_lib/framework/surrogate/pytorch_nn.py index f1fb2f15f..be6edc385 100644 --- a/foqus_lib/framework/surrogate/pytorch_nn.py +++ b/foqus_lib/framework/surrogate/pytorch_nn.py @@ -50,6 +50,16 @@ # from foqus_lib.framework.graph.graph import Graph from foqus_lib.framework.surrogate.surrogate import surrogate from foqus_lib.framework.uq.SurrogateParser import SurrogateParser +from foqus_lib.framework.surrogate.scaling import ( + BaseScaler, + LinearScaler, + LogScaler, + LogScaler2, + PowerScaler, + PowerScaler2, + map_name_to_scaler, + scale_dataframe +) # custom class to define Keras NN layers np.random.seed(46) @@ -284,6 +294,13 @@ def __init__(self, dat=None): desc="Name of output file for model, should have file extension: .pt", hint="Enter a custom file name if desired", ) + self.options.add( + name="scaling_function", + default="Linear", + dtype=str, + desc="Scaling/normalization function for input data", + validValues=["Linear", "Log", "Log2", "Power", "Power2"], + ) def run(self): """ @@ -326,22 +343,16 @@ def run(self): zlabels = list(output_data.columns) xdata = input_data zdata = output_data - xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds - zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds - - # normalize data using Linear form, pass as custom string and parse with SymPy - # users can normalize with any allowed form # manually, and then pass the - # appropriate flag to FOQUS from the allowed list: - # ["Linear", "Log", "Power", "Log 2", "Power 2", "Custom] - see the - # documentation for details on the scaling formulations - xmax, xmin = xdata.max(axis=0), xdata.min(axis=0) - zmax, zmin = zdata.max(axis=0), zdata.min(axis=0) - xdata, zdata = np.array(xdata), np.array(zdata) - for i in range(len(xdata)): - for j in range(len(xlabels)): - xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j]) - for j in range(len(zlabels)): - zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j]) + # xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds + # zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds + + scaling_func_option = self.options["scaling_function"].value + + scaler_instance = map_name_to_scaler[scaling_func_option] + xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance) + zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance) + + print(f"using scaling function: {scaling_func_option}") model_data = np.concatenate( (xdata, zdata), axis=1 @@ -353,8 +364,11 @@ def run(self): # raise exception here after BPC position # create model - x_train = torch.from_numpy(xdata).float().to(device) - z_train = torch.from_numpy(zdata).float().to(device) + + # need to convert xdata to a numpy array for the below to work + # otherwise causes TypeError: expected np.ndarray (got DataFrame) + x_train = torch.from_numpy(xdata.to_numpy()).float().to(device) + z_train = torch.from_numpy(zdata.to_numpy()).float().to(device) # print type at this point # can also print inside create_model diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py index 535cbfebf..e43aa53f2 100644 --- a/foqus_lib/framework/surrogate/scaling.py +++ b/foqus_lib/framework/surrogate/scaling.py @@ -5,6 +5,8 @@ from collections import OrderedDict import numpy as np +import pandas as pd +from typing import Tuple def validate_for_scaling(array_in, lo, hi) -> None: @@ -130,6 +132,10 @@ def unscale_power2(array_in, lo, hi): return result class BaseScaler: + # def __init__(self, data_array: np.ndarray): + # self.data = data_array + # self.lo_ = np.min(data_array) + # self.hi_ = np.max(data_array) def fit(self, X: np.ndarray): self.lo_ = np.min(X) @@ -184,4 +190,24 @@ def transform(self, X: np.ndarray) -> np.ndarray: def inverse_transform(self, X: np.ndarray) -> np.ndarray: return unscale_power2(X, self.lo_, self.hi_) - \ No newline at end of file + +map_name_to_scaler = { + "Linear": LinearScaler(), + "Log": LogScaler(), + "Log2": LogScaler2(), + "Power": PowerScaler(), + "Power2": PowerScaler2(), + #... +} + +def scale_dataframe(df: pd.DataFrame, scaler: BaseScaler) -> Tuple[pd.DataFrame, dict]: + scaled_df = pd.DataFrame(np.nan, columns=df.columns, index=df.index) + bounds = {} + + for col_name in df: + unscaled_col_data = df[col_name] + scaled_col_data = scaler.fit_transform(unscaled_col_data) + bounds[col_name] = scaler.lo_, scaler.hi_ + scaled_df.loc[:, col_name] = scaled_col_data + + return scaled_df, bounds diff --git a/foqus_lib/framework/surrogate/scikit_nn.py b/foqus_lib/framework/surrogate/scikit_nn.py index 85736ad3f..a7127206a 100644 --- a/foqus_lib/framework/surrogate/scikit_nn.py +++ b/foqus_lib/framework/surrogate/scikit_nn.py @@ -52,6 +52,16 @@ from foqus_lib.framework.surrogate.surrogate import surrogate from foqus_lib.framework.uq.SurrogateParser import SurrogateParser +from foqus_lib.framework.surrogate.scaling import ( + BaseScaler, + LinearScaler, + LogScaler, + LogScaler2, + PowerScaler, + PowerScaler2, + map_name_to_scaler, + scale_dataframe +) def validate_training_data(xdata: np.ndarray, zdata: np.ndarray): number_columns_in_xdata = xdata.shape[1] @@ -250,6 +260,14 @@ def __init__(self, dat=None): hint="Enter a custom file name if desired", ) + self.options.add( + name="scaling_function", + default="Linear", + dtype=str, + desc="Scaling/normalization function for input data", + validValues=["Linear", "Log", "Log2", "Power", "Power2"], + ) + def run(self): """ This function overloads the Thread class function, @@ -300,22 +318,13 @@ def run(self): xdata = input_data zdata = output_data - xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata} # x bounds - zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata} # z bounds - - # normalize data using Linear form, pass as custom string and parse with SymPy - # users can normalize with any allowed form # manually, and then pass the - # appropriate flag to FOQUS from the allowed list: - # ["Linear", "Log", "Power", "Log 2", "Power 2", "Custom] - see the - # documentation for details on the scaling formulations - xmax, xmin = xdata.max(axis=0), xdata.min(axis=0) - zmax, zmin = zdata.max(axis=0), zdata.min(axis=0) - xdata, zdata = np.array(xdata), np.array(zdata) - for i in range(len(xdata)): - for j in range(len(xlabels)): - xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j]) - for j in range(len(zlabels)): - zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j]) + scaling_func_option = self.options["scaling_function"].value + + scaler_instance = map_name_to_scaler[scaling_func_option] + xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance) + zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance) + + print(f"using scaling function: {scaling_func_option}") model_data = np.concatenate( (xdata, zdata), axis=1 From bc3a02006bece8a42d3e1195a87ff2a93ff0a86d Mon Sep 17 00:00:00 2001 From: franflame Date: Fri, 8 Mar 2024 17:14:01 -0800 Subject: [PATCH 03/12] Formatting --- foqus_lib/framework/surrogate/keras_nn.py | 6 +- foqus_lib/framework/surrogate/pytorch_nn.py | 6 +- foqus_lib/framework/surrogate/scaling.py | 81 ++++++++++++--------- foqus_lib/framework/surrogate/scikit_nn.py | 7 +- 4 files changed, 55 insertions(+), 45 deletions(-) diff --git a/foqus_lib/framework/surrogate/keras_nn.py b/foqus_lib/framework/surrogate/keras_nn.py index bae1e6f5f..5bccaa808 100644 --- a/foqus_lib/framework/surrogate/keras_nn.py +++ b/foqus_lib/framework/surrogate/keras_nn.py @@ -57,11 +57,11 @@ BaseScaler, LinearScaler, LogScaler, - LogScaler2, - PowerScaler, + LogScaler2, + PowerScaler, PowerScaler2, map_name_to_scaler, - scale_dataframe + scale_dataframe, ) # mapping between the human-readable name for the scaling variant diff --git a/foqus_lib/framework/surrogate/pytorch_nn.py b/foqus_lib/framework/surrogate/pytorch_nn.py index be6edc385..1e7ae1fd0 100644 --- a/foqus_lib/framework/surrogate/pytorch_nn.py +++ b/foqus_lib/framework/surrogate/pytorch_nn.py @@ -54,11 +54,11 @@ BaseScaler, LinearScaler, LogScaler, - LogScaler2, - PowerScaler, + LogScaler2, + PowerScaler, PowerScaler2, map_name_to_scaler, - scale_dataframe + scale_dataframe, ) # custom class to define Keras NN layers diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py index e43aa53f2..fa0b6a9be 100644 --- a/foqus_lib/framework/surrogate/scaling.py +++ b/foqus_lib/framework/surrogate/scaling.py @@ -19,7 +19,8 @@ def validate_for_scaling(array_in, lo, hi) -> None: if lo == hi: raise ValueError("Array must contain non-identical values") if not check_under_or_overflow(array_in): - raise ValueError("Array contains under/overflow values for dtype") + raise ValueError("Array contains under/overflow values for dtype") + def check_under_or_overflow(arr): if np.issubdtype(arr.dtype, np.integer): @@ -28,7 +29,7 @@ def check_under_or_overflow(arr): info = np.finfo(arr.dtype) else: raise ValueError("Unsupported data type") - max_value = info.max + max_value = info.max min_value = info.min return np.all(arr < max_value) & np.all(arr > min_value) @@ -45,8 +46,9 @@ def scale_linear(array_in, lo=None, hi=None): result = (array_in - lo) / (hi - lo) return result + def scale_log(array_in, lo=None, hi=None): -# need to account for log domain + # need to account for log domain if np.any(array_in <= 0): raise ValueError("All values must be > 0 to use scale_log") if lo is None: @@ -54,10 +56,10 @@ def scale_log(array_in, lo=None, hi=None): if hi is None: hi = np.max(array_in) validate_for_scaling(array_in, lo, hi) - result = ((np.log10(array_in) - np.log10(lo)) - / (np.log10(hi) - np.log10(lo))) + result = (np.log10(array_in) - np.log10(lo)) / (np.log10(hi) - np.log10(lo)) return result + def scale_log2(array_in, lo=None, hi=None): if lo is None: lo = np.min(array_in) @@ -66,71 +68,76 @@ def scale_log2(array_in, lo=None, hi=None): validate_for_scaling(array_in, lo, hi) result = np.log10(9 * (array_in - lo) / (hi - lo) + 1) return result + + # fix expected values in test + def scale_power(array_in, lo=None, hi=None): if lo is None: lo = np.min(array_in) if hi is None: hi = np.max(array_in) validate_for_scaling(array_in, lo, hi) - result = (np.power(10, array_in) - np.power(10, lo)) / (np.power(10, hi) - np.power(10, lo)) + result = (np.power(10, array_in) - np.power(10, lo)) / ( + np.power(10, hi) - np.power(10, lo) + ) return result + def scale_power2(array_in, lo=None, hi=None): if lo is None: lo = np.min(array_in) if hi is None: hi = np.max(array_in) validate_for_scaling(array_in, lo, hi) - result = (1/9 * - (np.power(10, (array_in - lo) / (hi - lo)) - 1) - ) + result = 1 / 9 * (np.power(10, (array_in - lo) / (hi - lo)) - 1) return result + def unscale_linear(array_in, lo, hi): result = array_in * (hi - lo) / 1.0 + lo return result + def unscale_log(array_in, lo, hi): result = lo * np.power(hi / lo, array_in) # result = ((np.log10(array_in) - np.log10(lo)) # / (np.log10(hi) - np.log10(lo))) -# out = math.pow(lo * (hi / lo), (array_in / 10.0)) -# out = ( -# 10 -# * (math.log10(array_in) - math.log10(lo)) -# / (math.log10(hi) - math.log10(lo)) -# ) + # out = math.pow(lo * (hi / lo), (array_in / 10.0)) + # out = ( + # 10 + # * (math.log10(array_in) - math.log10(lo)) + # / (math.log10(hi) - math.log10(lo)) + # ) return result + def unscale_log2(array_in, lo=None, hi=None): - result = (np.power(10, array_in / 1.0) - 1) * ( - hi - lo - ) / 9.0 + lo - # out = (math.pow(10, array_in / 10.0) - 1) * ( - # hi - lo - # ) / 9.0 + lo - + result = (np.power(10, array_in / 1.0) - 1) * (hi - lo) / 9.0 + lo + # out = (math.pow(10, array_in / 10.0) - 1) * ( + # hi - lo + # ) / 9.0 + lo + return result + def unscale_power(array_in, lo, hi): - # check if lo and hi were provided + # check if lo and hi were provided # result = np.log10((array_in / 10.0) * (np.power(10, hi) - np.power(10, lo)) # + np.power(10, lo)) result = np.log10( - (array_in / 1.0) * (np.power(10, hi) - np.power(10, lo)) - + np.power(10, lo) - ) + (array_in / 1.0) * (np.power(10, hi) - np.power(10, lo)) + np.power(10, lo) + ) return result + def unscale_power2(array_in, lo, hi): - result = ( - np.log10(9.0 * array_in / 1.0 + 1) * (hi - lo) + lo - ) + result = np.log10(9.0 * array_in / 1.0 + 1) * (hi - lo) + lo return result + class BaseScaler: # def __init__(self, data_array: np.ndarray): # self.data = data_array @@ -143,11 +150,7 @@ def fit(self, X: np.ndarray): return self def fit_transform(self, X: np.ndarray) -> np.ndarray: - return ( - self - .fit(X) - .transform(X) - ) + return self.fit(X).transform(X) def transform(self, X: np.ndarray) -> np.ndarray: raise NotImplementedError @@ -162,7 +165,8 @@ def transform(self, X: np.ndarray) -> np.ndarray: def inverse_transform(self, X: np.ndarray) -> np.ndarray: return unscale_linear(X, self.lo_, self.hi_) - + + class LogScaler(BaseScaler): def transform(self, X: np.ndarray) -> np.ndarray: return scale_log(X, self.lo_, self.hi_) @@ -170,6 +174,7 @@ def transform(self, X: np.ndarray) -> np.ndarray: def inverse_transform(self, X: np.ndarray) -> np.ndarray: return unscale_log(X, self.lo_, self.hi_) + class LogScaler2(BaseScaler): def transform(self, X: np.ndarray) -> np.ndarray: return scale_log2(X, self.lo_, self.hi_) @@ -177,6 +182,7 @@ def transform(self, X: np.ndarray) -> np.ndarray: def inverse_transform(self, X: np.ndarray) -> np.ndarray: return unscale_log2(X, self.lo_, self.hi_) + class PowerScaler(BaseScaler): def transform(self, X: np.ndarray) -> np.ndarray: return scale_power(X, self.lo_, self.hi_) @@ -184,6 +190,7 @@ def transform(self, X: np.ndarray) -> np.ndarray: def inverse_transform(self, X: np.ndarray) -> np.ndarray: return unscale_power(X, self.lo_, self.hi_) + class PowerScaler2(BaseScaler): def transform(self, X: np.ndarray) -> np.ndarray: return scale_power2(X, self.lo_, self.hi_) @@ -191,15 +198,17 @@ def transform(self, X: np.ndarray) -> np.ndarray: def inverse_transform(self, X: np.ndarray) -> np.ndarray: return unscale_power2(X, self.lo_, self.hi_) + map_name_to_scaler = { "Linear": LinearScaler(), "Log": LogScaler(), "Log2": LogScaler2(), "Power": PowerScaler(), "Power2": PowerScaler2(), - #... + # ... } + def scale_dataframe(df: pd.DataFrame, scaler: BaseScaler) -> Tuple[pd.DataFrame, dict]: scaled_df = pd.DataFrame(np.nan, columns=df.columns, index=df.index) bounds = {} diff --git a/foqus_lib/framework/surrogate/scikit_nn.py b/foqus_lib/framework/surrogate/scikit_nn.py index a7127206a..46f7c08ab 100644 --- a/foqus_lib/framework/surrogate/scikit_nn.py +++ b/foqus_lib/framework/surrogate/scikit_nn.py @@ -56,13 +56,14 @@ BaseScaler, LinearScaler, LogScaler, - LogScaler2, - PowerScaler, + LogScaler2, + PowerScaler, PowerScaler2, map_name_to_scaler, - scale_dataframe + scale_dataframe, ) + def validate_training_data(xdata: np.ndarray, zdata: np.ndarray): number_columns_in_xdata = xdata.shape[1] number_columns_in_zdata = zdata.shape[1] From 90500dcff6cc43be6863bdf5c84eea096a7e4667 Mon Sep 17 00:00:00 2001 From: franflame Date: Mon, 25 Mar 2024 14:45:42 -0700 Subject: [PATCH 04/12] Remove commented code and add documentation for BaseScaler --- foqus_lib/framework/surrogate/scaling.py | 27 +++--------------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py index fa0b6a9be..16301bb6a 100644 --- a/foqus_lib/framework/surrogate/scaling.py +++ b/foqus_lib/framework/surrogate/scaling.py @@ -70,9 +70,6 @@ def scale_log2(array_in, lo=None, hi=None): return result -# fix expected values in test - - def scale_power(array_in, lo=None, hi=None): if lo is None: lo = np.min(array_in) @@ -102,31 +99,15 @@ def unscale_linear(array_in, lo, hi): def unscale_log(array_in, lo, hi): result = lo * np.power(hi / lo, array_in) - - # result = ((np.log10(array_in) - np.log10(lo)) - # / (np.log10(hi) - np.log10(lo))) - # out = math.pow(lo * (hi / lo), (array_in / 10.0)) - # out = ( - # 10 - # * (math.log10(array_in) - math.log10(lo)) - # / (math.log10(hi) - math.log10(lo)) - # ) return result def unscale_log2(array_in, lo=None, hi=None): result = (np.power(10, array_in / 1.0) - 1) * (hi - lo) / 9.0 + lo - # out = (math.pow(10, array_in / 10.0) - 1) * ( - # hi - lo - # ) / 9.0 + lo - return result def unscale_power(array_in, lo, hi): - # check if lo and hi were provided - # result = np.log10((array_in / 10.0) * (np.power(10, hi) - np.power(10, lo)) - # + np.power(10, lo)) result = np.log10( (array_in / 1.0) * (np.power(10, hi) - np.power(10, lo)) + np.power(10, lo) ) @@ -139,10 +120,9 @@ def unscale_power2(array_in, lo, hi): class BaseScaler: - # def __init__(self, data_array: np.ndarray): - # self.data = data_array - # self.lo_ = np.min(data_array) - # self.hi_ = np.max(data_array) + """BaseScaler is the base class for the scaler classes defined + below. It exposes the transformer interface from scikit-learn, + and is not supposed to be instantiated directly.""" def fit(self, X: np.ndarray): self.lo_ = np.min(X) @@ -205,7 +185,6 @@ def inverse_transform(self, X: np.ndarray) -> np.ndarray: "Log2": LogScaler2(), "Power": PowerScaler(), "Power2": PowerScaler2(), - # ... } From 490d9f246cba27377d2bcd112d779263488f64d6 Mon Sep 17 00:00:00 2001 From: franflame Date: Mon, 25 Mar 2024 15:41:39 -0700 Subject: [PATCH 05/12] Passing hypothesis tests for scaling.py --- foqus_lib/framework/surrogate/scaling.py | 2 +- .../framework/surrogate/tests/__init__.py | 0 .../{scale_test => tests}/test_scaling.py | 85 ++++++++++++++++--- 3 files changed, 75 insertions(+), 12 deletions(-) create mode 100644 foqus_lib/framework/surrogate/tests/__init__.py rename foqus_lib/framework/surrogate/{scale_test => tests}/test_scaling.py (79%) diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py index 16301bb6a..3670230f1 100644 --- a/foqus_lib/framework/surrogate/scaling.py +++ b/foqus_lib/framework/surrogate/scaling.py @@ -16,7 +16,7 @@ def validate_for_scaling(array_in, lo, hi) -> None: raise ValueError("Only 1D arrays supported") if array_in.size < 2: raise ValueError("Array must have at least 2 values") - if lo == hi: + if np.allclose(lo, hi): raise ValueError("Array must contain non-identical values") if not check_under_or_overflow(array_in): raise ValueError("Array contains under/overflow values for dtype") diff --git a/foqus_lib/framework/surrogate/tests/__init__.py b/foqus_lib/framework/surrogate/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/foqus_lib/framework/surrogate/scale_test/test_scaling.py b/foqus_lib/framework/surrogate/tests/test_scaling.py similarity index 79% rename from foqus_lib/framework/surrogate/scale_test/test_scaling.py rename to foqus_lib/framework/surrogate/tests/test_scaling.py index 09a4f0596..b07adccf5 100644 --- a/foqus_lib/framework/surrogate/scale_test/test_scaling.py +++ b/foqus_lib/framework/surrogate/tests/test_scaling.py @@ -12,9 +12,16 @@ scale_power2, unscale_power2, validate_for_scaling, + map_name_to_scaler, + BaseScaler, + LinearScaler, + LogScaler, + LogScaler2, + PowerScaler, + PowerScaler2, ) -from hypothesis.extra.numpy import arrays as arrays_strat, array_shapes +from hypothesis.extra import numpy as hypothesis_np from hypothesis import given, example, assume from contextlib import contextmanager @@ -48,13 +55,6 @@ def test_scale_linear(): assert np.all(scaled_array <= 1) assert np.allclose(scaled_array, [0.0, 0.5, 1.0]) - # Test case 4: Scaling with repeated values - input_array = np.array([2, 2, 2, 2]) - scaled_array = scale_linear(input_array) - assert np.all(scaled_array >= 0) - assert np.all(scaled_array <= 1) - assert np.allclose(scaled_array, [0.0, 0.0, 0.0, 0.0]) - def test_unscale_linear(): # Test case 1: Basic unscaling @@ -189,9 +189,15 @@ def test_unscale_power2(): assert np.allclose(unscaled_array, [1.0, 4.7, 4.8, 4.999, 5.0]) -# fill in with more cases, parameters, functions -@pytest.mark.parametrize("x", [np.array([1, 2, 3, 4, 5]), np.array([0, 7, 9, 10, 12])]) -# @given(x=arrays_strat(np.float32, array_shapes())) +@given( + x=hypothesis_np.arrays( + np.float64, + hypothesis_np.array_shapes(), + # TODO: see if these bounds can be relaxed + # larger values cause failures in scale_power + elements={"min_value": -5, "max_value": 5}, + ) +) @pytest.mark.parametrize( "scale,unscale", [ @@ -218,6 +224,63 @@ def test_roundtrip(x, scale, unscale): assert np.allclose(x, unscaled) +# parametrize with list of scalar objects +def test_object_testing(): + array_one = np.array([1, 3, 5, 6, 8, 9, 10]) + + scaler_variant_1 = "Linear" + + # actual test content + scaler_instance_1 = map_name_to_scaler[scaler_variant_1] + + linear_arr_one = scaler_instance_1.fit_transform(array_one) + + print(linear_arr_one) + assert np.all(linear_arr_one >= 0) + assert np.all(linear_arr_one <= 1) + # actual test content + + scaler_variant_2 = "Log" + + scaler_instance_2 = map_name_to_scaler[scaler_variant_2] + + linear_arr_two = scaler_instance_2.fit_transform(array_one) + + print(linear_arr_two) + assert np.all(linear_arr_two >= 0) + assert np.all(linear_arr_two <= 1) + + scaler_variant_3 = "Power" + + scaler_instance_3 = map_name_to_scaler[scaler_variant_3] + + linear_arr_three = scaler_instance_3.fit_transform(array_one) + + print(linear_arr_three) + assert np.all(linear_arr_one >= 0) + assert np.all(linear_arr_one <= 1) + + scaler_variant_four = "Log2" + + scaler_instance_four = map_name_to_scaler[scaler_variant_four] + + linear_arr_four = scaler_instance_four.fit_transform(array_one) + + print(linear_arr_four) + assert np.all(linear_arr_four >= 0) + assert np.all(linear_arr_four <= 1) + + scaler_variant_five = "Power2" + + scaler_instance_five = map_name_to_scaler[scaler_variant_five] + + linear_arr_five = scaler_instance_five.fit_transform(array_one) + + print(linear_arr_five) + assert np.all(linear_arr_one >= 0) + assert np.all(linear_arr_one <= 1) + + def passes_validation(array_in, lo, hi): try: validate_for_scaling(array_in, lo, hi) From 730046efa8cab386bd9cb8aabfb8fa7096bfc877 Mon Sep 17 00:00:00 2001 From: franflame Date: Tue, 26 Mar 2024 16:37:03 -0700 Subject: [PATCH 06/12] Parametrize and revise scaler object test function --- .../framework/surrogate/tests/test_scaling.py | 70 +++++-------------- 1 file changed, 17 insertions(+), 53 deletions(-) diff --git a/foqus_lib/framework/surrogate/tests/test_scaling.py b/foqus_lib/framework/surrogate/tests/test_scaling.py index b07adccf5..3c039f862 100644 --- a/foqus_lib/framework/surrogate/tests/test_scaling.py +++ b/foqus_lib/framework/surrogate/tests/test_scaling.py @@ -224,61 +224,25 @@ def test_roundtrip(x, scale, unscale): assert np.allclose(x, unscaled) -# parametrize with list of scalar objects -def test_object_testing(): - array_one = np.array([1, 3, 5, 6, 8, 9, 10]) - - scaler_variant_1 = "Linear" - - # actual test content - scaler_instance_1 = map_name_to_scaler[scaler_variant_1] - - linear_arr_one = scaler_instance_1.fit_transform(array_one) - - print(linear_arr_one) - assert np.all(linear_arr_one >= 0) - assert np.all(linear_arr_one <= 1) - # actual test content - - scaler_variant_2 = "Log" - - scaler_instance_2 = map_name_to_scaler[scaler_variant_2] - - linear_arr_two = scaler_instance_2.fit_transform(array_one) - - print(linear_arr_two) - assert np.all(linear_arr_two >= 0) - assert np.all(linear_arr_two <= 1) - - scaler_variant_3 = "Power" - - scaler_instance_3 = map_name_to_scaler[scaler_variant_3] - - linear_arr_three = scaler_instance_3.fit_transform(array_one) - - print(linear_arr_three) - assert np.all(linear_arr_one >= 0) - assert np.all(linear_arr_one <= 1) - - scaler_variant_four = "Log2" - - scaler_instance_four = map_name_to_scaler[scaler_variant_four] - - linear_arr_four = scaler_instance_four.fit_transform(array_one) - - print(linear_arr_four) - assert np.all(linear_arr_four >= 0) - assert np.all(linear_arr_four <= 1) - - scaler_variant_five = "Power2" - - scaler_instance_five = map_name_to_scaler[scaler_variant_five] +@pytest.mark.parametrize( + "variant", + [ + "Linear", + "Log", + "Log2", + "Power", + "Power2", + ], +) +def test_use_scaler_objects(variant): + input_array = np.array([1, 3, 5, 6, 8, 9, 10]) + scaler_instance = map_name_to_scaler[variant] - linear_arr_five = scaler_instance_five.fit_transform(array_one) + result_arr = scaler_instance.fit_transform(input_array) - print(linear_arr_five) - assert np.all(linear_arr_one >= 0) - assert np.all(linear_arr_one <= 1) + print(result_arr) + assert np.all(result_arr >= 0) + assert np.all(result_arr <= 1) def passes_validation(array_in, lo, hi): From ff571ed0466cce2754f9e743196a07d84dc9897b Mon Sep 17 00:00:00 2001 From: franflame Date: Fri, 29 Mar 2024 18:09:40 -0700 Subject: [PATCH 07/12] Handle error case for very small values in scale_log --- foqus_lib/framework/surrogate/scaling.py | 5 +++-- foqus_lib/framework/surrogate/tests/test_scaling.py | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py index 3670230f1..ae79d7a88 100644 --- a/foqus_lib/framework/surrogate/scaling.py +++ b/foqus_lib/framework/surrogate/scaling.py @@ -49,8 +49,9 @@ def scale_linear(array_in, lo=None, hi=None): def scale_log(array_in, lo=None, hi=None): # need to account for log domain - if np.any(array_in <= 0): - raise ValueError("All values must be > 0 to use scale_log") + epsilon = 1e-8 + if np.any(array_in < epsilon): + raise ValueError(f"All values must be greater than {epsilon}") if lo is None: lo = np.min(array_in) if hi is None: diff --git a/foqus_lib/framework/surrogate/tests/test_scaling.py b/foqus_lib/framework/surrogate/tests/test_scaling.py index 3c039f862..10ffa6922 100644 --- a/foqus_lib/framework/surrogate/tests/test_scaling.py +++ b/foqus_lib/framework/surrogate/tests/test_scaling.py @@ -214,8 +214,10 @@ def test_roundtrip(x, scale, unscale): hi = np.max(x) if not passes_validation(x, lo, hi): expected_failure = pytest.raises(ValueError) - elif lo <= 0 and scale in POSITIVE_VALS_ONLY: - expected_failure = pytest.raises(ValueError, match="All values must be > 0.*") + elif lo < 1e-08 and scale in POSITIVE_VALS_ONLY: + expected_failure = pytest.raises( + ValueError, match="All values must be greater than 1e-08" + ) else: expected_failure = does_not_raise() with expected_failure: From 193d457531ed71d0b4179d4115b4e221d57b36ac Mon Sep 17 00:00:00 2001 From: franflame Date: Tue, 9 Apr 2024 16:18:23 -0700 Subject: [PATCH 08/12] Try adding GUI test for scaling option select --- foqus_lib/gui/tests/test_surrogate.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py index 21cccda2c..913857443 100644 --- a/foqus_lib/gui/tests/test_surrogate.py +++ b/foqus_lib/gui/tests/test_surrogate.py @@ -17,6 +17,7 @@ from foqus_lib.gui.main.mainWindow import mainWindow from foqus_lib.gui.surrogate.surrogateFrame import surrogateFrame +from PyQt5.QtWidgets import QComboBox pytestmark = pytest.mark.gui @@ -56,6 +57,7 @@ class TestFrame: # "ACOSSO", ], ) + def test_run_surrogate( self, qtbot, @@ -81,6 +83,10 @@ def test_run_surrogate( qtbot.click(button="Select All") with qtbot.focusing_on(group_box="Output Variables"): qtbot.click(button="Select All") + qtbot.select_tab("Method Settings") + with qtbot.focusing_on(table=any): + qtbot.select_row(12) + qtbot.using(column="Value").set_option("Linear") qtbot.select_tab("Execution") run_button, stop_button = qtbot.locate(button=any, index=[0, 1]) run_button.click() From 859ac94eb8bf1f826b1e5d4105e70eca5dc1e1be Mon Sep 17 00:00:00 2001 From: Ludovico Bianchi Date: Tue, 9 Apr 2024 19:31:33 -0500 Subject: [PATCH 09/12] Add support for text hints in table row search --- foqus_lib/gui/tests/test_surrogate.py | 2 +- pytest_qt_extras.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py index 913857443..1b56b4905 100644 --- a/foqus_lib/gui/tests/test_surrogate.py +++ b/foqus_lib/gui/tests/test_surrogate.py @@ -85,7 +85,7 @@ def test_run_surrogate( qtbot.click(button="Select All") qtbot.select_tab("Method Settings") with qtbot.focusing_on(table=any): - qtbot.select_row(12) + qtbot.select_row("scaling_function") qtbot.using(column="Value").set_option("Linear") qtbot.select_tab("Execution") run_button, stop_button = qtbot.locate(button=any, index=[0, 1]) diff --git a/pytest_qt_extras.py b/pytest_qt_extras.py index caf758bf2..4115c1f00 100644 --- a/pytest_qt_extras.py +++ b/pytest_qt_extras.py @@ -589,6 +589,10 @@ def run(cls, table, hint: TableRowSpec): raise InvalidMatchError( f"row index {hint} out of range: (count: {count})" ) + elif isinstance(hint, str): + matching_items = table.findItems(hint, QtCore.Qt.MatchExactly) + InvalidMatchError.check(matching_items, expected=1) + idx = int(matching_items[0].row()) elif hint is None: if count == 1: idx = 0 From 719375183977dc46fc453c5826b405347f292e1f Mon Sep 17 00:00:00 2001 From: Ludovico Bianchi Date: Tue, 9 Apr 2024 19:44:31 -0500 Subject: [PATCH 10/12] Add quotes to search hint since they are present in the widget --- foqus_lib/gui/tests/test_surrogate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py index 1b56b4905..73b5dfc58 100644 --- a/foqus_lib/gui/tests/test_surrogate.py +++ b/foqus_lib/gui/tests/test_surrogate.py @@ -86,7 +86,7 @@ def test_run_surrogate( qtbot.select_tab("Method Settings") with qtbot.focusing_on(table=any): qtbot.select_row("scaling_function") - qtbot.using(column="Value").set_option("Linear") + qtbot.using(column="Value").set_option('"Linear"') qtbot.select_tab("Execution") run_button, stop_button = qtbot.locate(button=any, index=[0, 1]) run_button.click() From 45ca80f34162b42973314cc384e41f1b06f99ad4 Mon Sep 17 00:00:00 2001 From: Ludovico Bianchi Date: Tue, 9 Apr 2024 19:56:19 -0500 Subject: [PATCH 11/12] Format with Black --- foqus_lib/framework/surrogate/keras_nn.py | 1 + foqus_lib/gui/tests/test_surrogate.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/foqus_lib/framework/surrogate/keras_nn.py b/foqus_lib/framework/surrogate/keras_nn.py index 5bccaa808..3e581bf30 100644 --- a/foqus_lib/framework/surrogate/keras_nn.py +++ b/foqus_lib/framework/surrogate/keras_nn.py @@ -67,6 +67,7 @@ # mapping between the human-readable name for the scaling variant # and an instance of the corresponding scaler class + # custom class to define Keras NN layers @tf.keras.utils.register_keras_serializable() class keras_nn(tf.keras.layers.Layer): diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py index 73b5dfc58..6a2d7f535 100644 --- a/foqus_lib/gui/tests/test_surrogate.py +++ b/foqus_lib/gui/tests/test_surrogate.py @@ -57,7 +57,6 @@ class TestFrame: # "ACOSSO", ], ) - def test_run_surrogate( self, qtbot, From 21f2b429b4ad3f9146b8367f32ae76d68dc29831 Mon Sep 17 00:00:00 2001 From: franflame Date: Fri, 12 Apr 2024 17:17:52 -0700 Subject: [PATCH 12/12] Add scaling variants to surrogate GUI test --- foqus_lib/gui/tests/test_surrogate.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py index 6a2d7f535..91725b41e 100644 --- a/foqus_lib/gui/tests/test_surrogate.py +++ b/foqus_lib/gui/tests/test_surrogate.py @@ -57,6 +57,16 @@ class TestFrame: # "ACOSSO", ], ) + @pytest.mark.parametrize( + "scaling_variant", + [ + '"Linear"', + '"Log"', + '"Log2"', + '"Power"', + '"Power2"', + ], + ) def test_run_surrogate( self, qtbot, @@ -64,6 +74,7 @@ def test_run_surrogate( main_window: mainWindow, name: str, required_import: str, + scaling_variant: str, ): qtbot.focused = frame pytest.importorskip(required_import, reason=f"{required_import} not available") @@ -85,7 +96,7 @@ def test_run_surrogate( qtbot.select_tab("Method Settings") with qtbot.focusing_on(table=any): qtbot.select_row("scaling_function") - qtbot.using(column="Value").set_option('"Linear"') + qtbot.using(column="Value").set_option(scaling_variant) qtbot.select_tab("Execution") run_button, stop_button = qtbot.locate(button=any, index=[0, 1]) run_button.click()