From bcf75931fdbf801a4ee8f1d58c1c76484b82f011 Mon Sep 17 00:00:00 2001
From: franflame <isaacfc749@gmail.com>
Date: Fri, 1 Mar 2024 14:24:54 -0800
Subject: [PATCH 01/12] Add first implementation of scaling functions

---
 .../surrogate/scale_test/test_scaling.py      | 232 ++++++++++++++++++
 foqus_lib/framework/surrogate/scaling.py      | 187 ++++++++++++++
 2 files changed, 419 insertions(+)
 create mode 100644 foqus_lib/framework/surrogate/scale_test/test_scaling.py
 create mode 100644 foqus_lib/framework/surrogate/scaling.py

diff --git a/foqus_lib/framework/surrogate/scale_test/test_scaling.py b/foqus_lib/framework/surrogate/scale_test/test_scaling.py
new file mode 100644
index 000000000..09a4f0596
--- /dev/null
+++ b/foqus_lib/framework/surrogate/scale_test/test_scaling.py
@@ -0,0 +1,232 @@
+import numpy as np
+import pytest
+from foqus_lib.framework.surrogate.scaling import (
+    scale_linear,
+    unscale_linear,
+    scale_log,
+    unscale_log,
+    scale_log2,
+    unscale_log2,
+    scale_power,
+    unscale_power,
+    scale_power2,
+    unscale_power2,
+    validate_for_scaling,
+)
+
+from hypothesis.extra.numpy import arrays as arrays_strat, array_shapes
+from hypothesis import given, example, assume
+from contextlib import contextmanager
+
+POSITIVE_VALS_ONLY = {scale_log}
+
+
+@contextmanager
+def does_not_raise():
+    yield
+
+
+def test_scale_linear():
+    # Test case 1: Basic scaling
+    input_array = np.array([1, 2, 3, 4, 5])
+    scaled_array = scale_linear(input_array)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.25, 0.5, 0.75, 1.0])
+
+    # Test case 2: Custom range scaling
+    input_array = np.array([10, 20, 30, 40, 50])
+    scaled_array = scale_linear(input_array, lo=10, hi=50)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.25, 0.5, 0.75, 1.0])
+
+    # Test case 3: Scaling with negative values
+    input_array = np.array([-5, 0, 5])
+    scaled_array = scale_linear(input_array)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.5, 1.0])
+
+    # Test case 4: Scaling with repeated values
+    input_array = np.array([2, 2, 2, 2])
+    scaled_array = scale_linear(input_array)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.0, 0.0, 0.0])
+
+
+def test_unscale_linear():
+    # Test case 1: Basic unscaling
+    input_array = np.array([0.0, 0.25, 0.5, 0.75, 1.0])
+    unscaled_array = unscale_linear(input_array, lo=1, hi=5)
+    assert np.allclose(unscaled_array, [1, 2, 3, 4, 5])
+
+    # Test case 2: Custom range unscaling
+    input_array = np.array([0.0, 0.25, 0.5, 0.75, 1.0])
+    unscaled_array = unscale_linear(input_array, lo=10, hi=50)
+    assert np.allclose(unscaled_array, [10, 20, 30, 40, 50])
+
+    # Test case 3: Unscaling with negative values
+    input_array = np.array([0.0, 0.5, 1.0])
+    unscaled_array = unscale_linear(input_array, lo=-5, hi=5)
+    assert np.allclose(unscaled_array, [-5, 0, 5])
+
+    # Test case 4: Unscaling with repeated values
+    input_array = np.array([0.0, 0.0, 0.0, 0.0])
+    unscaled_array = unscale_linear(input_array, lo=0, hi=5)
+    assert np.allclose(unscaled_array, [0, 0, 0, 0])
+
+
+def test_scale_log():
+    # Test case 1: Basic log scaling
+    input_array = np.array([1, 2, 3, 4, 5])
+    scaled_array = scale_log(input_array)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.43067656, 0.68260619, 0.86135312, 1.0])
+
+    # Test case 2: Custom range log scaling
+    input_array = np.array([10, 20, 30, 40, 50])
+    scaled_array = scale_log(input_array, lo=10, hi=50)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.43067656, 0.68260619, 0.86135312, 1.0])
+
+
+def test_scale_log2():
+    # Test case 1: Basic log2 scaling
+    input_array = np.array([1, 2, 3, 4, 5])
+    scaled_array = scale_log2(input_array)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.51188336, 0.74036269, 0.8893017, 1.0])
+
+    # Test case 2: Custom range log2 scaling
+    input_array = np.array([10, 20, 30, 40, 50])
+    scaled_array = scale_log2(input_array, lo=10, hi=50)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.51188336, 0.74036269, 0.8893017, 1.0])
+
+
+def test_scale_power():
+    # Test case 1: Basic power scaling
+    input_array = np.array([1, 2, 3, 4, 5])
+    scaled_array = scale_power(input_array)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(
+        scaled_array,
+        [0.00000000e00, 9.00090009e-04, 9.90099010e-03, 9.99099910e-02, 1.00000000e00],
+    )
+
+    # Test case 2: Custom range power scaling
+    input_array = np.array([1.0, 4.7, 4.8, 4.999, 5.0])
+    scaled_array = scale_power(input_array)
+    print(scaled_array)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.50113735, 0.63092044, 0.99769983, 1.0])
+
+
+def test_scale_power2():
+    # Test case 1: Basic power scaling
+    input_array = np.array([1, 2, 3, 4, 5])
+    scaled_array = scale_power2(input_array)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.08647549, 0.24025307, 0.51371258, 1.0])
+
+    # Test case 2: Custom range power scaling
+    input_array = np.array([1.0, 4.7, 4.8, 4.999, 5.0])
+    scaled_array = scale_power2(input_array)
+    assert np.all(scaled_array >= 0)
+    assert np.all(scaled_array <= 1)
+    assert np.allclose(scaled_array, [0.0, 0.82377238, 0.87916771, 0.99936058, 1.0])
+
+
+# @pytest.mark.xfail(reason="function formula is wrong", strict=True)
+def test_unscale_log():
+    input_array = np.array([0.0, 0.43067656, 0.68260619, 0.86135312, 1.0])
+    unscaled_array = unscale_log(input_array, lo=1, hi=5)
+    assert np.allclose(unscaled_array, [1, 2, 3, 4, 5])
+
+    input_array = np.array([0.0, 0.43067656, 0.68260619, 0.86135312, 1.0])
+    unscaled_array = unscale_log(input_array, lo=10, hi=50)
+    assert np.allclose(unscaled_array, [10, 20, 30, 40, 50])
+
+
+def test_unscale_log2():
+    input_array = np.array([0.0, 0.51188336, 0.74036269, 0.8893017, 1.0])
+    unscaled_array = unscale_log2(input_array, lo=1, hi=5)
+    assert np.allclose(unscaled_array, [1, 2, 3, 4, 5])
+
+    input_array = np.array([0.0, 0.51188336, 0.74036269, 0.8893017, 1.0])
+    unscaled_array = unscale_log2(input_array, lo=10, hi=50)
+    assert np.allclose(unscaled_array, [10, 20, 30, 40, 50])
+
+
+def test_unscale_power():
+    input_array = np.array(
+        [0.00000000e00, 9.00090009e-04, 9.90099010e-03, 9.99099910e-02, 1.00000000e00]
+    )
+    unscaled_array = unscale_power(input_array, lo=1, hi=5)
+    assert np.allclose(unscaled_array, [1, 2, 3, 4, 5])
+
+    input_array = np.array([0.0, 0.50113735, 0.63092044, 0.99769983, 1.0])
+    unscaled_array = unscale_power(input_array, lo=1.0, hi=5.0)
+    assert np.allclose(unscaled_array, [1.0, 4.7, 4.8, 4.999, 5.0])
+
+
+def test_unscale_power2():
+    input_array = np.array([0.0, 0.08647549, 0.24025307, 0.51371258, 1.0])
+    unscaled_array = unscale_power2(input_array, lo=1, hi=5)
+    assert np.allclose(unscaled_array, [1, 2, 3, 4, 5])
+
+    input_array = np.array([0.0, 0.82377238, 0.87916771, 0.99936058, 1.0])
+    unscaled_array = unscale_power2(input_array, lo=1.0, hi=5.0)
+    assert np.allclose(unscaled_array, [1.0, 4.7, 4.8, 4.999, 5.0])
+
+
+# fill in with more cases, parameters, functions
+@pytest.mark.parametrize("x", [np.array([1, 2, 3, 4, 5]), np.array([0, 7, 9, 10, 12])])
+# @given(x=arrays_strat(np.float32, array_shapes()))
+@pytest.mark.parametrize(
+    "scale,unscale",
+    [
+        (scale_linear, unscale_linear),
+        (scale_log, unscale_log),
+        (scale_log2, unscale_log2),
+        (scale_power, unscale_power),
+        (scale_power2, unscale_power2),
+    ],
+)
+def test_roundtrip(x, scale, unscale):
+
+    lo = np.min(x)
+    hi = np.max(x)
+    if not passes_validation(x, lo, hi):
+        expected_failure = pytest.raises(ValueError)
+    elif lo <= 0 and scale in POSITIVE_VALS_ONLY:
+        expected_failure = pytest.raises(ValueError, match="All values must be > 0.*")
+    else:
+        expected_failure = does_not_raise()
+    with expected_failure:
+        scaled = scale(x, lo=lo, hi=hi)
+        unscaled = unscale(scaled, lo=lo, hi=hi)
+        assert np.allclose(x, unscaled)
+
+
+def passes_validation(array_in, lo, hi):
+    try:
+        validate_for_scaling(array_in, lo, hi)
+    except Exception:
+        return False
+    else:
+        return True
+
+
+# Run the tests
+if __name__ == "__main__":
+    pytest.main()
diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py
new file mode 100644
index 000000000..535cbfebf
--- /dev/null
+++ b/foqus_lib/framework/surrogate/scaling.py
@@ -0,0 +1,187 @@
+import copy
+import json
+import logging
+import math
+from collections import OrderedDict
+
+import numpy as np
+
+
+def validate_for_scaling(array_in, lo, hi) -> None:
+    if not np.all(np.isfinite(array_in)):
+        raise ValueError("Input data cannot contain NaN or inf values")
+    if array_in.ndim != 1:
+        raise ValueError("Only 1D arrays supported")
+    if array_in.size < 2:
+        raise ValueError("Array must have at least 2 values")
+    if lo == hi:
+        raise ValueError("Array must contain non-identical values")
+    if not check_under_or_overflow(array_in):
+        raise ValueError("Array contains under/overflow values for dtype")        
+
+def check_under_or_overflow(arr):
+    if np.issubdtype(arr.dtype, np.integer):
+        info = np.iinfo(arr.dtype)
+    elif np.issubdtype(arr.dtype, np.floating):
+        info = np.finfo(arr.dtype)
+    else:
+        raise ValueError("Unsupported data type")
+    max_value = info.max 
+    min_value = info.min
+    return np.all(arr < max_value) & np.all(arr > min_value)
+
+
+def scale_linear(array_in, lo=None, hi=None):
+    if lo is None:
+        lo = np.min(array_in)
+    if hi is None:
+        hi = np.max(array_in)
+    validate_for_scaling(array_in, lo, hi)
+    if (hi - lo) == 0:
+        result = 0
+    else:
+        result = (array_in - lo) / (hi - lo)
+    return result
+
+def scale_log(array_in, lo=None, hi=None):
+# need to account for log domain 
+    if np.any(array_in <= 0):
+        raise ValueError("All values must be > 0 to use scale_log")
+    if lo is None:
+        lo = np.min(array_in)
+    if hi is None:
+        hi = np.max(array_in)
+    validate_for_scaling(array_in, lo, hi)
+    result = ((np.log10(array_in) - np.log10(lo))
+                 / (np.log10(hi) - np.log10(lo)))
+    return result
+
+def scale_log2(array_in, lo=None, hi=None):
+    if lo is None:
+        lo = np.min(array_in)
+    if hi is None:
+        hi = np.max(array_in)
+    validate_for_scaling(array_in, lo, hi)
+    result = np.log10(9 * (array_in - lo) / (hi - lo) + 1)
+    return result
+# fix expected values in test
+
+def scale_power(array_in, lo=None, hi=None):
+    if lo is None:
+        lo = np.min(array_in)
+    if hi is None:
+        hi = np.max(array_in)
+    validate_for_scaling(array_in, lo, hi)
+    result = (np.power(10, array_in) - np.power(10, lo)) / (np.power(10, hi) - np.power(10, lo))
+    return result
+
+def scale_power2(array_in, lo=None, hi=None):
+    if lo is None:
+        lo = np.min(array_in)
+    if hi is None:
+        hi = np.max(array_in)
+    validate_for_scaling(array_in, lo, hi)
+    result = (1/9 *
+                (np.power(10, (array_in - lo) / (hi - lo)) - 1)
+                )
+    return result
+
+def unscale_linear(array_in, lo, hi):
+    result = array_in * (hi - lo) / 1.0 + lo
+    return result
+
+def unscale_log(array_in, lo, hi):
+    result = lo * np.power(hi / lo, array_in)
+
+    # result = ((np.log10(array_in) - np.log10(lo))
+    #              / (np.log10(hi) - np.log10(lo)))
+# out = math.pow(lo * (hi / lo), (array_in / 10.0))
+#                 out = (
+#                     10
+#                     * (math.log10(array_in) - math.log10(lo))
+#                     / (math.log10(hi) - math.log10(lo))
+#                 )
+    return result
+
+def unscale_log2(array_in, lo=None, hi=None):
+    result = (np.power(10, array_in / 1.0) - 1) * (
+                        hi - lo
+                    ) / 9.0 + lo
+                # out = (math.pow(10, array_in / 10.0) - 1) * (
+                #     hi - lo
+                # ) / 9.0 + lo
+                
+    return result
+
+def unscale_power(array_in, lo, hi):
+    # check if lo and hi were provided 
+    # result = np.log10((array_in / 10.0) * (np.power(10, hi) - np.power(10, lo))
+    #                 + np.power(10, lo))
+    result = np.log10(
+                    (array_in / 1.0) * (np.power(10, hi) - np.power(10, lo))
+                    + np.power(10, lo)
+                )
+    return result
+
+def unscale_power2(array_in, lo, hi):
+    result = (
+                    np.log10(9.0 * array_in / 1.0 + 1) * (hi - lo) + lo
+                )
+    return result
+
+class BaseScaler:
+
+    def fit(self, X: np.ndarray):
+        self.lo_ = np.min(X)
+        self.hi_ = np.max(X)
+        return self
+
+    def fit_transform(self, X: np.ndarray) -> np.ndarray:
+        return (
+            self
+            .fit(X)
+            .transform(X)
+        )
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        raise NotImplementedError
+
+    def inverse_transform(self, X: np.ndarray) -> np.ndarray:
+        raise NotImplementedError
+
+
+class LinearScaler(BaseScaler):
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        return scale_linear(X, self.lo_, self.hi_)
+
+    def inverse_transform(self, X: np.ndarray) -> np.ndarray:
+        return unscale_linear(X, self.lo_, self.hi_)
+    
+class LogScaler(BaseScaler):
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        return scale_log(X, self.lo_, self.hi_)
+
+    def inverse_transform(self, X: np.ndarray) -> np.ndarray:
+        return unscale_log(X, self.lo_, self.hi_)
+
+class LogScaler2(BaseScaler):
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        return scale_log2(X, self.lo_, self.hi_)
+
+    def inverse_transform(self, X: np.ndarray) -> np.ndarray:
+        return unscale_log2(X, self.lo_, self.hi_)
+
+class PowerScaler(BaseScaler):
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        return scale_power(X, self.lo_, self.hi_)
+
+    def inverse_transform(self, X: np.ndarray) -> np.ndarray:
+        return unscale_power(X, self.lo_, self.hi_)
+
+class PowerScaler2(BaseScaler):
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        return scale_power2(X, self.lo_, self.hi_)
+
+    def inverse_transform(self, X: np.ndarray) -> np.ndarray:
+        return unscale_power2(X, self.lo_, self.hi_)
+    
\ No newline at end of file

From 3e4ff22eb6c477e02da259fdeb652d0ff5e903d4 Mon Sep 17 00:00:00 2001
From: franflame <isaacfc749@gmail.com>
Date: Fri, 8 Mar 2024 17:12:35 -0800
Subject: [PATCH 02/12] Implementing scaling.py functions in plugins

---
 foqus_lib/framework/surrogate/keras_nn.py   | 50 ++++++++++++++-------
 foqus_lib/framework/surrogate/pytorch_nn.py | 50 +++++++++++++--------
 foqus_lib/framework/surrogate/scaling.py    | 28 +++++++++++-
 foqus_lib/framework/surrogate/scikit_nn.py  | 41 ++++++++++-------
 4 files changed, 117 insertions(+), 52 deletions(-)

diff --git a/foqus_lib/framework/surrogate/keras_nn.py b/foqus_lib/framework/surrogate/keras_nn.py
index a045a5b52..bae1e6f5f 100644
--- a/foqus_lib/framework/surrogate/keras_nn.py
+++ b/foqus_lib/framework/surrogate/keras_nn.py
@@ -41,6 +41,7 @@
 from pathlib import Path
 from tokenize import String
 
+from typing import Tuple
 import numpy as np
 import pandas as pd
 import tensorflow as tf  # pylint: disable=import-error
@@ -52,6 +53,19 @@
 from foqus_lib.framework.surrogate.surrogate import surrogate
 from foqus_lib.framework.uq.SurrogateParser import SurrogateParser
 
+from foqus_lib.framework.surrogate.scaling import (
+    BaseScaler,
+    LinearScaler,
+    LogScaler,
+    LogScaler2, 
+    PowerScaler, 
+    PowerScaler2,
+    map_name_to_scaler,
+    scale_dataframe
+)
+
+# mapping between the human-readable name for the scaling variant
+# and an instance of the corresponding scaler class
 
 # custom class to define Keras NN layers
 @tf.keras.utils.register_keras_serializable()
@@ -293,6 +307,14 @@ def __init__(self, dat=None):
             desc="Name of output file for model, should have file extension: .keras",
             hint="Enter a custom file name if desired",
         )
+        # add option for normalization_form, make dropdown option
+        self.options.add(
+            name="scaling_function",
+            default="Linear",
+            dtype=str,
+            desc="Scaling/normalization function for input data",
+            validValues=list(map_name_to_scaler.keys()),
+        )
 
     def run(self):
         """
@@ -316,6 +338,9 @@ def run(self):
         self.msgQueue.put(f"input data columns: {input_data.columns}")
         self.msgQueue.put(f"output data columns: {output_data.columns}")
 
+        # extract scaling function option, apply it to the input data
+        # get scaler object
+
         # np.random.seed(46)
         # rn.seed(1342)
         # tf.random.set_seed(62)
@@ -341,22 +366,13 @@ def run(self):
         xdata = input_data
         zdata = output_data
 
-        xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata}  # x bounds
-        zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata}  # z bounds
-
-        # normalize data using Linear form
-        # users can normalize with any allowed form # manually, and then pass the
-        # appropriate flag to FOQUS from the allowed list:
-        # ["Linear", "Log", "Power", "Log 2", "Power 2"] - see the documentation for
-        # details on the scaling formulations
-        xmax, xmin = xdata.max(axis=0), xdata.min(axis=0)
-        zmax, zmin = zdata.max(axis=0), zdata.min(axis=0)
-        xdata, zdata = np.array(xdata), np.array(zdata)
-        for i in range(len(xdata)):
-            for j in range(len(xlabels)):
-                xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j])
-            for j in range(len(zlabels)):
-                zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j])
+        scaling_func_option = self.options["scaling_function"].value
+
+        scaler_instance = map_name_to_scaler[scaling_func_option]
+        xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance)
+        zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance)
+
+        print(f"using scaling function: {scaling_func_option}")
 
         # method to create model
         def create_model():
@@ -370,7 +386,7 @@ def create_model():
                 input_bounds=xdata_bounds,
                 output_bounds=zdata_bounds,
                 normalized=True,
-                normalization_form="Linear",
+                normalization_form=scaling_func_option,
             )
 
             outputs = layers(inputs)  # use network as function outputs = f(inputs)
diff --git a/foqus_lib/framework/surrogate/pytorch_nn.py b/foqus_lib/framework/surrogate/pytorch_nn.py
index f1fb2f15f..be6edc385 100644
--- a/foqus_lib/framework/surrogate/pytorch_nn.py
+++ b/foqus_lib/framework/surrogate/pytorch_nn.py
@@ -50,6 +50,16 @@
 # from foqus_lib.framework.graph.graph import Graph
 from foqus_lib.framework.surrogate.surrogate import surrogate
 from foqus_lib.framework.uq.SurrogateParser import SurrogateParser
+from foqus_lib.framework.surrogate.scaling import (
+    BaseScaler,
+    LinearScaler,
+    LogScaler,
+    LogScaler2, 
+    PowerScaler, 
+    PowerScaler2,
+    map_name_to_scaler,
+    scale_dataframe
+)
 
 # custom class to define Keras NN layers
 np.random.seed(46)
@@ -284,6 +294,13 @@ def __init__(self, dat=None):
             desc="Name of output file for model, should have file extension: .pt",
             hint="Enter a custom file name if desired",
         )
+        self.options.add(
+            name="scaling_function",
+            default="Linear",
+            dtype=str,
+            desc="Scaling/normalization function for input data",
+            validValues=["Linear", "Log", "Log2", "Power", "Power2"],
+        )
 
     def run(self):
         """
@@ -326,22 +343,16 @@ def run(self):
         zlabels = list(output_data.columns)
         xdata = input_data
         zdata = output_data
-        xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata}  # x bounds
-        zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata}  # z bounds
-
-        # normalize data using Linear form, pass as custom string and parse with SymPy
-        # users can normalize with any allowed form # manually, and then pass the
-        # appropriate flag to FOQUS from the allowed list:
-        # ["Linear", "Log", "Power", "Log 2", "Power 2", "Custom] - see the
-        # documentation for details on the scaling formulations
-        xmax, xmin = xdata.max(axis=0), xdata.min(axis=0)
-        zmax, zmin = zdata.max(axis=0), zdata.min(axis=0)
-        xdata, zdata = np.array(xdata), np.array(zdata)
-        for i in range(len(xdata)):
-            for j in range(len(xlabels)):
-                xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j])
-            for j in range(len(zlabels)):
-                zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j])
+        # xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata}  # x bounds
+        # zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata}  # z bounds
+
+        scaling_func_option = self.options["scaling_function"].value
+
+        scaler_instance = map_name_to_scaler[scaling_func_option]
+        xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance)
+        zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance)
+
+        print(f"using scaling function: {scaling_func_option}")
 
         model_data = np.concatenate(
             (xdata, zdata), axis=1
@@ -353,8 +364,11 @@ def run(self):
 
         # raise exception here after BPC position
         # create model
-        x_train = torch.from_numpy(xdata).float().to(device)
-        z_train = torch.from_numpy(zdata).float().to(device)
+
+        # need to convert xdata to a numpy array for the below to work
+        # otherwise causes TypeError: expected np.ndarray (got DataFrame)
+        x_train = torch.from_numpy(xdata.to_numpy()).float().to(device)
+        z_train = torch.from_numpy(zdata.to_numpy()).float().to(device)
 
         # print type at this point
         # can also print inside create_model
diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py
index 535cbfebf..e43aa53f2 100644
--- a/foqus_lib/framework/surrogate/scaling.py
+++ b/foqus_lib/framework/surrogate/scaling.py
@@ -5,6 +5,8 @@
 from collections import OrderedDict
 
 import numpy as np
+import pandas as pd
+from typing import Tuple
 
 
 def validate_for_scaling(array_in, lo, hi) -> None:
@@ -130,6 +132,10 @@ def unscale_power2(array_in, lo, hi):
     return result
 
 class BaseScaler:
+    # def __init__(self, data_array: np.ndarray):
+    #     self.data = data_array
+    #     self.lo_ = np.min(data_array)
+    #     self.hi_ = np.max(data_array)
 
     def fit(self, X: np.ndarray):
         self.lo_ = np.min(X)
@@ -184,4 +190,24 @@ def transform(self, X: np.ndarray) -> np.ndarray:
 
     def inverse_transform(self, X: np.ndarray) -> np.ndarray:
         return unscale_power2(X, self.lo_, self.hi_)
-    
\ No newline at end of file
+
+map_name_to_scaler = {
+    "Linear": LinearScaler(),
+    "Log": LogScaler(),
+    "Log2": LogScaler2(),
+    "Power": PowerScaler(),
+    "Power2": PowerScaler2(),
+    #...
+}
+
+def scale_dataframe(df: pd.DataFrame, scaler: BaseScaler) -> Tuple[pd.DataFrame, dict]:
+    scaled_df = pd.DataFrame(np.nan, columns=df.columns, index=df.index)
+    bounds = {}
+
+    for col_name in df:
+        unscaled_col_data = df[col_name]
+        scaled_col_data = scaler.fit_transform(unscaled_col_data)
+        bounds[col_name] = scaler.lo_, scaler.hi_
+        scaled_df.loc[:, col_name] = scaled_col_data
+
+    return scaled_df, bounds
diff --git a/foqus_lib/framework/surrogate/scikit_nn.py b/foqus_lib/framework/surrogate/scikit_nn.py
index 85736ad3f..a7127206a 100644
--- a/foqus_lib/framework/surrogate/scikit_nn.py
+++ b/foqus_lib/framework/surrogate/scikit_nn.py
@@ -52,6 +52,16 @@
 from foqus_lib.framework.surrogate.surrogate import surrogate
 from foqus_lib.framework.uq.SurrogateParser import SurrogateParser
 
+from foqus_lib.framework.surrogate.scaling import (
+    BaseScaler,
+    LinearScaler,
+    LogScaler,
+    LogScaler2, 
+    PowerScaler, 
+    PowerScaler2,
+    map_name_to_scaler,
+    scale_dataframe
+)
 
 def validate_training_data(xdata: np.ndarray, zdata: np.ndarray):
     number_columns_in_xdata = xdata.shape[1]
@@ -250,6 +260,14 @@ def __init__(self, dat=None):
             hint="Enter a custom file name if desired",
         )
 
+        self.options.add(
+            name="scaling_function",
+            default="Linear",
+            dtype=str,
+            desc="Scaling/normalization function for input data",
+            validValues=["Linear", "Log", "Log2", "Power", "Power2"],
+        )
+
     def run(self):
         """
         This function overloads the Thread class function,
@@ -300,22 +318,13 @@ def run(self):
         xdata = input_data
         zdata = output_data
 
-        xdata_bounds = {i: (xdata[i].min(), xdata[i].max()) for i in xdata}  # x bounds
-        zdata_bounds = {j: (zdata[j].min(), zdata[j].max()) for j in zdata}  # z bounds
-
-        # normalize data using Linear form, pass as custom string and parse with SymPy
-        # users can normalize with any allowed form # manually, and then pass the
-        # appropriate flag to FOQUS from the allowed list:
-        # ["Linear", "Log", "Power", "Log 2", "Power 2", "Custom] - see the
-        # documentation for details on the scaling formulations
-        xmax, xmin = xdata.max(axis=0), xdata.min(axis=0)
-        zmax, zmin = zdata.max(axis=0), zdata.min(axis=0)
-        xdata, zdata = np.array(xdata), np.array(zdata)
-        for i in range(len(xdata)):
-            for j in range(len(xlabels)):
-                xdata[i, j] = (xdata[i, j] - xmin[j]) / (xmax[j] - xmin[j])
-            for j in range(len(zlabels)):
-                zdata[i, j] = (zdata[i, j] - zmin[j]) / (zmax[j] - zmin[j])
+        scaling_func_option = self.options["scaling_function"].value
+
+        scaler_instance = map_name_to_scaler[scaling_func_option]
+        xdata, xdata_bounds = scale_dataframe(xdata, scaler_instance)
+        zdata, zdata_bounds = scale_dataframe(zdata, scaler_instance)
+
+        print(f"using scaling function: {scaling_func_option}")
 
         model_data = np.concatenate(
             (xdata, zdata), axis=1

From bc3a02006bece8a42d3e1195a87ff2a93ff0a86d Mon Sep 17 00:00:00 2001
From: franflame <isaacfc749@gmail.com>
Date: Fri, 8 Mar 2024 17:14:01 -0800
Subject: [PATCH 03/12] Formatting

---
 foqus_lib/framework/surrogate/keras_nn.py   |  6 +-
 foqus_lib/framework/surrogate/pytorch_nn.py |  6 +-
 foqus_lib/framework/surrogate/scaling.py    | 81 ++++++++++++---------
 foqus_lib/framework/surrogate/scikit_nn.py  |  7 +-
 4 files changed, 55 insertions(+), 45 deletions(-)

diff --git a/foqus_lib/framework/surrogate/keras_nn.py b/foqus_lib/framework/surrogate/keras_nn.py
index bae1e6f5f..5bccaa808 100644
--- a/foqus_lib/framework/surrogate/keras_nn.py
+++ b/foqus_lib/framework/surrogate/keras_nn.py
@@ -57,11 +57,11 @@
     BaseScaler,
     LinearScaler,
     LogScaler,
-    LogScaler2, 
-    PowerScaler, 
+    LogScaler2,
+    PowerScaler,
     PowerScaler2,
     map_name_to_scaler,
-    scale_dataframe
+    scale_dataframe,
 )
 
 # mapping between the human-readable name for the scaling variant
diff --git a/foqus_lib/framework/surrogate/pytorch_nn.py b/foqus_lib/framework/surrogate/pytorch_nn.py
index be6edc385..1e7ae1fd0 100644
--- a/foqus_lib/framework/surrogate/pytorch_nn.py
+++ b/foqus_lib/framework/surrogate/pytorch_nn.py
@@ -54,11 +54,11 @@
     BaseScaler,
     LinearScaler,
     LogScaler,
-    LogScaler2, 
-    PowerScaler, 
+    LogScaler2,
+    PowerScaler,
     PowerScaler2,
     map_name_to_scaler,
-    scale_dataframe
+    scale_dataframe,
 )
 
 # custom class to define Keras NN layers
diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py
index e43aa53f2..fa0b6a9be 100644
--- a/foqus_lib/framework/surrogate/scaling.py
+++ b/foqus_lib/framework/surrogate/scaling.py
@@ -19,7 +19,8 @@ def validate_for_scaling(array_in, lo, hi) -> None:
     if lo == hi:
         raise ValueError("Array must contain non-identical values")
     if not check_under_or_overflow(array_in):
-        raise ValueError("Array contains under/overflow values for dtype")        
+        raise ValueError("Array contains under/overflow values for dtype")
+
 
 def check_under_or_overflow(arr):
     if np.issubdtype(arr.dtype, np.integer):
@@ -28,7 +29,7 @@ def check_under_or_overflow(arr):
         info = np.finfo(arr.dtype)
     else:
         raise ValueError("Unsupported data type")
-    max_value = info.max 
+    max_value = info.max
     min_value = info.min
     return np.all(arr < max_value) & np.all(arr > min_value)
 
@@ -45,8 +46,9 @@ def scale_linear(array_in, lo=None, hi=None):
         result = (array_in - lo) / (hi - lo)
     return result
 
+
 def scale_log(array_in, lo=None, hi=None):
-# need to account for log domain 
+    # need to account for log domain
     if np.any(array_in <= 0):
         raise ValueError("All values must be > 0 to use scale_log")
     if lo is None:
@@ -54,10 +56,10 @@ def scale_log(array_in, lo=None, hi=None):
     if hi is None:
         hi = np.max(array_in)
     validate_for_scaling(array_in, lo, hi)
-    result = ((np.log10(array_in) - np.log10(lo))
-                 / (np.log10(hi) - np.log10(lo)))
+    result = (np.log10(array_in) - np.log10(lo)) / (np.log10(hi) - np.log10(lo))
     return result
 
+
 def scale_log2(array_in, lo=None, hi=None):
     if lo is None:
         lo = np.min(array_in)
@@ -66,71 +68,76 @@ def scale_log2(array_in, lo=None, hi=None):
     validate_for_scaling(array_in, lo, hi)
     result = np.log10(9 * (array_in - lo) / (hi - lo) + 1)
     return result
+
+
 # fix expected values in test
 
+
 def scale_power(array_in, lo=None, hi=None):
     if lo is None:
         lo = np.min(array_in)
     if hi is None:
         hi = np.max(array_in)
     validate_for_scaling(array_in, lo, hi)
-    result = (np.power(10, array_in) - np.power(10, lo)) / (np.power(10, hi) - np.power(10, lo))
+    result = (np.power(10, array_in) - np.power(10, lo)) / (
+        np.power(10, hi) - np.power(10, lo)
+    )
     return result
 
+
 def scale_power2(array_in, lo=None, hi=None):
     if lo is None:
         lo = np.min(array_in)
     if hi is None:
         hi = np.max(array_in)
     validate_for_scaling(array_in, lo, hi)
-    result = (1/9 *
-                (np.power(10, (array_in - lo) / (hi - lo)) - 1)
-                )
+    result = 1 / 9 * (np.power(10, (array_in - lo) / (hi - lo)) - 1)
     return result
 
+
 def unscale_linear(array_in, lo, hi):
     result = array_in * (hi - lo) / 1.0 + lo
     return result
 
+
 def unscale_log(array_in, lo, hi):
     result = lo * np.power(hi / lo, array_in)
 
     # result = ((np.log10(array_in) - np.log10(lo))
     #              / (np.log10(hi) - np.log10(lo)))
-# out = math.pow(lo * (hi / lo), (array_in / 10.0))
-#                 out = (
-#                     10
-#                     * (math.log10(array_in) - math.log10(lo))
-#                     / (math.log10(hi) - math.log10(lo))
-#                 )
+    # out = math.pow(lo * (hi / lo), (array_in / 10.0))
+    #                 out = (
+    #                     10
+    #                     * (math.log10(array_in) - math.log10(lo))
+    #                     / (math.log10(hi) - math.log10(lo))
+    #                 )
     return result
 
+
 def unscale_log2(array_in, lo=None, hi=None):
-    result = (np.power(10, array_in / 1.0) - 1) * (
-                        hi - lo
-                    ) / 9.0 + lo
-                # out = (math.pow(10, array_in / 10.0) - 1) * (
-                #     hi - lo
-                # ) / 9.0 + lo
-                
+    result = (np.power(10, array_in / 1.0) - 1) * (hi - lo) / 9.0 + lo
+    # out = (math.pow(10, array_in / 10.0) - 1) * (
+    #     hi - lo
+    # ) / 9.0 + lo
+
     return result
 
+
 def unscale_power(array_in, lo, hi):
-    # check if lo and hi were provided 
+    # check if lo and hi were provided
     # result = np.log10((array_in / 10.0) * (np.power(10, hi) - np.power(10, lo))
     #                 + np.power(10, lo))
     result = np.log10(
-                    (array_in / 1.0) * (np.power(10, hi) - np.power(10, lo))
-                    + np.power(10, lo)
-                )
+        (array_in / 1.0) * (np.power(10, hi) - np.power(10, lo)) + np.power(10, lo)
+    )
     return result
 
+
 def unscale_power2(array_in, lo, hi):
-    result = (
-                    np.log10(9.0 * array_in / 1.0 + 1) * (hi - lo) + lo
-                )
+    result = np.log10(9.0 * array_in / 1.0 + 1) * (hi - lo) + lo
     return result
 
+
 class BaseScaler:
     # def __init__(self, data_array: np.ndarray):
     #     self.data = data_array
@@ -143,11 +150,7 @@ def fit(self, X: np.ndarray):
         return self
 
     def fit_transform(self, X: np.ndarray) -> np.ndarray:
-        return (
-            self
-            .fit(X)
-            .transform(X)
-        )
+        return self.fit(X).transform(X)
 
     def transform(self, X: np.ndarray) -> np.ndarray:
         raise NotImplementedError
@@ -162,7 +165,8 @@ def transform(self, X: np.ndarray) -> np.ndarray:
 
     def inverse_transform(self, X: np.ndarray) -> np.ndarray:
         return unscale_linear(X, self.lo_, self.hi_)
-    
+
+
 class LogScaler(BaseScaler):
     def transform(self, X: np.ndarray) -> np.ndarray:
         return scale_log(X, self.lo_, self.hi_)
@@ -170,6 +174,7 @@ def transform(self, X: np.ndarray) -> np.ndarray:
     def inverse_transform(self, X: np.ndarray) -> np.ndarray:
         return unscale_log(X, self.lo_, self.hi_)
 
+
 class LogScaler2(BaseScaler):
     def transform(self, X: np.ndarray) -> np.ndarray:
         return scale_log2(X, self.lo_, self.hi_)
@@ -177,6 +182,7 @@ def transform(self, X: np.ndarray) -> np.ndarray:
     def inverse_transform(self, X: np.ndarray) -> np.ndarray:
         return unscale_log2(X, self.lo_, self.hi_)
 
+
 class PowerScaler(BaseScaler):
     def transform(self, X: np.ndarray) -> np.ndarray:
         return scale_power(X, self.lo_, self.hi_)
@@ -184,6 +190,7 @@ def transform(self, X: np.ndarray) -> np.ndarray:
     def inverse_transform(self, X: np.ndarray) -> np.ndarray:
         return unscale_power(X, self.lo_, self.hi_)
 
+
 class PowerScaler2(BaseScaler):
     def transform(self, X: np.ndarray) -> np.ndarray:
         return scale_power2(X, self.lo_, self.hi_)
@@ -191,15 +198,17 @@ def transform(self, X: np.ndarray) -> np.ndarray:
     def inverse_transform(self, X: np.ndarray) -> np.ndarray:
         return unscale_power2(X, self.lo_, self.hi_)
 
+
 map_name_to_scaler = {
     "Linear": LinearScaler(),
     "Log": LogScaler(),
     "Log2": LogScaler2(),
     "Power": PowerScaler(),
     "Power2": PowerScaler2(),
-    #...
+    # ...
 }
 
+
 def scale_dataframe(df: pd.DataFrame, scaler: BaseScaler) -> Tuple[pd.DataFrame, dict]:
     scaled_df = pd.DataFrame(np.nan, columns=df.columns, index=df.index)
     bounds = {}
diff --git a/foqus_lib/framework/surrogate/scikit_nn.py b/foqus_lib/framework/surrogate/scikit_nn.py
index a7127206a..46f7c08ab 100644
--- a/foqus_lib/framework/surrogate/scikit_nn.py
+++ b/foqus_lib/framework/surrogate/scikit_nn.py
@@ -56,13 +56,14 @@
     BaseScaler,
     LinearScaler,
     LogScaler,
-    LogScaler2, 
-    PowerScaler, 
+    LogScaler2,
+    PowerScaler,
     PowerScaler2,
     map_name_to_scaler,
-    scale_dataframe
+    scale_dataframe,
 )
 
+
 def validate_training_data(xdata: np.ndarray, zdata: np.ndarray):
     number_columns_in_xdata = xdata.shape[1]
     number_columns_in_zdata = zdata.shape[1]

From 90500dcff6cc43be6863bdf5c84eea096a7e4667 Mon Sep 17 00:00:00 2001
From: franflame <isaacfc749@gmail.com>
Date: Mon, 25 Mar 2024 14:45:42 -0700
Subject: [PATCH 04/12] Remove commented code and add documentation for
 BaseScaler

---
 foqus_lib/framework/surrogate/scaling.py | 27 +++---------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py
index fa0b6a9be..16301bb6a 100644
--- a/foqus_lib/framework/surrogate/scaling.py
+++ b/foqus_lib/framework/surrogate/scaling.py
@@ -70,9 +70,6 @@ def scale_log2(array_in, lo=None, hi=None):
     return result
 
 
-# fix expected values in test
-
-
 def scale_power(array_in, lo=None, hi=None):
     if lo is None:
         lo = np.min(array_in)
@@ -102,31 +99,15 @@ def unscale_linear(array_in, lo, hi):
 
 def unscale_log(array_in, lo, hi):
     result = lo * np.power(hi / lo, array_in)
-
-    # result = ((np.log10(array_in) - np.log10(lo))
-    #              / (np.log10(hi) - np.log10(lo)))
-    # out = math.pow(lo * (hi / lo), (array_in / 10.0))
-    #                 out = (
-    #                     10
-    #                     * (math.log10(array_in) - math.log10(lo))
-    #                     / (math.log10(hi) - math.log10(lo))
-    #                 )
     return result
 
 
 def unscale_log2(array_in, lo=None, hi=None):
     result = (np.power(10, array_in / 1.0) - 1) * (hi - lo) / 9.0 + lo
-    # out = (math.pow(10, array_in / 10.0) - 1) * (
-    #     hi - lo
-    # ) / 9.0 + lo
-
     return result
 
 
 def unscale_power(array_in, lo, hi):
-    # check if lo and hi were provided
-    # result = np.log10((array_in / 10.0) * (np.power(10, hi) - np.power(10, lo))
-    #                 + np.power(10, lo))
     result = np.log10(
         (array_in / 1.0) * (np.power(10, hi) - np.power(10, lo)) + np.power(10, lo)
     )
@@ -139,10 +120,9 @@ def unscale_power2(array_in, lo, hi):
 
 
 class BaseScaler:
-    # def __init__(self, data_array: np.ndarray):
-    #     self.data = data_array
-    #     self.lo_ = np.min(data_array)
-    #     self.hi_ = np.max(data_array)
+    """BaseScaler is the base class for the scaler classes defined
+    below. It exposes the transformer interface from scikit-learn,
+    and is not supposed to be instantiated directly."""
 
     def fit(self, X: np.ndarray):
         self.lo_ = np.min(X)
@@ -205,7 +185,6 @@ def inverse_transform(self, X: np.ndarray) -> np.ndarray:
     "Log2": LogScaler2(),
     "Power": PowerScaler(),
     "Power2": PowerScaler2(),
-    # ...
 }
 
 

From 490d9f246cba27377d2bcd112d779263488f64d6 Mon Sep 17 00:00:00 2001
From: franflame <isaacfc749@gmail.com>
Date: Mon, 25 Mar 2024 15:41:39 -0700
Subject: [PATCH 05/12] Passing hypothesis tests for scaling.py

---
 foqus_lib/framework/surrogate/scaling.py      |  2 +-
 .../framework/surrogate/tests/__init__.py     |  0
 .../{scale_test => tests}/test_scaling.py     | 85 ++++++++++++++++---
 3 files changed, 75 insertions(+), 12 deletions(-)
 create mode 100644 foqus_lib/framework/surrogate/tests/__init__.py
 rename foqus_lib/framework/surrogate/{scale_test => tests}/test_scaling.py (79%)

diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py
index 16301bb6a..3670230f1 100644
--- a/foqus_lib/framework/surrogate/scaling.py
+++ b/foqus_lib/framework/surrogate/scaling.py
@@ -16,7 +16,7 @@ def validate_for_scaling(array_in, lo, hi) -> None:
         raise ValueError("Only 1D arrays supported")
     if array_in.size < 2:
         raise ValueError("Array must have at least 2 values")
-    if lo == hi:
+    if np.allclose(lo, hi):
         raise ValueError("Array must contain non-identical values")
     if not check_under_or_overflow(array_in):
         raise ValueError("Array contains under/overflow values for dtype")
diff --git a/foqus_lib/framework/surrogate/tests/__init__.py b/foqus_lib/framework/surrogate/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/foqus_lib/framework/surrogate/scale_test/test_scaling.py b/foqus_lib/framework/surrogate/tests/test_scaling.py
similarity index 79%
rename from foqus_lib/framework/surrogate/scale_test/test_scaling.py
rename to foqus_lib/framework/surrogate/tests/test_scaling.py
index 09a4f0596..b07adccf5 100644
--- a/foqus_lib/framework/surrogate/scale_test/test_scaling.py
+++ b/foqus_lib/framework/surrogate/tests/test_scaling.py
@@ -12,9 +12,16 @@
     scale_power2,
     unscale_power2,
     validate_for_scaling,
+    map_name_to_scaler,
+    BaseScaler,
+    LinearScaler,
+    LogScaler,
+    LogScaler2,
+    PowerScaler,
+    PowerScaler2,
 )
 
-from hypothesis.extra.numpy import arrays as arrays_strat, array_shapes
+from hypothesis.extra import numpy as hypothesis_np
 from hypothesis import given, example, assume
 from contextlib import contextmanager
 
@@ -48,13 +55,6 @@ def test_scale_linear():
     assert np.all(scaled_array <= 1)
     assert np.allclose(scaled_array, [0.0, 0.5, 1.0])
 
-    # Test case 4: Scaling with repeated values
-    input_array = np.array([2, 2, 2, 2])
-    scaled_array = scale_linear(input_array)
-    assert np.all(scaled_array >= 0)
-    assert np.all(scaled_array <= 1)
-    assert np.allclose(scaled_array, [0.0, 0.0, 0.0, 0.0])
-
 
 def test_unscale_linear():
     # Test case 1: Basic unscaling
@@ -189,9 +189,15 @@ def test_unscale_power2():
     assert np.allclose(unscaled_array, [1.0, 4.7, 4.8, 4.999, 5.0])
 
 
-# fill in with more cases, parameters, functions
-@pytest.mark.parametrize("x", [np.array([1, 2, 3, 4, 5]), np.array([0, 7, 9, 10, 12])])
-# @given(x=arrays_strat(np.float32, array_shapes()))
+@given(
+    x=hypothesis_np.arrays(
+        np.float64,
+        hypothesis_np.array_shapes(),
+        # TODO: see if these bounds can be relaxed
+        # larger values cause failures in scale_power
+        elements={"min_value": -5, "max_value": 5},
+    )
+)
 @pytest.mark.parametrize(
     "scale,unscale",
     [
@@ -218,6 +224,63 @@ def test_roundtrip(x, scale, unscale):
         assert np.allclose(x, unscaled)
 
 
+# parametrize with list of scalar objects
+def test_object_testing():
+    array_one = np.array([1, 3, 5, 6, 8, 9, 10])
+
+    scaler_variant_1 = "Linear"
+
+    # actual test content
+    scaler_instance_1 = map_name_to_scaler[scaler_variant_1]
+
+    linear_arr_one = scaler_instance_1.fit_transform(array_one)
+
+    print(linear_arr_one)
+    assert np.all(linear_arr_one >= 0)
+    assert np.all(linear_arr_one <= 1)
+    # actual test content
+
+    scaler_variant_2 = "Log"
+
+    scaler_instance_2 = map_name_to_scaler[scaler_variant_2]
+
+    linear_arr_two = scaler_instance_2.fit_transform(array_one)
+
+    print(linear_arr_two)
+    assert np.all(linear_arr_two >= 0)
+    assert np.all(linear_arr_two <= 1)
+
+    scaler_variant_3 = "Power"
+
+    scaler_instance_3 = map_name_to_scaler[scaler_variant_3]
+
+    linear_arr_three = scaler_instance_3.fit_transform(array_one)
+
+    print(linear_arr_three)
+    assert np.all(linear_arr_one >= 0)
+    assert np.all(linear_arr_one <= 1)
+
+    scaler_variant_four = "Log2"
+
+    scaler_instance_four = map_name_to_scaler[scaler_variant_four]
+
+    linear_arr_four = scaler_instance_four.fit_transform(array_one)
+
+    print(linear_arr_four)
+    assert np.all(linear_arr_four >= 0)
+    assert np.all(linear_arr_four <= 1)
+
+    scaler_variant_five = "Power2"
+
+    scaler_instance_five = map_name_to_scaler[scaler_variant_five]
+
+    linear_arr_five = scaler_instance_five.fit_transform(array_one)
+
+    print(linear_arr_five)
+    assert np.all(linear_arr_one >= 0)
+    assert np.all(linear_arr_one <= 1)
+
+
 def passes_validation(array_in, lo, hi):
     try:
         validate_for_scaling(array_in, lo, hi)

From 730046efa8cab386bd9cb8aabfb8fa7096bfc877 Mon Sep 17 00:00:00 2001
From: franflame <isaacfc749@gmail.com>
Date: Tue, 26 Mar 2024 16:37:03 -0700
Subject: [PATCH 06/12] Parametrize and revise scaler object test function

---
 .../framework/surrogate/tests/test_scaling.py | 70 +++++--------------
 1 file changed, 17 insertions(+), 53 deletions(-)

diff --git a/foqus_lib/framework/surrogate/tests/test_scaling.py b/foqus_lib/framework/surrogate/tests/test_scaling.py
index b07adccf5..3c039f862 100644
--- a/foqus_lib/framework/surrogate/tests/test_scaling.py
+++ b/foqus_lib/framework/surrogate/tests/test_scaling.py
@@ -224,61 +224,25 @@ def test_roundtrip(x, scale, unscale):
         assert np.allclose(x, unscaled)
 
 
-# parametrize with list of scalar objects
-def test_object_testing():
-    array_one = np.array([1, 3, 5, 6, 8, 9, 10])
-
-    scaler_variant_1 = "Linear"
-
-    # actual test content
-    scaler_instance_1 = map_name_to_scaler[scaler_variant_1]
-
-    linear_arr_one = scaler_instance_1.fit_transform(array_one)
-
-    print(linear_arr_one)
-    assert np.all(linear_arr_one >= 0)
-    assert np.all(linear_arr_one <= 1)
-    # actual test content
-
-    scaler_variant_2 = "Log"
-
-    scaler_instance_2 = map_name_to_scaler[scaler_variant_2]
-
-    linear_arr_two = scaler_instance_2.fit_transform(array_one)
-
-    print(linear_arr_two)
-    assert np.all(linear_arr_two >= 0)
-    assert np.all(linear_arr_two <= 1)
-
-    scaler_variant_3 = "Power"
-
-    scaler_instance_3 = map_name_to_scaler[scaler_variant_3]
-
-    linear_arr_three = scaler_instance_3.fit_transform(array_one)
-
-    print(linear_arr_three)
-    assert np.all(linear_arr_one >= 0)
-    assert np.all(linear_arr_one <= 1)
-
-    scaler_variant_four = "Log2"
-
-    scaler_instance_four = map_name_to_scaler[scaler_variant_four]
-
-    linear_arr_four = scaler_instance_four.fit_transform(array_one)
-
-    print(linear_arr_four)
-    assert np.all(linear_arr_four >= 0)
-    assert np.all(linear_arr_four <= 1)
-
-    scaler_variant_five = "Power2"
-
-    scaler_instance_five = map_name_to_scaler[scaler_variant_five]
+@pytest.mark.parametrize(
+    "variant",
+    [
+        "Linear",
+        "Log",
+        "Log2",
+        "Power",
+        "Power2",
+    ],
+)
+def test_use_scaler_objects(variant):
+    input_array = np.array([1, 3, 5, 6, 8, 9, 10])
+    scaler_instance = map_name_to_scaler[variant]
 
-    linear_arr_five = scaler_instance_five.fit_transform(array_one)
+    result_arr = scaler_instance.fit_transform(input_array)
 
-    print(linear_arr_five)
-    assert np.all(linear_arr_one >= 0)
-    assert np.all(linear_arr_one <= 1)
+    print(result_arr)
+    assert np.all(result_arr >= 0)
+    assert np.all(result_arr <= 1)
 
 
 def passes_validation(array_in, lo, hi):

From ff571ed0466cce2754f9e743196a07d84dc9897b Mon Sep 17 00:00:00 2001
From: franflame <isaacfc749@gmail.com>
Date: Fri, 29 Mar 2024 18:09:40 -0700
Subject: [PATCH 07/12] Handle error case for very small values in scale_log

---
 foqus_lib/framework/surrogate/scaling.py            | 5 +++--
 foqus_lib/framework/surrogate/tests/test_scaling.py | 6 ++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/foqus_lib/framework/surrogate/scaling.py b/foqus_lib/framework/surrogate/scaling.py
index 3670230f1..ae79d7a88 100644
--- a/foqus_lib/framework/surrogate/scaling.py
+++ b/foqus_lib/framework/surrogate/scaling.py
@@ -49,8 +49,9 @@ def scale_linear(array_in, lo=None, hi=None):
 
 def scale_log(array_in, lo=None, hi=None):
     # need to account for log domain
-    if np.any(array_in <= 0):
-        raise ValueError("All values must be > 0 to use scale_log")
+    epsilon = 1e-8
+    if np.any(array_in < epsilon):
+        raise ValueError(f"All values must be greater than {epsilon}")
     if lo is None:
         lo = np.min(array_in)
     if hi is None:
diff --git a/foqus_lib/framework/surrogate/tests/test_scaling.py b/foqus_lib/framework/surrogate/tests/test_scaling.py
index 3c039f862..10ffa6922 100644
--- a/foqus_lib/framework/surrogate/tests/test_scaling.py
+++ b/foqus_lib/framework/surrogate/tests/test_scaling.py
@@ -214,8 +214,10 @@ def test_roundtrip(x, scale, unscale):
     hi = np.max(x)
     if not passes_validation(x, lo, hi):
         expected_failure = pytest.raises(ValueError)
-    elif lo <= 0 and scale in POSITIVE_VALS_ONLY:
-        expected_failure = pytest.raises(ValueError, match="All values must be > 0.*")
+    elif lo < 1e-08 and scale in POSITIVE_VALS_ONLY:
+        expected_failure = pytest.raises(
+            ValueError, match="All values must be greater than 1e-08"
+        )
     else:
         expected_failure = does_not_raise()
     with expected_failure:

From 193d457531ed71d0b4179d4115b4e221d57b36ac Mon Sep 17 00:00:00 2001
From: franflame <isaacfc749@gmail.com>
Date: Tue, 9 Apr 2024 16:18:23 -0700
Subject: [PATCH 08/12] Try adding GUI test for scaling option select

---
 foqus_lib/gui/tests/test_surrogate.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py
index 21cccda2c..913857443 100644
--- a/foqus_lib/gui/tests/test_surrogate.py
+++ b/foqus_lib/gui/tests/test_surrogate.py
@@ -17,6 +17,7 @@
 
 from foqus_lib.gui.main.mainWindow import mainWindow
 from foqus_lib.gui.surrogate.surrogateFrame import surrogateFrame
+from PyQt5.QtWidgets import QComboBox
 
 pytestmark = pytest.mark.gui
 
@@ -56,6 +57,7 @@ class TestFrame:
             # "ACOSSO",
         ],
     )
+    
     def test_run_surrogate(
         self,
         qtbot,
@@ -81,6 +83,10 @@ def test_run_surrogate(
             qtbot.click(button="Select All")
         with qtbot.focusing_on(group_box="Output Variables"):
             qtbot.click(button="Select All")
+        qtbot.select_tab("Method Settings")
+        with qtbot.focusing_on(table=any):
+            qtbot.select_row(12)
+            qtbot.using(column="Value").set_option("Linear")
         qtbot.select_tab("Execution")
         run_button, stop_button = qtbot.locate(button=any, index=[0, 1])
         run_button.click()

From 859ac94eb8bf1f826b1e5d4105e70eca5dc1e1be Mon Sep 17 00:00:00 2001
From: Ludovico Bianchi <lbianchi@lbl.gov>
Date: Tue, 9 Apr 2024 19:31:33 -0500
Subject: [PATCH 09/12] Add support for text hints in table row search

---
 foqus_lib/gui/tests/test_surrogate.py | 2 +-
 pytest_qt_extras.py                   | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py
index 913857443..1b56b4905 100644
--- a/foqus_lib/gui/tests/test_surrogate.py
+++ b/foqus_lib/gui/tests/test_surrogate.py
@@ -85,7 +85,7 @@ def test_run_surrogate(
             qtbot.click(button="Select All")
         qtbot.select_tab("Method Settings")
         with qtbot.focusing_on(table=any):
-            qtbot.select_row(12)
+            qtbot.select_row("scaling_function")
             qtbot.using(column="Value").set_option("Linear")
         qtbot.select_tab("Execution")
         run_button, stop_button = qtbot.locate(button=any, index=[0, 1])
diff --git a/pytest_qt_extras.py b/pytest_qt_extras.py
index caf758bf2..4115c1f00 100644
--- a/pytest_qt_extras.py
+++ b/pytest_qt_extras.py
@@ -589,6 +589,10 @@ def run(cls, table, hint: TableRowSpec):
                 raise InvalidMatchError(
                     f"row index {hint} out of range: (count: {count})"
                 )
+        elif isinstance(hint, str):
+            matching_items = table.findItems(hint, QtCore.Qt.MatchExactly)
+            InvalidMatchError.check(matching_items, expected=1)
+            idx = int(matching_items[0].row())
         elif hint is None:
             if count == 1:
                 idx = 0

From 719375183977dc46fc453c5826b405347f292e1f Mon Sep 17 00:00:00 2001
From: Ludovico Bianchi <lbianchi@lbl.gov>
Date: Tue, 9 Apr 2024 19:44:31 -0500
Subject: [PATCH 10/12] Add quotes to search hint since they are present in the
 widget

---
 foqus_lib/gui/tests/test_surrogate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py
index 1b56b4905..73b5dfc58 100644
--- a/foqus_lib/gui/tests/test_surrogate.py
+++ b/foqus_lib/gui/tests/test_surrogate.py
@@ -86,7 +86,7 @@ def test_run_surrogate(
         qtbot.select_tab("Method Settings")
         with qtbot.focusing_on(table=any):
             qtbot.select_row("scaling_function")
-            qtbot.using(column="Value").set_option("Linear")
+            qtbot.using(column="Value").set_option('"Linear"')
         qtbot.select_tab("Execution")
         run_button, stop_button = qtbot.locate(button=any, index=[0, 1])
         run_button.click()

From 45ca80f34162b42973314cc384e41f1b06f99ad4 Mon Sep 17 00:00:00 2001
From: Ludovico Bianchi <lbianchi@lbl.gov>
Date: Tue, 9 Apr 2024 19:56:19 -0500
Subject: [PATCH 11/12] Format with Black

---
 foqus_lib/framework/surrogate/keras_nn.py | 1 +
 foqus_lib/gui/tests/test_surrogate.py     | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/foqus_lib/framework/surrogate/keras_nn.py b/foqus_lib/framework/surrogate/keras_nn.py
index 5bccaa808..3e581bf30 100644
--- a/foqus_lib/framework/surrogate/keras_nn.py
+++ b/foqus_lib/framework/surrogate/keras_nn.py
@@ -67,6 +67,7 @@
 # mapping between the human-readable name for the scaling variant
 # and an instance of the corresponding scaler class
 
+
 # custom class to define Keras NN layers
 @tf.keras.utils.register_keras_serializable()
 class keras_nn(tf.keras.layers.Layer):
diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py
index 73b5dfc58..6a2d7f535 100644
--- a/foqus_lib/gui/tests/test_surrogate.py
+++ b/foqus_lib/gui/tests/test_surrogate.py
@@ -57,7 +57,6 @@ class TestFrame:
             # "ACOSSO",
         ],
     )
-    
     def test_run_surrogate(
         self,
         qtbot,

From 21f2b429b4ad3f9146b8367f32ae76d68dc29831 Mon Sep 17 00:00:00 2001
From: franflame <isaacfc749@gmail.com>
Date: Fri, 12 Apr 2024 17:17:52 -0700
Subject: [PATCH 12/12] Add scaling variants to surrogate GUI test

---
 foqus_lib/gui/tests/test_surrogate.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/foqus_lib/gui/tests/test_surrogate.py b/foqus_lib/gui/tests/test_surrogate.py
index 6a2d7f535..91725b41e 100644
--- a/foqus_lib/gui/tests/test_surrogate.py
+++ b/foqus_lib/gui/tests/test_surrogate.py
@@ -57,6 +57,16 @@ class TestFrame:
             # "ACOSSO",
         ],
     )
+    @pytest.mark.parametrize(
+        "scaling_variant",
+        [
+            '"Linear"',
+            '"Log"',
+            '"Log2"',
+            '"Power"',
+            '"Power2"',
+        ],
+    )
     def test_run_surrogate(
         self,
         qtbot,
@@ -64,6 +74,7 @@ def test_run_surrogate(
         main_window: mainWindow,
         name: str,
         required_import: str,
+        scaling_variant: str,
     ):
         qtbot.focused = frame
         pytest.importorskip(required_import, reason=f"{required_import} not available")
@@ -85,7 +96,7 @@ def test_run_surrogate(
         qtbot.select_tab("Method Settings")
         with qtbot.focusing_on(table=any):
             qtbot.select_row("scaling_function")
-            qtbot.using(column="Value").set_option('"Linear"')
+            qtbot.using(column="Value").set_option(scaling_variant)
         qtbot.select_tab("Execution")
         run_button, stop_button = qtbot.locate(button=any, index=[0, 1])
         run_button.click()