edgarWolf · edgarWolf · Jun 10, 2025 · Jun 5, 2025 · Jun 5, 2025 · Jun 5, 2025
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.0.11
+0.0.12
diff --git a/docs/data.md b/docs/data.md
@@ -66,4 +66,4 @@ coefficients, latent_information, curves = sample_curves(dataset["example"], mea
 ```
 By specifying a value for `measurement_scale` some gaussian noise with the specified scale is applied
 on each value for every curve. By default, $5\%$ of the mean of the curves is used. If you want to
-omit the scale, set it to `0.0` explictly.
+omit the scale, set it to `0.0` explicitly.
diff --git a/driftbench/benchmarks/data.py b/driftbench/benchmarks/data.py
@@ -5,15 +5,29 @@
 
 
 class Dataset:
+    """
+    Represents a container class for a dataset specification for benchmarking purposes.
+    """
+
     def __init__(self, name, spec, f=None, w0=None, n_variations=5):
+        """
+        Args:
+            name (str): The name of the dataset specification.
+            spec (dict): The yaml-specification of the dataset.
+            f (Callable): The function to fit the curves.
+            w0 (list[float]): The inital guess.
-            w0 (list[float]): The inital guess.
+            w0 (np.ndarray): The inital value.
-            w0 (list[float]): The inital guess.
+            w0 (np.ndarray): The inital value for the internal paramters.
-            w0 (list[float]): The inital guess.
+            w0 (np.ndarray): The inital value.
-            w0 (list[float]): The inital guess.
+            w0 (np.ndarray): The inital value for the internal paramters.
+            n_variations (int): The number of variations each dataset is sampled.
+            Each dataset is sampled as many times as `n_variations` is set, each time with a
+            different random seed.
+        """
         self.spec = spec
         self.name = name
         self.n_variations = n_variations
         self.w0 = w0
         self.f = f
 
-        drift_bounds = self.spec['drifts'].get_individual_drift_bounds()
-        self.Y = transform_drift_segments_into_binary(drift_bounds, self.spec['N'])
+        drift_bounds = self.spec["drifts"].get_individual_drift_bounds()
+        self.Y = transform_drift_segments_into_binary(drift_bounds, self.spec["N"])
 
     def _generate(self, random_state):
         _, _, curves = sample_curves(

diff --git a/driftbench/data_generation/sample.py b/driftbench/data_generation/sample.py
@@ -12,6 +12,28 @@ def sample_curves(
     measurement_scale=None,
     callback=None,
 ):
+    """
+    Samples synthetic curves given a dataset specification.
+    Args:
+        dataset_specification (dict): A dataset specification which contains
+        all information to syntethisize curves in yaml-format.
+        Each dataset is encoded with a name and needs a latent information provided.
+        The function `f` to fit and as well as initial guess `w0`can be provided as well.
+        f (Callable): The function to fit the curves. Use this parameter if no function is specified
+        in `dataset_specification`.
+        w0 (list[float]): The inital guess for the optimization problem used to synthesize curves.
+        Use this parameter if no initial guess is specified in `dataset_specification`.
+        random_state (int): The random state for reproducablity.
+        measurement_scale (float): The scale for the noise applied on the evaluated curves. If not
+        set, 5% percent of the mean of the curves is used. Set to 0.0 if you want to omit
+        this noise.
+    Returns:
+        tuple[np.ndarray, list[LatentInformation], np.ndarray): A tuple containing
+            - the coefficients `w` for each sampled curve.
+            - the latent information for each sampled curve.
+            - the evaluated sampled curves.
+
+    """
     dimensions = dataset_specification["dimensions"]
     drifts = dataset_specification.get("drifts")
     x_scale = dataset_specification.get("x_scale", 0.02)

diff --git a/driftbench/data_generation/visualize.py b/driftbench/data_generation/visualize.py
@@ -2,7 +2,9 @@
 import matplotlib.pyplot as plt
 
 
-def plot_curve_with_latent_information(coefficients, p, latent_information, title=None, ax=None, y_lim=None):
+def plot_curve_with_latent_information(
+    coefficients, p, latent_information, title=None, ax=None, y_lim=None
+):
     """
     Plots the reconstructed wave with the given coefficients and a polynomial with the ground truth
     defined by the latent information.
@@ -29,20 +31,21 @@ def plot_curve_with_latent_information(coefficients, p, latent_information, titl
 
     # Plot the given x-values
     for xx in latent_information.x0:
-        ax.axvline(xx, linestyle='dashed', color='black')
+        ax.axvline(xx, linestyle="dashed", color="black")
 
     # Plot slope according to first derivative
     for slope, x_slope in zip(latent_information.y1, latent_information.x1):
-        xxs = [x for x in range(int(x_slope - 1), int(x_slope + 3.))]
+        xxs = [x for x in range(int(x_slope - 1), int(x_slope + 3.0))]
         dx_vals = np.array(
-            [(slope * x) - (slope * x_slope - p(coefficients, x_slope)) for x in xxs])
+            [(slope * x) - (slope * x_slope - p(coefficients, x_slope)) for x in xxs]
+        )
         ax.scatter(x_slope, p(coefficients, x_slope), alpha=0.4, color="green")
         ax.plot(xxs, dx_vals, c="green")
 
     # Plot curvature
     for x_curvature, curvature in zip(latent_information.x2, latent_information.y2):
         label = "convex" if curvature > 0.0 else "concave"
-        ax.axvline(x_curvature, linestyle='dashed', color='purple', label=label)
+        ax.axvline(x_curvature, linestyle="dashed", color="purple", label=label)
 
     # Mark the corresponding y-values
     for yy, xx in zip(latent_information.y0, latent_information.x0):
@@ -56,6 +59,17 @@ def plot_curve_with_latent_information(coefficients, p, latent_information, titl
 
 
 def plot_curves(curves, xs, title=None, cmap="coolwarm", ylim=None):
+    """
+    Plots curves with a given cmap, where the color mapping is applied over the temporal axis.
+    Args:
+        curves(np.ndarray): The curves array, of shape (N, m), where N curves consist of m
+        timesteps.
+        xs(list[float]): The x-values for the curve, must be of length m.
+        title (str): The title of the plot.
+        cmap (str): The colormap for the color mapping over the temporal axis.
+        ylim(list[float]): The y-limit for the plot.
+
+    """
     fig, ax = plt.subplots()
     cmap_obj = plt.get_cmap(name=cmap)
     cycler = plt.cycler("color", cmap_obj(np.linspace(0, 1, curves.shape[0])))