Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.11
0.0.12
2 changes: 1 addition & 1 deletion docs/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,4 @@ coefficients, latent_information, curves = sample_curves(dataset["example"], mea
```
By specifying a value for `measurement_scale` some gaussian noise with the specified scale is applied
on each value for every curve. By default, $5\%$ of the mean of the curves is used. If you want to
omit the scale, set it to `0.0` explictly.
omit the scale, set it to `0.0` explicitly.
18 changes: 16 additions & 2 deletions driftbench/benchmarks/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,29 @@


class Dataset:
"""
Represents a container class for a dataset specification for benchmarking purposes.
"""

def __init__(self, name, spec, f=None, w0=None, n_variations=5):
"""
Args:
name (str): The name of the dataset specification.
spec (dict): The yaml-specification of the dataset.
f (Callable): The function to fit the curves.
w0 (list[float]): The inital guess.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
w0 (list[float]): The inital guess.
w0 (np.ndarray): The inital value.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe something like:

Suggested change
w0 (list[float]): The inital guess.
w0 (np.ndarray): The inital value for the internal paramters.

?

n_variations (int): The number of variations each dataset is sampled.
Each dataset is sampled as many times as `n_variations` is set, each time with a
different random seed.
"""
self.spec = spec
self.name = name
self.n_variations = n_variations
self.w0 = w0
self.f = f

drift_bounds = self.spec['drifts'].get_individual_drift_bounds()
self.Y = transform_drift_segments_into_binary(drift_bounds, self.spec['N'])
drift_bounds = self.spec["drifts"].get_individual_drift_bounds()
self.Y = transform_drift_segments_into_binary(drift_bounds, self.spec["N"])

def _generate(self, random_state):
_, _, curves = sample_curves(
Expand Down
22 changes: 22 additions & 0 deletions driftbench/data_generation/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,28 @@ def sample_curves(
measurement_scale=None,
callback=None,
):
"""
Samples synthetic curves given a dataset specification.
Args:
dataset_specification (dict): A dataset specification which contains
all information to syntethisize curves in yaml-format.
Each dataset is encoded with a name and needs a latent information provided.
The function `f` to fit and as well as initial guess `w0`can be provided as well.
f (Callable): The function to fit the curves. Use this parameter if no function is specified
in `dataset_specification`.
w0 (list[float]): The inital guess for the optimization problem used to synthesize curves.
Use this parameter if no initial guess is specified in `dataset_specification`.
random_state (int): The random state for reproducablity.
measurement_scale (float): The scale for the noise applied on the evaluated curves. If not
set, 5% percent of the mean of the curves is used. Set to 0.0 if you want to omit
this noise.
Returns:
tuple[np.ndarray, list[LatentInformation], np.ndarray): A tuple containing
- the coefficients `w` for each sampled curve.
- the latent information for each sampled curve.
- the evaluated sampled curves.

"""
dimensions = dataset_specification["dimensions"]
drifts = dataset_specification.get("drifts")
x_scale = dataset_specification.get("x_scale", 0.02)
Expand Down
24 changes: 19 additions & 5 deletions driftbench/data_generation/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import matplotlib.pyplot as plt


def plot_curve_with_latent_information(coefficients, p, latent_information, title=None, ax=None, y_lim=None):
def plot_curve_with_latent_information(
coefficients, p, latent_information, title=None, ax=None, y_lim=None
):
"""
Plots the reconstructed wave with the given coefficients and a polynomial with the ground truth
defined by the latent information.
Expand All @@ -29,20 +31,21 @@ def plot_curve_with_latent_information(coefficients, p, latent_information, titl

# Plot the given x-values
for xx in latent_information.x0:
ax.axvline(xx, linestyle='dashed', color='black')
ax.axvline(xx, linestyle="dashed", color="black")

# Plot slope according to first derivative
for slope, x_slope in zip(latent_information.y1, latent_information.x1):
xxs = [x for x in range(int(x_slope - 1), int(x_slope + 3.))]
xxs = [x for x in range(int(x_slope - 1), int(x_slope + 3.0))]
dx_vals = np.array(
[(slope * x) - (slope * x_slope - p(coefficients, x_slope)) for x in xxs])
[(slope * x) - (slope * x_slope - p(coefficients, x_slope)) for x in xxs]
)
ax.scatter(x_slope, p(coefficients, x_slope), alpha=0.4, color="green")
ax.plot(xxs, dx_vals, c="green")

# Plot curvature
for x_curvature, curvature in zip(latent_information.x2, latent_information.y2):
label = "convex" if curvature > 0.0 else "concave"
ax.axvline(x_curvature, linestyle='dashed', color='purple', label=label)
ax.axvline(x_curvature, linestyle="dashed", color="purple", label=label)

# Mark the corresponding y-values
for yy, xx in zip(latent_information.y0, latent_information.x0):
Expand All @@ -56,6 +59,17 @@ def plot_curve_with_latent_information(coefficients, p, latent_information, titl


def plot_curves(curves, xs, title=None, cmap="coolwarm", ylim=None):
"""
Plots curves with a given cmap, where the color mapping is applied over the temporal axis.
Args:
curves(np.ndarray): The curves array, of shape (N, m), where N curves consist of m
timesteps.
xs(list[float]): The x-values for the curve, must be of length m.
title (str): The title of the plot.
cmap (str): The colormap for the color mapping over the temporal axis.
ylim(list[float]): The y-limit for the plot.

"""
fig, ax = plt.subplots()
cmap_obj = plt.get_cmap(name=cmap)
cycler = plt.cycler("color", cmap_obj(np.linspace(0, 1, curves.shape[0])))
Expand Down