diff --git a/sql/HISTORY.md b/sql/HISTORY.md index 06d722f7..11f1cff5 100644 --- a/sql/HISTORY.md +++ b/sql/HISTORY.md @@ -1,3 +1,7 @@ +# SmartNoise SQL v1.0.7 Release Notes + +* Fix odomoter accounting bug (thanks @tudorcebere!) + # SmartNoise SQL v1.0.6 Release Notes * Allow newer versions of Graphviz diff --git a/sql/VERSION b/sql/VERSION index ece61c60..f9cbc01a 100644 --- a/sql/VERSION +++ b/sql/VERSION @@ -1 +1 @@ -1.0.6 \ No newline at end of file +1.0.7 \ No newline at end of file diff --git a/sql/pyproject.toml b/sql/pyproject.toml index 713460d1..81fa5058 100644 --- a/sql/pyproject.toml +++ b/sql/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "smartnoise-sql" -version = "1.0.6" +version = "1.0.7" description = "Differentially Private SQL Queries" authors = ["SmartNoise Team "] license = "MIT" @@ -10,7 +10,7 @@ repository = "https://github.com/opendp/smartnoise-sdk" readme = "README.md" [tool.poetry.dependencies] -python = ">=3.8,<3.14" +python = ">=3.9,<3.14" opendp = ">=0.8.0,<0.13.0" antlr4-python3-runtime = "4.9.3" PyYAML = "^6.0.1" diff --git a/sql/snsql/sql/odometer.py b/sql/snsql/sql/odometer.py index c2db2f06..7929dc3c 100644 --- a/sql/snsql/sql/odometer.py +++ b/sql/snsql/sql/odometer.py @@ -29,7 +29,7 @@ def spent(self): basic = self.k * epsilon optimal_left_side = ((np.exp(epsilon) - 1) * epsilon * self.k)/(np.exp(epsilon) + 1) optimal_a = optimal_left_side + epsilon * np.sqrt(2 * self.k * np.log(epsilon + (np.sqrt(self.k*epsilon*epsilon)/tol))) - optimal_b = optimal_left_side + epsilon * np.sqrt(2 * self.k * (1/tol)) + optimal_b = optimal_left_side + epsilon * np.sqrt(2 * self.k * np.log(1 / tol)) delta = 1 - (1 - delta) ** self.k delta = delta * (1 - delta) + self.tol return tuple([min(basic, optimal_a, optimal_b), delta]) diff --git a/sql/tests/odometer/test_odometer.py b/sql/tests/odometer/test_odometer.py index 56f8b38f..43ddbe24 100644 --- a/sql/tests/odometer/test_odometer.py +++ b/sql/tests/odometer/test_odometer.py @@ -108,7 +108,7 @@ def test_odo_hom(self): for _ in range(300): odo.spend() eps, delt = odo.spent - assert(np.isclose(eps, 8.4917)) + assert(np.isclose(eps, 8.2519)) assert(np.isclose(delt, 0.19256)) def test_odo_het(self): privacy = Privacy(epsilon=0.1, delta = 1/(1000)) diff --git a/synth/HISTORY.md b/synth/HISTORY.md index a0ddfa20..84598eee 100644 --- a/synth/HISTORY.md +++ b/synth/HISTORY.md @@ -1,3 +1,7 @@ +# SmartNoise Synth v1.0.6 Release Notes + +* Fix privacy leak in covariance (thanks, @tudorcebere!) + # SmartNoise Synth v1.0.5 Release Notes * Upgrade to OpenDP v0.12.0 diff --git a/synth/VERSION b/synth/VERSION index 1464c521..ece61c60 100644 --- a/synth/VERSION +++ b/synth/VERSION @@ -1 +1 @@ -1.0.5 \ No newline at end of file +1.0.6 \ No newline at end of file diff --git a/synth/pyproject.toml b/synth/pyproject.toml index eb1a1b90..eaf7f002 100644 --- a/synth/pyproject.toml +++ b/synth/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "smartnoise-synth" -version = "1.0.5" +version = "1.0.6" description = "Differentially Private Synthetic Data" authors = ["SmartNoise Team "] license = "MIT" @@ -14,7 +14,7 @@ python = ">=3.9,<3.13" opacus = "^0.14.0" torch = {version = ">=2.2.0", optional = true} pac-synth = "^0.0.8" -smartnoise-sql = "^1.0.5" +smartnoise-sql = ">=1.0.7" Faker = ">=17.0.0" [tool.poetry.dev-dependencies] diff --git a/synth/snsynth/models/dp_covariance.py b/synth/snsynth/models/dp_covariance.py index 0fa516a3..a11a39c4 100644 --- a/synth/snsynth/models/dp_covariance.py +++ b/synth/snsynth/models/dp_covariance.py @@ -112,10 +112,10 @@ def dp_noise(n, noise_scale): u = np.random.uniform(size=n) return q_lap_iter(u, b=noise_scale) - true_val = covar(data.values.T, self.intercept) - scale = self.sens / self.epsilon - val = np.array(true_val) + dp_noise(n=len(true_val), noise_scale=scale) - return list(val) + true_val = covar(new_data.values.T, self.intercept) + scale = self.sens / self.epsilon + val = np.array(true_val) + dp_noise(n=len(true_val), noise_scale=scale) + return list(val) # TODO: this implementation only works for one dependent variable right now def get_linear_regression(self, data, x_names, y_name, intercept=False): diff --git a/synth/snsynth/models/linear_regression.py b/synth/snsynth/models/linear_regression.py index 4f7d3d6b..db69fde9 100644 --- a/synth/snsynth/models/linear_regression.py +++ b/synth/snsynth/models/linear_regression.py @@ -1,6 +1,9 @@ import pandas as pd from sklearn.base import RegressorMixin, MultiOutputMixin -from sklearn.linear_model.base import LinearModel +try: + from sklearn.linear_model.base import LinearModel +except ImportError: + from sklearn.linear_model._base import LinearModel from .dp_covariance import DPcovariance diff --git a/synth/tests/test_dp_covariance.py b/synth/tests/test_dp_covariance.py new file mode 100644 index 00000000..93791136 --- /dev/null +++ b/synth/tests/test_dp_covariance.py @@ -0,0 +1,22 @@ +import numpy as np +import pandas as pd + +from snsynth.models.dp_covariance import DPcovariance + + +def test_release_uses_clipped_data(monkeypatch): + # Make Laplace noise exactly zero so output equals the internal covariance. + monkeypatch.setattr(np.random, "uniform", lambda size: np.full(size, 0.5)) + + data = pd.DataFrame({"x": [2.0, 0.0], "y": [2.0, 0.0]}) + bounds = pd.DataFrame({"x": [0.0, 1.0], "y": [0.0, 1.0]}) + + model = DPcovariance(n=2, cols=["x", "y"], rng=bounds, global_eps=1.0) + release = np.array(model.release(data)) + + # Expected covariance of clipped data [[1,1],[0,0]]. + clipped = np.array([[1.0, 1.0], [0.0, 0.0]]).T + cov = np.cov(clipped) + expected = cov[np.tril_indices(cov.shape[0])] + + np.testing.assert_allclose(release, expected)