diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..23981a1 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.lock linguist-generated -diff diff --git a/.github/workflows/push_pypi.yml b/.github/workflows/push_pypi.yml new file mode 100644 index 0000000..1eb7ead --- /dev/null +++ b/.github/workflows/push_pypi.yml @@ -0,0 +1,33 @@ +name: Publish to PyPI.org + +on: + pull_request: + branches: [main] + release: + types: [published] + push: + tags: + - v* + +jobs: + pypi: + runs-on: ubuntu-slim + steps: + - uses: actions/checkout@v6 + + - uses: astral-sh/setup-uv@v7 + with: + python-version: 3.13 + + - name: Build Packages + run: | + uv build --wheel + + - name: Sanity Check + run: | + uv run --isolated --no-project --with dist/diffprivlib* python -c 'import diffprivlib_logger' + + - name: Publish Packages + if: github.event_name == 'release' || github.ref_type == 'tag' + run: | + uv publish --check-url https://pypi.org/simple --token ${{ secrets.PYPI_TOKEN }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 11520e5..0b09f23 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,44 +1,30 @@ name: Smoke Test on: - push: - branches: - - main pull_request: + branches: [main] + push: + branches: [main] + jobs: python-test: - runs-on: ubuntu-22.04 + runs-on: ubuntu-slim + strategy: matrix: - python-version: [3.11] + python-version: [ 3.11, 3.12, 3.13, 3.14 ] + steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v6 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - uses: astral-sh/setup-uv@v7 with: python-version: ${{ matrix.python-version }} - - name: Get pip cache dir - id: pip-cache - run: echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT - - - name: Cache pip dependencies - uses: actions/cache@v2 - with: - path: ${{ steps.pip-cache.outputs.dir }} - key: ${{ runner.os }}-pip-${{ hashFiles('python/setup.cfg') }} - restore-keys: ${{ runner.os }}-pip- - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install pytest - - - name: Install package - run: python -m pip install -e . + - name: Install the project + run: uv sync --locked --all-extras --dev - - name: Test - run: cd tests && pytest -v \ No newline at end of file + - name: Run tests + run: uv run pytest -v tests diff --git a/pyproject.toml b/pyproject.toml index 60263ea..eeaae68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,45 @@ -[tool.black] -line-length = 79 -include = '\.pyi?$' +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "diffprivlib-logger" +version = "0.0.4" +description="A logger wrapper for DiffPrivLib" +readme = "README.md" +requires-python = ">=3.10, <4" +authors = [ + { name = "Data Science Competence Center", email = "dscc@bfs.admin.ch" }, + { name = "Swiss Federal Statistical Office" }, +] +license = "MIT" +keywords = [ + "diffprivlib", + "logger", + "serialiser", + "deserialiser", +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", +] +dependencies = [ + "scikit-learn>=0.24", + "numpy >= 2.0.0", + "scikit-learn >= 1.4.0", + "scipy >= 1.13.0", + "joblib >= 0.16.0", + "setuptools >= 49.0.0", +] + +[project.urls] +Homepage = "https://github.com/dscc-admin/lomas/" + +[project.optional-dependencies] +test = [ + "pytest>=8.3", +] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index a1b8ecd..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -diffprivlib>=0.6.0 -scikit-learn>=0.24.2 \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 22c6d04..0000000 --- a/setup.py +++ /dev/null @@ -1,32 +0,0 @@ -import pathlib - -from setuptools import find_packages, setup - -here = pathlib.Path(__file__).parent.resolve() - -this_directory = pathlib.Path(__file__).parent -long_description = (this_directory / "README.md").read_text() - -setup( - name="diffprivlib_logger", - packages=find_packages(), - version="0.0.3", - description="A logger wrapper for DiffPrivLib", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/dscc-admin/lomas/", - author="Data Science Competence Center, Swiss Federal Statistical Office", - author_email="dscc@bfs.admin.ch", - license="MIT", - classifiers=[ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.11", - ], - keywords=["diffprivliv", "logger", "serialiser", "deserialiser"], - python_requires=">=3.10, <4", - install_requires=[ - "diffprivlib>=0.6.0", - ], -) diff --git a/src/diffprivlib/__init__.py b/src/diffprivlib/__init__.py new file mode 100644 index 0000000..7ada7b4 --- /dev/null +++ b/src/diffprivlib/__init__.py @@ -0,0 +1,32 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Differential Privacy Library for Python +======================================= + +The IBM Differential Privacy Library is a library for writing, executing and experimenting with differential privacy. +The Library includes a basic differential privacy mechanisms, the building blocks of differential privacy; tools for +basic data analysis with differential privacy; and machine learning models that satisfy differential privacy. + +""" +from diffprivlib import mechanisms +from diffprivlib import models +from diffprivlib import tools +from diffprivlib.accountant import BudgetAccountant + +__version__ = '0.6.7' diff --git a/src/diffprivlib/accountant.py b/src/diffprivlib/accountant.py new file mode 100644 index 0000000..d9ec66f --- /dev/null +++ b/src/diffprivlib/accountant.py @@ -0,0 +1,469 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2020 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Privacy budget accountant for differential privacy +""" +from numbers import Integral + +import numpy as np + +from diffprivlib.utils import Budget, BudgetError +from diffprivlib.validation import check_epsilon_delta + + +class BudgetAccountant: + """Privacy budget accountant for differential privacy. + + This class creates a privacy budget accountant to track privacy spend across queries and other data accesses. Once + initialised, the BudgetAccountant stores each privacy spend and iteratively updates the total budget spend, raising + an error when the budget ceiling (if specified) is exceeded. The accountant can be initialised without any maximum + budget, to enable users track the total privacy spend of their actions without hindrance. + + Diffprivlib functions can make use of a BudgetAccountant in three different ways (see examples for more details): + + - Passed as an ``accountant`` parameter to the function (e.g., ``mean(..., accountant=acc)``) + - Set as the default using the ``set_default()`` method (all subsequent diffprivlib functions will use the + accountant by default) + - As a context manager using a ``with`` statement (the accountant is used for that block of code) + + Implements the accountant rules as given in [KOV17]_. + + Parameters + ---------- + epsilon : float, default: infinity + Epsilon budget ceiling of the accountant. + + delta : float, default: 1.0 + Delta budget ceiling of the accountant. + + slack : float, default: 0.0 + Slack allowed in delta spend. Greater slack may reduce the overall epsilon spend. + + spent_budget : list of tuples of the form (epsilon, delta), optional + List of tuples of pre-existing budget spends. Allows for a new accountant to be initialised with spends + extracted from a previous instance. + + Attributes + ---------- + epsilon : float + Epsilon budget ceiling of the accountant. + + delta : float + Delta budget ceiling of the accountant. + + slack : float + The accountant's slack. Can be modified at runtime, subject to the privacy budget not being exceeded. + + spent_budget : list of tuples of the form (epsilon, delta) + The list of privacy spends recorded by the accountant. Can be used in the initialisation of a new accountant. + + Examples + -------- + + A ``BudgetAccountant`` is typically passed to diffprivlib functions as an ``accountant`` parameter. If ``epsilon`` + and ``delta`` are not set, the accountant has an infinite budget by default, allowing you to track privacy spend + without imposing a hard limit. By allowing a ``slack`` in the budget calculation, the overall epsilon privacy spend + can be reduced (at the cost of extra delta spend). + + >>> import diffprivlib as dp + >>> from numpy.random import random + >>> X = random(100) + >>> acc = dp.BudgetAccountant(epsilon=1.5, delta=0) + >>> dp.tools.mean(X, bounds=(0, 1), accountant=acc) + 0.4547006207923884 + >>> acc.total() + (epsilon=1.0, delta=0) + >>> dp.tools.std(X, bounds=(0, 1), epsilon=0.25, accountant=acc) + 0.2630216611181259 + >>> acc.total() + (epsilon=1.25, delta=0) + + >>> acc2 = dp.BudgetAccountant() # infinite budget + >>> first_half = dp.tools.mean(X[:50], epsilon=0.25, bounds=(0, 1), accountant=acc2) + >>> last_half = dp.tools.mean(X[50:], epsilon=0.25, bounds=(0, 1), accountant=acc2) + >>> acc2.total() + (epsilon=0.5, delta=0) + >>> acc2.remaining() + (epsilon=inf, delta=1.0) + + >>> acc3 = dp.BudgetAccountant(slack=1e-3) + >>> for i in range(20): + ... dp.tools.mean(X, epsilon=0.05, bounds=(0, 1), accountant=acc3) + >>> acc3.total() # Slack has reduced the epsilon spend by almost 25% + (epsilon=0.7613352285668463, delta=0.001) + + Using ``set_default()``, an accountant is used by default in all diffprivlib functions in that script. Accountants + also act as context managers, allowing for use in a ``with`` statement. Passing an accountant as a parameter + overrides all other methods. + + >>> acc4 = dp.BudgetAccountant() + >>> acc4.set_default() + BudgetAccountant() + >>> Y = random((100, 2)) - 0.5 + >>> clf = dp.models.PCA(1, centered=True, data_norm=1.4) + >>> clf.fit(Y) + PCA(accountant=BudgetAccountant(spent_budget=[(1.0, 0)]), centered=True, copy=True, data_norm=1.4, epsilon=1.0, + n_components=1, random_state=None, bounds=None, whiten=False) + >>> acc4.total() + (epsilon=1.0, delta=0) + + >>> with dp.BudgetAccountant() as acc5: + ... dp.tools.mean(Y, bounds=(0, 1), epsilon=1/3) + >>> acc5.total() + (epsilon=0.3333333333333333, delta=0) + + References + ---------- + .. [KOV17] Kairouz, Peter, Sewoong Oh, and Pramod Viswanath. "The composition theorem for differential privacy." + IEEE Transactions on Information Theory 63.6 (2017): 4037-4049. + + """ + _default = None + + def __init__(self, epsilon=float("inf"), delta=1.0, slack=0.0, spent_budget=None): + check_epsilon_delta(epsilon, delta) + self.__epsilon = epsilon + self.__min_epsilon = 0 if epsilon == float("inf") else epsilon * 1e-14 + self.__delta = delta + self.__spent_budget = [] + self.slack = slack + + if spent_budget is not None: + if not isinstance(spent_budget, list): + raise TypeError("spent_budget must be a list") + + for _epsilon, _delta in spent_budget: + self.spend(_epsilon, _delta) + + def __repr__(self, n_budget_max=5): + params = [] + if self.epsilon != float("inf"): + params.append(f"epsilon={self.epsilon}") + + if self.delta != 1: + params.append(f"delta={self.delta}") + + if self.slack > 0: + params.append(f"slack={self.slack}") + + if self.spent_budget: + if len(self.spent_budget) > n_budget_max: + params.append("spent_budget=" + str(self.spent_budget[:n_budget_max] + ["..."]).replace("'", "")) + else: + params.append("spent_budget=" + str(self.spent_budget)) + + return "BudgetAccountant(" + ", ".join(params) + ")" + + def __enter__(self): + self.old_default = self.pop_default() + self.set_default() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.pop_default() + + if self.old_default is not None: + self.old_default.set_default() + del self.old_default + + def __len__(self): + return len(self.spent_budget) + + @property + def slack(self): + """Slack parameter for composition. + """ + return self.__slack + + @slack.setter + def slack(self, slack): + if not 0 <= slack <= self.delta: + raise ValueError(f"Slack must be between 0 and delta ({self.delta}), inclusive. Got {slack}.") + + epsilon_spent, delta_spent = self.total(slack=slack) + + if self.epsilon < epsilon_spent or self.delta < delta_spent: + raise BudgetError(f"Privacy budget will be exceeded by changing slack to {slack}.") + + self.__slack = slack + + @property + def spent_budget(self): + """List of tuples of the form (epsilon, delta) of spent privacy budget. + """ + return self.__spent_budget.copy() + + @property + def epsilon(self): + """Epsilon privacy ceiling of the accountant. + """ + return self.__epsilon + + @property + def delta(self): + """Delta privacy ceiling of the accountant. + """ + return self.__delta + + def total(self, spent_budget=None, slack=None): + """Returns the total current privacy spend. + + `spent_budget` and `slack` can be specified as parameters, otherwise the class values will be used. + + Parameters + ---------- + spent_budget : list of tuples of the form (epsilon, delta), optional + List of tuples of budget spends. If not provided, the accountant's spends will be used. + + slack : float, optional + Slack in delta for composition. If not provided, the accountant's slack will be used. + + Returns + ------- + epsilon : float + Total epsilon spend. + + delta : float + Total delta spend. + + """ + if spent_budget is None: + spent_budget = self.spent_budget + else: + for epsilon, delta in spent_budget: + check_epsilon_delta(epsilon, delta) + + if slack is None: + slack = self.slack + elif not 0 <= slack <= self.delta: + raise ValueError(f"Slack must be between 0 and delta ({self.delta}), inclusive. Got {slack}.") + + epsilon_sum, epsilon_exp_sum, epsilon_sq_sum = 0, 0, 0 + + for epsilon, _ in spent_budget: + epsilon_sum += epsilon + epsilon_exp_sum += (1 - np.exp(-epsilon)) * epsilon / (1 + np.exp(-epsilon)) + epsilon_sq_sum += epsilon ** 2 + + total_epsilon_naive = epsilon_sum + total_delta = self.__total_delta_safe(spent_budget, slack) + + if slack == 0: + return Budget(total_epsilon_naive, total_delta) + + total_epsilon_drv = epsilon_exp_sum + np.sqrt(2 * epsilon_sq_sum * np.log(1 / slack)) + total_epsilon_kov = epsilon_exp_sum + np.sqrt(2 * epsilon_sq_sum * + np.log(np.exp(1) + np.sqrt(epsilon_sq_sum) / slack)) + + return Budget(min(total_epsilon_naive, total_epsilon_drv, total_epsilon_kov), total_delta) + + def check(self, epsilon, delta): + """Checks if the provided (epsilon,delta) can be spent without exceeding the accountant's budget ceiling. + + Parameters + ---------- + epsilon : float + Epsilon budget spend to check. + + delta : float + Delta budget spend to check. + + Returns + ------- + bool + True if the budget can be spent, otherwise a :class:`.BudgetError` is raised. + + Raises + ------ + BudgetError + If the specified budget spend will result in the budget ceiling being exceeded. + + """ + check_epsilon_delta(epsilon, delta) + if self.epsilon == float("inf") and self.delta == 1: + return True + + if 0 < epsilon < self.__min_epsilon: + raise ValueError(f"Epsilon must be at least {self.__min_epsilon} if non-zero, got {epsilon}.") + + spent_budget = self.spent_budget + [(epsilon, delta)] + + if Budget(self.epsilon, self.delta) >= self.total(spent_budget=spent_budget): + return True + + raise BudgetError(f"Privacy spend of ({epsilon},{delta}) not permissible; will exceed remaining privacy budget." + f" Use {self.__class__.__name__}.{self.remaining.__name__}() to check remaining budget.") + + def remaining(self, k=1): + """Calculates the budget that remains to be spent. + + Calculates the privacy budget that can be spent on `k` queries. Spending this budget on `k` queries will + match the budget ceiling, assuming no floating point errors. + + Parameters + ---------- + k : int, default: 1 + The number of queries for which to calculate the remaining budget. + + Returns + ------- + epsilon : float + Total epsilon spend remaining for `k` queries. + + delta : float + Total delta spend remaining for `k` queries. + + """ + if not isinstance(k, Integral): + raise TypeError(f"k must be integer-valued, got {type(k)}.") + if k < 1: + raise ValueError(f"k must be at least 1, got {k}.") + + _, spent_delta = self.total() + delta = 1 - ((1 - self.delta) / (1 - spent_delta)) ** (1 / k) if spent_delta < 1.0 else 1.0 + # delta = 1 - np.exp((np.log(1 - self.delta) - np.log(1 - spent_delta)) / k) + + lower = 0 + upper = self.epsilon + old_interval_size = (upper - lower) * 2 + + while old_interval_size > upper - lower: + old_interval_size = upper - lower + mid = (upper + lower) / 2 + + spent_budget = self.spent_budget + [(mid, 0)] * k + x_0, _ = self.total(spent_budget=spent_budget) + + if x_0 >= self.epsilon: + upper = mid + if x_0 <= self.epsilon: + lower = mid + + epsilon = (upper + lower) / 2 + + return Budget(epsilon, delta) + + def spend(self, epsilon, delta): + """Spend the given privacy budget. + + Instructs the accountant to spend the given epsilon and delta privacy budget, while ensuring the target budget + is not exceeded. + + Parameters + ---------- + epsilon : float + Epsilon privacy budget to spend. + + delta : float + Delta privacy budget to spend. + + Returns + ------- + self : BudgetAccountant + + """ + self.check(epsilon, delta) + self.__spent_budget.append((epsilon, delta)) + return self + + @staticmethod + def __total_delta_safe(spent_budget, slack): + """ + Calculate total delta spend of `spent_budget`, with special consideration for floating point arithmetic. + Should yield greater precision, especially for a large number of budget spends with very small delta. + + Parameters + ---------- + spent_budget: list of tuples of the form (epsilon, delta) + List of budget spends, for which the total delta spend is to be calculated. + + slack: float + Delta slack parameter for composition of spends. + + Returns + ------- + float + Total delta spend. + + """ + delta_spend = [slack] + for _, delta in spent_budget: + delta_spend.append(delta) + delta_spend.sort() + + # (1 - a) * (1 - b) = 1 - (a + b - a * b) + prod = 0 + for delta in delta_spend: + prod += delta - prod * delta + + return prod + + @staticmethod + def load_default(accountant): + """Loads the default privacy budget accountant if none is supplied, otherwise checks that the supplied + accountant is a BudgetAccountant class. + + An accountant can be set as the default using the `set_default()` method. If no default has been set, a default + is created. + + Parameters + ---------- + accountant : BudgetAccountant or None + The supplied budget accountant. If None, the default accountant is returned. + + Returns + ------- + default : BudgetAccountant + Returns a working BudgetAccountant, either the supplied `accountant` or the existing default. + + """ + if accountant is None: + if BudgetAccountant._default is None: + BudgetAccountant._default = BudgetAccountant() + + return BudgetAccountant._default + + if not isinstance(accountant, BudgetAccountant): + raise TypeError(f"Accountant must be of type BudgetAccountant, got {type(accountant)}") + + return accountant + + def set_default(self): + """Sets the current accountant to be the default when running functions and queries with diffprivlib. + + Returns + ------- + self : BudgetAccountant + + """ + BudgetAccountant._default = self + return self + + @staticmethod + def pop_default(): + """Pops the default BudgetAccountant from the class and returns it to the user. + + Returns + ------- + default : BudgetAccountant + Returns the existing default BudgetAccountant. + + """ + default = BudgetAccountant._default + BudgetAccountant._default = None + return default diff --git a/src/diffprivlib/mechanisms/__init__.py b/src/diffprivlib/mechanisms/__init__.py new file mode 100644 index 0000000..dbcb1a1 --- /dev/null +++ b/src/diffprivlib/mechanisms/__init__.py @@ -0,0 +1,34 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Basic mechanisms for achieving differential privacy, the basic building blocks of the library. +""" +from diffprivlib.mechanisms.base import DPMachine, DPMechanism, TruncationAndFoldingMixin + +from diffprivlib.mechanisms.binary import Binary +from diffprivlib.mechanisms.bingham import Bingham +from diffprivlib.mechanisms.exponential import Exponential, ExponentialCategorical, ExponentialHierarchical, \ + PermuteAndFlip +from diffprivlib.mechanisms.gaussian import Gaussian, GaussianAnalytic, GaussianDiscrete +from diffprivlib.mechanisms.geometric import Geometric, GeometricFolded, GeometricTruncated +from diffprivlib.mechanisms.laplace import Laplace, LaplaceBoundedDomain, LaplaceBoundedNoise, LaplaceFolded,\ + LaplaceTruncated +from diffprivlib.mechanisms.snapping import Snapping +from diffprivlib.mechanisms.staircase import Staircase +from diffprivlib.mechanisms.uniform import Uniform +from diffprivlib.mechanisms.vector import Vector diff --git a/src/diffprivlib/mechanisms/base.py b/src/diffprivlib/mechanisms/base.py new file mode 100644 index 0000000..2554cb5 --- /dev/null +++ b/src/diffprivlib/mechanisms/base.py @@ -0,0 +1,269 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Base classes for differential privacy mechanisms. +""" +import abc +from copy import copy +import inspect +from numbers import Real + +from diffprivlib.utils import check_random_state + + +class DPMachine(abc.ABC): + """ + Parent class for :class:`.DPMechanism` and :class:`.DPTransformer`, providing and specifying basic functionality. + + """ + @abc.abstractmethod + def randomise(self, value): + """Randomise `value` with the mechanism. + + Parameters + ---------- + value : int or float or str or method + The value to be randomised. + + Returns + ------- + int or float or str or method + The randomised value, same type as `value`. + + """ + + def copy(self): + """Produces a copy of the class. + + Returns + ------- + self : class + Returns the copy. + + """ + return copy(self) + + +class DPMechanism(DPMachine, abc.ABC): + r"""Abstract base class for all mechanisms. Instantiated from :class:`.DPMachine`. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in [0, ∞]. + + delta : float + Privacy parameter :math:`\delta` for the mechanism. Must be in [0, 1]. Cannot be simultaneously zero with + ``epsilon``. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, delta, random_state=None): + self.epsilon, self.delta = self._check_epsilon_delta(epsilon, delta) + self.random_state = random_state + + self._rng = check_random_state(random_state, True) + + def __repr__(self): + attrs = inspect.getfullargspec(self.__class__).kwonlyargs + attr_output = [] + + for attr in attrs: + attr_output.append(attr + "=" + repr(self.__getattribute__(attr))) + + return str(self.__module__) + "." + str(self.__class__.__name__) + "(" + ", ".join(attr_output) + ")" + + @abc.abstractmethod + def randomise(self, value): + """Randomise `value` with the mechanism. + + Parameters + ---------- + value : int or float or str or method + The value to be randomised. + + Returns + ------- + int or float or str or method + The randomised value, same type as `value`. + + """ + + def bias(self, value): + """Returns the bias of the mechanism at a given `value`. + + Parameters + ---------- + value : int or float + The value at which the bias of the mechanism is sought. + + Returns + ------- + bias : float or None + The bias of the mechanism at `value` if defined, `None` otherwise. + + """ + raise NotImplementedError + + def variance(self, value): + """Returns the variance of the mechanism at a given `value`. + + Parameters + ---------- + value : int or float + The value at which the variance of the mechanism is sought. + + Returns + ------- + bias : float or None + The variance of the mechanism at `value` if defined, `None` otherwise. + + """ + raise NotImplementedError + + def mse(self, value): + """Returns the mean squared error (MSE) of the mechanism at a given `value`. + + Parameters + ---------- + value : int or float + The value at which the MSE of the mechanism is sought. + + Returns + ------- + bias : float or None + The MSE of the mechanism at `value` if defined, `None` otherwise. + + """ + return self.variance(value) + (self.bias(value)) ** 2 + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if not isinstance(epsilon, Real) or not isinstance(delta, Real): + raise TypeError("Epsilon and delta must be numeric") + + if epsilon < 0: + raise ValueError("Epsilon must be non-negative") + + if not 0 <= delta <= 1: + raise ValueError("Delta must be in [0, 1]") + + if epsilon + delta == 0: + raise ValueError("Epsilon and Delta cannot both be zero") + + return float(epsilon), float(delta) + + def _check_all(self, value): + del value + self._check_epsilon_delta(self.epsilon, self.delta) + + return True + + +class TruncationAndFoldingMixin: # pylint: disable=too-few-public-methods + """Mixin for truncating or folding the outputs of a mechanism. Must be instantiated with a :class:`.DPMechanism`. + + Parameters + ---------- + lower : float + The lower bound of the mechanism. + + upper : float + The upper bound of the mechanism. + + """ + def __init__(self, *, lower, upper): + if not isinstance(self, DPMechanism): + raise TypeError("TruncationAndFoldingMachine must be implemented alongside a :class:`.DPMechanism`") + + self.lower, self.upper = self._check_bounds(lower, upper) + + @classmethod + def _check_bounds(cls, lower, upper): + """Performs a check on the bounds provided for the mechanism.""" + if not isinstance(lower, Real) or not isinstance(upper, Real): + raise TypeError("Bounds must be numeric") + + if lower > upper: + raise ValueError("Lower bound must not be greater than upper bound") + + return lower, upper + + def _check_all(self, value): + """Checks that all parameters of the mechanism have been initialised correctly""" + del value + self._check_bounds(self.lower, self.upper) + + return True + + def _truncate(self, value): + if value > self.upper: + return self.upper + if value < self.lower: + return self.lower + + return value + + def _fold(self, value): + if value < self.lower: + return self._fold(2 * self.lower - value) + if value > self.upper: + return self._fold(2 * self.upper - value) + + return value + + +def bernoulli_neg_exp(gamma, random_state=None): + """Sample from Bernoulli(exp(-gamma)). + + Adapted from "The Discrete Gaussian for Differential Privacy", Canonne, Kamath, Steinke, 2020. + https://arxiv.org/pdf/2004.00010v2.pdf + + Parameters + ---------- + gamma : float + Parameter to sample from Bernoulli(exp(-gamma)). Must be non-negative. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + Returns + ------- + One sample from the Bernoulli(exp(-gamma)) distribution. + + """ + if gamma < 0: + raise ValueError(f"Gamma must be non-negative, got {gamma}.") + + rng = check_random_state(random_state, True) + + while gamma > 1: + gamma -= 1 + if not bernoulli_neg_exp(1, rng): + return 0 + + counter = 1 + + while rng.random() <= gamma / counter: + counter += 1 + + return counter % 2 diff --git a/src/diffprivlib/mechanisms/binary.py b/src/diffprivlib/mechanisms/binary.py new file mode 100644 index 0000000..b574bcb --- /dev/null +++ b/src/diffprivlib/mechanisms/binary.py @@ -0,0 +1,119 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +The binary mechanism for differential privacy. + +""" +import numpy as np + +from diffprivlib.mechanisms.base import DPMechanism +from diffprivlib.utils import copy_docstring + + +class Binary(DPMechanism): + r"""The classic binary mechanism in differential privacy. + + Given a binary input value, the mechanism randomly decides to flip to the other binary value or not, in order to + satisfy differential privacy. + + Paper link: https://arxiv.org/pdf/1612.05568.pdf + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in [0, ∞]. + + value0 : str + 0th binary label. + + value1 : str + 1st binary label. Cannot be the same as ``value0``. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + Notes + ----- + * The binary attributes, known as `labels`, must be specified as strings. If non-string labels are required (e.g. + integer-valued labels), a :class:`.DPTransformer` can be used (e.g. :class:`.IntToString`). + + """ + def __init__(self, *, epsilon, value0, value1, random_state=None): + super().__init__(epsilon=epsilon, delta=0.0, random_state=random_state) + self.value0, self.value1 = self._check_labels(value0, value1) + + @classmethod + def _check_labels(cls, value0, value1): + if not isinstance(value0, str) or not isinstance(value1, str): + raise TypeError("Binary labels must be strings. Use a DPTransformer (e.g. transformers.IntToString) for " + "non-string labels") + + if len(value0) * len(value1) == 0: + raise ValueError("Binary labels must be non-empty strings") + + if value0 == value1: + raise ValueError("Binary labels must not match") + + return value0, value1 + + def _check_all(self, value): + super()._check_all(value) + self._check_labels(self.value0, self.value1) + + if not isinstance(value, str): + raise TypeError("Value to be randomised must be a string") + + if value not in [self.value0, self.value1]: + raise ValueError(f"Value to be randomised is not in the domain {{\"{self.value0}\", \"{self.value1}\"}}, " + f"got \"{value}\".") + + return True + + @copy_docstring(DPMechanism.bias) + def bias(self, value): + raise NotImplementedError + + @copy_docstring(DPMechanism.variance) + def variance(self, value): + raise NotImplementedError + + def randomise(self, value): + """Randomise `value` with the mechanism. + + Parameters + ---------- + value : str + The value to be randomised. + + Returns + ------- + str + The randomised value. + + """ + self._check_all(value) + + indicator = 0 if value == self.value0 else 1 + + unif_rv = self._rng.random() * (np.exp(self.epsilon) + 1) + + if unif_rv > np.exp(self.epsilon) + self.delta: + indicator = 1 - indicator + + return self.value1 if indicator else self.value0 diff --git a/src/diffprivlib/mechanisms/bingham.py b/src/diffprivlib/mechanisms/bingham.py new file mode 100644 index 0000000..8103502 --- /dev/null +++ b/src/diffprivlib/mechanisms/bingham.py @@ -0,0 +1,152 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +The Bingham mechanism in differential privacy, for estimating the first eigenvector of a covariance matrix. +""" +import secrets +from numbers import Real + +import numpy as np + +from diffprivlib.mechanisms.base import DPMechanism +from diffprivlib.utils import copy_docstring + + +class Bingham(DPMechanism): + r""" + The Bingham mechanism in differential privacy. + + Used to estimate the first eigenvector (associated with the largest eigenvalue) of a covariance matrix. + + Paper link: http://eprints.whiterose.ac.uk/123206/7/simbingham8.pdf + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + sensitivity : float, default: 1 + The sensitivity of the mechanism. Must be in [0, ∞). + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, sensitivity=1.0, random_state=None): + super().__init__(epsilon=epsilon, delta=0, random_state=random_state) + self.sensitivity = self._check_sensitivity(sensitivity) + + if isinstance(self._rng, secrets.SystemRandom): + self._rng = np.random.default_rng() + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if not delta == 0: + raise ValueError("Delta must be zero") + + return super()._check_epsilon_delta(epsilon, delta) + + @classmethod + def _check_sensitivity(cls, sensitivity): + if not isinstance(sensitivity, Real): + raise TypeError("Sensitivity must be numeric") + + if sensitivity < 0: + raise ValueError("Sensitivity must be non-negative") + + return float(sensitivity) + + def _check_all(self, value): + super()._check_all(value) + self._check_sensitivity(self.sensitivity) + + if not isinstance(value, np.ndarray): + raise TypeError(f"Value to be randomised must be a numpy array, got {type(value)}") + if value.ndim != 2: + raise ValueError(f"Array must be 2-dimensional, got {value.ndim} dimensions") + if value.shape[0] != value.shape[1]: + raise ValueError(f"Array must be square, got {value.shape[0]} x {value.shape[1]}") + if not np.allclose(value, value.T): + raise ValueError("Array must be symmetric, supplied array is not.") + + return True + + @copy_docstring(DPMechanism.bias) + def bias(self, value): + raise NotImplementedError + + @copy_docstring(DPMechanism.variance) + def variance(self, value): + raise NotImplementedError + + def randomise(self, value): + """Randomise `value` with the mechanism. + + Parameters + ---------- + value : numpy array + The data to be randomised. + + Returns + ------- + numpy array + The randomised eigenvector. + + """ + self._check_all(value) + + eigvals, eigvecs = np.linalg.eigh(value) + dims = value.shape[0] + + if dims == 1: + return np.ones((1, 1)) + if self.sensitivity / self.epsilon == 0: + return eigvecs[:, eigvals.argmax()] + + value_translated = self.epsilon * (eigvals.max() * np.eye(dims) - value) / 4 / self.sensitivity + translated_eigvals = np.linalg.eigvalsh(value_translated) + + left, right, mid = 1, dims, (1 + dims) / 2 + old_interval_size = (right - left) * 2 + + while right - left < old_interval_size: + old_interval_size = right - left + + mid = (right + left) / 2 + f_mid = np.array([1 / (mid + 2 * eig) for eig in translated_eigvals]).sum() + + if f_mid <= 1: + right = mid + + if f_mid >= 1: + left = mid + + b_const = mid + omega = np.eye(dims) + 2 * value_translated / b_const + omega_inv = np.linalg.inv(omega) + norm_const = np.exp(-(dims - b_const) / 2) * ((dims / b_const) ** (dims / 2)) + + while True: + rnd_vec = self._rng.multivariate_normal(np.zeros(dims), omega_inv / 4, size=4).sum(axis=0) + unit_vec = rnd_vec / np.linalg.norm(rnd_vec) + prob = np.exp(-unit_vec.dot(value_translated).dot(unit_vec)) / norm_const\ + / ((unit_vec.dot(omega).dot(unit_vec)) ** (dims / 2)) + + if self._rng.random() <= prob: + return unit_vec diff --git a/src/diffprivlib/mechanisms/exponential.py b/src/diffprivlib/mechanisms/exponential.py new file mode 100644 index 0000000..139114c --- /dev/null +++ b/src/diffprivlib/mechanisms/exponential.py @@ -0,0 +1,570 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Implementation of the standard exponential mechanism, and its derivative, the hierarchical mechanism. +""" +from numbers import Real + +import numpy as np + +from diffprivlib.mechanisms.base import DPMechanism, bernoulli_neg_exp +from diffprivlib.mechanisms.binary import Binary +from diffprivlib.utils import copy_docstring + + +class Exponential(DPMechanism): + r""" + The exponential mechanism for achieving differential privacy on candidate selection, as first proposed by McSherry + and Talwar. + + The exponential mechanism achieves differential privacy by randomly choosing a candidate subject to candidate + utility scores, with greater probability given to higher-utility candidates. + + Paper link: https://www.cs.drexel.edu/~greenie/privacy/mdviadp.pdf + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + sensitivity : float + The sensitivity in utility values to a change in a datapoint in the underlying dataset. + + utility : list + A list of non-negative utility values for each candidate. + + monotonic : bool, default: False + Specifies if the utility function is monotonic, i.e. that adding an individual to the underlying dataset can + only increase the values in `utility`. + + candidates : list, optional + An optional list of candidate labels. If omitted, the zero-indexed list [0, 1, ..., n] is used. + + measure : list, optional + An optional list of measures for each candidate. If omitted, a uniform measure is used. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, sensitivity, utility, monotonic=False, candidates=None, measure=None, + random_state=None): + super().__init__(epsilon=epsilon, delta=0.0, random_state=random_state) + self.sensitivity = self._check_sensitivity(sensitivity) + self.utility, self.candidates, self.measure = self._check_utility_candidates_measure(utility, candidates, + measure) + self.monotonic = bool(monotonic) + self._probabilities = self._find_probabilities(self.epsilon, self.sensitivity, self.utility, self.monotonic, + self.measure) + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if not delta == 0: + raise ValueError("Delta must be zero") + + return super()._check_epsilon_delta(epsilon, delta) + + @classmethod + def _check_sensitivity(cls, sensitivity): + if not isinstance(sensitivity, Real): + raise TypeError("Sensitivity must be numeric") + + if sensitivity < 0: + raise ValueError("Sensitivity must be non-negative") + + return float(sensitivity) + + @classmethod + def _check_utility_candidates_measure(cls, utility, candidates, measure): + if not isinstance(utility, list): + raise TypeError(f"Utility must be a list, got a {utility}.") + + if not all(isinstance(u, Real) for u in utility): + raise TypeError("Utility must be a list of real-valued numbers.") + + if len(utility) < 1: + raise ValueError("Utility must have at least one element.") + + if np.isinf(utility).any(): + raise ValueError("Utility must be a list of finite numbers.") + + if candidates is not None: + if not isinstance(candidates, list): + raise TypeError(f"Candidates must be a list, got a {type(candidates)}.") + + if len(candidates) != len(utility): + raise ValueError("List of candidates must be the same length as the list of utility values.") + + if measure is not None: + if not isinstance(measure, list): + raise TypeError(f"Measure must be a list, got a {type(measure)}.") + + if not all(isinstance(m, Real) for m in measure): + raise TypeError("Measure must be a list of real-valued numbers.") + + if np.isinf(measure).any(): + raise ValueError("Measure must be a list of finite numbers.") + + if len(measure) != len(utility): + raise ValueError("List of measures must be the same length as the list of utility values.") + + return utility, candidates, measure + + @classmethod + def _find_probabilities(cls, epsilon, sensitivity, utility, monotonic, measure): + scale = epsilon / sensitivity / (2 - monotonic) if sensitivity / epsilon > 0 else float("inf") + + # Set max utility to 0 to avoid overflow on high utility; will be normalised out before returning + utility = np.array(utility) - max(utility) + + if np.isinf(scale): + probabilities = np.isclose(utility, 0).astype(float) + else: + probabilities = np.exp(scale * utility) + + probabilities *= np.array(measure) if measure else 1 + probabilities /= probabilities.sum() + + return np.cumsum(probabilities) + + def _check_all(self, value): + super()._check_all(value) + self._check_sensitivity(self.sensitivity) + self._check_utility_candidates_measure(self.utility, self.candidates, self.measure) + + if value is not None: + raise ValueError(f"Value to be randomised must be None. Got: {value}.") + + return True + + @copy_docstring(DPMechanism.bias) + def bias(self, value): + raise NotImplementedError + + @copy_docstring(DPMechanism.variance) + def variance(self, value): + raise NotImplementedError + + def randomise(self, value=None): + """Select a candidate with differential privacy. + + Parameters + ---------- + value : None + Ignored. + + Returns + ------- + int or other + The randomised candidate. + + """ + self._check_all(value) + + rand = self._rng.random() + + if np.any(rand <= self._probabilities): + idx = np.argmax(rand <= self._probabilities) + elif np.isclose(rand, self._probabilities[-1]): + idx = len(self._probabilities) - 1 + else: + raise RuntimeError("Can't find a candidate to return. " + f"Debugging info: Rand: {rand}, Probabilities: {self._probabilities}") + + return self.candidates[idx] if self.candidates else idx + + +class PermuteAndFlip(Exponential): + r""" + The permute and flip mechanism for achieving differential privacy on candidate selection, as first proposed by + McKenna and Sheldon. + + The permute and flip mechanism is an alternative to the exponential mechanism, and achieves differential privacy by + randomly choosing a candidate subject to candidate utility scores, with greater probability given to higher-utility + candidates. + + Paper link: https://arxiv.org/pdf/2010.12603.pdf + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + sensitivity : float + The sensitivity in utility values to a change in a datapoint in the underlying dataset. + + utility : list + A list of non-negative utility values for each candidate. + + monotonic : bool, default: False + Specifies if the utility function is monotonic, i.e. that adding an individual to the underlying dataset can + only increase the values in `utility`. + + candidates : list, optional + An optional list of candidate labels. If omitted, the zero-indexed list [0, 1, ..., n] is used. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, sensitivity, utility, monotonic=False, candidates=None, random_state=None): + super().__init__(epsilon=epsilon, sensitivity=sensitivity, utility=utility, monotonic=monotonic, + candidates=candidates, measure=None, random_state=random_state) + + @copy_docstring(DPMechanism.bias) + def bias(self, value): + raise NotImplementedError + + @copy_docstring(DPMechanism.variance) + def variance(self, value): + raise NotImplementedError + + @classmethod + def _find_probabilities(cls, epsilon, sensitivity, utility, monotonic, measure): + scale = epsilon / sensitivity / (2 - monotonic) if sensitivity / epsilon > 0 else float("inf") + + utility = np.array(utility) + utility -= max(utility) + + if np.isinf(scale): + log_probabilities = np.ones_like(utility) * (-float("inf")) + log_probabilities[utility == 0] = 0 + else: + log_probabilities = scale * utility + + return log_probabilities + + def randomise(self, value=None): + """Select a candidate with differential privacy. + + Parameters + ---------- + value : None + Ignored. + + Returns + ------- + int or other + The randomised candidate. + + """ + self._check_all(value) + + candidate_ids = list(range(len(self.utility))) + + while candidate_ids: + idx = candidate_ids[int(self._rng.random() * len(candidate_ids))] + candidate_ids.remove(idx) + + if bernoulli_neg_exp(-self._probabilities[idx], self._rng): + return self.candidates[idx] if self.candidates else idx + + raise RuntimeError(f"No value to return. Probabilities: {self._probabilities}.") + + +class ExponentialCategorical(DPMechanism): + r""" + The exponential mechanism for achieving differential privacy on categorical inputs, as first proposed by McSherry + and Talwar. + + The exponential mechanism achieves differential privacy by randomly choosing an output value for a given input + value, with greater probability given to values 'closer' to the input, as measured by a given utility function. + + Paper link: https://www.cs.drexel.edu/~greenie/privacy/mdviadp.pdf + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + utility_list : list of tuples + The utility list of the mechanism. Must be specified as a list of tuples, of the form ("value1", "value2", + utility), where each `value` is a string and `utility` is a strictly positive float. A `utility` must be + specified for every pair of values given in the `utility_list`. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, utility_list, random_state=None): + super().__init__(epsilon=epsilon, delta=0.0, random_state=random_state) + + self._balanced_tree = False + self._utility_values, self._sensitivity, self._domain_values = self._build_utility(utility_list) + self._check_utility_full(self._domain_values) + self._normalising_constant = self._build_normalising_constant() + + def _build_utility(self, utility_list): + if not isinstance(utility_list, list): + raise TypeError("Utility must be given in a list") + + self._normalising_constant = None + + utility_values = {} + domain_values = [] + sensitivity = 0 + + for _utility_sub_list in utility_list: + value1, value2, utility_value = _utility_sub_list + + if not isinstance(value1, str) or not isinstance(value2, str): + raise TypeError("Utility keys must be strings") + if not isinstance(utility_value, Real): + raise TypeError("Utility value must be a number") + if utility_value < 0.0: + raise ValueError("Utility values must be non-negative") + + sensitivity = max(sensitivity, utility_value) + if value1 not in domain_values: + domain_values.append(value1) + if value2 not in domain_values: + domain_values.append(value2) + + if value1 == value2: + continue + if value1 < value2: + utility_values[(value1, value2)] = utility_value + else: + utility_values[(value2, value1)] = utility_value + + self._utility_values = utility_values + self._sensitivity = sensitivity + self._domain_values = domain_values + + return utility_values, sensitivity, domain_values + + def _check_utility_full(self, domain_values): + missing = [] + + for val1 in domain_values: + for val2 in domain_values: + if val1 >= val2: + continue + + if (val1, val2) not in self._utility_values: + missing.append((val1, val2)) + + if missing: + raise ValueError(f"Utility values missing: {missing}") + + return True + + @property + def utility_list(self): + """Gets the utility list of the mechanism, in the same form as accepted by `.set_utility_list`. + + Returns + ------- + utility_list : list of tuples (str, str, float), or None + Returns a list of tuples of the form ("value1", "value2", utility), or `None` if the utility has not yet + been set. + + """ + utility_list = [] + + for _key, _utility in self._utility_values.items(): + value1, value2 = _key + utility_list.append((value1, value2, _utility)) + + return utility_list + + def _build_normalising_constant(self, re_eval=False): + balanced_tree = True + first_constant_value = None + normalising_constant = {} + + for _base_leaf in self._domain_values: + constant_value = 0.0 + + for _target_leaf in self._domain_values: + constant_value += self._get_prob(_base_leaf, _target_leaf) + + normalising_constant[_base_leaf] = constant_value + + if first_constant_value is None: + first_constant_value = constant_value + elif not np.isclose(constant_value, first_constant_value): + balanced_tree = False + + # If the tree is balanced, we can eliminate the doubling factor + if balanced_tree and not re_eval: + self._balanced_tree = True + return self._build_normalising_constant(True) + + return normalising_constant + + def _get_utility(self, value1, value2): + if value1 == value2: + return 0 + + if value1 > value2: + return self._get_utility(value1=value2, value2=value1) + + return self._utility_values[(value1, value2)] + + def _get_prob(self, value1, value2): + if value1 == value2: + return 1.0 + + balancing_factor = 1 if self._balanced_tree else 2 + return np.exp(- self.epsilon * self._get_utility(value1, value2) / balancing_factor / self._sensitivity) + + def _check_all(self, value): + super()._check_all(value) + + if not isinstance(value, str): + raise TypeError("Value to be randomised must be a string") + + if value not in self._domain_values: + raise ValueError(f"Value \"{value}\" not in domain") + + return True + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if not delta == 0: + raise ValueError("Delta must be zero") + + return super()._check_epsilon_delta(epsilon, delta) + + @copy_docstring(DPMechanism.bias) + def bias(self, value): + raise NotImplementedError + + @copy_docstring(DPMechanism.variance) + def variance(self, value): + raise NotImplementedError + + @copy_docstring(Binary.randomise) + def randomise(self, value): + self._check_all(value) + + unif_rv = self._rng.random() * self._normalising_constant[value] + cum_prob = 0 + _target_value = None + + for _target_value in self._normalising_constant.keys(): + cum_prob += self._get_prob(value, _target_value) + + if unif_rv <= cum_prob: + return _target_value + + return _target_value + + +class ExponentialHierarchical(ExponentialCategorical): + r""" + Adaptation of the exponential mechanism to hierarchical data. Simplifies the process of specifying utility values, + as the values can be inferred from the hierarchy. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + hierarchy : nested list of str + The hierarchy as specified as a nested list of string. Each string must be a leaf node, and each leaf node + must lie at the same depth in the hierarchy. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + Examples + -------- + Example hierarchies: + + >>> flat_hierarchy = ["A", "B", "C", "D", "E"] + >>> nested_hierarchy = [["A"], ["B"], ["C"], ["D", "E"]] + + """ + def __init__(self, *, epsilon, hierarchy, random_state=None): + self.hierarchy = hierarchy + utility_list = self._build_utility_list(self._build_hierarchy(hierarchy)) + super().__init__(epsilon=epsilon, utility_list=utility_list, random_state=random_state) + self._list_hierarchy = None + + def _build_hierarchy(self, nested_list, parent_node=None): + if not isinstance(nested_list, list): + raise TypeError("Hierarchy must be a list") + + if parent_node is None: + parent_node = [] + + hierarchy = {} + + for _i, _value in enumerate(nested_list): + if isinstance(_value, str): + hierarchy[_value] = parent_node + [_i] + elif not isinstance(_value, list): + raise TypeError("All leaves of the hierarchy must be a string " + + "(see node " + str(parent_node + [_i]) + ")") + else: + hierarchy.update(self._build_hierarchy(_value, parent_node + [_i])) + + self._check_hierarchy_height(hierarchy) + + return hierarchy + + @staticmethod + def _check_hierarchy_height(hierarchy): + hierarchy_height = None + for _value, _hierarchy_locator in hierarchy.items(): + if hierarchy_height is None: + hierarchy_height = len(_hierarchy_locator) + elif len(_hierarchy_locator) != hierarchy_height: + raise ValueError( + f"Leaves of the hierarchy must all be at the same level (node {str(_hierarchy_locator)} is at " + f"level {len(_hierarchy_locator)} instead of hierarchy height {hierarchy_height})" + ) + + @staticmethod + def _build_utility_list(hierarchy): + if not isinstance(hierarchy, dict): + raise TypeError("Hierarchy for _build_utility_list must be a dict") + + utility_list = [] + hierarchy_height = None + + for _root_value, _root_hierarchy_locator in hierarchy.items(): + if hierarchy_height is None: + hierarchy_height = len(_root_hierarchy_locator) + + for _target_value, _target_hierarchy_locator in hierarchy.items(): + if _root_value >= _target_value: + continue + + i = 0 + while (i < len(_root_hierarchy_locator) and + _root_hierarchy_locator[i] == _target_hierarchy_locator[i]): + i += 1 + + utility_list.append([_root_value, _target_value, hierarchy_height - i]) + + return utility_list + + @copy_docstring(DPMechanism.bias) + def bias(self, value): + raise NotImplementedError + + @copy_docstring(DPMechanism.variance) + def variance(self, value): + raise NotImplementedError diff --git a/src/diffprivlib/mechanisms/gaussian.py b/src/diffprivlib/mechanisms/gaussian.py new file mode 100644 index 0000000..c57c7c0 --- /dev/null +++ b/src/diffprivlib/mechanisms/gaussian.py @@ -0,0 +1,362 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +The classic Gaussian mechanism in differential privacy, and its derivatives. +""" +from math import erf +from numbers import Real, Integral + +import numpy as np + +from diffprivlib.mechanisms.base import DPMechanism, bernoulli_neg_exp +from diffprivlib.mechanisms.geometric import Geometric +from diffprivlib.mechanisms.laplace import Laplace +from diffprivlib.utils import copy_docstring + + +class Gaussian(DPMechanism): + r"""The Gaussian mechanism in differential privacy. + + First proposed by Dwork and Roth in "The algorithmic foundations of differential privacy" [DR14]_. Samples from the + Gaussian distribution are generated using two samples from `random.normalvariate` as detailed in [HB21b]_, to + prevent against reconstruction attacks due to limited floating point precision. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, 1]. For ``epsilon > 1``, use + :class:`.GaussianAnalytic`. + + delta : float + Privacy parameter :math:`\delta` for the mechanism. Must be in (0, 1]. + + sensitivity : float + The sensitivity of the mechanism. Must be in [0, ∞). + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + References + ---------- + .. [DR14] Dwork, Cynthia, and Aaron Roth. "The algorithmic foundations of differential privacy." Found. Trends + Theor. Comput. Sci. 9, no. 3-4 (2014): 211-407. + + .. [HB21b] Holohan, Naoise, and Stefano Braghin. "Secure Random Sampling in Differential Privacy." arXiv preprint + arXiv:2107.10138 (2021). + + """ + def __init__(self, *, epsilon, delta, sensitivity, random_state=None): + super().__init__(epsilon=epsilon, delta=delta, random_state=random_state) + self.sensitivity = self._check_sensitivity(sensitivity) + self._scale = np.sqrt(2 * np.log(1.25 / self.delta)) * self.sensitivity / self.epsilon + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if epsilon == 0 or delta == 0: + raise ValueError("Neither Epsilon nor Delta can be zero") + + if isinstance(epsilon, Real) and epsilon > 1.0: + raise ValueError("Epsilon cannot be greater than 1. If required, use GaussianAnalytic instead.") + + return super()._check_epsilon_delta(epsilon, delta) + + @classmethod + def _check_sensitivity(cls, sensitivity): + if not isinstance(sensitivity, Real): + raise TypeError("Sensitivity must be numeric") + + if sensitivity < 0: + raise ValueError("Sensitivity must be non-negative") + + return float(sensitivity) + + def _check_all(self, value): + super()._check_all(value) + self._check_sensitivity(self.sensitivity) + + if not isinstance(value, Real): + raise TypeError("Value to be randomised must be a number") + + return True + + @copy_docstring(Laplace.bias) + def bias(self, value): + return 0.0 + + @copy_docstring(Laplace.variance) + def variance(self, value): + self._check_all(0) + + return self._scale ** 2 + + @copy_docstring(Laplace.randomise) + def randomise(self, value): + self._check_all(value) + + try: + standard_normal = (self._rng.normalvariate(0, 1) + self._rng.normalvariate(0, 1)) / np.sqrt(2) + except AttributeError: # random_state is a np.random.RandomState + standard_normal = (self._rng.standard_normal() + self._rng.standard_normal()) / np.sqrt(2) + + return value + standard_normal * self._scale + + +class GaussianAnalytic(Gaussian): + r"""The analytic Gaussian mechanism in differential privacy. + + As first proposed by Balle and Wang in "Improving the Gaussian Mechanism for Differential Privacy: Analytical + Calibration and Optimal Denoising". + + Paper link: https://arxiv.org/pdf/1805.06530.pdf + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + delta : float + Privacy parameter :math:`\delta` for the mechanism. Must be in (0, 1]. + + sensitivity : float + The sensitivity of the mechanism. Must be in [0, ∞). + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, delta, sensitivity, random_state=None): + super().__init__(epsilon=epsilon, delta=delta, sensitivity=sensitivity, random_state=random_state) + self._scale = self._find_scale() + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if epsilon == 0 or delta == 0: + raise ValueError("Neither Epsilon nor Delta can be zero") + + return DPMechanism._check_epsilon_delta(epsilon, delta) # pylint: disable=protected-access + + def _check_all(self, value): + super()._check_all(value) + + return True + + def _find_scale(self): + if self.sensitivity / self.epsilon == 0: + return 0.0 + + epsilon = self.epsilon + delta = self.delta + + def phi(val): + return (1 + erf(val / np.sqrt(2))) / 2 + + def b_plus(val): + return phi(np.sqrt(epsilon * val)) - np.exp(epsilon) * phi(- np.sqrt(epsilon * (val + 2))) - delta + + def b_minus(val): + return phi(- np.sqrt(epsilon * val)) - np.exp(epsilon) * phi(- np.sqrt(epsilon * (val + 2))) - delta + + delta_0 = b_plus(0) + + if delta_0 < 0: + target_func = b_plus + else: + target_func = b_minus + + # Find the starting interval by doubling the initial size until the target_func sign changes, as suggested + # in the paper + left = 0 + right = 1 + + while target_func(left) * target_func(right) > 0: + left = right + right *= 2 + + # Binary search code copied from mechanisms.LaplaceBoundedDomain + old_interval_size = (right - left) * 2 + + while old_interval_size > right - left: + old_interval_size = right - left + middle = (right + left) / 2 + + if target_func(middle) * target_func(left) <= 0: + right = middle + if target_func(middle) * target_func(right) <= 0: + left = middle + + alpha = np.sqrt(1 + (left + right) / 4) + (-1 if delta_0 < 0 else 1) * np.sqrt((left + right) / 4) + + return alpha * self.sensitivity / np.sqrt(2 * self.epsilon) + + +class GaussianDiscrete(DPMechanism): + r"""The Discrete Gaussian mechanism in differential privacy. + + As proposed by Canonne, Kamath and Steinke, re-purposed for approximate :math:`(\epsilon,\delta)`-differential + privacy. + + Paper link: https://arxiv.org/pdf/2004.00010.pdf + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + delta : float + Privacy parameter :math:`\delta` for the mechanism. Must be in (0, 1]. + + sensitivity : int, default: 1 + The sensitivity of the mechanism. Must be in [0, ∞). + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, delta, sensitivity=1, random_state=None): + super().__init__(epsilon=epsilon, delta=delta, random_state=random_state) + self.sensitivity = self._check_sensitivity(sensitivity) + self._scale = self._find_scale() + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if epsilon == 0 or delta == 0: + raise ValueError("Neither Epsilon nor Delta can be zero") + + return super()._check_epsilon_delta(epsilon, delta) + + @classmethod + def _check_sensitivity(cls, sensitivity): + if not isinstance(sensitivity, Integral): + raise TypeError("Sensitivity must be an integer") + + if sensitivity < 0: + raise ValueError("Sensitivity must be non-negative") + + return sensitivity + + def _check_all(self, value): + super()._check_all(value) + self._check_sensitivity(self.sensitivity) + + if not isinstance(value, Integral): + raise TypeError("Value to be randomised must be an integer") + + return True + + @copy_docstring(Laplace.bias) + def bias(self, value): + return 0.0 + + @copy_docstring(Laplace.variance) + def variance(self, value): + raise NotImplementedError + + @copy_docstring(Geometric.randomise) + def randomise(self, value): + self._check_all(value) + + if self._scale == 0: + return value + + tau = 1 / (1 + np.floor(self._scale)) + sigma2 = self._scale ** 2 + + while True: + geom_x = 0 + while bernoulli_neg_exp(tau, self._rng): + geom_x += 1 + + bern_b = self._rng.random() < 0.5 + if bern_b and not geom_x: + continue + + lap_y = int((1 - 2 * bern_b) * geom_x) + bern_c = bernoulli_neg_exp((abs(lap_y) - tau * sigma2) ** 2 / 2 / sigma2, self._rng) + if bern_c: + return value + lap_y + + def _find_scale(self): + """Determine the scale of the mechanism's distribution given epsilon and delta. + """ + if self.sensitivity / self.epsilon == 0: + return 0 + + def objective(sigma, epsilon_, delta_, sensitivity_): + """Function for which we are seeking its root. """ + idx_0 = int(np.floor(epsilon_ * sigma ** 2 / sensitivity_ - sensitivity_ / 2)) + idx_1 = int(np.floor(epsilon_ * sigma ** 2 / sensitivity_ + sensitivity_ / 2)) + idx = 1 + + lhs, rhs, denom = float(idx_0 < 0), 0, 1 + _term, diff = 1, 1 + + while _term > 0 and diff > 0: + _term = np.exp(-idx ** 2 / 2 / sigma ** 2) + + if idx > idx_0: + lhs += _term + + if idx_0 < -idx: + lhs += _term + + if idx > idx_1: + diff = -rhs + rhs += _term + diff += rhs + + denom += 2 * _term + idx += 1 + if idx > 1e6: + raise ValueError("Infinite sum not converging, aborting. Try changing the epsilon and/or delta.") + + return (lhs - np.exp(epsilon_) * rhs) / denom - delta_ + + epsilon = self.epsilon + delta = self.delta + sensitivity = self.sensitivity + + # Begin by locating the root within an interval [2**i, 2**(i+1)] + guess_0 = 1 + f_0 = objective(guess_0, epsilon, delta, sensitivity) + pwr = 1 if f_0 > 0 else -1 + guess_1 = 2 ** pwr + f_1 = objective(guess_1, epsilon, delta, sensitivity) + + while f_0 * f_1 > 0: + guess_0 *= 2 ** pwr + guess_1 *= 2 ** pwr + + f_0 = f_1 + f_1 = objective(guess_1, epsilon, delta, sensitivity) + + # Find the root (sigma) using the bisection method + while not np.isclose(guess_0, guess_1, atol=1e-12, rtol=1e-6): + guess_mid = (guess_0 + guess_1) / 2 + f_mid = objective(guess_mid, epsilon, delta, sensitivity) + + if f_mid * f_0 <= 0: + f_1 = f_mid + guess_1 = guess_mid + if f_mid * f_1 <= 0: + f_0 = f_mid + guess_0 = guess_mid + + return (guess_0 + guess_1) / 2 diff --git a/src/diffprivlib/mechanisms/geometric.py b/src/diffprivlib/mechanisms/geometric.py new file mode 100644 index 0000000..27e87c6 --- /dev/null +++ b/src/diffprivlib/mechanisms/geometric.py @@ -0,0 +1,233 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +The classic geometric mechanism for differential privacy, and its derivatives. +""" +from numbers import Integral + +import numpy as np + +from diffprivlib.mechanisms.base import DPMechanism, TruncationAndFoldingMixin +from diffprivlib.utils import copy_docstring + + +class Geometric(DPMechanism): + r""" + The classic geometric mechanism for differential privacy, as first proposed by Ghosh, Roughgarden and Sundararajan. + Extended to allow for non-unity sensitivity. + + Paper link: https://arxiv.org/pdf/0811.2841.pdf + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + sensitivity : float, default: 1 + The sensitivity of the mechanism. Must be in [0, ∞). + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, sensitivity=1, random_state=None): + super().__init__(epsilon=epsilon, delta=0.0, random_state=random_state) + self.sensitivity = self._check_sensitivity(sensitivity) + self._scale = - self.epsilon / self.sensitivity if self.sensitivity > 0 else - float("inf") + + @classmethod + def _check_sensitivity(cls, sensitivity): + if not isinstance(sensitivity, Integral): + raise TypeError("Sensitivity must be an integer") + + if sensitivity < 0: + raise ValueError("Sensitivity must be non-negative") + + return sensitivity + + def _check_all(self, value): + super()._check_all(value) + self._check_sensitivity(self.sensitivity) + + if not isinstance(value, Integral): + raise TypeError("Value to be randomised must be an integer") + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if not delta == 0: + raise ValueError("Delta must be zero") + + return super()._check_epsilon_delta(epsilon, delta) + + @copy_docstring(DPMechanism.bias) + def bias(self, value): + return 0.0 + + @copy_docstring(DPMechanism.variance) + def variance(self, value): + self._check_all(value) + + leading_factor = (1 - np.exp(self._scale)) / (1 + np.exp(self._scale)) + geom_series = np.exp(self._scale) / (1 - np.exp(self._scale)) + + return 2 * leading_factor * (geom_series + 3 * (geom_series ** 2) + 2 * (geom_series ** 3)) + + def randomise(self, value): + """Randomise `value` with the mechanism. + + Parameters + ---------- + value : int + The value to be randomised. + + Returns + ------- + int + The randomised value. + + """ + self._check_all(value) + + # Need to account for overlap of 0-value between distributions of different sign + unif_rv = self._rng.random() - 0.5 + unif_rv *= 1 + np.exp(self._scale) + sgn = -1 if unif_rv < 0 else 1 + + # Use formula for geometric distribution, with ratio of exp(-epsilon/sensitivity) + return int(np.round(value + sgn * np.floor(np.log(sgn * unif_rv) / self._scale))) + + +class GeometricTruncated(Geometric, TruncationAndFoldingMixin): + r""" + The truncated geometric mechanism, where values that fall outside a pre-described range are mapped back to the + closest point within the range. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + sensitivity : float, default: 1 + The sensitivity of the mechanism. Must be in [0, ∞). + + lower : int + The lower bound of the mechanism. + + upper : int + The upper bound of the mechanism. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, sensitivity=1, lower, upper, random_state=None): + super().__init__(epsilon=epsilon, sensitivity=sensitivity, random_state=random_state) + TruncationAndFoldingMixin.__init__(self, lower=lower, upper=upper) + + @classmethod + def _check_bounds(cls, lower, upper): + if not isinstance(lower, Integral) and abs(lower) != float("inf"): + raise TypeError(f"Lower bound must be integer-valued, got {lower}") + if not isinstance(upper, Integral) and abs(upper) != float("inf"): + raise TypeError(f"Upper bound must be integer-valued, got {upper}") + + return super()._check_bounds(lower, upper) + + @copy_docstring(DPMechanism.bias) + def bias(self, value): + raise NotImplementedError + + @copy_docstring(DPMechanism.bias) + def variance(self, value): + raise NotImplementedError + + def _check_all(self, value): + super()._check_all(value) + TruncationAndFoldingMixin._check_all(self, value) + + return True + + @copy_docstring(Geometric.randomise) + def randomise(self, value): + self._check_all(value) + + noisy_value = super().randomise(value) + return int(np.round(self._truncate(noisy_value))) + + +class GeometricFolded(Geometric, TruncationAndFoldingMixin): + r""" + The folded geometric mechanism, where values outside a pre-described range are folded back toward the domain around + the closest point within the domain. + Half-integer bounds are permitted. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + sensitivity : float, default: 1 + The sensitivity of the mechanism. Must be in [0, ∞). + + lower : int or float + The lower bound of the mechanism. Must be integer or half-integer -valued. + + upper : int or float + The upper bound of the mechanism. Must be integer or half-integer -valued. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, sensitivity=1, lower, upper, random_state=None): + super().__init__(epsilon=epsilon, sensitivity=sensitivity, random_state=random_state) + TruncationAndFoldingMixin.__init__(self, lower=lower, upper=upper) + + @classmethod + def _check_bounds(cls, lower, upper): + if not np.isclose(2 * lower, np.round(2 * lower)) or not np.isclose(2 * upper, np.round(2 * upper)): + raise ValueError("Bounds must be integer or half-integer floats") + + return super()._check_bounds(lower, upper) + + def _fold(self, value): + return super()._fold(int(np.round(value))) + + @copy_docstring(DPMechanism.bias) + def bias(self, value): + raise NotImplementedError + + @copy_docstring(DPMechanism.bias) + def variance(self, value): + raise NotImplementedError + + def _check_all(self, value): + super()._check_all(value) + TruncationAndFoldingMixin._check_all(self, value) + + return True + + @copy_docstring(Geometric.randomise) + def randomise(self, value): + self._check_all(value) + + noisy_value = super().randomise(value) + return int(np.round(self._fold(noisy_value))) diff --git a/src/diffprivlib/mechanisms/laplace.py b/src/diffprivlib/mechanisms/laplace.py new file mode 100644 index 0000000..9d6f9fa --- /dev/null +++ b/src/diffprivlib/mechanisms/laplace.py @@ -0,0 +1,499 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +The classic Laplace mechanism in differential privacy, and its derivatives. +""" +from numbers import Real + +import numpy as np + +from diffprivlib.mechanisms.base import DPMechanism, TruncationAndFoldingMixin +from diffprivlib.utils import copy_docstring + + +class Laplace(DPMechanism): + r""" + The classical Laplace mechanism in differential privacy. + + First proposed by Dwork, McSherry, Nissim and Smith [DMNS16]_, with support for (relaxed) + :math:`(\epsilon,\delta)`-differential privacy [HLM15]_. + + Samples from the Laplace distribution are generated using 4 uniform variates, as detailed in [HB21]_, to prevent + against reconstruction attacks due to limited floating point precision. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in [0, ∞]. + + delta : float, default: 0.0 + Privacy parameter :math:`\delta` for the mechanism. Must be in [0, 1]. Cannot be simultaneously zero with + ``epsilon``. + + sensitivity : float + The sensitivity of the mechanism. Must be in [0, ∞). + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + References + ---------- + .. [DMNS16] Dwork, Cynthia, Frank McSherry, Kobbi Nissim, and Adam Smith. "Calibrating noise to sensitivity in + private data analysis." Journal of Privacy and Confidentiality 7, no. 3 (2016): 17-51. + + .. [HLM15] Holohan, Naoise, Douglas J. Leith, and Oliver Mason. "Differential privacy in metric spaces: Numerical, + categorical and functional data under the one roof." Information Sciences 305 (2015): 256-268. + + .. [HB21] Holohan, Naoise, and Stefano Braghin. "Secure Random Sampling in Differential Privacy." arXiv preprint + arXiv:2107.10138 (2021). + + """ + def __init__(self, *, epsilon, delta=0.0, sensitivity, random_state=None): + super().__init__(epsilon=epsilon, delta=delta, random_state=random_state) + self.sensitivity = self._check_sensitivity(sensitivity) + self._scale = None + + @classmethod + def _check_sensitivity(cls, sensitivity): + if not isinstance(sensitivity, Real): + raise TypeError("Sensitivity must be numeric") + + if sensitivity < 0: + raise ValueError("Sensitivity must be non-negative") + + return float(sensitivity) + + def _check_all(self, value): + super()._check_all(value) + self._check_sensitivity(self.sensitivity) + + if not isinstance(value, Real): + raise TypeError("Value to be randomised must be a number") + + return True + + def bias(self, value): + """Returns the bias of the mechanism at a given `value`. + + Parameters + ---------- + value : int or float + The value at which the bias of the mechanism is sought. + + Returns + ------- + bias : float or None + The bias of the mechanism at `value`. + + """ + return 0.0 + + def variance(self, value): + """Returns the variance of the mechanism at a given `value`. + + Parameters + ---------- + value : float + The value at which the variance of the mechanism is sought. + + Returns + ------- + bias : float + The variance of the mechanism at `value`. + + """ + self._check_all(0) + + return 2 * (self.sensitivity / (self.epsilon - np.log(1 - self.delta))) ** 2 + + @staticmethod + def _laplace_sampler(unif1, unif2, unif3, unif4): + return np.log(1 - unif1) * np.cos(np.pi * unif2) + np.log(1 - unif3) * np.cos(np.pi * unif4) + + def randomise(self, value): + """Randomise `value` with the mechanism. + + Parameters + ---------- + value : float + The value to be randomised. + + Returns + ------- + float + The randomised value. + + """ + self._check_all(value) + + scale = self.sensitivity / (self.epsilon - np.log(1 - self.delta)) + standard_laplace = self._laplace_sampler(self._rng.random(), self._rng.random(), self._rng.random(), + self._rng.random()) + + return value - scale * standard_laplace + + +class LaplaceTruncated(Laplace, TruncationAndFoldingMixin): + r""" + The truncated Laplace mechanism, where values outside a pre-described domain are mapped to the closest point + within the domain. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in [0, ∞]. + + delta : float, default: 0.0 + Privacy parameter :math:`\delta` for the mechanism. Must be in [0, 1]. Cannot be simultaneously zero with + ``epsilon``. + + sensitivity : float + The sensitivity of the mechanism. Must be in [0, ∞). + + lower : float + The lower bound of the mechanism. + + upper : float + The upper bound of the mechanism. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, delta=0.0, sensitivity, lower, upper, random_state=None): + super().__init__(epsilon=epsilon, delta=delta, sensitivity=sensitivity, random_state=random_state) + TruncationAndFoldingMixin.__init__(self, lower=lower, upper=upper) + + @copy_docstring(Laplace.bias) + def bias(self, value): + self._check_all(value) + + shape = self.sensitivity / self.epsilon + + return shape / 2 * (np.exp((self.lower - value) / shape) - np.exp((value - self.upper) / shape)) + + @copy_docstring(Laplace.variance) + def variance(self, value): + self._check_all(value) + + shape = self.sensitivity / self.epsilon + + variance = value ** 2 + shape * (self.lower * np.exp((self.lower - value) / shape) + - self.upper * np.exp((value - self.upper) / shape)) + variance += (shape ** 2) * (2 - np.exp((self.lower - value) / shape) + - np.exp((value - self.upper) / shape)) + + variance -= (self.bias(value) + value) ** 2 + + return variance + + def _check_all(self, value): + Laplace._check_all(self, value) + TruncationAndFoldingMixin._check_all(self, value) + + return True + + @copy_docstring(Laplace.randomise) + def randomise(self, value): + self._check_all(value) + + noisy_value = super().randomise(value) + return self._truncate(noisy_value) + + +class LaplaceFolded(Laplace, TruncationAndFoldingMixin): + r""" + The folded Laplace mechanism, where values outside a pre-described domain are folded around the domain until they + fall within. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in [0, ∞]. + + delta : float, default: 0.0 + Privacy parameter :math:`\delta` for the mechanism. Must be in [0, 1]. Cannot be simultaneously zero with + ``epsilon``. + + sensitivity : float + The sensitivity of the mechanism. Must be in [0, ∞). + + lower : float + The lower bound of the mechanism. + + upper : float + The upper bound of the mechanism. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, delta=0.0, sensitivity, lower, upper, random_state=None): + super().__init__(epsilon=epsilon, delta=delta, sensitivity=sensitivity, random_state=random_state) + TruncationAndFoldingMixin.__init__(self, lower=lower, upper=upper) + + @copy_docstring(Laplace.bias) + def bias(self, value): + self._check_all(value) + + shape = self.sensitivity / self.epsilon + + bias = shape * (np.exp((self.lower + self.upper - 2 * value) / shape) - 1) + bias /= np.exp((self.lower - value) / shape) + np.exp((self.upper - value) / shape) + + return bias + + @copy_docstring(DPMechanism.variance) + def variance(self, value): + raise NotImplementedError + + def _check_all(self, value): + super()._check_all(value) + TruncationAndFoldingMixin._check_all(self, value) + + return True + + @copy_docstring(Laplace.randomise) + def randomise(self, value): + self._check_all(value) + + noisy_value = super().randomise(value) + return self._fold(noisy_value) + + +class LaplaceBoundedDomain(LaplaceTruncated): + r""" + The bounded Laplace mechanism on a bounded domain. The mechanism draws values directly from the domain using + rejection sampling, without any post-processing [HABM20]_. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in [0, ∞]. + + delta : float, default: 0.0 + Privacy parameter :math:`\delta` for the mechanism. Must be in [0, 1]. Cannot be simultaneously zero with + ``epsilon``. + + sensitivity : float + The sensitivity of the mechanism. Must be in [0, ∞). + + lower : float + The lower bound of the mechanism. + + upper : float + The upper bound of the mechanism. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + References + ---------- + .. [HABM20] Holohan, Naoise, Spiros Antonatos, Stefano Braghin, and Pól Mac Aonghusa. "The Bounded Laplace Mechanism + in Differential Privacy." Journal of Privacy and Confidentiality 10, no. 1 (2020). + + """ + def _find_scale(self): + eps = self.epsilon + delta = self.delta + diam = self.upper - self.lower + delta_q = self.sensitivity + + def _delta_c(shape): + if shape == 0: + return 2.0 + return (2 - np.exp(- delta_q / shape) - np.exp(- (diam - delta_q) / shape)) / (1 - np.exp(- diam / shape)) + + def _f(shape): + return delta_q / (eps - np.log(_delta_c(shape)) - np.log(1 - delta)) + + left = delta_q / (eps - np.log(1 - delta)) + right = _f(left) + old_interval_size = (right - left) * 2 + + while old_interval_size > right - left: + old_interval_size = right - left + middle = (right + left) / 2 + + if _f(middle) >= middle: + left = middle + if _f(middle) <= middle: + right = middle + + return (right + left) / 2 + + def effective_epsilon(self): + r"""Gets the effective epsilon of the mechanism, only for strict :math:`\epsilon`-differential privacy. Returns + ``None`` if :math:`\delta` is non-zero. + + Returns + ------- + float + The effective :math:`\epsilon` parameter of the mechanism. Returns ``None`` if `delta` is non-zero. + + """ + if self._scale is None: + self._scale = self._find_scale() + + if self.delta > 0.0: + return None + + return self.sensitivity / self._scale + + @copy_docstring(Laplace.bias) + def bias(self, value): + self._check_all(value) + + if self._scale is None: + self._scale = self._find_scale() + + bias = (self._scale - self.lower + value) / 2 * np.exp((self.lower - value) / self._scale) \ + - (self._scale + self.upper - value) / 2 * np.exp((value - self.upper) / self._scale) + bias /= 1 - np.exp((self.lower - value) / self._scale) / 2 \ + - np.exp((value - self.upper) / self._scale) / 2 + + return bias + + @copy_docstring(Laplace.variance) + def variance(self, value): + self._check_all(value) + + if self._scale is None: + self._scale = self._find_scale() + + variance = value**2 + variance -= (np.exp((self.lower - value) / self._scale) * (self.lower ** 2) + + np.exp((value - self.upper) / self._scale) * (self.upper ** 2)) / 2 + variance += self._scale * (self.lower * np.exp((self.lower - value) / self._scale) + - self.upper * np.exp((value - self.upper) / self._scale)) + variance += (self._scale ** 2) * (2 - np.exp((self.lower - value) / self._scale) + - np.exp((value - self.upper) / self._scale)) + variance /= 1 - (np.exp(-(value - self.lower) / self._scale) + + np.exp(-(self.upper - value) / self._scale)) / 2 + + variance -= (self.bias(value) + value) ** 2 + + return variance + + @copy_docstring(Laplace.randomise) + def randomise(self, value): + self._check_all(value) + + if self._scale is None: + self._scale = self._find_scale() + + value = max(min(value, self.upper), self.lower) + if np.isnan(value): + return float("nan") + + samples = 1 + + while True: + try: + unif = self._rng.random(4 * samples) + except TypeError: # rng is secrets.SystemRandom + unif = [self._rng.random() for _ in range(4 * samples)] + noisy = value + self._scale * self._laplace_sampler(*np.array(unif).reshape(4, -1)) + + if ((noisy >= self.lower) & (noisy <= self.upper)).any(): + idx = np.argmax((noisy >= self.lower) & (noisy <= self.upper)) + return noisy[idx] + samples = min(100000, samples * 2) + + +class LaplaceBoundedNoise(Laplace): + r""" + The Laplace mechanism with bounded noise, only applicable for approximate differential privacy (delta > 0) + [GDGK18]_. + + Epsilon must be strictly positive, `epsilon` > 0. `delta` must be strictly in the interval (0, 0.5). + - For zero `epsilon`, use :class:`.Uniform`. + - For zero `delta`, use :class:`.Laplace`. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + delta : float + Privacy parameter :math:`\delta` for the mechanism. Must be in (0, 0.5). + + sensitivity : float + The sensitivity of the mechanism. Must be in [0, ∞). + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + References + ---------- + .. [GDGK18] Geng, Quan, Wei Ding, Ruiqi Guo, and Sanjiv Kumar. "Truncated Laplacian Mechanism for Approximate + Differential Privacy." arXiv preprint arXiv:1810.00877v1 (2018). + + """ + def __init__(self, *, epsilon, delta, sensitivity, random_state=None): + super().__init__(epsilon=epsilon, delta=delta, sensitivity=sensitivity, random_state=random_state) + self._noise_bound = None + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if epsilon == 0: + raise ValueError("Epsilon must be strictly positive. For zero epsilon, use :class:`.Uniform`.") + + if isinstance(delta, Real) and not 0 < delta < 0.5: + raise ValueError("Delta must be strictly in the interval (0,0.5). For zero delta, use :class:`.Laplace`.") + + return super()._check_epsilon_delta(epsilon, delta) + + @copy_docstring(Laplace.bias) + def bias(self, value): + return 0.0 + + @copy_docstring(DPMechanism.variance) + def variance(self, value): + raise NotImplementedError + + @copy_docstring(Laplace.randomise) + def randomise(self, value): + self._check_all(value) + + if self._scale is None or self._noise_bound is None: + self._scale = self.sensitivity / self.epsilon + self._noise_bound = 0 if self._scale == 0 else \ + self._scale * np.log(1 + (np.exp(self.epsilon) - 1) / 2 / self.delta) + + if np.isnan(value): + return float("nan") + + samples = 1 + + while True: + try: + unif = self._rng.random(4 * samples) + except TypeError: # rng is secrets.SystemRandom + unif = [self._rng.random() for _ in range(4 * samples)] + noisy = self._scale * self._laplace_sampler(*np.array(unif).reshape(4, -1)) + + if ((noisy >= - self._noise_bound) & (noisy <= self._noise_bound)).any(): + idx = np.argmax((noisy >= - self._noise_bound) & (noisy <= self._noise_bound)) + return value + noisy[idx] + samples = min(100000, samples * 2) diff --git a/src/diffprivlib/mechanisms/snapping.py b/src/diffprivlib/mechanisms/snapping.py new file mode 100644 index 0000000..393f3e7 --- /dev/null +++ b/src/diffprivlib/mechanisms/snapping.py @@ -0,0 +1,250 @@ +""" +The Snapping mechanism in differential privacy, which eliminates a weakness to floating point errors in the classic +Laplace mechanism with standard Laplace sampling. +""" +import struct + +import numpy as np +try: + from crlibm import log_rn # pylint: disable=no-name-in-module +except ModuleNotFoundError: + log_rn = np.log + +from diffprivlib.mechanisms import LaplaceTruncated + + +class Snapping(LaplaceTruncated): + r""" + The Snapping mechanism for differential privacy. + + First proposed by Ilya Mironov [Mir12]_. + + It eliminates a vulnerability stemming from the representation of reals as floating-point numbers in implementations + of the classic Laplace mechanism and its variants which use the inverse CDF of the Laplace distribution to sample + it. It causes a high degree of reduction in the granularity of the output. + + For the most faithful implementation of the mechanism, the ``crlibm`` package should be installed. + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in [:math:`2 \eta`, ∞], where :math:`\eta` is the + machine epsilon of the floating point type. + + sensitivity : float + The sensitivity of the mechanism. Must be in [0, ∞). + + lower : float + The lower bound of the mechanism. + + upper : float + The upper bound of the mechanism. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + References + ---------- + .. [Mir12] Mironov, Ilya. "On significance of the least significant bits for differential privacy." Proceedings of + the 2012 ACM conference on Computer and communications security (2012). + + """ + def __init__(self, *, epsilon, sensitivity, lower, upper, random_state=None): + super().__init__(epsilon=epsilon, sensitivity=sensitivity, delta=0.0, lower=lower, upper=upper, + random_state=random_state) + self._bound = self._scale_bound() + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + epsilon, delta = super()._check_epsilon_delta(epsilon, delta) + + machine_epsilon = np.finfo(float).epsneg + if epsilon <= 2 * machine_epsilon: + raise ValueError("Epsilon must be at least as large as twice the machine epsilon for the floating point " + "type, as the effective epsilon must be non-negative") + + return epsilon, delta + + def _scale_bound(self): + """ + Scales the lower and upper bounds to be proportionate to sensitivity 1, and symmetrical about 0. + For sensitivity 0, only centres the bound, as scaling up and down is not defined. + + Returns + ------- + float + A symmetric bound around 0 scaled to sensitivity 1 + + """ + if self.sensitivity == 0: + return (self.upper - self.lower) / 2.0 + return (self.upper - self.lower) / 2.0 / self.sensitivity + + def _truncate(self, value): + if value > self._bound: + return self._bound + if value < -self._bound: + return -self._bound + + return value + + def bias(self, value): + raise NotImplementedError + + def variance(self, value): + raise NotImplementedError + + def effective_epsilon(self): + r""" + Returns the effective value used in the Snapping mechanism to give the required :math:`\epsilon`-DP, based on + the bounds and the machine epsilon. + Based on section 5.2 of [Mir12]_. + + Returns + ------- + float + The effective value of :math:`\epsilon` + + """ + machine_epsilon = np.finfo(float).epsneg + return (self.epsilon - 2 * machine_epsilon) / (1 + 12 * self._bound * machine_epsilon) + + def _scale_and_offset_value(self, value): + """ + Centre value around 0 with symmetric bound and scale to sensitivity 1 + + Parameters + ---------- + value : float + value to be scaled + Returns + ------- + float + value offset to be centered on 0 and scaled to sensitivity 1 + + """ + value_scaled = value / self.sensitivity + return value_scaled - self._bound - (self.lower / self.sensitivity) + + def _reverse_scale_and_offset_value(self, value): + return (value + self._bound) * self.sensitivity + self.lower + + @staticmethod + def _get_nearest_power_of_2(x): + def float_to_bits(d): + s = struct.pack('>d', d) + return struct.unpack('>q', s)[0] + + def bits_to_float(b): + s = struct.pack('>q', b) + return struct.unpack('>d', s)[0] + + bits = float_to_bits(x) + mantissa_size = np.finfo(float).nmant + if bits % (1 << mantissa_size) == 0: + return x + return bits_to_float(((bits >> mantissa_size) + 1) << mantissa_size) + + def _round_to_nearest_power_of_2(self, value, lambda_): + """ Performs the rounding step from [Mir12]_ with ties resolved towards +∞ + + Parameters + ---------- + value : float + Value to be rounded + + Returns + ------- + float + Rounded value + + """ + if self.epsilon == float('inf'): # infinitely small rounding + return value + remainder = value % lambda_ + if remainder > lambda_ / 2: + return value - remainder + lambda_ + if remainder == lambda_ / 2: + return value + remainder + return value - remainder + + def _uniform_sampler(self): + """ + Uniformly sample the full domain of floating-point numbers between (0, 1), rather than only multiples of 2^-53. + A uniform distribution over D ∩ (0, 1) can be generated by independently sampling an exponent + from the geometric distribution with parameter .5 and a significand by drawing a uniform string from + {0, 1}^52 [Mir12]_ + + Based on code recipe in Python standard library documentation [Py21]_. + + Returns + ------- + float + A value sampled from float in (0, 1) with probability proportional to the size of the infinite-precision + real interval each float represents + + References + ---------- + .. [Py21] The Python Standard Library. "random — Generate pseudo-random numbers", 2021 + https://docs.python.org/3/library/random.html#recipes + + """ + mantissa_size = np.finfo(float).nmant + mantissa = 1 << mantissa_size | self._getrandbits(mantissa_size) + exponent = -(mantissa_size + 1) + x = 0 + while not x: + x = self._getrandbits(32) + exponent += x.bit_length() - 32 + return np.ldexp(mantissa, exponent) + + def _getrandbits(self, bits): + try: + return self._rng.getrandbits(bits) + except AttributeError: + return self._rng.randint(0, 2 ** bits) + + @staticmethod + def _laplace_sampler(unif_bit, unif): + r""" + Laplace inverse CDF random sampling implementation which uses full domain uniform sampling and exact log + implementation from crlibm (if installed), as mentioned in [Mir12]_. + Outputs a random value scaled according to privacy budget and sensitivity 1, as bounds and input are scaled to + sensitivity 1 before Laplacian noise is added. + + Returns + ------- + float + Random value from Laplace distribution scaled according to :math:`\epsilon` + + """ + laplace = (-1) ** unif_bit * log_rn(unif) + return laplace + + def randomise(self, value): + """Randomise `value` with the mechanism. + + Parameters + ---------- + value : float + The value to be randomised. + + Returns + ------- + float + The randomised value. + + """ + self._check_all(value) + if self.sensitivity == 0: + return self._truncate(value) + + value_scaled_offset = self._scale_and_offset_value(value) + value_clamped = self._truncate(value_scaled_offset) + + scale = 1.0 / self.effective_epsilon() # everything is already scaled to sensitivity 1 + lambda_ = self._get_nearest_power_of_2(scale) + laplace = scale * self._laplace_sampler(self._getrandbits(1), self._uniform_sampler()) + value_rounded = self._round_to_nearest_power_of_2(value_clamped + laplace, lambda_) + return self._reverse_scale_and_offset_value(self._truncate(value_rounded)) diff --git a/src/diffprivlib/mechanisms/staircase.py b/src/diffprivlib/mechanisms/staircase.py new file mode 100644 index 0000000..be29429 --- /dev/null +++ b/src/diffprivlib/mechanisms/staircase.py @@ -0,0 +1,107 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +The staircase mechanism in differential privacy. +""" +import secrets +from numbers import Real + +import numpy as np + +from diffprivlib.mechanisms.laplace import Laplace +from diffprivlib.utils import copy_docstring + + +class Staircase(Laplace): + r""" + The staircase mechanism in differential privacy. + + The staircase mechanism is an optimisation of the classical Laplace Mechanism (:class:`.Laplace`), described as a + "geometric mixture of uniform random variables". + Paper link: https://arxiv.org/pdf/1212.1186.pdf + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + sensitivity : float + The sensitivity of the mechanism. Must be in [0, ∞). + + gamma : float, default: 1 / (1 + exp(epsilon/2)) + Value of the tuning parameter gamma for the mechanism. Must be in [0, 1]. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, sensitivity, gamma=None, random_state=None): + super().__init__(epsilon=epsilon, delta=0, sensitivity=sensitivity, random_state=random_state) + self.gamma = self._check_gamma(gamma, epsilon=self.epsilon) + + if isinstance(self._rng, secrets.SystemRandom): + self._rng = np.random.default_rng() + + @classmethod + def _check_gamma(cls, gamma, epsilon=None): + if gamma is None and epsilon is not None: + gamma = 1 / (1 + np.exp(epsilon / 2)) + + if not isinstance(gamma, Real): + raise TypeError("Gamma must be numeric") + if not 0.0 <= gamma <= 1.0: + raise ValueError("Gamma must be in [0,1]") + + return float(gamma) + + @copy_docstring(Laplace._check_all) + def _check_all(self, value): + super()._check_all(value) + self._check_gamma(self.gamma) + + return True + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if not delta == 0: + raise ValueError("Delta must be zero") + + return super()._check_epsilon_delta(epsilon, delta) + + @copy_docstring(Laplace.bias) + def bias(self, value): + return 0.0 + + @copy_docstring(Laplace.variance) + def variance(self, value): + raise NotImplementedError + + @copy_docstring(Laplace.randomise) + def randomise(self, value): + self._check_all(value) + + sign = -1 if self._rng.random() < 0.5 else 1 + geometric_rv = self._rng.geometric(1 - np.exp(- self.epsilon)) - 1 + unif_rv = self._rng.random() + binary_rv = 0 if self._rng.random() < self.gamma / (self.gamma + + (1 - self.gamma) * np.exp(- self.epsilon)) else 1 + + return value + sign * ((1 - binary_rv) * ((geometric_rv + self.gamma * unif_rv) * self.sensitivity) + + binary_rv * ((geometric_rv + self.gamma + (1 - self.gamma) * unif_rv) * + self.sensitivity)) diff --git a/src/diffprivlib/mechanisms/transforms/__init__.py b/src/diffprivlib/mechanisms/transforms/__init__.py new file mode 100644 index 0000000..5831369 --- /dev/null +++ b/src/diffprivlib/mechanisms/transforms/__init__.py @@ -0,0 +1,32 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Transform wrappers for differential privacy mechanisms to extend their use to alternative data types. + +Notes +----- +The naming convention for new transforms is to describe the `pre-transform` action, i.e. the action performed on the +data to be ingested by the mechanism. For transforms without a `pre-transform`, the `post-transform` action should be +described. + +""" +from diffprivlib.mechanisms.transforms.base import DPTransformer + +from diffprivlib.mechanisms.transforms.roundedinteger import RoundedInteger +from diffprivlib.mechanisms.transforms.stringtoint import StringToInt +from diffprivlib.mechanisms.transforms.inttostring import IntToString diff --git a/src/diffprivlib/mechanisms/transforms/base.py b/src/diffprivlib/mechanisms/transforms/base.py new file mode 100644 index 0000000..34b1a71 --- /dev/null +++ b/src/diffprivlib/mechanisms/transforms/base.py @@ -0,0 +1,89 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Core utilities for DP transformers. +""" +from diffprivlib.mechanisms.base import DPMachine + + +class DPTransformer(DPMachine): + """ + Base class for DP transformers. DP Transformers are simple wrappers for DP Mechanisms to allow mechanisms to be + used with data types and structures outside their scope. + + A :class:`.DPTransformer` must be initiated with a :class:`.DPMachine` (either another :class:`.DPTransformer`, or a + :class:`.DPMechanism`). This allows many instances of :class:`.DPTransformer` to be chained together, but the chain + must terminate with a :class:`.DPMechanism`. + + """ + def __init__(self, parent): + if not isinstance(parent, DPMachine): + raise TypeError("Data transformer must take a DPMachine as input") + + self.parent = parent + + def pre_transform(self, value): + """Performs no transformation on the input data, and is ingested by the mechanism as-is. + + Parameters + ---------- + value : float or string + Input value to be transformed. + + Returns + ------- + float or string + Transformed input value + """ + return value + + def post_transform(self, value): + """Performs no transformation on the output of the mechanism, and is returned as-is. + + Parameters + ---------- + value : float or string + Mechanism output to be transformed. + + Returns + ------- + float or string + Transformed output value. + + """ + return value + + def randomise(self, value): + """ + Randomise the given value using the :class:`.DPMachine`. + + Parameters + ---------- + value : float or string + Value to be randomised. + + Returns + ------- + float or string + Randomised value, same type as `value`. + + """ + transformed_value = self.pre_transform(value) + noisy_value = self.parent.randomise(transformed_value) + output_value = self.post_transform(noisy_value) + return output_value diff --git a/src/diffprivlib/mechanisms/transforms/inttostring.py b/src/diffprivlib/mechanisms/transforms/inttostring.py new file mode 100644 index 0000000..00350fc --- /dev/null +++ b/src/diffprivlib/mechanisms/transforms/inttostring.py @@ -0,0 +1,60 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +IntToString DP transformer, for using integer-valued data with string-valued mechanisms. +""" +from diffprivlib.mechanisms.transforms.base import DPTransformer + + +class IntToString(DPTransformer): + """ + IntToString DP transformer, for using integer-valued data with string-valued mechanisms. + + Useful when using integer-valued data with :class:`.Binary` or :class:`.Exponential`. + """ + def pre_transform(self, value): + """Transforms the input to be string-valued for ingestion by the mechanism. + + Parameters + ---------- + value : float or string + Input value to be transformed. + + Returns + ------- + string + Transformed input value + + """ + return str(value) + + def post_transform(self, value): + """Transforms the output of the mechanism to be integer-valued. + + Parameters + ---------- + value : float or string + Mechanism output to be transformed. + + Returns + ------- + int + Transformed output value. + + """ + return int(value) diff --git a/src/diffprivlib/mechanisms/transforms/roundedinteger.py b/src/diffprivlib/mechanisms/transforms/roundedinteger.py new file mode 100644 index 0000000..7717d01 --- /dev/null +++ b/src/diffprivlib/mechanisms/transforms/roundedinteger.py @@ -0,0 +1,42 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Rounded integer transformer. Rounds the output of the given mechanism to the nearest integer. +""" +from diffprivlib.mechanisms.transforms.base import DPTransformer + + +class RoundedInteger(DPTransformer): + """ + Rounded integer transform. Rounds the (float) output of the given mechanism to the nearest integer. + """ + def post_transform(self, value): + """Transforms the (float) output of the mechanism to be a rounded integer. + + Parameters + ---------- + value : float + Mechanism output to be transformed. + + Returns + ------- + int + Transformed output value. + + """ + return int(round(value)) diff --git a/src/diffprivlib/mechanisms/transforms/stringtoint.py b/src/diffprivlib/mechanisms/transforms/stringtoint.py new file mode 100644 index 0000000..e3d4224 --- /dev/null +++ b/src/diffprivlib/mechanisms/transforms/stringtoint.py @@ -0,0 +1,61 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +StringToInt DP transformer, for using string-valued data with integer-valued mechanisms. +""" +from diffprivlib.mechanisms.transforms.base import DPTransformer + + +class StringToInt(DPTransformer): + """ + StringToInt DP transformer, for using string-valued data with integer-valued mechanisms. + + Useful when using ordered, string-valued data with :class:`.Geometric`. + """ + + def pre_transform(self, value): + """Transforms the input to be integer-valued for ingestion by the mechanism. + + Parameters + ---------- + value : float or string + Input value to be transformed. + + Returns + ------- + int + Transformed input value + + """ + return int(value) + + def post_transform(self, value): + """Transforms the output of the mechanism to be string-valued. + + Parameters + ---------- + value : float or string + Mechanism output to be transformed. + + Returns + ------- + string + Transformed output value. + + """ + return str(value) diff --git a/src/diffprivlib/mechanisms/uniform.py b/src/diffprivlib/mechanisms/uniform.py new file mode 100644 index 0000000..f68cd1d --- /dev/null +++ b/src/diffprivlib/mechanisms/uniform.py @@ -0,0 +1,98 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +The uniform mechanism in differential privacy. +""" +from numbers import Real + +from diffprivlib.mechanisms.base import DPMechanism +from diffprivlib.mechanisms.laplace import Laplace +from diffprivlib.utils import copy_docstring + + +class Uniform(DPMechanism): + r""" + The Uniform mechanism in differential privacy. + + This emerges as a special case of the :class:`.LaplaceBoundedNoise` mechanism when epsilon = 0. + Paper link: https://arxiv.org/pdf/1810.00877.pdf + + Parameters + ---------- + delta : float + Privacy parameter :math:`\delta` for the mechanism. Must be in (0, 0.5]. + + sensitivity : float + The sensitivity of the mechanism. Must be in [0, ∞). + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, delta, sensitivity, random_state=None): + super().__init__(epsilon=0.0, delta=delta, random_state=random_state) + self.sensitivity = self._check_sensitivity(sensitivity) + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if not epsilon == 0: + raise ValueError("Epsilon must be strictly zero.") + + if not 0 < delta <= 0.5: + raise ValueError("Delta must be in the half-open interval (0, 0.5]") + + return super()._check_epsilon_delta(epsilon, delta) + + @classmethod + def _check_sensitivity(cls, sensitivity): + if not isinstance(sensitivity, Real): + raise TypeError("Sensitivity must be numeric") + + if sensitivity < 0: + raise ValueError("Sensitivity must be non-negative") + + return float(sensitivity) + + @copy_docstring(Laplace.bias) + def bias(self, value): + return 0.0 + + @copy_docstring(Laplace.variance) + def variance(self, value): + self._check_all(value) + + return (self.sensitivity / self.delta) ** 2 / 12 + + def _check_all(self, value): + super()._check_all(value) + self._check_sensitivity(self.sensitivity) + + if not isinstance(value, Real): + raise TypeError("Value to be randomised must be a number") + + return True + + @copy_docstring(Laplace.randomise) + def randomise(self, value): + self._check_all(value) + + unif_rv = 2 * self._rng.random() - 1 + unif_rv *= self.sensitivity / self.delta / 2 + + return value + unif_rv diff --git a/src/diffprivlib/mechanisms/vector.py b/src/diffprivlib/mechanisms/vector.py new file mode 100644 index 0000000..c5699d4 --- /dev/null +++ b/src/diffprivlib/mechanisms/vector.py @@ -0,0 +1,190 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +The vector mechanism in differential privacy, for producing perturbed objectives +""" +from numbers import Real + +import numpy as np + +from diffprivlib.mechanisms.base import DPMechanism +from diffprivlib.utils import copy_docstring + + +class Vector(DPMechanism): + r""" + The vector mechanism in differential privacy. + + The vector mechanism is used when perturbing convex objective functions. + Full paper: http://www.jmlr.org/papers/volume12/chaudhuri11a/chaudhuri11a.pdf + + Parameters + ---------- + epsilon : float + Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞]. + + function_sensitivity : float + The function sensitivity of the mechanism. Must be in [0, ∞). + + data_sensitivity : float, default: 1.0 + The data sensitivity of the mechanism. Must be in [0, ∞). + + dimension : int + Function input dimension. This dimension relates to the size of the input vector of the function being + considered by the mechanism. This corresponds to the size of the random vector produced by the mechanism. Must + be in [1, ∞). + + alpha : float, default: 0.01 + Regularisation parameter. Must be in (0, ∞). + + n : int, default: 1 + Size of the training dataset, required to calibrate the influence of the random vector in the objective. + + random_state : int or RandomState, optional + Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + """ + def __init__(self, *, epsilon, function_sensitivity, data_sensitivity=1.0, dimension, alpha=0.01, n=1, + random_state=None): + super().__init__(epsilon=epsilon, delta=0.0, random_state=random_state) + self.function_sensitivity, self.data_sensitivity = self._check_sensitivity(function_sensitivity, + data_sensitivity) + self.dimension = self._check_dimension(dimension) + self.alpha = self._check_alpha(alpha) + self.n = int(n) + + @classmethod + def _check_epsilon_delta(cls, epsilon, delta): + if not delta == 0: + raise ValueError("Delta must be zero") + + return super()._check_epsilon_delta(epsilon, delta) + + @classmethod + def _check_alpha(cls, alpha): + if not isinstance(alpha, Real): + raise TypeError("Alpha must be numeric") + + if alpha <= 0: + raise ValueError("Alpha must be strictly positive") + + return alpha + + @classmethod + def _check_dimension(cls, vector_dim): + if not isinstance(vector_dim, Real) or not np.isclose(vector_dim, int(vector_dim)): + raise TypeError("d must be integer-valued") + if int(vector_dim) < 1: + raise ValueError("d must be strictly positive") + + return int(vector_dim) + + @classmethod + def _check_sensitivity(cls, function_sensitivity, data_sensitivity): + if not isinstance(function_sensitivity, Real) or not isinstance(data_sensitivity, Real): + raise TypeError("Sensitivities must be numeric") + + if function_sensitivity < 0 or data_sensitivity < 0: + raise ValueError("Sensitivities must be non-negative") + + return function_sensitivity, data_sensitivity + + def _check_all(self, value): + super()._check_all(value) + self._check_alpha(self.alpha) + self._check_sensitivity(self.function_sensitivity, self.data_sensitivity) + self._check_dimension(self.dimension) + + if self.n < 1: + raise ValueError(f"n must be strictly positive, got {self.n}") + + if not callable(value): + raise TypeError("Value to be randomised must be a function") + + return True + + @copy_docstring(DPMechanism.bias) + def bias(self, value): + raise NotImplementedError + + @copy_docstring(DPMechanism.variance) + def variance(self, value): + raise NotImplementedError + + def randomise(self, value): + """Randomise `value` with the mechanism. + + If `value` is a method of two outputs, they are taken as `f` and `fprime` (i.e., its gradient), and both are + perturbed accordingly. + + Parameters + ---------- + value : method + The function to be randomised. + + Returns + ------- + method + The randomised method. + + """ + self._check_all(value) + + epsilon_p = self.epsilon - 2 * np.log(1 + self.function_sensitivity * self.data_sensitivity / self.alpha) + delta = 0 + + if epsilon_p <= 0: + delta = (self.function_sensitivity * self.data_sensitivity / np.expm1(self.epsilon / 4) + - self.alpha) / self.n + epsilon_p = self.epsilon / 2 + + scale = self.data_sensitivity * 2 / epsilon_p + + try: + normed_noisy_vector = self._rng.standard_normal((self.dimension, 4)).sum(axis=1) / 2 + noisy_norm = self._rng.gamma(self.dimension / 4, scale, 4).sum() + except AttributeError: # rng is secrets.SystemRandom + normed_noisy_vector = np.reshape([self._rng.normalvariate(0, 1) for _ in range(self.dimension * 4)], + (-1, 4)).sum(axis=1) / 2 + noisy_norm = sum(self._rng.gammavariate(self.dimension / 4, scale) for _ in range(4)) if scale > 0 else 0.0 + + norm = np.linalg.norm(normed_noisy_vector, 2) + normed_noisy_vector = normed_noisy_vector / norm * noisy_norm + + def output_func(*args): + input_vec = args[0] + + func = value(*args) + + if isinstance(func, tuple): + func, grad = func + else: + grad = None + + func += np.dot(normed_noisy_vector, input_vec) / self.n + func += 0.5 * delta * np.dot(input_vec, input_vec) + + if grad is not None: + grad += normed_noisy_vector / self.n + delta * input_vec + + return func, grad + + return func + + return output_func diff --git a/src/diffprivlib/models/__init__.py b/src/diffprivlib/models/__init__.py new file mode 100644 index 0000000..6feb569 --- /dev/null +++ b/src/diffprivlib/models/__init__.py @@ -0,0 +1,27 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Machine learning models with differential privacy +""" +from diffprivlib.models.naive_bayes import GaussianNB +from diffprivlib.models.k_means import KMeans +from diffprivlib.models.linear_regression import LinearRegression +from diffprivlib.models.logistic_regression import LogisticRegression +from diffprivlib.models.pca import PCA +from diffprivlib.models.standard_scaler import StandardScaler +from diffprivlib.models.forest import RandomForestClassifier, DecisionTreeClassifier diff --git a/src/diffprivlib/models/forest.py b/src/diffprivlib/models/forest.py new file mode 100644 index 0000000..682b419 --- /dev/null +++ b/src/diffprivlib/models/forest.py @@ -0,0 +1,615 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2021 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Random Forest Classifier with Differential Privacy +""" +from collections import namedtuple +import warnings + +from joblib import Parallel, delayed +import numpy as np +from sklearn.exceptions import DataConversionWarning +from sklearn.tree._tree import Tree, DOUBLE, DTYPE, NODE_DTYPE # pylint: disable=no-name-in-module +from sklearn.ensemble._forest import RandomForestClassifier as skRandomForestClassifier, _parallel_build_trees +from sklearn.tree import DecisionTreeClassifier as skDecisionTreeClassifier + +# TODO: remove when sklearn 1.6 a min req +try: + from sklearn.utils.validation import validate_data +except ImportError: + from sklearn.base import BaseEstimator + validate_data = BaseEstimator._validate_data + +from diffprivlib.accountant import BudgetAccountant +from diffprivlib.utils import PrivacyLeakWarning, check_random_state +from diffprivlib.mechanisms import PermuteAndFlip +from diffprivlib.validation import DiffprivlibMixin + +MAX_INT = np.iinfo(np.int32).max + + +class RandomForestClassifier(skRandomForestClassifier, DiffprivlibMixin): # pylint: disable=too-many-ancestors + r"""Random Forest Classifier with differential privacy. + + This class implements Differentially Private Random Decision Forests using [1]. + :math:`\epsilon`-Differential privacy is achieved by constructing decision trees via random splitting criterion and + applying the :class:`.PermuteAndFlip` Mechanism to determine a noisy label. + + Parameters + ---------- + n_estimators : int, default: 10 + The number of trees in the forest. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the data, provided as a tuple of the form (min, max). `min` and `max` can either be scalars, covering + the min/max of the entire data, or vectors with one entry per feature. If not provided, the bounds are computed + on the data when ``.fit()`` is first called, resulting in a :class:`.PrivacyLeakWarning`. + + classes : array-like of shape (n_classes,) + Array of classes to be trained on. If not provided, the classes will be read from the data when ``.fit()`` is + first called, resulting in a :class:`.PrivacyLeakWarning`. + + n_jobs : int, default: 1 + Number of CPU cores used when parallelising over classes. ``-1`` means using all processors. + + verbose : int, default: 0 + Set to any positive number for verbosity. + + random_state : int or RandomState, optional + Controls both the randomness of the shuffling of the samples used when building trees (if ``shuffle=True``) and + training of the differentially-private :class:`.DecisionTreeClassifier` to construct the forest. To obtain a + deterministic behaviour during randomisation, ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + max_depth : int, default: 5 + The maximum depth of the tree. The depth translates to an exponential increase in memory usage. + + warm_start : bool, default=False + When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, + otherwise, just fit a whole new forest. + + shuffle : bool, default=False + When set to ``True``, shuffles the datapoints to be trained on trees at random. In diffprivlib, each datapoint + is used to train exactly one tree. When set to ``False``, datapoints are chosen in-order to their tree in + sequence. + + Attributes + ---------- + estimator_ : DecisionTreeClassifier + The child estimator template used to create the collection of fitted sub-estimators. + + estimators_ : list of DecisionTreeClassifier + The collection of fitted sub-estimators. + + classes_ : ndarray of shape (n_classes,) or a list of such arrays + The classes labels. + + n_classes_ : int or list + The number of classes. + + n_features_in_ : int + Number of features seen during :term:`fit`. + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. + + n_outputs_ : int + The number of outputs when ``fit`` is performed. + + Examples + -------- + >>> from sklearn.datasets import make_classification + >>> from diffprivlib.models import RandomForestClassifier + >>> X, y = make_classification(n_samples=1000, n_features=4, + ... n_informative=2, n_redundant=0, + ... random_state=0, shuffle=False) + >>> clf = RandomForestClassifier(n_estimators=100, random_state=0) + >>> clf.fit(X, y) + >>> print(clf.predict([[0, 0, 0, 0]])) + [1] + + References + ---------- + [1] Sam Fletcher, Md Zahidul Islam. "Differentially Private Random Decision Forests using Smooth Sensitivity" + https://arxiv.org/abs/1606.03572 + + """ + + _parameter_constraints = DiffprivlibMixin._copy_parameter_constraints( + skRandomForestClassifier, "n_estimators", "n_jobs", "verbose", "random_state", "warm_start") + + def __init__(self, n_estimators=10, *, epsilon=1.0, bounds=None, classes=None, n_jobs=1, verbose=0, accountant=None, + random_state=None, max_depth=5, warm_start=False, shuffle=False, **unused_args): + super().__init__( + n_estimators=n_estimators, + criterion=None, + bootstrap=False, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start) + self.epsilon = epsilon + self.bounds = bounds + self.classes = classes + self.max_depth = max_depth + self.shuffle = shuffle + self.accountant = BudgetAccountant.load_default(accountant) + self.estimator = DecisionTreeClassifier() + self.estimator_params = ("max_depth", "epsilon", "bounds", "classes") + + self._warn_unused_args(unused_args) + + def fit(self, X, y, sample_weight=None): + """ + Build a forest of trees from the training set (X, y). + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The training input samples. Internally, its dtype will be converted to ``dtype=np.float32``. + + y : array-like of shape (n_samples,) + The target values (class labels in classification, real numbers in regression). + + sample_weight : ignored + Ignored by diffprivlib. Present for consistency with sklearn API. + + Returns + ------- + self : object + Fitted estimator. + """ + self._validate_params() + self.accountant.check(self.epsilon, 0) + + if sample_weight is not None: + self._warn_unused_args("sample_weight") + + # Validate or convert input data + X, y = validate_data(self, X, y, multi_output=False, dtype=DTYPE) + + if self.bounds is None: + warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning) + self.bounds = (np.min(X, axis=0), np.max(X, axis=0)) + self.bounds = self._check_bounds(self.bounds, shape=X.shape[1]) + X = self._clip_to_bounds(X, self.bounds) + + y = np.atleast_1d(y) + if y.ndim == 2 and y.shape[1] == 1: + warnings.warn("A column-vector y was passed when a 1d array was expected. Please change the shape of y to " + "(n_samples,), for example using ravel().", DataConversionWarning, stacklevel=2) + + if y.ndim == 1: + # reshape is necessary to preserve the data contiguity against vs [:, np.newaxis] that does not. + y = np.reshape(y, (-1, 1)) + + self.n_outputs_ = y.shape[1] + + if self.classes is None: + warnings.warn("Classes have not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify the prediction classes for model.", PrivacyLeakWarning) + self.classes = np.unique(y) + self.classes_ = np.ravel(self.classes) + self.n_classes_ = len(self.classes_) + + # y, expanded_class_weight = self._validate_y_class_weight(y) + y = np.searchsorted(self.classes_, y) + + if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous: + y = np.ascontiguousarray(y, dtype=DOUBLE) + + # Check parameters + self._validate_estimator() + + random_state = check_random_state(self.random_state) + + if not self.warm_start or not hasattr(self, "estimators_"): + # Free allocated memory, if any + self.estimators_ = [] + + n_more_estimators = self.n_estimators - len(self.estimators_) + + if n_more_estimators < 0: + raise ValueError(f"n_estimators={self.n_estimators} must be larger or equal to len(estimators_)=" + f"{len(self.estimators_)} when warm_start==True") + if n_more_estimators == 0: + warnings.warn("Warm-start fitting without increasing n_estimators does not fit new trees.") + return self + + if self.warm_start and len(self.estimators_) > 0: + # We draw from the random state to get the random state we + # would have got if we hadn't used a warm_start. + random_state.randint(MAX_INT, size=len(self.estimators_)) + + trees = [ + self._make_estimator(append=False, random_state=random_state) + for _ in range(n_more_estimators) + ] + + # Split samples between trees as evenly as possible (randomly if shuffle==True) + n_samples = X.shape[0] + tree_idxs = random_state.permutation(n_samples) if self.shuffle else np.arange(n_samples) + tree_idxs = (tree_idxs // (n_samples / n_more_estimators)).astype(int) + + # Parallel loop: we prefer the threading backend as the Cython code + # for fitting the trees is internally releasing the Python GIL + # making threading more efficient than multiprocessing in + # that case. However, for joblib 0.12+ we respect any + # parallel_backend contexts set at a higher level, + # since correctness does not rely on using threads. + trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer="threads")( + delayed(_parallel_build_trees)( + tree=t, + bootstrap=False, + X=X[tree_idxs == i], + y=y[tree_idxs == i], + sample_weight=None, + tree_idx=i, + n_trees=len(trees), + verbose=self.verbose, + ) + for i, t in enumerate(trees) + ) + + # Collect newly grown trees + self.estimators_.extend(trees) + + self.accountant.spend(self.epsilon, 0) + + return self + + +class DecisionTreeClassifier(skDecisionTreeClassifier, DiffprivlibMixin): + r"""Decision Tree Classifier with differential privacy. + + This class implements the base differentially private decision tree classifier + for the Random Forest classifier algorithm. Not meant to be used separately. + + Parameters + ---------- + max_depth : int, default: 5 + The maximum depth of the tree. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the data, provided as a tuple of the form (min, max). `min` and `max` can either be scalars, covering + the min/max of the entire data, or vectors with one entry per feature. If not provided, the bounds are computed + on the data when ``.fit()`` is first called, resulting in a :class:`.PrivacyLeakWarning`. + + classes : array-like of shape (n_classes,), optional + Array of class labels. If not provided, the classes will be read from the data when ``.fit()`` is first called, + resulting in a :class:`.PrivacyLeakWarning`. + + random_state : int or RandomState, optional + Controls the randomness of the estimator. At each split, the feature to split on is chosen randomly, as is the + threshold at which to split. The classification label at each leaf is then randomised, subject to differential + privacy constraints. To obtain a deterministic behaviour during randomisation, ``random_state`` has to be fixed + to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Attributes + ---------- + n_features_in_: int + The number of features when fit is performed. + + n_classes_: int + The number of classes. + + classes_: array of shape (n_classes, ) + The class labels. + + """ + + _parameter_constraints = DiffprivlibMixin._copy_parameter_constraints( + skDecisionTreeClassifier, "max_depth", "random_state") + + def __init__(self, max_depth=5, *, epsilon=1, bounds=None, classes=None, random_state=None, accountant=None, + criterion=None, **unused_args): + super().__init__( + criterion=None, + splitter=None, + max_depth=max_depth, + min_samples_split=None, + min_samples_leaf=None, + min_weight_fraction_leaf=None, + max_features=None, + random_state=random_state, + max_leaf_nodes=None, + min_impurity_decrease=None + ) + self.epsilon = epsilon + self.bounds = bounds + self.classes = classes + self.accountant = BudgetAccountant.load_default(accountant) + + if criterion is not None: + unused_args['criterion'] = criterion + + self._warn_unused_args(unused_args) + + def fit(self, X, y, sample_weight=None, check_input=True): + """Build a differentially-private decision tree classifier from the training set (X, y). + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The training input samples. Internally, it will be converted to ``dtype=np.float32``. + + y : array-like of shape (n_samples,) + The target values (class labels) as integers or strings. + + sample_weight : ignored + Ignored by diffprivlib. Present for consistency with sklearn API. + + check_input : bool, default=True + Allow to bypass several input checking. Don't use this parameter unless you know what you do. + + Returns + ------- + self : DecisionTreeClassifier + Fitted estimator. + """ + self._validate_params() + random_state = check_random_state(self.random_state) + + self.accountant.check(self.epsilon, 0) + + if sample_weight is not None: + self._warn_unused_args("sample_weight") + + if check_input: + X, y = validate_data(self, X, y, multi_output=False) + self.n_outputs_ = 1 + + if self.bounds is None: + warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning) + self.bounds = (np.min(X, axis=0), np.max(X, axis=0)) + self.bounds = self._check_bounds(self.bounds, shape=X.shape[1]) + X = self._clip_to_bounds(X, self.bounds) + + if self.classes is None: + warnings.warn("Classes have not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify the prediction classes for model.", PrivacyLeakWarning) + self.classes = np.unique(y) + self.classes_ = np.ravel(self.classes) + self.n_classes_ = len(self.classes_) + self.n_features_in_ = X.shape[1] + + # Build and fit the _FittingTree + fitting_tree = _FittingTree(self.max_depth, self.n_features_in_, self.classes_, self.epsilon, self.bounds, + random_state) + fitting_tree.build() + fitting_tree.fit(X, y) + + # Load params from _FittingTree into sklearn.Tree + d = fitting_tree.__getstate__() + tree = Tree(self.n_features_in_, np.array([self.n_classes_]), self.n_outputs_) + tree.__setstate__(d) + self.tree_ = tree + + self.accountant.spend(self.epsilon, 0) + + return self + + def _fit(self, X, y, sample_weight=None, check_input=True, missing_values_in_feature_mask=None): + self.fit(X, y, sample_weight=sample_weight, check_input=check_input) + + return self + + @property + def n_features_(self): + return self.n_features_in_ + + def _more_tags(self): + return {} + + +class _FittingTree(DiffprivlibMixin): + r"""Array-based representation of a binary decision tree, trained with differential privacy. + + This tree mimics the architecture of the corresponding Tree from sklearn.tree.tree_, but without many methods given + in Tree. The purpose of _FittingTree is to fit the parameters of the model, and have those parameters passed to + Tree (using _FittingTree.__getstate__() and Tree.__setstate__()), to be used for prediction. + + Parameters + ---------- + max_depth : int + The maximum depth of the tree. + + n_features : int + The number of features of the training dataset. + + classes : array-like of shape (n_classes,) + The classes of the training dataset. + + epsilon : float + Privacy parameter :math:`\epsilon`. + + bounds : tuple + Bounds of the data, provided as a tuple of the form (min, max). `min` and `max` can either be scalars, covering + the min/max of the entire data. + + random_state : RandomState + Controls the randomness of the building and training process: the feature to split at each node, the threshold + to split at and the randomisation of the label at each leaf. + + """ + _TREE_LEAF = -1 + _TREE_UNDEFINED = -2 + StackNode = namedtuple("StackNode", ["parent", "is_left", "depth", "bounds"]) + + def __init__(self, max_depth, n_features, classes, epsilon, bounds, random_state): + self.node_count = 0 + self.nodes = [] + self.max_depth = max_depth + self.n_features = n_features + self.classes = classes + self.epsilon = epsilon + self.bounds = bounds + self.random_state = random_state + + def __getstate__(self): + """Get state of _FittingTree to feed into __setstate__ of sklearn.Tree""" + d = {"max_depth": self.max_depth, + "node_count": self.node_count, + "nodes": np.array([tuple(node) for node in self.nodes], dtype=NODE_DTYPE), + "values": self.values_} + return d + + def build(self): + """Build the decision tree using random feature selection and random thresholding.""" + stack = [self.StackNode(parent=self._TREE_UNDEFINED, is_left=False, depth=0, bounds=self.bounds)] + + while stack: + parent, is_left, depth, bounds = stack.pop() + node_id = self.node_count + bounds_lower, bounds_upper = self._check_bounds(bounds, shape=self.n_features) + + # Update parent node with its child + if parent != self._TREE_UNDEFINED: + if is_left: + self.nodes[parent].left_child = node_id + else: + self.nodes[parent].right_child = node_id + + # Check if we have a leaf node, then add it + if depth >= self.max_depth: + node = _Node(node_id, self._TREE_UNDEFINED, self._TREE_UNDEFINED) + node.left_child = self._TREE_LEAF + node.right_child = self._TREE_LEAF + + self.nodes.append(node) + self.node_count += 1 + continue + + # We have a decision node, so pick feature and threshold + feature = self.random_state.randint(self.n_features) + threshold = self.random_state.uniform(bounds_lower[feature], bounds_upper[feature]) + + left_bounds_upper = bounds_upper.copy() + left_bounds_upper[feature] = threshold + right_bounds_lower = bounds_lower.copy() + right_bounds_lower[feature] = threshold + + self.nodes.append(_Node(node_id, feature, threshold)) + self.node_count += 1 + + stack.append(self.StackNode(parent=node_id, is_left=True, depth=depth+1, + bounds=(bounds_lower, left_bounds_upper))) + stack.append(self.StackNode(parent=node_id, is_left=False, depth=depth+1, + bounds=(right_bounds_lower, bounds_upper))) + + return self + + def fit(self, X, y): + """Fit the tree to the given training data. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + Training vector, where n_samples is the number of samples and n_features is the number of features. + + y : array-like, shape (n_samples,) + Target vector relative to X. + + """ + if not self.nodes: + raise ValueError("Fitting Tree must be built before calling fit().") + + leaves = self.apply(X) + unique_leaves = np.unique(leaves) + values = np.zeros(shape=(self.node_count, 1, len(self.classes))) + + # Populate value of real leaves + for leaf in unique_leaves: + idxs = (leaves == leaf) + leaf_y = y[idxs] + + counts = [np.sum(leaf_y == cls) for cls in self.classes] + mech = PermuteAndFlip(epsilon=self.epsilon, sensitivity=1, monotonic=True, utility=counts, + random_state=self.random_state) + values[leaf, 0, mech.randomise()] = 1 + + # Populate value of empty leaves + for node in self.nodes: + if values[node.node_id].sum() or node.left_child != self._TREE_LEAF: + continue + + values[node.node_id, 0, self.random_state.randint(len(self.classes))] = 1 + + self.values_ = values + + return self + + def apply(self, X): + """Finds the terminal region (=leaf node) for each sample in X.""" + n_samples = X.shape[0] + out = np.zeros((n_samples,), dtype=int) + out_ptr = out.data + + for i in range(n_samples): + node = self.nodes[0] + + while node.left_child != self._TREE_LEAF: + if X[i, node.feature] <= node.threshold: + node = self.nodes[node.left_child] + else: + node = self.nodes[node.right_child] + + out_ptr[i] = node.node_id + + return out + + +class _Node: + """Base storage structure for the nodes in a _FittingTree object.""" + def __init__(self, node_id, feature, threshold): + self.feature = feature + self.threshold = threshold + self.left_child = -1 + self.right_child = -1 + self.node_id = node_id + + def __iter__(self): + """Defines parameters needed to populate NODE_DTYPE for Tree.__setstate__ using tuple(_Node).""" + yield self.left_child + yield self.right_child + yield self.feature + yield self.threshold + yield 0.0 # Impurity + yield 0 # n_node_samples + yield 0.0 # weighted_n_node_samples + + # remove branch when scikit-learn v1.3 is min requirement + if len(NODE_DTYPE) > 7: + yield False diff --git a/src/diffprivlib/models/k_means.py b/src/diffprivlib/models/k_means.py new file mode 100644 index 0000000..8ded5a4 --- /dev/null +++ b/src/diffprivlib/models/k_means.py @@ -0,0 +1,301 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +K-means clustering algorithm satisfying differential privacy. +""" +import warnings + +import numpy as np +import sklearn.cluster as sk_cluster + +# TODO: remove when sklearn 1.6 a min req +try: + from sklearn.utils.validation import validate_data +except ImportError: + from sklearn.base import BaseEstimator + validate_data = BaseEstimator._validate_data + +from diffprivlib.accountant import BudgetAccountant +from diffprivlib.mechanisms import LaplaceBoundedDomain, GeometricFolded +from diffprivlib.utils import PrivacyLeakWarning, check_random_state +from diffprivlib.validation import DiffprivlibMixin + + +class KMeans(sk_cluster.KMeans, DiffprivlibMixin): + r"""K-Means clustering with differential privacy. + + Implements the DPLloyd approach presented in [SCL16]_, leveraging the :class:`sklearn.cluster.KMeans` class for full + integration with Scikit Learn. + + Parameters + ---------- + n_clusters : int, default: 8 + The number of clusters to form as well as the number of centroids to generate. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the data, provided as a tuple of the form (min, max). `min` and `max` can either be scalars, covering + the min/max of the entire data, or vectors with one entry per feature. If not provided, the bounds are computed + on the data when ``.fit()`` is first called, resulting in a :class:`.PrivacyLeakWarning`. + + random_state : int or RandomState, optional + Controls the randomness of the model. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Attributes + ---------- + cluster_centers_ : array, [n_clusters, n_features] + Coordinates of cluster centers. If the algorithm stops before fully converging, these will not be consistent + with ``labels_``. + + labels_ : + Labels of each point + + inertia_ : float + Sum of squared distances of samples to their closest cluster center. + + n_iter_ : int + Number of iterations run. + + References + ---------- + .. [SCL16] Su, Dong, Jianneng Cao, Ninghui Li, Elisa Bertino, and Hongxia Jin. "Differentially private k-means + clustering." In Proceedings of the sixth ACM conference on data and application security and privacy, pp. 26-37. + ACM, 2016. + + """ + + _parameter_constraints = DiffprivlibMixin._copy_parameter_constraints( + sk_cluster.KMeans, "n_clusters", "random_state") + + def __init__(self, n_clusters=8, *, epsilon=1.0, bounds=None, random_state=None, accountant=None, **unused_args): + super().__init__(n_clusters=n_clusters, random_state=random_state) + + self.epsilon = epsilon + self.bounds = bounds + self.accountant = BudgetAccountant.load_default(accountant) + + self._warn_unused_args(unused_args) + + self.cluster_centers_ = None + self.bounds_processed = None + self.labels_ = None + self.inertia_ = None + self.n_iter_ = None + self._n_threads = 1 + + def fit(self, X, y=None, sample_weight=None): + """Computes k-means clustering with differential privacy. + + Parameters + ---------- + X : array-like, shape=(n_samples, n_features) + Training instances to cluster. + + y : Ignored + not used, present here for API consistency by convention. + + sample_weight : ignored + Ignored by diffprivlib. Present for consistency with sklearn API. + + Returns + ------- + self : class + + """ + self._validate_params() + self.accountant.check(self.epsilon, 0) + + if sample_weight is not None: + self._warn_unused_args("sample_weight") + + del y + + random_state = check_random_state(self.random_state) + + X = validate_data(self, X, accept_sparse=False, dtype=[np.float64, np.float32]) + n_samples, n_dims = X.shape + + if n_samples < self.n_clusters: + raise ValueError(f"n_samples={n_samples} should be >= n_clusters={self.n_clusters}") + + iters = self._calc_iters(n_dims, n_samples) + + if self.bounds is None: + warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify `bounds` for each dimension.", PrivacyLeakWarning) + self.bounds = (np.min(X, axis=0), np.max(X, axis=0)) + + self.bounds = self._check_bounds(self.bounds, n_dims, min_separation=1e-5) + X = self._clip_to_bounds(X, self.bounds) + + centers = self._init_centers(n_dims, random_state=random_state) + labels = None + distances = None + + # Run _update_centers first to ensure consistency of `labels` and `centers`, since convergence unlikely + for _ in range(-1, iters): + if labels is not None: + centers = self._update_centers(X, centers=centers, labels=labels, dims=n_dims, total_iters=iters, + random_state=random_state) + + distances, labels = self._distances_labels(X, centers) + + self.cluster_centers_ = centers + self.labels_ = labels + self.inertia_ = distances[np.arange(len(labels)), labels].sum() + self.n_iter_ = iters + + self.accountant.spend(self.epsilon, 0) + + return self + + def _init_centers(self, dims, random_state): + if self.bounds_processed is None: + bounds_processed = np.zeros(shape=(dims, 2)) + + for dim in range(dims): + lower = self.bounds[0][dim] + upper = self.bounds[1][dim] + + bounds_processed[dim, :] = [upper - lower, lower] + + self.bounds_processed = bounds_processed + + cluster_proximity = np.min(self.bounds_processed[:, 0]) / 2.0 + + while cluster_proximity > 0: + centers = np.zeros(shape=(self.n_clusters, dims)) + cluster, retry = 0, 0 + + while retry < 100: + if cluster >= self.n_clusters: + break + + temp_center = random_state.random(dims) * (self.bounds_processed[:, 0] - 2 * cluster_proximity) + \ + self.bounds_processed[:, 1] + cluster_proximity + + if cluster == 0: + centers[0, :] = temp_center + cluster += 1 + continue + + min_distance = ((centers[:cluster, :] - temp_center) ** 2).sum(axis=1).min() + + if np.sqrt(min_distance) >= 2 * cluster_proximity: + centers[cluster, :] = temp_center + cluster += 1 + retry = 0 + else: + retry += 1 + + if cluster >= self.n_clusters: + return centers + + cluster_proximity /= 2.0 + + return None + + def _distances_labels(self, X, centers): + distances = np.zeros((X.shape[0], self.n_clusters)) + + for cluster in range(self.n_clusters): + distances[:, cluster] = ((X - centers[cluster, :]) ** 2).sum(axis=1) + + labels = np.argmin(distances, axis=1) + return distances, labels + + def _update_centers(self, X, centers, labels, dims, total_iters, random_state): + """Updates the centers of the KMeans algorithm for the current iteration, while satisfying differential + privacy. + + Differential privacy is satisfied by adding (integer-valued, using :class:`.GeometricFolded`) random noise to + the count of nearest neighbours to the previous cluster centers, and adding (real-valued, using + :class:`.LaplaceBoundedDomain`) random noise to the sum of values per dimension. + + """ + epsilon_0, epsilon_i = self._split_epsilon(dims, total_iters) + geometric_mech = GeometricFolded(epsilon=epsilon_0, sensitivity=1, lower=0.5, upper=float("inf"), + random_state=random_state) + + for cluster in range(self.n_clusters): + if cluster not in labels: + continue + + cluster_count = sum(labels == cluster) + noisy_count = geometric_mech.randomise(cluster_count) + + cluster_sum = np.sum(X[labels == cluster], axis=0) + noisy_sum = np.zeros_like(cluster_sum) + + for i in range(dims): + laplace_mech = LaplaceBoundedDomain(epsilon=epsilon_i, + sensitivity=self.bounds[1][i] - self.bounds[0][i], + lower=noisy_count * self.bounds[0][i], + upper=noisy_count * self.bounds[1][i], random_state=random_state) + noisy_sum[i] = laplace_mech.randomise(cluster_sum[i]) + + centers[cluster, :] = noisy_sum / noisy_count + + return centers + + def _split_epsilon(self, dims, total_iters, rho=0.225): + """Split epsilon between sum perturbation and count perturbation, as proposed by Su et al. + + Parameters + ---------- + dims : int + Number of dimensions to split `epsilon` across. + + total_iters : int + Total number of iterations to split `epsilon` across. + + rho : float, default: 0.225 + Coordinate normalisation factor. + + Returns + ------- + epsilon_0 : float + The epsilon value for satisfying differential privacy on the count of a cluster. + + epsilon_i : float + The epsilon value for satisfying differential privacy on each dimension of the center of a cluster. + + """ + epsilon_i = 1 + epsilon_0 = np.cbrt(4 * dims * rho ** 2) + + normaliser = self.epsilon / total_iters / (epsilon_i * dims + epsilon_0) + + return epsilon_i * normaliser, epsilon_0 * normaliser + + def _calc_iters(self, n_dims, n_samples, rho=0.225): + """Calculate the number of iterations to allow for the KMeans algorithm.""" + + epsilon_m = np.sqrt(500 * (self.n_clusters ** 3) / (n_samples ** 2) * + (n_dims + np.cbrt(4 * n_dims * (rho ** 2))) ** 3) + + iters = max(min(self.epsilon / epsilon_m, 7), 2) + + return int(iters) diff --git a/src/diffprivlib/models/linear_regression.py b/src/diffprivlib/models/linear_regression.py new file mode 100644 index 0000000..72bf9fb --- /dev/null +++ b/src/diffprivlib/models/linear_regression.py @@ -0,0 +1,325 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +# New BSD License +# +# Copyright (c) 2007–2019 The scikit-learn developers. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +# following conditions are met: +# +# a. Redistributions of source code must retain the above copyright notice, this list of conditions and the following +# disclaimer. +# b. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the +# following disclaimer in the documentation and/or other materials provided with the distribution. +# c. Neither the name of the Scikit-learn Developers nor the names of its contributors may be used to endorse or +# promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +""" +Linear Regression with differential privacy +""" +import warnings + +import numpy as np +import sklearn.linear_model as sk_lr +from scipy.optimize import minimize +from sklearn.utils import check_array +from sklearn.utils.validation import FLOAT_DTYPES + +# TODO: remove when sklearn 1.6 a min req +try: + from sklearn.utils.validation import validate_data +except ImportError: + from sklearn.base import BaseEstimator + validate_data = BaseEstimator._validate_data + +from diffprivlib.accountant import BudgetAccountant +from diffprivlib.mechanisms import Laplace, LaplaceFolded +from diffprivlib.tools import mean +from diffprivlib.utils import warn_unused_args, PrivacyLeakWarning, check_random_state +from diffprivlib.validation import check_bounds, clip_to_bounds, DiffprivlibMixin + + +# noinspection PyPep8Naming +def _preprocess_data(X, y, fit_intercept, epsilon=1.0, bounds_X=None, bounds_y=None, copy=True, check_input=True, + random_state=None, **unused_args): + warn_unused_args(unused_args) + + random_state = check_random_state(random_state) + + if check_input: + X = check_array(X, copy=copy, accept_sparse=False, dtype=FLOAT_DTYPES) + elif copy: + X = X.copy(order='K') + + y = np.asarray(y, dtype=X.dtype) + X_scale = np.ones(X.shape[1], dtype=X.dtype) + + if fit_intercept: + bounds_X = check_bounds(bounds_X, X.shape[1]) + bounds_y = check_bounds(bounds_y, y.shape[1] if y.ndim > 1 else 1) + + X = clip_to_bounds(X, bounds_X) + y = clip_to_bounds(y, bounds_y) + + X_offset = mean(X, axis=0, bounds=bounds_X, epsilon=epsilon, random_state=random_state, + accountant=BudgetAccountant()) + X -= X_offset + y_offset = mean(y, axis=0, bounds=bounds_y, epsilon=epsilon, random_state=random_state, + accountant=BudgetAccountant()) + y = y - y_offset + else: + X_offset = np.zeros(X.shape[1], dtype=X.dtype) + if y.ndim == 1: + y_offset = X.dtype.type(0) + else: + y_offset = np.zeros(y.shape[1], dtype=X.dtype) + + return X, y, X_offset, y_offset, X_scale + + +def _construct_regression_obj(X, y, bounds_X, bounds_y, epsilon, alpha, random_state): + if y.ndim == 1: + y = y.reshape(-1, 1) + + n_features = X.shape[1] + n_targets = y.shape[1] + + local_epsilon = epsilon / (1 + n_targets * n_features + n_features * (n_features + 1) / 2) + coefs = ((y ** 2).sum(axis=0), np.einsum('ij,ik->jk', X, y), np.einsum('ij,ik', X, X)) + + del X, y + + def get_max_sensitivity(y_lower, y_upper, x_lower, x_upper): + corners = [y_lower * x_lower, y_lower * x_upper, y_upper * x_lower, y_upper * x_upper] + return np.max(corners) - np.min(corners) + + # Randomise 0th-degree monomial coefficients + mono_coef_0 = np.zeros(n_targets) + + for i in range(n_targets): + sensitivity = np.abs([bounds_y[0][i], bounds_y[1][i]]).max() ** 2 + mech = LaplaceFolded(epsilon=local_epsilon, sensitivity=sensitivity, lower=0, upper=float("inf"), + random_state=random_state) + mono_coef_0[i] = mech.randomise(coefs[0][i]) + + # Randomise 1st-degree monomial coefficients + mono_coef_1 = np.zeros((n_features, n_targets)) + + for i in range(n_targets): + for j in range(n_features): + sensitivity = get_max_sensitivity(bounds_y[0][i], bounds_y[1][i], bounds_X[0][j], bounds_X[1][j]) + mech = Laplace(epsilon=local_epsilon, sensitivity=sensitivity, random_state=random_state) + mono_coef_1[j, i] = mech.randomise(coefs[1][j, i]) + + # Randomise 2nd-degree monomial coefficients + mono_coef_2 = np.zeros((n_features, n_features)) + + for i in range(n_features): + sensitivity = np.max(np.abs([bounds_X[0][i], bounds_X[0][i]])) ** 2 + mech = LaplaceFolded(epsilon=local_epsilon, sensitivity=sensitivity, lower=0, upper=float("inf"), + random_state=random_state) + mono_coef_2[i, i] = mech.randomise(coefs[2][i, i]) + + for j in range(i + 1, n_features): + sensitivity = get_max_sensitivity(bounds_X[0][i], bounds_X[1][i], bounds_X[0][j], bounds_X[1][j]) + mech = Laplace(epsilon=local_epsilon, sensitivity=sensitivity, random_state=random_state) + mono_coef_2[i, j] = mech.randomise(coefs[2][i, j]) + mono_coef_2[j, i] = mono_coef_2[i, j] # Enforce symmetry + + del coefs + noisy_coefs = (mono_coef_0, mono_coef_1, mono_coef_2) + + def obj(idx): + def inner_obj(omega): + func = noisy_coefs[0][idx] + func -= 2 * np.dot(noisy_coefs[1][:, idx], omega) + func += np.multiply(noisy_coefs[2], np.tensordot(omega, omega, axes=0)).sum() + func += alpha * (omega ** 2).sum() + + grad = - 2 * noisy_coefs[1][:, idx] + 2 * np.matmul(noisy_coefs[2], omega) + 2 * omega * alpha + + return func, grad + + return inner_obj + + output = tuple(obj(i) for i in range(n_targets)) + + return output, noisy_coefs + + +# noinspection PyPep8Naming,PyAttributeOutsideInit +class LinearRegression(sk_lr.LinearRegression, DiffprivlibMixin): + r""" + Ordinary least squares Linear Regression with differential privacy. + + LinearRegression fits a linear model with coefficients w = (w1, ..., wp) to minimize the residual sum of squares + between the observed targets in the dataset, and the targets predicted by the linear approximation. Differential + privacy is guaranteed with respect to the training sample. + + Differential privacy is achieved by adding noise to the coefficients of the objective function, taking inspiration + from [ZZX12]_. + + Parameters + ---------- + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds_X : tuple + Bounds of the data, provided as a tuple of the form (min, max). `min` and `max` can either be scalars, covering + the min/max of the entire data, or vectors with one entry per feature. If not provided, the bounds are computed + on the data when ``.fit()`` is first called, resulting in a :class:`.PrivacyLeakWarning`. + + bounds_y : tuple + Same as `bounds_X`, but for the training label set `y`. + + fit_intercept : bool, default: True + Whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations + (i.e. data is expected to be centered). + + copy_X : bool, default: True + If True, X will be copied; else, it may be overwritten. + + random_state : int or RandomState, optional + Controls the randomness of the model. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Attributes + ---------- + coef_ : array of shape (n_features, ) or (n_targets, n_features) + Estimated coefficients for the linear regression problem. If multiple targets are passed during the fit (y 2D), + this is a 2D array of shape (n_targets, n_features), while if only one target is passed, this is a 1D array of + length n_features. + + intercept_ : float or array of shape of (n_targets,) + Independent term in the linear model. Set to 0.0 if `fit_intercept = False`. + + References + ---------- + .. [ZZX12] Zhang, Jun, Zhenjie Zhang, Xiaokui Xiao, Yin Yang, and Marianne Winslett. "Functional mechanism: + regression analysis under differential privacy." arXiv preprint arXiv:1208.0219 (2012). + + """ + + _parameter_constraints = DiffprivlibMixin._copy_parameter_constraints( + sk_lr.LinearRegression, "fit_intercept", "copy_X") + + def __init__(self, *, epsilon=1.0, bounds_X=None, bounds_y=None, fit_intercept=True, copy_X=True, random_state=None, + accountant=None, **unused_args): + super().__init__(fit_intercept=fit_intercept, copy_X=copy_X, n_jobs=None) + + self.epsilon = epsilon + self.bounds_X = bounds_X + self.bounds_y = bounds_y + self.random_state = random_state + self.accountant = BudgetAccountant.load_default(accountant) + + self._warn_unused_args(unused_args) + + def fit(self, X, y, sample_weight=None): + """ + Fit linear model. + + Parameters + ---------- + X : array-like or sparse matrix, shape (n_samples, n_features) + Training data + + y : array_like, shape (n_samples, n_targets) + Target values. Will be cast to X's dtype if necessary + + sample_weight : ignored + Ignored by diffprivlib. Present for consistency with sklearn API. + + Returns + ------- + self : returns an instance of self. + + """ + self._validate_params() + self.accountant.check(self.epsilon, 0) + + if sample_weight is not None: + self._warn_unused_args("sample_weight") + + random_state = check_random_state(self.random_state) + + X, y = validate_data(self, X, y, accept_sparse=False, y_numeric=True, multi_output=True) + + if self.bounds_X is None or self.bounds_y is None: + warnings.warn( + "Bounds parameters haven't been specified, so falling back to determining bounds from the " + "data.\n" + "This will result in additional privacy leakage. To ensure differential privacy with no " + "additional privacy loss, specify `bounds_X` and `bounds_y`.", + PrivacyLeakWarning) + + if self.bounds_X is None: + self.bounds_X = (np.min(X, axis=0), np.max(X, axis=0)) + if self.bounds_y is None: + self.bounds_y = (np.min(y, axis=0), np.max(y, axis=0)) + + # pylint: disable=no-member + self.bounds_X = self._check_bounds(self.bounds_X, X.shape[1]) + self.bounds_y = self._check_bounds(self.bounds_y, y.shape[1] if y.ndim > 1 else 1) + + n_features = X.shape[1] + n_targets = y.shape[1] if y.ndim > 1 else 1 + epsilon_intercept_scale = 1 / (n_features + 1) if self.fit_intercept else 0 + + X, y, X_offset, y_offset, X_scale = self._preprocess_data( + X, y, fit_intercept=self.fit_intercept, bounds_X=self.bounds_X, bounds_y=self.bounds_y, + epsilon=self.epsilon * epsilon_intercept_scale, copy=self.copy_X, random_state=random_state) + + bounds_X = (self.bounds_X[0] - X_offset, self.bounds_X[1] - X_offset) + bounds_y = (self.bounds_y[0] - y_offset, self.bounds_y[1] - y_offset) + + objs, obj_coefs = _construct_regression_obj( + X, y, bounds_X, bounds_y, epsilon=self.epsilon * (1 - epsilon_intercept_scale), alpha=0, + random_state=random_state) + coef = np.zeros((n_features, n_targets)) + + for i, obj in enumerate(objs): + opt_result = minimize(obj, np.zeros(n_features), jac=True) + coef[:, i] = opt_result.x + + self.coef_ = coef.T + self._obj_coefs = obj_coefs + + if y.ndim == 1: + self.coef_ = np.ravel(self.coef_) + self._set_intercept(X_offset, y_offset, X_scale) + + self.accountant.spend(self.epsilon, 0) + + return self + + _preprocess_data = staticmethod(_preprocess_data) diff --git a/src/diffprivlib/models/logistic_regression.py b/src/diffprivlib/models/logistic_regression.py new file mode 100644 index 0000000..1d3055a --- /dev/null +++ b/src/diffprivlib/models/logistic_regression.py @@ -0,0 +1,414 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +# New BSD License +# +# Copyright (c) 2007–2019 The scikit-learn developers. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +# following conditions are met: +# +# a. Redistributions of source code must retain the above copyright notice, this list of conditions and the following +# disclaimer. +# b. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the +# following disclaimer in the documentation and/or other materials provided with the distribution. +# c. Neither the name of the Scikit-learn Developers nor the names of its contributors may be used to endorse or +# promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +""" +Logistic Regression classifier satisfying differential privacy. +""" +import numbers +import warnings + +import numpy as np +from joblib import delayed, Parallel +from scipy import optimize +from sklearn.exceptions import ConvergenceWarning +from sklearn import linear_model +from sklearn.utils import check_array, check_consistent_length +from sklearn.utils.multiclass import check_classification_targets +from sklearn.linear_model._linear_loss import LinearModelLoss +from sklearn._loss import HalfBinomialLoss + +# TODO: remove when sklearn v1.6 a min req +try: + from sklearn.utils.validation import validate_data +except ImportError: + from sklearn.base import BaseEstimator + validate_data = BaseEstimator._validate_data + +from diffprivlib.accountant import BudgetAccountant +from diffprivlib.mechanisms import Vector +from diffprivlib.utils import PrivacyLeakWarning, warn_unused_args, check_random_state +from diffprivlib.validation import DiffprivlibMixin + + +class LogisticRegression(linear_model.LogisticRegression, DiffprivlibMixin): + r"""Logistic Regression (aka logit, MaxEnt) classifier with differential privacy. + + This class implements regularised logistic regression using :ref:`Scipy's L-BFGS-B algorithm + `. :math:`\epsilon`-Differential privacy is achieved relative to the maximum norm + of the data, as determined by `data_norm`, by the :class:`.Vector` mechanism, which adds a Laplace-distributed + random vector to the objective. Adapted from the work presented in [CMS11]_. + + This class is a child of :obj:`sklearn.linear_model.LogisticRegression`, with amendments to allow for the + implementation of differential privacy. Some parameters of `Scikit Learn`'s model have therefore had to be fixed, + including: + + - The only permitted solver is 'lbfgs'. Specifying the ``solver`` option will result in a warning. + - Consequently, the only permitted penalty is 'l2'. Specifying the ``penalty`` option will result in a warning. + - In the multiclass case, only the one-vs-rest (OvR) scheme is permitted. Specifying the ``multi_class`` option + will result in a warning. + + Parameters + ---------- + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + data_norm : float, optional + The max l2 norm of any row of the data. This defines the spread of data that will be protected by + differential privacy. + + If not specified, the max norm is taken from the data when ``.fit()`` is first called, but will result in a + :class:`.PrivacyLeakWarning`, as it reveals information about the data. To preserve differential privacy fully, + `data_norm` should be selected independently of the data, i.e. with domain knowledge. + + tol : float, default: 1e-4 + Tolerance for stopping criteria. + + C : float, default: 1.0 + Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values + specify stronger regularization. + + fit_intercept : bool, default: True + Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function. + + max_iter : int, default: 100 + Maximum number of iterations taken for the solver to converge. For smaller `epsilon` (more noise), `max_iter` + may need to be increased. + + verbose : int, default: 0 + Set to any positive number for verbosity. + + warm_start : bool, default: False + When set to ``True``, reuse the solution of the previous call to fit as initialization, otherwise, just erase + the previous solution. + + n_jobs : int, optional + Number of CPU cores used when parallelising over classes. ``None`` means 1 unless in a context. ``-1`` means + using all processors. + + random_state : int or RandomState, optional + Controls the randomness of the model. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Attributes + ---------- + classes_ : array, shape (n_classes, ) + A list of class labels known to the classifier. + + coef_ : array, shape (1, n_features) or (n_classes, n_features) + Coefficient of the features in the decision function. + + `coef_` is of shape (1, n_features) when the given problem is binary. + + intercept_ : array, shape (1,) or (n_classes,) + Intercept (a.k.a. bias) added to the decision function. + + If `fit_intercept` is set to False, the intercept is set to zero. `intercept_` is of shape (1,) when the + given problem is binary. + + n_iter_ : array, shape (n_classes,) or (1, ) + Actual number of iterations for all classes. If binary, it returns only 1 element. + + Examples + -------- + >>> from sklearn.datasets import load_iris + >>> from diffprivlib.models import LogisticRegression + >>> X, y = load_iris(return_X_y=True) + >>> clf = LogisticRegression(data_norm=12, epsilon=2).fit(X, y) + >>> clf.predict(X[:2, :]) + array([0, 0]) + >>> clf.predict_proba(X[:2, :]) + array([[7.35362932e-01, 2.16667422e-14, 2.64637068e-01], + [9.08384378e-01, 3.47767052e-13, 9.16156215e-02]]) + >>> clf.score(X, y) + 0.5266666666666666 + + See also + -------- + sklearn.linear_model.LogisticRegression : The implementation of logistic regression in scikit-learn, upon which this + implementation is built. + .Vector : The mechanism used by the model to achieve differential privacy. + + References + ---------- + .. [CMS11] Chaudhuri, Kamalika, Claire Monteleoni, and Anand D. Sarwate. "Differentially private empirical risk + minimization." Journal of Machine Learning Research 12, no. Mar (2011): 1069-1109. + + """ + + _parameter_constraints = DiffprivlibMixin._copy_parameter_constraints( + linear_model.LogisticRegression, "tol", "C", "fit_intercept", "max_iter", "verbose", "warm_start", "n_jobs", + "random_state") + + def __init__(self, *, epsilon=1.0, data_norm=None, tol=1e-4, C=1.0, fit_intercept=True, max_iter=100, verbose=0, + warm_start=False, n_jobs=None, random_state=None, accountant=None, **unused_args): + super().__init__(penalty='l2', dual=False, tol=tol, C=C, fit_intercept=fit_intercept, intercept_scaling=1.0, + class_weight=None, random_state=random_state, solver='lbfgs', max_iter=max_iter, + verbose=verbose, warm_start=warm_start, n_jobs=n_jobs) + self.epsilon = epsilon + self.data_norm = data_norm + self.classes_ = None + self.accountant = BudgetAccountant.load_default(accountant) + + self._warn_unused_args(unused_args) + + # noinspection PyAttributeOutsideInit + def fit(self, X, y, sample_weight=None): + """Fit the model according to the given training data. + + Parameters + ---------- + X : {array-like, sparse matrix}, shape (n_samples, n_features) + Training vector, where n_samples is the number of samples and n_features is the number of features. + + y : array-like, shape (n_samples,) + Target vector relative to X. + + sample_weight : ignored + Ignored by diffprivlib. Present for consistency with sklearn API. + + Returns + ------- + self : class + + """ + self._validate_params() + self.accountant.check(self.epsilon, 0) + + if sample_weight is not None: + self._warn_unused_args("sample_weight") + + random_state = check_random_state(self.random_state) + + X, y = validate_data(self, X, y, accept_sparse='csr', dtype=float, order="C", + accept_large_sparse=True) + check_classification_targets(y) + self.classes_ = np.unique(y) + _, n_features = X.shape + + if self.data_norm is None: + warnings.warn("Data norm has not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify `data_norm` at initialisation.", PrivacyLeakWarning) + self.data_norm = np.linalg.norm(X, axis=1).max() + + X = self._clip_to_norm(X, self.data_norm) + + n_classes = len(self.classes_) + classes_ = self.classes_ + if n_classes < 2: + raise ValueError("This solver needs samples of at least 2 classes in the data, but the data contains only " + f"one class: {classes_[0]}") + + if len(self.classes_) == 2: + n_classes = 1 + classes_ = classes_[1:] + + if self.warm_start: + warm_start_coef = getattr(self, 'coef_', None) + else: + warm_start_coef = None + if warm_start_coef is not None and self.fit_intercept: + warm_start_coef = np.append(warm_start_coef, self.intercept_[:, np.newaxis], axis=1) + + self.coef_ = [] + self.intercept_ = np.zeros(n_classes) + + if warm_start_coef is None: + warm_start_coef = [None] * n_classes + + path_func = delayed(_logistic_regression_path) + + fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer='processes')( + path_func(X, y, epsilon=self.epsilon / n_classes, data_norm=self.data_norm, pos_class=class_, Cs=[self.C], + fit_intercept=self.fit_intercept, max_iter=self.max_iter, tol=self.tol, verbose=self.verbose, + coef=warm_start_coef_, random_state=random_state, check_input=False) + for class_, warm_start_coef_ in zip(classes_, warm_start_coef)) + + fold_coefs_, _, n_iter_ = zip(*fold_coefs_) + self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0] + + self.coef_ = np.asarray(fold_coefs_) + self.coef_ = self.coef_.reshape(n_classes, n_features + int(self.fit_intercept)) + + if self.fit_intercept: + self.intercept_ = self.coef_[:, -1] + self.coef_ = self.coef_[:, :-1] + + self.accountant.spend(self.epsilon, 0) + + return self + + +def _logistic_regression_path(X, y, epsilon, data_norm, pos_class=None, Cs=10, fit_intercept=True, max_iter=100, + tol=1e-4, verbose=0, coef=None, random_state=None, check_input=True, **unused_args): + """Compute a Logistic Regression model with differential privacy for a list of regularization parameters. Takes + inspiration from ``_logistic_regression_path`` in scikit-learn, specified to the LBFGS solver and one-vs-rest + multi class fitting. + + Parameters + ---------- + X : array-like or sparse matrix, shape (n_samples, n_features) + Input data. + + y : array-like, shape (n_samples,) or (n_samples, n_targets) + Input data, target values. + + epsilon : float + Privacy parameter for differential privacy. + + data_norm : float + Max norm of the data for which differential privacy is satisfied. + + pos_class : int, optional + The class with respect to which we perform a one-vs-all fit. If None, then it is assumed that the given problem + is binary. + + Cs : int | array-like, shape (n_cs,), default: 10 + List of values for the regularization parameter or integer specifying the number of regularization parameters + that should be used. In this case, the parameters will be chosen in a logarithmic scale between 1e-4 and 1e4. + + fit_intercept : bool, default: True + Whether to fit an intercept for the model. In this case the shape of the returned array is + (n_cs, n_features + 1). + + max_iter : int, default: 100 + Maximum number of iterations for the solver. + + tol : float, default: 1e-4 + Stopping criterion. For the newton-cg and lbfgs solvers, the iteration will stop when ``max{|g_i | i = 1, + ..., n} <= tol`` where ``g_i`` is the i-th component of the gradient. + + verbose : int, default: 0 + For the liblinear and lbfgs solvers set verbose to any positive number for verbosity. + + coef : array-like, shape (n_features,), optional + Initialization value for coefficients of logistic regression. Useless for liblinear solver. + + random_state : int or RandomState, optional + Controls the randomness of the model. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + check_input : bool, default: True + If False, the input arrays X and y will not be checked. + + Returns + ------- + coefs : ndarray, shape (n_cs, n_features) or (n_cs, n_features + 1) + List of coefficients for the Logistic Regression model. If fit_intercept is set to True then the second + dimension will be n_features + 1, where the last item represents the intercept. For + ``multiclass='multinomial'``, the shape is (n_classes, n_cs, n_features) or (n_classes, n_cs, n_features + 1). + + Cs : ndarray + Grid of Cs used for cross-validation. + + n_iter : array, shape (n_cs,) + Actual number of iteration for each Cs. + + """ + warn_unused_args(unused_args) + + random_state = check_random_state(random_state) + + if isinstance(Cs, numbers.Integral): + Cs = np.logspace(-4, 4, int(Cs)) + + # Data norm increases if intercept is included + if fit_intercept: + data_norm = np.sqrt(data_norm ** 2 + 1) + + # Pre-processing. + if check_input: + X = check_array(X, accept_sparse='csr', dtype=np.float64, accept_large_sparse=True) + y = check_array(y, ensure_2d=False, dtype=None) + check_consistent_length(X, y) + n_samples, n_features = X.shape + + classes = np.unique(y) + + if pos_class is None: + if classes.size > 2: + raise ValueError('To fit OvR, use the pos_class argument') + # np.unique(y) gives labels in sorted order. + pos_class = classes[1] + + sample_weight = np.ones(X.shape[0], dtype=X.dtype) + + # For doing an ovr, we need to mask the labels first. + output_vec = np.zeros(n_features + int(fit_intercept), dtype=X.dtype) + mask = (y == pos_class) + y_bin = np.ones(y.shape, dtype=X.dtype) + y_bin[~mask] = 0.0 + + if coef is not None: + # it must work both giving the bias term and not + if coef.size not in (n_features, output_vec.size): + raise ValueError(f"Initialization coef is of shape {coef.size}, expected shape {n_features} or " + f"{output_vec.size}") + output_vec[:coef.size] = coef + + target = y_bin + func = LinearModelLoss(base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept).loss_gradient + + coefs = [] + n_iter = np.zeros(len(Cs), dtype=np.int32) + for i, C in enumerate(Cs): + l2_reg_strength = 1.0 / (C * n_samples) + vector_mech = Vector(epsilon=epsilon, dimension=n_features + int(fit_intercept), alpha=1.0 / C, + function_sensitivity=0.25, data_sensitivity=data_norm, n=n_samples, + random_state=random_state) + noisy_logistic_loss = vector_mech.randomise(func) + + args = (X, target, sample_weight, l2_reg_strength) + iprint = [-1, 50, 1, 100, 101][np.searchsorted(np.array([0, 1, 2, 3]), verbose)] + output_vec, _, info = optimize.fmin_l_bfgs_b(noisy_logistic_loss, output_vec, fprime=None, factr=64, args=args, + iprint=iprint, pgtol=tol, maxiter=max_iter) + if info["warnflag"] == 1: + warnings.warn("lbfgs failed to converge. Increase the number of iterations.", ConvergenceWarning) + + coefs.append(output_vec.copy()) + + n_iter[i] = info['nit'] + + return np.array(coefs), np.array(Cs), n_iter diff --git a/src/diffprivlib/models/naive_bayes.py b/src/diffprivlib/models/naive_bayes.py new file mode 100644 index 0000000..06a39b4 --- /dev/null +++ b/src/diffprivlib/models/naive_bayes.py @@ -0,0 +1,306 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Gaussian Naive Bayes classifier satisfying differential privacy +""" +import warnings + +import numpy as np +import sklearn.naive_bayes as sk_nb +from sklearn.utils.multiclass import _check_partial_fit_first_call + +# TODO: remove when sklearn 1.6 a min req +try: + from sklearn.utils.validation import validate_data +except ImportError: + from sklearn.base import BaseEstimator + validate_data = BaseEstimator._validate_data + +from diffprivlib.accountant import BudgetAccountant +from diffprivlib.mechanisms import LaplaceBoundedDomain, GeometricTruncated, LaplaceTruncated +from diffprivlib.utils import PrivacyLeakWarning, warn_unused_args, check_random_state +from diffprivlib.validation import DiffprivlibMixin + + +class GaussianNB(sk_nb.GaussianNB, DiffprivlibMixin): + r"""Gaussian Naive Bayes (GaussianNB) with differential privacy + + Inherits the :class:`sklearn.naive_bayes.GaussianNB` class from Scikit Learn and adds noise to satisfy differential + privacy to the learned means and variances. Adapted from the work presented in [VSB13]_. + + Parameters + ---------- + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon` for the model. + + bounds : tuple, optional + Bounds of the data, provided as a tuple of the form (min, max). `min` and `max` can either be scalars, covering + the min/max of the entire data, or vectors with one entry per feature. If not provided, the bounds are computed + on the data when ``.fit()`` is first called, resulting in a :class:`.PrivacyLeakWarning`. + + priors : array-like, shape (n_classes,) + Prior probabilities of the classes. If specified the priors are not adjusted according to the data. + + var_smoothing : float, default: 1e-9 + Portion of the largest variance of all features that is added to variances for calculation stability. + + random_state : int or RandomState, optional + Controls the randomness of the model. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Attributes + ---------- + class_prior_ : array, shape (n_classes,) + probability of each class. + + class_count_ : array, shape (n_classes,) + number of training samples observed in each class. + + theta_ : array, shape (n_classes, n_features) + mean of each feature per class + + var_ : array, shape (n_classes, n_features) + variance of each feature per class + + epsilon_ : float + absolute additive value to variances (unrelated to ``epsilon`` parameter for differential privacy) + + References + ---------- + .. [VSB13] Vaidya, Jaideep, Basit Shafiq, Anirban Basu, and Yuan Hong. "Differentially private naive bayes + classification." In 2013 IEEE/WIC/ACM International Joint Conferences on Web Intelligence (WI) and Intelligent + Agent Technologies (IAT), vol. 1, pp. 571-576. IEEE, 2013. + + """ + + def __init__(self, *, epsilon=1.0, bounds=None, priors=None, var_smoothing=1e-9, random_state=None, + accountant=None): + super().__init__(priors=priors, var_smoothing=var_smoothing) + + self.epsilon = epsilon + self.bounds = bounds + self.random_state = random_state + self.accountant = BudgetAccountant.load_default(accountant) + + def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): + self.accountant.check(self.epsilon, 0) + + if sample_weight is not None: + warn_unused_args("sample_weight") + + random_state = check_random_state(self.random_state) + + X, y = validate_data(self, X, y) + + if self.bounds is None: + warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning) + self.bounds = (np.min(X, axis=0), np.max(X, axis=0)) + + self.bounds = self._check_bounds(self.bounds, shape=X.shape[1]) + X = self._clip_to_bounds(X, self.bounds) + + self.epsilon_ = self.var_smoothing + + if _refit: + self.classes_ = None + + if _check_partial_fit_first_call(self, classes): + n_features = X.shape[1] + n_classes = len(self.classes_) + self.theta_ = np.zeros((n_classes, n_features)) + self.var_ = np.zeros((n_classes, n_features)) + + self.class_count_ = np.zeros(n_classes, dtype=np.float64) + + if self.priors is not None: + priors = np.asarray(self.priors) + + if len(priors) != n_classes: + raise ValueError("Number of priors must match number of classes.") + if not np.isclose(priors.sum(), 1.0): + raise ValueError("The sum of the priors should be 1.") + if (priors < 0).any(): + raise ValueError("Priors must be non-negative.") + self.class_prior_ = priors + else: + # Initialize the priors to zeros for each class + self.class_prior_ = np.zeros(len(self.classes_), dtype=np.float64) + else: + if X.shape[1] != self.theta_.shape[1]: + raise ValueError(f"Number of features {X.shape[1]} does not match previous " + f"data {self.theta_.shape[1]}.") + # Put epsilon back in each time + self.var_[:, :] -= self.epsilon_ + + classes = self.classes_ + + unique_y = np.unique(y) + unique_y_in_classes = np.isin(unique_y, classes) + + if not np.all(unique_y_in_classes): + raise ValueError(f"The target label(s) {unique_y[~unique_y_in_classes]} in y do not exist in the initial " + f"classes {classes}") + + noisy_class_counts = self._noisy_class_counts(y, random_state=random_state) + + for _i, y_i in enumerate(unique_y): + i = classes.searchsorted(y_i) + X_i = X[y == y_i, :] + + n_i = noisy_class_counts[_i] + + new_theta, new_var = self._update_mean_variance(self.class_count_[i], self.theta_[i, :], self.var_[i, :], + X_i, random_state=random_state, n_noisy=n_i) + + self.theta_[i, :] = new_theta + self.var_[i, :] = new_var + self.class_count_[i] += n_i + + self.var_[:, :] += self.epsilon_ + + # Update if only no priors is provided + if self.priors is None: + # Empirical prior, with sample_weight taken into account + self.class_prior_ = self.class_count_ / self.class_count_.sum() + + self.accountant.spend(self.epsilon, 0) + + return self + + def _update_mean_variance(self, n_past, mu, var, X, random_state, sample_weight=None, n_noisy=None): + """Compute online update of Gaussian mean and variance. + + Given starting sample count, mean, and variance, a new set of points X return the updated mean and variance. + (NB - each dimension (column) in X is treated as independent -- you get variance, not covariance). + + Can take scalar mean and variance, or vector mean and variance to simultaneously update a number of + independent Gaussians. + + See Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque: + + http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf + + Parameters + ---------- + n_past : int + Number of samples represented in old mean and variance. If sample weights were given, this should contain + the sum of sample weights represented in old mean and variance. + + mu : array-like, shape (number of Gaussians,) + Means for Gaussians in original set. + + var : array-like, shape (number of Gaussians,) + Variances for Gaussians in original set. + + random_state : RandomState + Controls the randomness of the model. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + sample_weight : ignored + Ignored in diffprivlib. + + n_noisy : int, optional + Noisy count of the given class, satisfying differential privacy. + + Returns + ------- + total_mu : array-like, shape (number of Gaussians,) + Updated mean for each Gaussian over the combined set. + + total_var : array-like, shape (number of Gaussians,) + Updated variance for each Gaussian over the combined set. + """ + if n_noisy is None: + warnings.warn("Noisy class count has not been specified and will be read from the data. To use this " + "method correctly, make sure it is run by the parent GaussianNB class.", PrivacyLeakWarning) + n_noisy = X.shape[0] + + if not n_noisy: + return mu, var + + if sample_weight is not None: + warn_unused_args("sample_weight") + + # Split epsilon between each feature, using 1/3 of total budget for each of mean and variance + n_features = X.shape[1] + local_epsilon = self.epsilon / 3 / n_features + + new_mu = np.zeros((n_features,)) + new_var = np.zeros((n_features,)) + + for feature in range(n_features): + temp_x = X[:, feature] + lower, upper = self.bounds[0][feature], self.bounds[1][feature] + local_diameter = upper - lower + + mech_mu = LaplaceTruncated(epsilon=local_epsilon, delta=0, sensitivity=local_diameter, + lower=lower * n_noisy, upper=upper * n_noisy, random_state=random_state) + _mu = mech_mu.randomise(temp_x.sum()) / n_noisy + + local_sq_sens = max(_mu - lower, upper - _mu) ** 2 + mech_var = LaplaceBoundedDomain(epsilon=local_epsilon, delta=0, sensitivity=local_sq_sens, lower=0, + upper=local_sq_sens * n_noisy, random_state=random_state) + _var = mech_var.randomise(((temp_x - _mu) ** 2).sum()) / n_noisy + + new_mu[feature] = _mu + new_var[feature] = _var + + if n_past == 0: + return new_mu, new_var + + n_total = float(n_past + n_noisy) + + # Combine mean of old and new data, taking into consideration + # (weighted) number of observations + total_mu = (n_noisy * new_mu + n_past * mu) / n_total + + # Combine variance of old and new data, taking into consideration + # (weighted) number of observations. This is achieved by combining + # the sum-of-squared-differences (ssd) + old_ssd = n_past * var + new_ssd = n_noisy * new_var + total_ssd = old_ssd + new_ssd + (n_past / float(n_noisy * n_total)) * (n_noisy * mu - n_noisy * new_mu) ** 2 + total_var = total_ssd / n_total + + return total_mu, total_var + + def _noisy_class_counts(self, y, random_state): + unique_y = np.unique(y) + n_total = y.shape[0] + + # Use 1/3 of total epsilon budget for getting noisy class counts + mech = GeometricTruncated(epsilon=self.epsilon / 3, sensitivity=1, lower=1, upper=n_total, + random_state=random_state) + noisy_counts = np.array([mech.randomise((y == y_i).sum()) for y_i in unique_y]) + + argsort = np.argsort(noisy_counts) + i = 0 if noisy_counts.sum() > n_total else len(unique_y) - 1 + + while np.sum(noisy_counts) != n_total: + _i = argsort[i] + sgn = np.sign(n_total - noisy_counts.sum()) + noisy_counts[_i] = np.clip(noisy_counts[_i] + sgn, 1, n_total) + + i = (i - sgn) % len(unique_y) + + return noisy_counts diff --git a/src/diffprivlib/models/pca.py b/src/diffprivlib/models/pca.py new file mode 100644 index 0000000..d656169 --- /dev/null +++ b/src/diffprivlib/models/pca.py @@ -0,0 +1,283 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +# New BSD License +# +# Copyright (c) 2007–2019 The scikit-learn developers. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +# following conditions are met: +# +# a. Redistributions of source code must retain the above copyright notice, this list of conditions and the following +# disclaimer. +# b. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the +# following disclaimer in the documentation and/or other materials provided with the distribution. +# c. Neither the name of the Scikit-learn Developers nor the names of its contributors may be used to endorse or +# promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +""" +Principal Component Analysis with differential privacy +""" +import warnings +from numbers import Integral + +import numpy as np +import sklearn.decomposition._pca as sk_pca +from sklearn.utils.extmath import stable_cumsum, svd_flip + +from diffprivlib.accountant import BudgetAccountant +from diffprivlib.models.utils import covariance_eig +from diffprivlib.tools import mean +from diffprivlib.utils import copy_docstring, PrivacyLeakWarning, check_random_state +from diffprivlib.validation import DiffprivlibMixin + + +# noinspection PyPep8Naming +class PCA(sk_pca.PCA, DiffprivlibMixin): + r"""Principal component analysis (PCA) with differential privacy. + + This class is a child of :obj:`sklearn.decomposition.PCA`, with amendments to allow for the implementation of + differential privacy as given in [IS16b]_. Some parameters of `Scikit Learn`'s model have therefore had to be + fixed, including: + + - The only permitted `svd_solver` is 'full'. Specifying the ``svd_solver`` option will result in a warning; + - The parameters ``tol`` and ``iterated_power`` are not applicable (as a consequence of fixing ``svd_solver = + 'full'``). + + Parameters + ---------- + n_components : int, float, None or str + Number of components to keep. + If n_components is not set all components are kept:: + + n_components == min(n_samples, n_features) + + If ``n_components == 'mle'``, Minka's MLE is used to guess the dimension. + + If ``0 < n_components < 1``, select the number of components such that the amount of variance that needs to be + explained is greater than the percentage specified by n_components. + + Hence, the None case results in:: + + n_components == min(n_samples, n_features) - 1 + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. If ``centered=False``, half of epsilon is used to calculate the + differentially private mean to center the data prior to the calculation of principal components. + + data_norm : float, optional + The max l2 norm of any row of the data. This defines the spread of data that will be protected by + differential privacy. + + If not specified, the max norm is taken from the data when ``.fit()`` is first called, but will result in a + :class:`.PrivacyLeakWarning`, as it reveals information about the data. To preserve differential privacy fully, + `data_norm` should be selected independently of the data, i.e. with domain knowledge. + + centered : bool, default: False + If False, the data will be centered before calculating the principal components. This will be calculated with + differential privacy, consuming privacy budget from epsilon. + + If True, the data is assumed to have been centered previously (e.g. using :class:`.StandardScaler`), and + therefore will not require the consumption of privacy budget to calculate the mean. + + bounds : tuple, optional + Bounds of the data, provided as a tuple of the form (min, max). `min` and `max` can either be scalars, covering + the min/max of the entire data, or vectors with one entry per feature. If not provided, the bounds are computed + on the data when ``.fit()`` is first called, resulting in a :class:`.PrivacyLeakWarning`. + + copy : bool, default: True + If False, data passed to fit are overwritten and running fit(X).transform(X) will not yield the expected + results, use fit_transform(X) instead. + + whiten : bool, default: False + When True (False by default) the `components_` vectors are multiplied by the square root of n_samples and + then divided by the singular values to ensure uncorrelated outputs with unit component-wise variances. + + Whitening will remove some information from the transformed signal (the relative variance scales of the + components) but can sometime improve the predictive accuracy of the downstream estimators by making their + data respect some hard-wired assumptions. + + random_state : int or RandomState, optional + Controls the randomness of the model. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Attributes + ---------- + components_ : array, shape (n_components, n_features) + Principal axes in feature space, representing the directions of maximum variance in the data. The components + are sorted by ``explained_variance_``. + + explained_variance_ : array, shape (n_components,) + The amount of variance explained by each of the selected components. + + Equal to n_components largest eigenvalues of the covariance matrix of X. + + explained_variance_ratio_ : array, shape (n_components,) + Percentage of variance explained by each of the selected components. + + If ``n_components`` is not set then all components are stored and the sum of the ratios is equal to 1.0. + + singular_values_ : array, shape (n_components,) + The singular values corresponding to each of the selected components. The singular values are equal to the + 2-norms of the ``n_components`` variables in the lower-dimensional space. + + mean_ : array, shape (n_features,) + Per-feature empirical mean, estimated from the training set. + + Equal to `X.mean(axis=0)`. + + n_components_ : int + The estimated number of components. When n_components is set to 'mle' or a number between 0 and 1 (with + svd_solver == 'full') this number is estimated from input data. Otherwise it equals the parameter + n_components, or the lesser value of n_features and n_samples if n_components is None. + + n_features_in_ : int + Number of features in the training data. + + n_samples_ : int + Number of samples in the training data. + + noise_variance_ : float + The estimated noise covariance following the Probabilistic PCA model from Tipping and Bishop 1999. See + "Pattern Recognition and Machine Learning" by C. Bishop, 12.2.1 p. 574 or + http://www.miketipping.com/papers/met-mppca.pdf. It is required to compute the estimated data covariance and + score samples. + + Equal to the average of (min(n_features, n_samples) - n_components) smallest eigenvalues of the covariance + matrix of X. + + See Also + -------- + :obj:`sklearn.decomposition.PCA` : Scikit-learn implementation Principal Component Analysis. + + References + ---------- + .. [IS16b] Imtiaz, Hafiz, and Anand D. Sarwate. "Symmetric matrix perturbation for differentially-private principal + component analysis." In 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), + pp. 2339-2343. IEEE, 2016. + """ + + _parameter_constraints = DiffprivlibMixin._copy_parameter_constraints( + sk_pca.PCA, "n_components", "copy", "whiten", "random_state") + + def __init__(self, n_components=None, *, epsilon=1.0, data_norm=None, centered=False, bounds=None, copy=True, + whiten=False, random_state=None, accountant=None, **unused_args): + super().__init__(n_components=n_components, copy=copy, whiten=whiten, svd_solver='full', tol=0.0, + iterated_power='auto', random_state=random_state) + self.centered = centered + self.epsilon = epsilon + self.data_norm = data_norm + self.bounds = bounds + self.accountant = BudgetAccountant.load_default(accountant) + + self._warn_unused_args(unused_args) + + def _fit_full(self, X, n_components, xp=None, is_array_api_compliant=False): + self.accountant.check(self.epsilon, 0) + + random_state = check_random_state(self.random_state) + + n_samples, n_features = X.shape + + if self.centered: + self.mean_ = np.zeros_like(np.mean(X, axis=0)) + else: + if self.bounds is None: + warnings.warn( + "Bounds parameter hasn't been specified, so falling back to determining range from the data.\n" + "This will result in additional privacy leakage. To ensure differential privacy with no " + "additional privacy loss, specify `range` for each valued returned by np.mean().", + PrivacyLeakWarning) + + self.bounds = (np.min(X, axis=0), np.max(X, axis=0)) + + self.bounds = self._check_bounds(self.bounds, n_features) + self.mean_ = mean(X, epsilon=self.epsilon / 2, bounds=self.bounds, axis=0, random_state=random_state, + accountant=BudgetAccountant()) + + X -= self.mean_ + + if self.data_norm is None: + warnings.warn("Data norm has not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify `data_norm` at initialisation.", PrivacyLeakWarning) + self.data_norm = np.linalg.norm(X, axis=1).max() + + X = self._clip_to_norm(X, self.data_norm) + + sigma_vec, u_mtx = covariance_eig(X, epsilon=self.epsilon if self.centered else self.epsilon / 2, + norm=self.data_norm, random_state=random_state, + dims=n_components if isinstance(n_components, Integral) else None) + u_mtx, _ = svd_flip(u_mtx, np.zeros_like(u_mtx).T) + sigma_vec = np.sqrt(sigma_vec) + + components_ = u_mtx.T + + # Get variance explained by singular values + explained_variance_ = np.sort((sigma_vec ** 2) / (n_samples - 1))[::-1] + total_var = explained_variance_.sum() + explained_variance_ratio_ = explained_variance_ / total_var + singular_values_ = sigma_vec.copy() # Store the singular values. + + # Post-process the number of components required + if n_components == 'mle': + n_components = sk_pca._infer_dimension(explained_variance_, n_samples) # pylint: disable=protected-access + elif 0 < n_components < 1.0: + # number of components for which the cumulated explained + # variance percentage is superior to the desired threshold + ratio_cumsum = stable_cumsum(explained_variance_ratio_) + n_components = np.searchsorted(ratio_cumsum, n_components) + 1 + + # Compute noise covariance using Probabilistic PCA model + # The sigma2 maximum likelihood (cf. eq. 12.46) + if n_components < min(n_features, n_samples): + self.noise_variance_ = explained_variance_[n_components:].mean() + else: + self.noise_variance_ = 0. + + self.n_samples_ = n_samples + self.components_ = components_[:n_components] + self.n_components_ = n_components + self.explained_variance_ = explained_variance_[:n_components] + self.explained_variance_ratio_ = explained_variance_ratio_[:n_components] + self.singular_values_ = singular_values_[:n_components] + + self.accountant.spend(self.epsilon, 0) + + return u_mtx, sigma_vec[:n_components], u_mtx.T + + @copy_docstring(sk_pca.PCA.fit_transform) + def fit_transform(self, X, y=None): + del y + + self._fit(X) + + return self.transform(X) diff --git a/src/diffprivlib/models/standard_scaler.py b/src/diffprivlib/models/standard_scaler.py new file mode 100644 index 0000000..0de58f1 --- /dev/null +++ b/src/diffprivlib/models/standard_scaler.py @@ -0,0 +1,273 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +# New BSD License +# +# Copyright (c) 2007–2019 The scikit-learn developers. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +# following conditions are met: +# +# a. Redistributions of source code must retain the above copyright notice, this list of conditions and the following +# disclaimer. +# b. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the +# following disclaimer in the documentation and/or other materials provided with the distribution. +# c. Neither the name of the Scikit-learn Developers nor the names of its contributors may be used to endorse or +# promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +""" +Standard Scaler with differential privacy +""" +import warnings + +import numpy as np +import sklearn.preprocessing as sk_pp +from sklearn.preprocessing._data import _handle_zeros_in_scale + +# TODO: remove when sklearn 1.6 a min req +try: + from sklearn.utils.validation import validate_data +except ImportError: + from sklearn.base import BaseEstimator + validate_data = BaseEstimator._validate_data + +from diffprivlib.accountant import BudgetAccountant +from diffprivlib.utils import PrivacyLeakWarning, check_random_state +from diffprivlib.tools import nanvar, nanmean +from diffprivlib.validation import DiffprivlibMixin + + +def _incremental_mean_and_var(X, epsilon, bounds, last_mean, last_variance, last_sample_count, random_state=None): + # Initialising new accountant, as budget is tracked in main class. Subject to review in line with GH issue #21 + temp_acc = BudgetAccountant() + + # old = stats until now + # new = the current increment + # updated = the aggregated stats + last_sum = last_mean * last_sample_count + + new_mean = nanmean(X, epsilon=epsilon, axis=0, bounds=bounds, random_state=random_state, accountant=temp_acc) + new_sample_count = np.sum(~np.isnan(X), axis=0) + new_sum = new_mean * new_sample_count + updated_sample_count = last_sample_count + new_sample_count + + updated_mean = (last_sum + new_sum) / updated_sample_count + + if last_variance is None: + updated_variance = None + else: + new_unnormalized_variance = nanvar(X, epsilon=epsilon, axis=0, bounds=bounds, random_state=random_state, + accountant=temp_acc) * new_sample_count + last_unnormalized_variance = last_variance * last_sample_count + + with np.errstate(divide='ignore', invalid='ignore'): + last_over_new_count = last_sample_count / new_sample_count + updated_unnormalized_variance = ( + last_unnormalized_variance + new_unnormalized_variance + + last_over_new_count / updated_sample_count * + (last_sum / last_over_new_count - new_sum) ** 2) + + zeros = last_sample_count == 0 + updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros] + updated_variance = updated_unnormalized_variance / updated_sample_count + + return updated_mean, updated_variance, updated_sample_count + + +# noinspection PyPep8Naming,PyAttributeOutsideInit +class StandardScaler(sk_pp.StandardScaler, DiffprivlibMixin): + """Standardize features by removing the mean and scaling to unit variance, calculated with differential privacy + guarantees. Differential privacy is guaranteed on the learned scaler with respect to the training sample; the + transformed output will certainly not satisfy differential privacy. + + The standard score of a sample `x` is calculated as: + + z = (x - u) / s + + where `u` is the (differentially private) mean of the training samples or zero if `with_mean=False`, and `s` is the + (differentially private) standard deviation of the training samples or one if `with_std=False`. + + Centering and scaling happen independently on each feature by computing the relevant statistics on the samples in + the training set. Mean and standard deviation are then stored to be used on later data using the `transform` + method. + + For further information, users are referred to :class:`sklearn.preprocessing.StandardScaler`. + + Parameters + ---------- + epsilon : float, default: 1.0 + The privacy budget to be allocated to learning the mean and variance of the training sample. If + `with_std=True`, the privacy budget is split evenly between mean and variance (the mean must be calculated even + when `with_mean=False`, as it is used in the calculation of the variance. + + bounds : tuple, optional + Bounds of the data, provided as a tuple of the form (min, max). `min` and `max` can either be scalars, covering + the min/max of the entire data, or vectors with one entry per feature. If not provided, the bounds are computed + on the data when ``.fit()`` is first called, resulting in a :class:`.PrivacyLeakWarning`. + + copy : boolean, default: True + If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; + e.g. if the data is not a NumPy array, a copy may still be returned. + + with_mean : boolean, True by default + If True, center the data before scaling. + + with_std : boolean, True by default + If True, scale the data to unit variance (or equivalently, unit standard deviation). + + random_state : int or RandomState, optional + Controls the randomness of the model. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Attributes + ---------- + scale_ : ndarray or None, shape (n_features,) + Per feature relative scaling of the data. This is calculated using `np.sqrt(var_)`. Equal to ``None`` when + ``with_std=False``. + + mean_ : ndarray or None, shape (n_features,) + The mean value for each feature in the training set. Equal to ``None`` when ``with_mean=False``. + + var_ : ndarray or None, shape (n_features,) + The variance for each feature in the training set. Used to compute `scale_`. Equal to ``None`` when + ``with_std=False``. + + n_samples_seen_ : int or array, shape (n_features,) + The number of samples processed by the estimator for each feature. If there are not missing samples, the + ``n_samples_seen`` will be an integer, otherwise it will be an array. + Will be reset on new calls to fit, but increments across ``partial_fit`` calls. + + See also + -------- + :class:`sklearn.preprocessing.StandardScaler` + Vanilla scikit-learn version, without differential privacy. + + :class:`.PCA` + Further removes the linear correlation across features with 'whiten=True'. + + Notes + ----- + NaNs are treated as missing values: disregarded in fit, and maintained in transform. + + """ # noqa + def __init__(self, *, epsilon=1.0, bounds=None, copy=True, with_mean=True, with_std=True, random_state=None, + accountant=None): + super().__init__(copy=copy, with_mean=with_mean, with_std=with_std) + self.epsilon = epsilon + self.bounds = bounds + self.random_state = random_state + self.accountant = BudgetAccountant.load_default(accountant) + + def partial_fit(self, X, y=None, sample_weight=None): + """Online computation of mean and std with differential privacy on X for later scaling. All of X is processed + as a single batch. This is intended for cases when `fit` is not feasible due to very large number of + `n_samples` or because X is read from a continuous stream. + + The algorithm for incremental mean and std is given in Equation 1.5a,b in Chan, Tony F., Gene H. Golub, and + Randall J. LeVeque. "Algorithms for computing the sample variance: Analysis and recommendations." The American + Statistician 37.3 (1983): 242-247: + + Parameters + ---------- + X : {array-like}, shape [n_samples, n_features] + The data used to compute the mean and standard deviation used for later scaling along the features axis. + + y + Ignored + + sample_weight + Ignored by diffprivlib. Present for consistency with sklearn API. + + """ + self._validate_params() + self.accountant.check(self.epsilon, 0) + + if sample_weight is not None: + self._warn_unused_args("sample_weight") + + random_state = check_random_state(self.random_state) + + epsilon_0 = self.epsilon / 2 if self.with_std else self.epsilon + + X = validate_data(self, X, accept_sparse=False, copy=self.copy, estimator=self, dtype=float) + + if self.bounds is None: + warnings.warn("Bounds parameter hasn't been specified, so falling back to determining bounds from the " + "data.\n This will result in additional privacy leakage. To ensure differential privacy " + "with no additional privacy loss, specify `bounds` for each valued returned by np.mean().", + PrivacyLeakWarning) + self.bounds = (np.min(X, axis=0), np.max(X, axis=0)) + + self.bounds = self._check_bounds(self.bounds, X.shape[1]) + X = self._clip_to_bounds(X, self.bounds) + + # Even in the case of `with_mean=False`, we update the mean anyway. This is needed for the incremental + # computation of the var See incr_mean_variance_axis and _incremental_mean_variance_axis + + # if n_samples_seen_ is an integer (i.e. no missing values), we need to transform it to a NumPy array of + # shape (n_features,) required by incr_mean_variance_axis and _incremental_variance_axis + if hasattr(self, 'n_samples_seen_') and isinstance(self.n_samples_seen_, (int, np.integer)): + self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1]).astype(np.int64) + + if not hasattr(self, 'n_samples_seen_'): + self.n_samples_seen_ = np.zeros(X.shape[1], dtype=np.int64) + + # First pass + if not hasattr(self, 'scale_'): + self.mean_ = .0 + if self.with_std: + self.var_ = .0 + else: + self.var_ = None + + if not self.with_mean and not self.with_std: + self.mean_ = None + self.var_ = None + self.n_samples_seen_ += X.shape[0] - np.isnan(X).sum(axis=0) + else: + self.mean_, self.var_, self.n_samples_seen_ = _incremental_mean_and_var( + X, epsilon_0, self.bounds, self.mean_, self.var_, self.n_samples_seen_, random_state + ) + + # for backward-compatibility, reduce n_samples_seen_ to an integer + # if the number of samples is the same for each feature (i.e. no + # missing values) + if np.ptp(self.n_samples_seen_) == 0: + self.n_samples_seen_ = self.n_samples_seen_[0] + + if self.with_std: + self.scale_ = _handle_zeros_in_scale(np.sqrt(self.var_)) + else: + self.scale_ = None + + self.accountant.spend(self.epsilon, 0) + + return self diff --git a/src/diffprivlib/models/utils.py b/src/diffprivlib/models/utils.py new file mode 100644 index 0000000..6595a51 --- /dev/null +++ b/src/diffprivlib/models/utils.py @@ -0,0 +1,124 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2020 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Utilities for use in machine learning models +""" +import warnings +from numbers import Integral + +import numpy as np +from scipy.linalg import null_space + +from diffprivlib.mechanisms import LaplaceBoundedDomain, Bingham +from diffprivlib.utils import PrivacyLeakWarning, check_random_state + + +def covariance_eig(array, epsilon=1.0, norm=None, dims=None, eigvals_only=False, random_state=None): + r""" + Return the eigenvalues and eigenvectors of the covariance matrix of `array`, satisfying differential privacy. + + Paper link: http://papers.nips.cc/paper/9567-differentially-private-covariance-estimation.pdf + + Parameters + ---------- + array : array-like, shape (n_samples, n_features) + Matrix for which the covariance matrix is sought. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + norm : float, optional + The max l2 norm of any row of the input array. This defines the spread of data that will be protected by + differential privacy. + + If not specified, the max norm is taken from the data, but will result in a :class:`.PrivacyLeakWarning`, as it + reveals information about the data. To preserve differential privacy fully, `norm` should be selected + independently of the data, i.e. with domain knowledge. + + dims : int, optional + Number of eigenvectors to return. If `None`, return all eigenvectors. + + eigvals_only : bool, default: False + Only return the eigenvalue estimates. If True, all the privacy budget is spent on estimating the eigenvalues. + + random_state : int or RandomState, optional + Controls the randomness of the model. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + Returns + ------- + w : (n_features) array + The eigenvalues, each repeated according to its multiplicity. + + v : (n_features, dims) array + The normalized (unit "length") eigenvectors, such that the column ``v[:,i]`` is the eigenvector corresponding to + the eigenvalue ``w[i]``. + + """ + + random_state = check_random_state(random_state) + + n_features = array.shape[1] + dims = n_features if dims is None else min(dims, n_features) + if not isinstance(dims, Integral): + raise TypeError(f"Number of requested dimensions must be integer-valued, got {type(dims)}") + if dims < 0: + raise ValueError(f"Number of requested dimensions must be non-negative, got {dims}") + + max_norm = np.linalg.norm(array, axis=1).max() + if norm is None: + warnings.warn("Data norm has not been specified and will be calculated on the data provided. This will result " + "in additional privacy leakage. To ensure differential privacy and no additional privacy " + "leakage, specify `data_norm` at initialisation.", PrivacyLeakWarning) + norm = max_norm + elif max_norm > norm and not np.isclose(max_norm, norm): + raise ValueError(f"Rows of input array must have l2 norm of at most {norm}, got {max_norm}") + + cov = array.T.dot(array) / (norm ** 2) + eigvals = np.sort(np.linalg.eigvalsh(cov))[::-1] + epsilon_0 = epsilon if eigvals_only else epsilon / (dims + (dims != n_features)) + + mech_eigvals = LaplaceBoundedDomain(epsilon=epsilon_0, lower=0, upper=float("inf"), sensitivity=2, + random_state=random_state) + noisy_eigvals = np.array([mech_eigvals.randomise(eigval) for eigval in eigvals]) * (norm ** 2) + + if eigvals_only: + return noisy_eigvals + + # When estimating all eigenvectors, we don't need to spend budget for the dth vector + epsilon_i = epsilon / (dims + (dims != n_features)) + cov_i = cov + proj_i = np.eye(n_features) + + theta = np.zeros((0, n_features)) + mech_cov = Bingham(epsilon=epsilon_i, random_state=random_state) + + for _ in range(dims): + if cov_i.size > 1: + u_i = mech_cov.randomise(cov_i) + else: + u_i = np.ones((1,)) + + theta_i = proj_i.T.dot(u_i) + theta = np.vstack((theta, theta_i)) + + if cov_i.size > 1: + proj_i = null_space(theta).T + cov_i = proj_i.dot(cov).dot(proj_i.T) + + return noisy_eigvals, theta.T diff --git a/src/diffprivlib/tools/__init__.py b/src/diffprivlib/tools/__init__.py new file mode 100644 index 0000000..414ab68 --- /dev/null +++ b/src/diffprivlib/tools/__init__.py @@ -0,0 +1,23 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Tools for data analysis with differential privacy. +""" +from diffprivlib.tools.histograms import histogram, histogramdd, histogram2d +from diffprivlib.tools.quantiles import quantile, median, percentile +from diffprivlib.tools.utils import count_nonzero, mean, std, sum, var, nanmean, nanstd, nansum, nanvar diff --git a/src/diffprivlib/tools/histograms.py b/src/diffprivlib/tools/histograms.py new file mode 100644 index 0000000..faf5d41 --- /dev/null +++ b/src/diffprivlib/tools/histograms.py @@ -0,0 +1,365 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +# Copyright (c) 2005-2019, NumPy Developers. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +# following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this list of conditions and the following +# disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the +# following disclaimer in the documentation and/or other materials provided with the distribution. +# +# * Neither the name of the NumPy Developers nor the names of any contributors may be used to endorse or promote +# products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" +Differentially private histogram-related functions +Builds upon the histogram functionality of Numpy +""" +import warnings +from sys import maxsize + +import numpy as np + +from diffprivlib.accountant import BudgetAccountant +from diffprivlib.mechanisms import GeometricTruncated +from diffprivlib.utils import PrivacyLeakWarning, warn_unused_args, check_random_state + + +# noinspection PyShadowingBuiltins +def histogram(sample, epsilon=1.0, bins=10, range=None, weights=None, density=None, random_state=None, accountant=None, + **unused_args): + r""" + Compute the differentially private histogram of a set of data. + + The histogram is computed using :obj:`numpy.histogram`, and noise added using :class:`.GeometricTruncated` to + satisfy differential privacy. If the `range` parameter is not specified correctly, a :class:`.PrivacyLeakWarning` + is thrown. Users are referred to :obj:`numpy.histogram` for more usage notes. + + Parameters + ---------- + sample : array_like + Input data. The histogram is computed over the flattened array. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon` to be applied. + + bins : int or sequence of scalars or str, default: 10 + If `bins` is an int, it defines the number of equal-width bins in the given range (10, by default). If `bins` + is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing + for non-uniform bin widths. + + If `bins` is a string, it defines the method used to calculate the optimal bin width, as defined by + `histogram_bin_edges`. + + range : (float, float), optional + The lower and upper range of the bins. If not provided, range is simply ``(a.min(), a.max())``. Values outside + the range are ignored. The first element of the range must be less than or equal to the second. `range` affects + the automatic bin computation as well. While bin width is computed to be optimal based on the actual data + within `range`, the bin count will fill the entire range including portions containing no data. + + weights : array_like, optional + An array of weights, of the same shape as `a`. Each value in `a` only contributes its associated weight + towards the bin count (instead of 1). If `density` is True, the weights are normalized, so that the integral + of the density over the range remains 1. + + density : bool, optional + If ``False``, the result will contain the number of samples in each bin. If ``True``, the result is the value + of the probability *density* function at the bin, normalized such that the *integral* over the range is 1. + Note that the sum of the histogram values will not be equal to 1 unless bins of unity width are chosen; it is + not a probability *mass* function. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + hist : array + The values of the histogram. See `density` and `weights` for a + description of the possible semantics. + bin_edges : array of dtype float + Return the bin edges ``(length(hist)+1)``. + + + See Also + -------- + histogramdd, histogram2d + + Notes + ----- + All but the last (righthand-most) bin is half-open. In other words, if `bins` is:: + + [1, 2, 3, 4] + + then the first bin is ``[1, 2)`` (including 1, but excluding 2) and the second ``[2, 3)``. The last bin, however, + is ``[3, 4]``, which *includes* 4. + + """ + warn_unused_args(unused_args) + + random_state = check_random_state(random_state) + + accountant = BudgetAccountant.load_default(accountant) + accountant.check(epsilon, 0) + + if range is None: + warnings.warn("Range parameter has not been specified. Falling back to taking range from the data.\n" + "To ensure differential privacy, and no additional privacy leakage, the range must be " + "specified independently of the data (i.e., using domain knowledge).", PrivacyLeakWarning) + + hist, bin_edges = np.histogram(sample, bins=bins, range=range, weights=weights, density=None) + + dp_mech = GeometricTruncated(epsilon=epsilon, sensitivity=1, lower=0, upper=maxsize, random_state=random_state) + + dp_hist = np.zeros_like(hist) + + for i in np.arange(dp_hist.shape[0]): + dp_hist[i] = dp_mech.randomise(int(hist[i])) + + # dp_hist = dp_hist.astype(float, casting='safe') + + accountant.spend(epsilon, 0) + + if density: + bin_sizes = np.array(np.diff(bin_edges), float) + return dp_hist / bin_sizes / (dp_hist.sum() if dp_hist.sum() else 1), bin_edges + + return dp_hist, bin_edges + + +# noinspection PyShadowingBuiltins +def histogramdd(sample, epsilon=1.0, bins=10, range=None, weights=None, density=None, random_state=None, + accountant=None, **unused_args): + r""" + Compute the differentially private multidimensional histogram of some data. + + The histogram is computed using :obj:`numpy.histogramdd`, and noise added using :class:`.GeometricTruncated` to + satisfy differential privacy. If the `range` parameter is not specified correctly, a :class:`.PrivacyLeakWarning` + is thrown. Users are referred to :obj:`numpy.histogramdd` for more usage notes. + + Parameters + ---------- + sample : (N, D) array, or (D, N) array_like + The data to be histogrammed. + + Note the unusual interpretation of sample when an array_like: + + * When an array, each row is a coordinate in a D-dimensional space - such as + ``histogramgramdd(np.array([p1, p2, p3]))``. + * When an array_like, each element is the list of values for single coordinate - such as + ``histogramgramdd((X, Y, Z))``. + + The first form should be preferred. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon` to be applied. + + bins : sequence or int, default: 10 + The bin specification: + + * A sequence of arrays describing the monotonically increasing bin edges along each dimension. + * The number of bins for each dimension (nx, ny, ... =bins) + * The number of bins for all dimensions (nx=ny=...=bins). + + range : sequence, optional + A sequence of length D, each an optional (lower, upper) tuple giving the outer bin edges to be used if the edges + are not given explicitly in `bins`. + An entry of None in the sequence results in the minimum and maximum values being used for the corresponding + dimension. + The default, None, is equivalent to passing a tuple of D None values. + + density : bool, optional + If False, the default, returns the number of samples in each bin. If True, returns the probability *density* + function at the bin, ``bin_count / sample_count / bin_volume``. + + weights : (N,) array_like, optional + An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. Weights are normalized to 1 if normed is + True. If normed is False, the values of the returned histogram are equal to the sum of the weights belonging to + the samples falling into each bin. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + H : ndarray + The multidimensional histogram of sample x. See normed and weights for the different possible semantics. + edges : list + A list of D arrays describing the bin edges for each dimension. + + See Also + -------- + histogram: 1-D differentially private histogram + histogram2d: 2-D differentially private histogram + + """ + warn_unused_args(unused_args) + + random_state = check_random_state(random_state) + + accountant = BudgetAccountant.load_default(accountant) + accountant.check(epsilon, 0) + + # Range only required if bin edges not specified + if np.array(bins, dtype=object).ndim == 0 or not np.all([np.ndim(_bin) for _bin in bins]): + if range is None or (isinstance(range, list) and None in range): + warnings.warn("Range parameter has not been specified (or has missing elements). Falling back to taking " + "range from the data.\n " + "To ensure differential privacy, and no additional privacy leakage, the range must be " + "specified for each dimension independently of the data (i.e., using domain knowledge).", + PrivacyLeakWarning) + + hist, bin_edges = np.histogramdd(sample, bins=bins, range=range, weights=weights, density=None) + + dp_mech = GeometricTruncated(epsilon=epsilon, sensitivity=1, lower=0, upper=maxsize, random_state=random_state) + + dp_hist = np.zeros_like(hist) + iterator = np.nditer(hist, flags=['multi_index']) + + while not iterator.finished: + dp_hist[iterator.multi_index] = dp_mech.randomise(int(iterator[0])) + iterator.iternext() + + dp_hist = dp_hist.astype(float, casting='safe') + + if density: + # calculate the probability density function + dims = len(dp_hist.shape) + dp_hist_sum = dp_hist.sum() + for i in np.arange(dims): + shape = np.ones(dims, int) + shape[i] = dp_hist.shape[i] + # noinspection PyUnresolvedReferences + dp_hist = dp_hist / np.diff(bin_edges[i]).reshape(shape) + + if dp_hist_sum > 0: + dp_hist /= dp_hist_sum + + accountant.spend(epsilon, 0) + + return dp_hist, bin_edges + + +# noinspection PyShadowingBuiltins +def histogram2d(array_x, array_y, epsilon=1.0, bins=10, range=None, weights=None, density=None, random_state=None, + accountant=None, **unused_args): + r""" + Compute the differentially private bi-dimensional histogram of two data samples. + + Parameters + ---------- + array_x : array_like, shape (N,) + An array containing the x coordinates of the points to be histogrammed. + + array_y : array_like, shape (N,) + An array containing the y coordinates of the points to be histogrammed. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon` to be applied. + + bins : int or array_like or [int, int] or [array, array], default: 10 + The bin specification: + + * If int, the number of bins for the two dimensions (nx=ny=bins). + * If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins). + * If [int, int], the number of bins in each dimension (nx, ny = bins). + * If [array, array], the bin edges in each dimension (x_edges, y_edges = bins). + * A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges. + + range : array_like, shape(2,2), optional + The leftmost and rightmost edges of the bins along each dimension (if not specified explicitly in the `bins` + parameters): ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range will be considered outliers and + not tallied in the histogram. + + density : bool, optional + If False, the default, returns the number of samples in each bin. If True, returns the probability *density* + function at the bin, ``bin_count / sample_count / bin_area``. + + weights : array_like, shape(N,), optional + An array of values ``w_i`` weighing each sample ``(x_i, y_i)``. Weights are normalized to 1 if `normed` is + True. If `normed` is False, the values of the returned histogram are equal to the sum of the weights belonging + to the samples falling into each bin. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + H : ndarray, shape(nx, ny) + The bi-dimensional histogram of samples `x` and `y`. Values in `x` are histogrammed along the first dimension + and values in `y` are histogrammed along the second dimension. + + xedges : ndarray, shape(nx+1,) + The bin edges along the first dimension. + + yedges : ndarray, shape(ny+1,) + The bin edges along the second dimension. + + See Also + -------- + histogram : 1D differentially private histogram + histogramdd : Differentially private Multidimensional histogram + + Notes + ----- + When `normed` is True, then the returned histogram is the sample density, defined such that the sum over bins of the + product ``bin_value * bin_area`` is 1. + + Please note that the histogram does not follow the Cartesian convention where `x` values are on the abscissa and `y` + values on the ordinate axis. Rather, `x` is histogrammed along the first dimension of the array (vertical), and `y` + along the second dimension of the array (horizontal). This ensures compatibility with `histogramdd`. + + """ + warn_unused_args(unused_args) + + try: + num_bins = len(bins) + except TypeError: + num_bins = 1 + + if num_bins not in (1, 2): + xedges = yedges = np.asarray(bins) + bins = [xedges, yedges] + + hist, edges = histogramdd([array_x, array_y], epsilon=epsilon, bins=bins, range=range, weights=weights, + density=density, random_state=random_state, accountant=accountant) + return hist, edges[0], edges[1] diff --git a/src/diffprivlib/tools/quantiles.py b/src/diffprivlib/tools/quantiles.py new file mode 100644 index 0000000..1c95dd9 --- /dev/null +++ b/src/diffprivlib/tools/quantiles.py @@ -0,0 +1,273 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2020 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Quantile functions with differential privacy +""" +import warnings + +import numpy as np + +from diffprivlib.accountant import BudgetAccountant +from diffprivlib.mechanisms import Exponential +from diffprivlib.utils import warn_unused_args, PrivacyLeakWarning, check_random_state +from diffprivlib.validation import clip_to_bounds, check_bounds +from diffprivlib.tools.utils import _wrap_axis + + +def quantile(array, quant, epsilon=1.0, bounds=None, axis=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r""" + Compute the differentially private quantile of the array. + + Returns the specified quantile with differential privacy. The quantile is calculated over the flattened array. + Differential privacy is achieved with the :class:`.Exponential` mechanism, using the method first proposed by + Smith, 2011. + + Paper link: https://dl.acm.org/doi/pdf/10.1145/1993636.1993743 + + Parameters + ---------- + array : array_like + Array containing numbers whose quantile is sought. If `array` is not an array, a conversion is attempted. + + quant : float or array-like + Quantile or array of quantiles. Each quantile must be in the unit interval [0, 1]. If quant is array-like, + quantiles are returned over the flattened array. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. Differential privacy is achieved over the entire output, with epsilon split + evenly between each output value. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : None or int or tuple of ints, optional + Axis or axes along which a sum is performed. The default, axis=None, will sum all of the elements of the input + array. If axis is negative it counts from the last to the first axis. + + If axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple instead of a single + axis or all the axes as before. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be passed through to the `mean` method of sub-classes + of `ndarray`, however any non-default value will be. If the sub-class' method does not implement `keepdims` any + exceptions will be raised. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + m : ndarray + Returns a new array containing the quantile values. + + See Also + -------- + numpy.quantile : Equivalent non-private method. + + percentile, median + + """ + warn_unused_args(unused_args) + + random_state = check_random_state(random_state) + + if bounds is None: + warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning) + bounds = (np.min(array), np.max(array)) + + quant = np.ravel(quant) + + if np.any(quant < 0) or np.any(quant > 1): + raise ValueError("Quantiles must be in the unit interval [0, 1].") + + if len(quant) > 1: + return np.array([quantile(array, q_i, epsilon=epsilon / len(quant), bounds=bounds, axis=axis, keepdims=keepdims, + accountant=accountant, random_state=random_state) for q_i in quant]) + + # Dealing with a single quant from now on + quant = quant.item() + + if axis is not None or keepdims: + return _wrap_axis(quantile, array, quant=quant, epsilon=epsilon, bounds=bounds, axis=axis, keepdims=keepdims, + random_state=random_state, accountant=accountant) + + # Dealing with a scalar output from now on + bounds = check_bounds(bounds, shape=0, min_separation=1e-5) + + accountant = BudgetAccountant.load_default(accountant) + accountant.check(epsilon, 0) + + # Let's ravel array to be single-dimensional + array = clip_to_bounds(np.ravel(array), bounds) + + k = array.size + array = np.append(array, list(bounds)) + array.sort() + + interval_sizes = np.diff(array) + + # Todo: Need to find a way to do this in a differentially private way, see GH 80 + if np.isnan(interval_sizes).any(): + return np.nan + + mech = Exponential(epsilon=epsilon, sensitivity=1, utility=list(-np.abs(np.arange(0, k + 1) - quant * k)), + measure=list(interval_sizes), random_state=random_state) + idx = mech.randomise() + output = random_state.random() * (array[idx+1] - array[idx]) + array[idx] + + accountant.spend(epsilon, 0) + + return output + + +def percentile(array, percent, epsilon=1.0, bounds=None, axis=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r""" + Compute the differentially private percentile of the array. + + This method calls :obj:`.quantile`, where quantile = percentile / 100. + + Parameters + ---------- + array : array_like + Array containing numbers whose percentile is sought. If `array` is not an array, a conversion is attempted. + + percent : float or array-like + Percentile or list of percentiles sought. Each percentile must be in [0, 100]. If percent is array-like, + percentiles are returned over the flattened array. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. Differential privacy is achieved over the entire output, with epsilon split + evenly between each output value. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : None or int or tuple of ints, optional + Axis or axes along which a sum is performed. The default, axis=None, will sum all of the elements of the input + array. If axis is negative it counts from the last to the first axis. + + If axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple instead of a single + axis or all the axes as before. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be passed through to the `mean` method of sub-classes + of `ndarray`, however any non-default value will be. If the sub-class' method does not implement `keepdims` any + exceptions will be raised. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + m : ndarray + Returns a new array containing the percentile values. + + See Also + -------- + numpy.percentile : Equivalent non-private method. + + quantile, median + + """ + warn_unused_args(unused_args) + + quant = np.asarray(percent) / 100 + + if np.any(quant < 0) or np.any(quant > 1): + raise ValueError("Percentiles must be between 0 and 100 inclusive") + + return quantile(array, quant, epsilon=epsilon, bounds=bounds, axis=axis, keepdims=keepdims, + random_state=random_state, accountant=accountant) + + +def median(array, epsilon=1.0, bounds=None, axis=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r""" + Compute the differentially private median of the array. + + Returns the median with differential privacy. The median is calculated over each axis, or the flattened array + if an axis is not provided. This method calls the :obj:`.quantile` method, for the 0.5 quantile. + + Parameters + ---------- + array : array_like + Array containing numbers whose median is sought. If `array` is not an array, a conversion is attempted. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. Differential privacy is achieved over the entire output, with epsilon split + evenly between each output value. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : None or int or tuple of ints, optional + Axis or axes along which a sum is performed. The default, axis=None, will sum all of the elements of the input + array. If axis is negative it counts from the last to the first axis. + + If axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple instead of a single + axis or all the axes as before. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be passed through to the `mean` method of sub-classes + of `ndarray`, however any non-default value will be. If the sub-class' method does not implement `keepdims` any + exceptions will be raised. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + m : ndarray + Returns a new array containing the median values. + + See Also + -------- + numpy.median : Equivalent non-private method. + + quantile, percentile + + """ + warn_unused_args(unused_args) + + return quantile(array, 0.5, epsilon=epsilon, bounds=bounds, axis=axis, keepdims=keepdims, random_state=random_state, + accountant=accountant) diff --git a/src/diffprivlib/tools/utils.py b/src/diffprivlib/tools/utils.py new file mode 100644 index 0000000..29ae3c6 --- /dev/null +++ b/src/diffprivlib/tools/utils.py @@ -0,0 +1,745 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +# Copyright (c) 2005-2019, NumPy Developers. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +# following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this list of conditions and the following +# disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the +# following disclaimer in the documentation and/or other materials provided with the distribution. +# +# * Neither the name of the NumPy Developers nor the names of any contributors may be used to endorse or promote +# products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" +General utilities and tools for performing differentially private operations on data. +""" +import warnings +from numbers import Integral +import numpy as np + +from diffprivlib.accountant import BudgetAccountant +from diffprivlib.mechanisms import LaplaceBoundedDomain, GeometricTruncated, LaplaceTruncated +from diffprivlib.utils import PrivacyLeakWarning, warn_unused_args, check_random_state +from diffprivlib.validation import check_bounds, clip_to_bounds + +_sum_ = sum + + +def _wrap_axis(func, array, *, axis, keepdims, epsilon, bounds, **kwargs): + """Wrapper for functions with axis and keepdims parameters to ensure the function only needs to be evaluated on + scalar outputs. + + """ + dummy = np.zeros_like(array).sum(axis=axis, keepdims=keepdims) + array = np.asarray(array) + ndim = array.ndim + bounds = check_bounds(bounds, np.size(dummy) if np.ndim(dummy) == 1 else 0) + + if isinstance(axis, int): + axis = (axis,) + elif axis is None: + axis = tuple(range(ndim)) + + # Ensure all axes are non-negative + axis = tuple(ndim + ax if ax < 0 else ax for ax in axis) + + if isinstance(dummy, np.ndarray): + iterator = np.nditer(dummy, flags=['multi_index']) + + while not iterator.finished: + idx = list(iterator.multi_index) # Multi index on 'dummy' + _bounds = (bounds[0][idx], bounds[1][idx]) if np.ndim(dummy) == 1 else bounds + + # Construct slicing tuple on 'array' + if len(idx) + len(axis) > ndim: + full_slice = tuple(slice(None) if ax in axis else idx[ax] for ax in range(ndim)) + else: + idx.reverse() + full_slice = tuple(slice(None) if ax in axis else idx.pop() for ax in range(ndim)) + + dummy[iterator.multi_index] = func(array[full_slice], epsilon=epsilon / dummy.size, bounds=_bounds, + **kwargs) + iterator.iternext() + + return dummy + + return func(array, bounds=bounds, epsilon=epsilon, **kwargs) + + +def count_nonzero(array, epsilon=1.0, axis=None, keepdims=False, random_state=None, accountant=None): + r"""Counts the number of non-zero values in the array ``array`` with differential privacy. + + It is typical to use this function on the result of binary operations, such as ``count_nonzero(array >= 0)``. If + you wish to count the number of elements of an array, use ``count_nonzero(np.ones_like(array))``. + + The word "non-zero" is in reference to the Python 2.x built-in method ``__nonzero__()`` (renamed ``__bool__()`` in + Python 3.x) of Python objects that tests an object's "truthfulness". For example, any number is considered truthful + if it is nonzero, whereas any string is considered truthful if it is not the empty string. Thus, this function + (recursively) counts how many elements in ``array`` (and in sub-arrays thereof) have their ``__nonzero__()`` or + ``__bool__()`` method evaluated to ``True``. + + Parameters + ---------- + array : array_like + The array for which to count non-zeros. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + axis : int or tuple, optional + Axis or tuple of axes along which to count non-zeros. Default is None, meaning that non-zeros will be counted + along a flattened version of ``array``. + + keepdims : bool, default: False + If this is set to True, the axes that are counted are left in the result as dimensions with size one. With this + option, the result will broadcast correctly against the input array. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + count : int or array of int + Differentially private number of non-zero values in the array along a given axis. Otherwise, the total number + of non-zero values in the array is returned. + + """ + array = np.asanyarray(array) + + if np.issubdtype(array.dtype, np.character): + array_bool = array != array.dtype.type() + else: + array_bool = array.astype(np.bool_, copy=False) + + return sum(array_bool, axis=axis, dtype=np.intp, bounds=(0, 1), epsilon=epsilon, keepdims=keepdims, + random_state=random_state, accountant=accountant) + + +def mean(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r""" + Compute the differentially private arithmetic mean along the specified axis. + + Returns the average of the array elements with differential privacy. The average is taken over the flattened array + by default, otherwise over the specified axis. Noise is added using :class:`.Laplace` to satisfy differential + privacy, where sensitivity is calculated using `bounds`. Users are advised to consult the documentation of + :obj:`numpy.mean` for further details, as the behaviour of `mean` closely follows its Numpy variant. + + Parameters + ---------- + array : array_like + Array containing numbers whose mean is desired. If `array` is not an array, a conversion is attempted. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : int or tuple of ints, optional + Axis or axes along which the means are computed. The default is to compute the mean of the flattened array. + + If this is a tuple of ints, a mean is performed over multiple axes, instead of a single axis or all the axes as + before. + + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default is `float64`; for floating point inputs, it + is the same as the input dtype. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + m : ndarray, see dtype parameter above + Returns a new array containing the mean values. + + See Also + -------- + std, var, nanmean + + """ + warn_unused_args(unused_args) + + return _mean(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=False) + + +def nanmean(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r""" + Compute the differentially private arithmetic mean along the specified axis, ignoring NaNs. + + Returns the average of the array elements with differential privacy. The average is taken over the flattened array + by default, otherwise over the specified axis. Noise is added using :class:`.Laplace` to satisfy differential + privacy, where sensitivity is calculated using `bounds`. Users are advised to consult the documentation of + :obj:`numpy.mean` for further details, as the behaviour of `mean` closely follows its Numpy variant. + + For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised. + + Parameters + ---------- + array : array_like + Array containing numbers whose mean is desired. If `array` is not an array, a conversion is attempted. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : int or tuple of ints, optional + Axis or axes along which the means are computed. The default is to compute the mean of the flattened array. + + If this is a tuple of ints, a mean is performed over multiple axes, instead of a single axis or all the axes as + before. + + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default is `float64`; for floating point inputs, it + is the same as the input dtype. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + m : ndarray, see dtype parameter above + Returns a new array containing the mean values. + + See Also + -------- + std, var, mean + + """ + warn_unused_args(unused_args) + + return _mean(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=True) + + +def _mean(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, + accountant=None, nan=False): + random_state = check_random_state(random_state) + + if bounds is None: + warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning) + bounds = (np.nanmin(array), np.nanmax(array)) + + if axis is not None or keepdims: + return _wrap_axis(_mean, array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=nan) + + lower, upper = check_bounds(bounds, shape=0, dtype=dtype) + + accountant = BudgetAccountant.load_default(accountant) + accountant.check(epsilon, 0) + + array = clip_to_bounds(np.ravel(array), bounds) + + _func = np.nanmean if nan else np.mean + actual_mean = _func(array, axis=axis, dtype=dtype, keepdims=keepdims) + + mech = LaplaceTruncated(epsilon=epsilon, delta=0, sensitivity=(upper - lower) / array.size, lower=lower, + upper=upper, random_state=random_state) + output = mech.randomise(actual_mean) + + accountant.spend(epsilon, 0) + + return output + + +def var(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r""" + Compute the differentially private variance along the specified axis. + + Returns the variance of the array elements, a measure of the spread of a distribution, with differential privacy. + The variance is computer for the flattened array by default, otherwise over the specified axis. Noise is added + using :class:`.LaplaceBoundedDomain` to satisfy differential privacy, where sensitivity is calculated using + `bounds`. Users are advised to consult the documentation of :obj:`numpy.var` for further details, as the behaviour + of `var` closely follows its Numpy variant. + + Parameters + ---------- + array : array_like + Array containing numbers whose variance is desired. If `array` is not an array, a conversion is attempted. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : int or tuple of ints, optional + Axis or axes along which the variance is computed. The default is to compute the variance of the flattened + array. + + If this is a tuple of ints, a variance is performed over multiple axes, instead of a single axis or all the axes + as before. + + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type the default is `float32`; for arrays of float + types it is the same as the array type. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + variance : ndarray, see dtype parameter above + Returns a new array containing the variance. + + See Also + -------- + std , mean, nanvar + + """ + warn_unused_args(unused_args) + + return _var(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=False) + + +def nanvar(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r""" + Compute the differentially private variance along the specified axis, ignoring NaNs. + + Returns the variance of the array elements, a measure of the spread of a distribution, with differential privacy. + The variance is computer for the flattened array by default, otherwise over the specified axis. Noise is added + using :class:`.LaplaceBoundedDomain` to satisfy differential privacy, where sensitivity is calculated using + `bounds`. Users are advised to consult the documentation of :obj:`numpy.var` for further details, as the behaviour + of `var` closely follows its Numpy variant. + + For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised. + + Parameters + ---------- + array : array_like + Array containing numbers whose variance is desired. If `array` is not an array, a conversion is attempted. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : int or tuple of ints, optional + Axis or axes along which the variance is computed. The default is to compute the variance of the flattened + array. + + If this is a tuple of ints, a variance is performed over multiple axes, instead of a single axis or all the axes + as before. + + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type the default is `float32`; for arrays of float + types it is the same as the array type. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + variance : ndarray, see dtype parameter above + If ``out=None``, returns a new array containing the variance; otherwise, a reference to the output array is + returned. + + See Also + -------- + std , mean, var + + """ + warn_unused_args(unused_args) + + return _var(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=True) + + +def _var(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + nan=False): + random_state = check_random_state(random_state) + + if bounds is None: + warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning) + bounds = (np.nanmin(array), np.nanmax(array)) + + if axis is not None or keepdims: + return _wrap_axis(_var, array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=nan) + + lower, upper = check_bounds(bounds, shape=0, dtype=dtype) + + accountant = BudgetAccountant.load_default(accountant) + accountant.check(epsilon, 0) + + # Let's ravel array to be single-dimensional + array = clip_to_bounds(np.ravel(array), bounds) + + _func = np.nanvar if nan else np.var + actual_var = _func(array, axis=axis, dtype=dtype, keepdims=keepdims) + + dp_mech = LaplaceBoundedDomain(epsilon=epsilon, delta=0, + sensitivity=((upper - lower) / array.size) ** 2 * (array.size - 1), lower=0, + upper=((upper - lower) ** 2) / 4, random_state=random_state) + output = dp_mech.randomise(actual_var) + + accountant.spend(epsilon, 0) + + return output + + +def std(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r""" + Compute the standard deviation along the specified axis. + + Returns the standard deviation of the array elements, a measure of the spread of a distribution, with differential + privacy. The standard deviation is computed for the flattened array by default, otherwise over the specified axis. + Noise is added using :class:`.LaplaceBoundedDomain` to satisfy differential privacy, where sensitivity is + calculated using `bounds`. Users are advised to consult the documentation of :obj:`numpy.std` for further details, + as the behaviour of `std` closely follows its Numpy variant. + + Parameters + ---------- + array : array_like + Calculate the standard deviation of these values. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : int or tuple of ints, optional + Axis or axes along which the standard deviation is computed. The default is to compute the standard deviation + of the flattened array. + + If this is a tuple of ints, a standard deviation is performed over multiple axes, instead of a single axis or + all the axes as before. + + dtype : dtype, optional + Type to use in computing the standard deviation. For arrays of integer type the default is float64, for arrays + of float types it is the same as the array type. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + standard_deviation : ndarray, see dtype parameter above. + Return a new array containing the standard deviation. + + See Also + -------- + var, mean, nanstd + + """ + warn_unused_args(unused_args) + + return _std(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=False) + + +def nanstd(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r""" + Compute the standard deviation along the specified axis, ignoring NaNs. + + Returns the standard deviation of the array elements, a measure of the spread of a distribution, with differential + privacy. The standard deviation is computed for the flattened array by default, otherwise over the specified axis. + Noise is added using :class:`.LaplaceBoundedDomain` to satisfy differential privacy, where sensitivity is + calculated using `bounds`. Users are advised to consult the documentation of :obj:`numpy.std` for further details, + as the behaviour of `std` closely follows its Numpy variant. + + For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised. + + Parameters + ---------- + array : array_like + Calculate the standard deviation of these values. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : int or tuple of ints, optional + Axis or axes along which the standard deviation is computed. The default is to compute the standard deviation + of the flattened array. + + If this is a tuple of ints, a standard deviation is performed over multiple axes, instead of a single axis or + all the axes as before. + + dtype : dtype, optional + Type to use in computing the standard deviation. For arrays of integer type the default is float64, for arrays + of float types it is the same as the array type. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + standard_deviation : ndarray, see dtype parameter above. + Return a new array containing the standard deviation. + + See Also + -------- + var, mean, std + + """ + warn_unused_args(unused_args) + + return _std(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=True) + + +def _std(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + nan=False): + ret = _var(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=nan) + + if isinstance(ret, np.ndarray): + ret = np.sqrt(ret) + elif hasattr(ret, 'dtype'): + ret = ret.dtype.type(np.sqrt(ret)) + else: + ret = np.sqrt(ret) + + return ret + + +def sum(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r"""Sum of array elements over a given axis with differential privacy. + + Parameters + ---------- + array : array_like + Elements to sum. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : None or int or tuple of ints, optional + Axis or axes along which a sum is performed. The default, axis=None, will sum all of the elements of the input + array. If axis is negative it counts from the last to the first axis. + + If axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple instead of a single + axis or all the axes as before. + + dtype : dtype, optional + The type of the returned array and of the accumulator in which the elements are summed. The dtype of `array` is + used by default unless `array` has an integer dtype of less precision than the default platform integer. In + that case, if `array` is signed then the platform integer is used while if `array` is unsigned then an unsigned + integer of the same precision as the platform integer is used. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + sum_along_axis : ndarray + An array with the same shape as `array`, with the specified axis removed. If `array` is a 0-d array, or if + `axis` is None, a scalar is returned. + + See Also + -------- + ndarray.sum : Equivalent non-private method. + + mean, nansum + + """ + warn_unused_args(unused_args) + + return _sum(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=False) + + +def nansum(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + **unused_args): + r"""Sum of array elements over a given axis with differential privacy, ignoring NaNs. + + Parameters + ---------- + array : array_like + Elements to sum. + + epsilon : float, default: 1.0 + Privacy parameter :math:`\epsilon`. + + bounds : tuple, optional + Bounds of the values of the array, of the form (min, max). + + axis : None or int or tuple of ints, optional + Axis or axes along which a sum is performed. The default, axis=None, will sum all of the elements of the input + array. If axis is negative it counts from the last to the first axis. + + If axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple instead of a single + axis or all the axes as before. + + dtype : dtype, optional + The type of the returned array and of the accumulator in which the elements are summed. The dtype of `array` is + used by default unless `array` has an integer dtype of less precision than the default platform integer. In + that case, if `array` is signed then the platform integer is used while if `array` is unsigned then an unsigned + integer of the same precision as the platform integer is used. + + keepdims : bool, default: False + If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With + this option, the result will broadcast correctly against the input array. + + random_state : int or RandomState, optional + Controls the randomness of the algorithm. To obtain a deterministic behaviour during randomisation, + ``random_state`` has to be fixed to an integer. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + Returns + ------- + sum_along_axis : ndarray + An array with the same shape as `array`, with the specified axis removed. If `array` is a 0-d array, or if + `axis` is None, a scalar is returned. If an output array is specified, a reference to `out` is returned. + + See Also + -------- + ndarray.sum : Equivalent non-private method. + + mean, sum + + """ + warn_unused_args(unused_args) + + return _sum(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=True) + + +def _sum(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, random_state=None, accountant=None, + nan=False): + random_state = check_random_state(random_state) + + if bounds is None: + warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will " + "result in additional privacy leakage. To ensure differential privacy and no additional " + "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning) + bounds = (np.nanmin(array), np.nanmax(array)) + + if axis is not None or keepdims: + return _wrap_axis(_sum, array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, + random_state=random_state, accountant=accountant, nan=nan) + + lower, upper = check_bounds(bounds, shape=0, dtype=dtype) + + accountant = BudgetAccountant.load_default(accountant) + accountant.check(epsilon, 0) + + # Let's ravel array to be single-dimensional + array = clip_to_bounds(np.ravel(array), bounds) + + _func = np.nansum if nan else np.sum + actual_sum = _func(array, axis=axis, dtype=dtype, keepdims=keepdims) + + mech = GeometricTruncated if dtype is not None and issubclass(dtype, Integral) else LaplaceTruncated + mech = mech(epsilon=epsilon, sensitivity=upper - lower, lower=lower * array.size, upper=upper * array.size, + random_state=random_state) + output = mech.randomise(actual_sum) + + accountant.spend(epsilon, 0) + + return output diff --git a/src/diffprivlib/utils.py b/src/diffprivlib/utils.py new file mode 100644 index 0000000..467bc89 --- /dev/null +++ b/src/diffprivlib/utils.py @@ -0,0 +1,198 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2019 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Basic functions and other utilities for the differential privacy library +""" +import secrets +import warnings + +import numpy as np +from sklearn.utils import check_random_state as skl_check_random_state + + +def copy_docstring(source): + """Decorator function to copy a docstring from a `source` function to a `target` function. + + The docstring is only copied if a docstring is present in `source`, and if none is present in `target`. Takes + inspiration from similar in `matplotlib`. + + Parameters + ---------- + source : method + Source function from which to copy the docstring. If ``source.__doc__`` is empty, do nothing. + + Returns + ------- + target : method + Target function with new docstring. + + """ + def copy_func(target): + if source.__doc__ and not target.__doc__: + target.__doc__ = source.__doc__ + return target + return copy_func + + +def warn_unused_args(args): + """Warn the user about supplying unused `args` to a diffprivlib model. + + Arguments can be supplied as a string, a list of strings, or a dictionary as supplied to kwargs. + + Parameters + ---------- + args : str or list or dict + Arguments for which warnings should be thrown. + + Returns + ------- + None + + """ + if isinstance(args, str): + args = [args] + + for arg in args: + warnings.warn(f"Parameter '{arg}' is not functional in diffprivlib. Remove this parameter to suppress this " + "warning.", DiffprivlibCompatibilityWarning) + + +def check_random_state(seed, secure=False): + """Turn seed into a np.random.RandomState or secrets.SystemRandom instance. + + If secure=True, and seed is None (or was generated from a previous None seed), then secrets is used. Otherwise a + np.random.RandomState is used. + + Parameters + ---------- + seed : None, int or instance of RandomState + If seed is None and secure is False, return the RandomState singleton used by np.random. + If seed is None and secure is True, return a SystemRandom instance from secrets. + If seed is an int, return a new RandomState instance seeded with seed. + If seed is already a RandomState or SystemRandom instance, return it. + Otherwise raise ValueError. + + secure : bool, default: False + Specifies if a secure random number generator from secrets can be used. + """ + if secure: + if isinstance(seed, secrets.SystemRandom): + return seed + + if seed is None or seed is np.random.mtrand._rand: # pylint: disable=protected-access + return secrets.SystemRandom() + elif isinstance(seed, secrets.SystemRandom): + raise ValueError("secrets.SystemRandom instance cannot be passed when secure is False.") + + return skl_check_random_state(seed) + + +class Budget(tuple): + """Custom tuple subclass for privacy budgets of the form (epsilon, delta). + + The ``Budget`` class allows for correct comparison/ordering of privacy budget, ensuring that both epsilon and delta + satisfy the comparison (tuples are compared lexicographically). Additionally, tuples are represented with added + verbosity, labelling epsilon and delta appropriately. + + Examples + -------- + + >>> from diffprivlib.utils import Budget + >>> Budget(1, 0.5) + (epsilon=1, delta=0.5) + >>> Budget(2, 0) >= Budget(1, 0.5) + False + >>> (2, 0) >= (1, 0.5) # Tuples are compared with lexicographic ordering + True + + """ + def __new__(cls, epsilon, delta): + if epsilon < 0: + raise ValueError("Epsilon must be non-negative") + + if not 0 <= delta <= 1: + raise ValueError("Delta must be in [0, 1]") + + return tuple.__new__(cls, (epsilon, delta)) + + def __gt__(self, other): + if self.__ge__(other) and not self.__eq__(other): + return True + return False + + def __ge__(self, other): + if self[0] >= other[0] and self[1] >= other[1]: + return True + return False + + def __lt__(self, other): + if self.__le__(other) and not self.__eq__(other): + return True + return False + + def __le__(self, other): + if self[0] <= other[0] and self[1] <= other[1]: + return True + return False + + def __repr__(self): + return f"(epsilon={self[0]}, delta={self[1]})" + + +class BudgetError(ValueError): + """Custom exception to capture the privacy budget being exceeded, typically controlled by a + :class:`.BudgetAccountant`. + + For example, this exception may be raised when the user: + + - Attempts to execute a query which would exceed the privacy budget of the accountant. + - Attempts to change the slack of the accountant in such a way that the existing budget spends would exceed the + accountant's budget. + + """ + + +class PrivacyLeakWarning(RuntimeWarning): + """Custom warning to capture privacy leaks resulting from incorrect parameter setting. + + For example, this warning may occur when the user: + + - fails to specify the bounds or range of data to a model where required (e.g., `bounds=None` to + :class:`.GaussianNB`). + - inputs data to a model that falls outside the bounds or range originally specified. + + """ + + +class DiffprivlibCompatibilityWarning(RuntimeWarning): + """Custom warning to capture inherited class arguments that are not compatible with diffprivlib. + + The purpose of the warning is to alert the user of the incompatibility, but to continue execution having fixed the + incompatibility at runtime. + + For example, this warning may occur when the user: + + - passes a parameter value that is not compatible with diffprivlib (e.g., `solver='liblinear'` to + :class:`.LogisticRegression`) + - specifies a non-default value for a parameter that is ignored by diffprivlib (e.g., `intercept_scaling=0.5` + to :class:`.LogisticRegression`. + + """ + + +warnings.simplefilter('always', PrivacyLeakWarning) diff --git a/src/diffprivlib/validation.py b/src/diffprivlib/validation.py new file mode 100644 index 0000000..a12301a --- /dev/null +++ b/src/diffprivlib/validation.py @@ -0,0 +1,221 @@ +# MIT License +# +# Copyright (C) IBM Corporation 2020 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Validation functions for the differential privacy library +""" +from numbers import Real, Integral + +import numpy as np + +from diffprivlib.utils import warn_unused_args + + +def check_epsilon_delta(epsilon, delta, allow_zero=False): + """Checks that epsilon and delta are valid values for differential privacy. Throws an error if checks fail, + otherwise returns nothing. + + As well as the requirements of epsilon and delta separately, both cannot be simultaneously zero, unless + ``allow_zero`` is set to ``True``. + + Parameters + ---------- + epsilon : float + Epsilon parameter for differential privacy. Must be non-negative. + + delta : float + Delta parameter for differential privacy. Must be on the unit interval, [0, 1]. + + allow_zero : bool, default: False + Allow epsilon and delta both be zero. + + """ + if not isinstance(epsilon, Real) or not isinstance(delta, Real): + raise TypeError("Epsilon and delta must be numeric") + + if epsilon < 0: + raise ValueError("Epsilon must be non-negative") + + if not 0 <= delta <= 1: + raise ValueError("Delta must be in [0, 1]") + + if not allow_zero and epsilon + delta == 0: + raise ValueError("Epsilon and Delta cannot both be zero") + + +def check_bounds(bounds, shape=0, min_separation=0.0, dtype=float): + """Input validation for the ``bounds`` parameter. + + Checks that ``bounds`` is composed of a list of tuples of the form (lower, upper), where lower <= upper and both + are numeric. Also checks that ``bounds`` contains the appropriate number of dimensions, and that there is a + ``min_separation`` between the bounds. + + Parameters + ---------- + bounds : tuple + Tuple of bounds of the form (min, max). `min` and `max` can either be scalars or 1-dimensional arrays. + + shape : int, default: 0 + Number of dimensions to be expected in ``bounds``. + + min_separation : float, default: 0.0 + The minimum separation between `lower` and `upper` of each dimension. This separation is enforced if not + already satisfied. + + dtype : data-type, default: float + Data type of the returned bounds. + + Returns + ------- + bounds : tuple + + """ + if not isinstance(bounds, tuple): + raise TypeError(f"Bounds must be specified as a tuple of (min, max), got {type(bounds)}.") + if not isinstance(shape, Integral): + raise TypeError(f"shape parameter must be integer-valued, got {type(shape)}.") + + lower, upper = bounds + + if np.asarray(lower).size == 1 or np.asarray(upper).size == 1: + lower = np.ravel(lower).astype(dtype) + upper = np.ravel(upper).astype(dtype) + else: + lower = np.asarray(lower, dtype=dtype) + upper = np.asarray(upper, dtype=dtype) + + if lower.shape != upper.shape: + raise ValueError("lower and upper bounds must be the same shape array") + if lower.ndim > 1: + raise ValueError("lower and upper bounds must be scalar or a 1-dimensional array") + if lower.size not in (1, shape): + raise ValueError(f"lower and upper bounds must have {shape or 1} element(s), got {lower.size}.") + + n_bounds = lower.shape[0] + + for i in range(n_bounds): + _lower = lower[i] + _upper = upper[i] + + if not isinstance(_lower, Real) or not isinstance(_upper, Real): + raise TypeError(f"Each bound must be numeric, got {_lower} ({type(_lower)}) and {_upper} ({type(_upper)}).") + + if _lower > _upper: + raise ValueError(f"For each bound, lower bound must be smaller than upper bound, got {lower}, {upper})") + + if _upper - _lower < min_separation: + mid = (_upper + _lower) / 2 + lower[i] = mid - min_separation / 2 + upper[i] = mid + min_separation / 2 + + if shape == 0: + return lower.item(), upper.item() + + if n_bounds == 1: + lower = np.ones(shape, dtype=dtype) * lower.item() + upper = np.ones(shape, dtype=dtype) * upper.item() + + return lower, upper + + +def clip_to_norm(array, clip): + """Clips the examples of a 2-dimensional array to a given maximum norm. + + Parameters + ---------- + array : np.ndarray + Array to be clipped. After clipping, all examples have a 2-norm of at most `clip`. + + clip : float + Norm at which to clip each example + + Returns + ------- + array : np.ndarray + The clipped array. + + """ + if not isinstance(array, np.ndarray): + raise TypeError(f"Input array must be a numpy array, got {type(array)}.") + if array.ndim != 2: + raise ValueError(f"input array must be 2-dimensional, got {array.ndim} dimensions.") + if not isinstance(clip, Real): + raise TypeError(f"Clip value must be numeric, got {type(clip)}.") + if clip <= 0: + raise ValueError(f"Clip value must be strictly positive, got {clip}.") + + norms = np.linalg.norm(array, axis=1) / clip + norms[norms < 1] = 1 + + return array / norms[:, np.newaxis] + + +def clip_to_bounds(array, bounds): + """Clips the examples of a 2-dimensional array to given bounds. + + Parameters + ---------- + array : np.ndarray + Array to be clipped. After clipping, all examples have a 2-norm of at most `clip`. + + bounds : tuple + Tuple of bounds of the form (min, max) which the array is to be clipped to. `min` and `max` must be scalar, + unless array is 2-dimensional. + + Returns + ------- + array : np.ndarray + The clipped array. + + """ + if not isinstance(array, np.ndarray): + raise TypeError(f"Input array must be a numpy array, got {type(array)}.") + + lower, upper = check_bounds(bounds, np.size(bounds[0]), min_separation=0) + clipped_array = array.copy() + + if np.allclose(lower, np.min(lower)) and np.allclose(upper, np.max(upper)): + clipped_array = np.clip(clipped_array, np.min(lower), np.max(upper)) + else: + if array.ndim != 2: + raise ValueError(f"For non-scalar bounds, input array must be 2-dimensional. Got {array.ndim} dimensions.") + + for feature in range(array.shape[1]): + clipped_array[:, feature] = np.clip(array[:, feature], lower[feature], upper[feature]) + + return clipped_array + + +class DiffprivlibMixin: # pylint: disable=too-few-public-methods + """Mixin for Diffprivlib models.""" + _check_bounds = staticmethod(check_bounds) + _clip_to_norm = staticmethod(clip_to_norm) + _clip_to_bounds = staticmethod(clip_to_bounds) + _warn_unused_args = staticmethod(warn_unused_args) + + # todo: remove when scikit-learn v1.2 is a min requirement + def _validate_params(self): + pass + + @staticmethod + def _copy_parameter_constraints(cls, *args): + """Copies the parameter constraints for `*args` from `cls` + """ + if not hasattr(cls, "_parameter_constraints"): + return {} + + return {k: cls._parameter_constraints[k] for k in args if k in cls._parameter_constraints} diff --git a/diffprivlib_logger/__init__.py b/src/diffprivlib_logger/__init__.py similarity index 100% rename from diffprivlib_logger/__init__.py rename to src/diffprivlib_logger/__init__.py diff --git a/diffprivlib_logger/constants.py b/src/diffprivlib_logger/constants.py similarity index 100% rename from diffprivlib_logger/constants.py rename to src/diffprivlib_logger/constants.py diff --git a/diffprivlib_logger/deserialise.py b/src/diffprivlib_logger/deserialise.py similarity index 100% rename from diffprivlib_logger/deserialise.py rename to src/diffprivlib_logger/deserialise.py diff --git a/diffprivlib_logger/serialise.py b/src/diffprivlib_logger/serialise.py similarity index 100% rename from diffprivlib_logger/serialise.py rename to src/diffprivlib_logger/serialise.py diff --git a/tests/test_de_serialisation.py b/tests/test_de_serialisation.py index 991c93c..4e79f10 100644 --- a/tests/test_de_serialisation.py +++ b/tests/test_de_serialisation.py @@ -20,7 +20,7 @@ def test_serialize(): ) result_json = serialise_pipeline(example_pipe) - expected_json = """{"module": "diffprivlib", "version": "0.6.0", "pipeline": [{"type": "_dpl_type:StandardScaler", "name": "scaler", "params": {"with_mean": true, "with_std": true, "copy": true, "epsilon": 1.0, "bounds": {"_tuple": true, "_items": [[17, 1, 0, 0, 1], [90, 160, 10000, 4356, 99]]}, "random_state": null, "accountant": "_dpl_instance:BudgetAccountant"}}, {"type": "_dpl_type:PCA", "name": "pca", "params": {"n_components": 2, "copy": true, "whiten": false, "random_state": null, "centered": true, "epsilon": 1.0, "data_norm": 5, "bounds": null, "accountant": "_dpl_instance:BudgetAccountant"}}, {"type": "_dpl_type:LogisticRegression", "name": "lr", "params": {"tol": 0.0001, "C": 1.0, "fit_intercept": true, "random_state": null, "max_iter": 100, "verbose": 0, "warm_start": false, "n_jobs": null, "epsilon": 1.0, "data_norm": 5, "accountant": "_dpl_instance:BudgetAccountant"}}]}""" # noqa + expected_json = """{"module": "diffprivlib", "version": "0.6.0", "pipeline": [{"type": "_dpl_type:StandardScaler", "name": "scaler", "params": {"with_mean": true, "with_std": true, "copy": true, "epsilon": 1.0, "bounds": {"_tuple": true, "_items": [[17, 1, 0, 0, 1], [90, 160, 10000, 4356, 99]]}, "random_state": null, "accountant": "_dpl_instance:BudgetAccountant"}}, {"type": "_dpl_type:PCA", "name": "pca", "params": {"n_components": 2, "copy": true, "whiten": false, "random_state": null, "centered": true, "epsilon": 1.0, "data_norm": 5, "bounds": null, "accountant": "_dpl_instance:BudgetAccountant"}}, {"type": "_dpl_type:LogisticRegression", "name": "lr", "params": {"C": 1.0, "tol": 0.0001, "fit_intercept": true, "random_state": null, "max_iter": 100, "verbose": 0, "warm_start": false, "n_jobs": null, "epsilon": 1.0, "data_norm": 5, "accountant": "_dpl_instance:BudgetAccountant"}}]}""" # noqa expected_json_updated = expected_json.replace( "0.6.0", diffprivlib.__version__ ) diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..5d6c8d5 --- /dev/null +++ b/uv.lock @@ -0,0 +1,584 @@ +version = 1 +revision = 3 +requires-python = ">=3.10, <4" +resolution-markers = [ + "python_full_version >= '3.11'", + "python_full_version < '3.11'", +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "diffprivlib-logger" +version = "0.0.4" +source = { editable = "." } +dependencies = [ + { name = "joblib" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "setuptools" }, +] + +[package.optional-dependencies] +test = [ + { name = "pytest" }, +] + +[package.metadata] +requires-dist = [ + { name = "joblib", specifier = ">=0.16.0" }, + { name = "numpy", specifier = ">=2.0.0" }, + { name = "pytest", marker = "extra == 'test'", specifier = ">=8.3" }, + { name = "scikit-learn", specifier = ">=0.24" }, + { name = "scikit-learn", specifier = ">=1.4.0" }, + { name = "scipy", specifier = ">=1.13.0" }, + { name = "setuptools", specifier = ">=49.0.0" }, +] +provides-extras = ["test"] + +[[package]] +name = "exceptiongroup" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "joblib" +version = "1.5.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, +] + +[[package]] +name = "numpy" +version = "2.2.6" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245, upload-time = "2025-05-17T21:27:58.555Z" }, + { url = "https://files.pythonhosted.org/packages/22/c2/4b9221495b2a132cc9d2eb862e21d42a009f5a60e45fc44b00118c174bff/numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90", size = 14360048, upload-time = "2025-05-17T21:28:21.406Z" }, + { url = "https://files.pythonhosted.org/packages/fd/77/dc2fcfc66943c6410e2bf598062f5959372735ffda175b39906d54f02349/numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163", size = 5340542, upload-time = "2025-05-17T21:28:30.931Z" }, + { url = "https://files.pythonhosted.org/packages/7a/4f/1cb5fdc353a5f5cc7feb692db9b8ec2c3d6405453f982435efc52561df58/numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf", size = 6878301, upload-time = "2025-05-17T21:28:41.613Z" }, + { url = "https://files.pythonhosted.org/packages/eb/17/96a3acd228cec142fcb8723bd3cc39c2a474f7dcf0a5d16731980bcafa95/numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83", size = 14297320, upload-time = "2025-05-17T21:29:02.78Z" }, + { url = "https://files.pythonhosted.org/packages/b4/63/3de6a34ad7ad6646ac7d2f55ebc6ad439dbbf9c4370017c50cf403fb19b5/numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915", size = 16801050, upload-time = "2025-05-17T21:29:27.675Z" }, + { url = "https://files.pythonhosted.org/packages/07/b6/89d837eddef52b3d0cec5c6ba0456c1bf1b9ef6a6672fc2b7873c3ec4e2e/numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680", size = 15807034, upload-time = "2025-05-17T21:29:51.102Z" }, + { url = "https://files.pythonhosted.org/packages/01/c8/dc6ae86e3c61cfec1f178e5c9f7858584049b6093f843bca541f94120920/numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289", size = 18614185, upload-time = "2025-05-17T21:30:18.703Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c5/0064b1b7e7c89137b471ccec1fd2282fceaae0ab3a9550f2568782d80357/numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d", size = 6527149, upload-time = "2025-05-17T21:30:29.788Z" }, + { url = "https://files.pythonhosted.org/packages/a3/dd/4b822569d6b96c39d1215dbae0582fd99954dcbcf0c1a13c61783feaca3f/numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3", size = 12904620, upload-time = "2025-05-17T21:30:48.994Z" }, + { url = "https://files.pythonhosted.org/packages/da/a8/4f83e2aa666a9fbf56d6118faaaf5f1974d456b1823fda0a176eff722839/numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae", size = 21176963, upload-time = "2025-05-17T21:31:19.36Z" }, + { url = "https://files.pythonhosted.org/packages/b3/2b/64e1affc7972decb74c9e29e5649fac940514910960ba25cd9af4488b66c/numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a", size = 14406743, upload-time = "2025-05-17T21:31:41.087Z" }, + { url = "https://files.pythonhosted.org/packages/4a/9f/0121e375000b5e50ffdd8b25bf78d8e1a5aa4cca3f185d41265198c7b834/numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42", size = 5352616, upload-time = "2025-05-17T21:31:50.072Z" }, + { url = "https://files.pythonhosted.org/packages/31/0d/b48c405c91693635fbe2dcd7bc84a33a602add5f63286e024d3b6741411c/numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491", size = 6889579, upload-time = "2025-05-17T21:32:01.712Z" }, + { url = "https://files.pythonhosted.org/packages/52/b8/7f0554d49b565d0171eab6e99001846882000883998e7b7d9f0d98b1f934/numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a", size = 14312005, upload-time = "2025-05-17T21:32:23.332Z" }, + { url = "https://files.pythonhosted.org/packages/b3/dd/2238b898e51bd6d389b7389ffb20d7f4c10066d80351187ec8e303a5a475/numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf", size = 16821570, upload-time = "2025-05-17T21:32:47.991Z" }, + { url = "https://files.pythonhosted.org/packages/83/6c/44d0325722cf644f191042bf47eedad61c1e6df2432ed65cbe28509d404e/numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1", size = 15818548, upload-time = "2025-05-17T21:33:11.728Z" }, + { url = "https://files.pythonhosted.org/packages/ae/9d/81e8216030ce66be25279098789b665d49ff19eef08bfa8cb96d4957f422/numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab", size = 18620521, upload-time = "2025-05-17T21:33:39.139Z" }, + { url = "https://files.pythonhosted.org/packages/6a/fd/e19617b9530b031db51b0926eed5345ce8ddc669bb3bc0044b23e275ebe8/numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47", size = 6525866, upload-time = "2025-05-17T21:33:50.273Z" }, + { url = "https://files.pythonhosted.org/packages/31/0a/f354fb7176b81747d870f7991dc763e157a934c717b67b58456bc63da3df/numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303", size = 12907455, upload-time = "2025-05-17T21:34:09.135Z" }, + { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" }, + { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" }, + { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" }, + { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" }, + { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" }, + { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" }, + { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" }, + { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" }, + { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" }, + { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" }, + { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" }, + { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" }, + { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" }, + { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" }, + { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" }, + { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" }, + { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" }, + { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" }, + { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" }, + { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" }, + { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" }, + { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" }, + { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" }, + { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" }, + { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" }, + { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/9e/3b/d94a75f4dbf1ef5d321523ecac21ef23a3cd2ac8b78ae2aac40873590229/numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d", size = 21040391, upload-time = "2025-05-17T21:44:35.948Z" }, + { url = "https://files.pythonhosted.org/packages/17/f4/09b2fa1b58f0fb4f7c7963a1649c64c4d315752240377ed74d9cd878f7b5/numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db", size = 6786754, upload-time = "2025-05-17T21:44:47.446Z" }, + { url = "https://files.pythonhosted.org/packages/af/30/feba75f143bdc868a1cc3f44ccfa6c4b9ec522b36458e738cd00f67b573f/numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543", size = 16643476, upload-time = "2025-05-17T21:45:11.871Z" }, + { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload-time = "2025-05-17T21:45:31.426Z" }, +] + +[[package]] +name = "numpy" +version = "2.4.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.11'", +] +sdist = { url = "https://files.pythonhosted.org/packages/10/8b/c265f4823726ab832de836cdd184d0986dcf94480f81e8739692a7ac7af2/numpy-2.4.3.tar.gz", hash = "sha256:483a201202b73495f00dbc83796c6ae63137a9bdade074f7648b3e32613412dd", size = 20727743, upload-time = "2026-03-09T07:58:53.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/51/5093a2df15c4dc19da3f79d1021e891f5dcf1d9d1db6ba38891d5590f3fe/numpy-2.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:33b3bf58ee84b172c067f56aeadc7ee9ab6de69c5e800ab5b10295d54c581adb", size = 16957183, upload-time = "2026-03-09T07:55:57.774Z" }, + { url = "https://files.pythonhosted.org/packages/b5/7c/c061f3de0630941073d2598dc271ac2f6cbcf5c83c74a5870fea07488333/numpy-2.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ba7b51e71c05aa1f9bc3641463cd82308eab40ce0d5c7e1fd4038cbf9938147", size = 14968734, upload-time = "2026-03-09T07:56:00.494Z" }, + { url = "https://files.pythonhosted.org/packages/ef/27/d26c85cbcd86b26e4f125b0668e7a7c0542d19dd7d23ee12e87b550e95b5/numpy-2.4.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a1988292870c7cb9d0ebb4cc96b4d447513a9644801de54606dc7aabf2b7d920", size = 5475288, upload-time = "2026-03-09T07:56:02.857Z" }, + { url = "https://files.pythonhosted.org/packages/2b/09/3c4abbc1dcd8010bf1a611d174c7aa689fc505585ec806111b4406f6f1b1/numpy-2.4.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:23b46bb6d8ecb68b58c09944483c135ae5f0e9b8d8858ece5e4ead783771d2a9", size = 6805253, upload-time = "2026-03-09T07:56:04.53Z" }, + { url = "https://files.pythonhosted.org/packages/21/bc/e7aa3f6817e40c3f517d407742337cbb8e6fc4b83ce0b55ab780c829243b/numpy-2.4.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a016db5c5dba78fa8fe9f5d80d6708f9c42ab087a739803c0ac83a43d686a470", size = 15969479, upload-time = "2026-03-09T07:56:06.638Z" }, + { url = "https://files.pythonhosted.org/packages/78/51/9f5d7a41f0b51649ddf2f2320595e15e122a40610b233d51928dd6c92353/numpy-2.4.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:715de7f82e192e8cae5a507a347d97ad17598f8e026152ca97233e3666daaa71", size = 16901035, upload-time = "2026-03-09T07:56:09.405Z" }, + { url = "https://files.pythonhosted.org/packages/64/6e/b221dd847d7181bc5ee4857bfb026182ef69499f9305eb1371cbb1aea626/numpy-2.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2ddb7919366ee468342b91dea2352824c25b55814a987847b6c52003a7c97f15", size = 17325657, upload-time = "2026-03-09T07:56:12.067Z" }, + { url = "https://files.pythonhosted.org/packages/eb/b8/8f3fd2da596e1063964b758b5e3c970aed1949a05200d7e3d46a9d46d643/numpy-2.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a315e5234d88067f2d97e1f2ef670a7569df445d55400f1e33d117418d008d52", size = 18635512, upload-time = "2026-03-09T07:56:14.629Z" }, + { url = "https://files.pythonhosted.org/packages/5c/24/2993b775c37e39d2f8ab4125b44337ab0b2ba106c100980b7c274a22bee7/numpy-2.4.3-cp311-cp311-win32.whl", hash = "sha256:2b3f8d2c4589b1a2028d2a770b0fc4d1f332fb5e01521f4de3199a896d158ddd", size = 6238100, upload-time = "2026-03-09T07:56:17.243Z" }, + { url = "https://files.pythonhosted.org/packages/76/1d/edccf27adedb754db7c4511d5eac8b83f004ae948fe2d3509e8b78097d4c/numpy-2.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:77e76d932c49a75617c6d13464e41203cd410956614d0a0e999b25e9e8d27eec", size = 12609816, upload-time = "2026-03-09T07:56:19.089Z" }, + { url = "https://files.pythonhosted.org/packages/92/82/190b99153480076c8dce85f4cfe7d53ea84444145ffa54cb58dcd460d66b/numpy-2.4.3-cp311-cp311-win_arm64.whl", hash = "sha256:eb610595dd91560905c132c709412b512135a60f1851ccbd2c959e136431ff67", size = 10485757, upload-time = "2026-03-09T07:56:21.753Z" }, + { url = "https://files.pythonhosted.org/packages/a9/ed/6388632536f9788cea23a3a1b629f25b43eaacd7d7377e5d6bc7b9deb69b/numpy-2.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:61b0cbabbb6126c8df63b9a3a0c4b1f44ebca5e12ff6997b80fcf267fb3150ef", size = 16669628, upload-time = "2026-03-09T07:56:24.252Z" }, + { url = "https://files.pythonhosted.org/packages/74/1b/ee2abfc68e1ce728b2958b6ba831d65c62e1b13ce3017c13943f8f9b5b2e/numpy-2.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7395e69ff32526710748f92cd8c9849b361830968ea3e24a676f272653e8983e", size = 14696872, upload-time = "2026-03-09T07:56:26.991Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d1/780400e915ff5638166f11ca9dc2c5815189f3d7cf6f8759a1685e586413/numpy-2.4.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:abdce0f71dcb4a00e4e77f3faf05e4616ceccfe72ccaa07f47ee79cda3b7b0f4", size = 5203489, upload-time = "2026-03-09T07:56:29.414Z" }, + { url = "https://files.pythonhosted.org/packages/0b/bb/baffa907e9da4cc34a6e556d6d90e032f6d7a75ea47968ea92b4858826c4/numpy-2.4.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:48da3a4ee1336454b07497ff7ec83903efa5505792c4e6d9bf83d99dc07a1e18", size = 6550814, upload-time = "2026-03-09T07:56:32.225Z" }, + { url = "https://files.pythonhosted.org/packages/7b/12/8c9f0c6c95f76aeb20fc4a699c33e9f827fa0d0f857747c73bb7b17af945/numpy-2.4.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32e3bef222ad6b052280311d1d60db8e259e4947052c3ae7dd6817451fc8a4c5", size = 15666601, upload-time = "2026-03-09T07:56:34.461Z" }, + { url = "https://files.pythonhosted.org/packages/bd/79/cc665495e4d57d0aa6fbcc0aa57aa82671dfc78fbf95fe733ed86d98f52a/numpy-2.4.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e7dd01a46700b1967487141a66ac1a3cf0dd8ebf1f08db37d46389401512ca97", size = 16621358, upload-time = "2026-03-09T07:56:36.852Z" }, + { url = "https://files.pythonhosted.org/packages/a8/40/b4ecb7224af1065c3539f5ecfff879d090de09608ad1008f02c05c770cb3/numpy-2.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:76f0f283506c28b12bba319c0fab98217e9f9b54e6160e9c79e9f7348ba32e9c", size = 17016135, upload-time = "2026-03-09T07:56:39.337Z" }, + { url = "https://files.pythonhosted.org/packages/f7/b1/6a88e888052eed951afed7a142dcdf3b149a030ca59b4c71eef085858e43/numpy-2.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:737f630a337364665aba3b5a77e56a68cc42d350edd010c345d65a3efa3addcc", size = 18345816, upload-time = "2026-03-09T07:56:42.31Z" }, + { url = "https://files.pythonhosted.org/packages/f3/8f/103a60c5f8c3d7fc678c19cd7b2476110da689ccb80bc18050efbaeae183/numpy-2.4.3-cp312-cp312-win32.whl", hash = "sha256:26952e18d82a1dbbc2f008d402021baa8d6fc8e84347a2072a25e08b46d698b9", size = 5960132, upload-time = "2026-03-09T07:56:44.851Z" }, + { url = "https://files.pythonhosted.org/packages/d7/7c/f5ee1bf6ed888494978046a809df2882aad35d414b622893322df7286879/numpy-2.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:65f3c2455188f09678355f5cae1f959a06b778bc66d535da07bf2ef20cd319d5", size = 12316144, upload-time = "2026-03-09T07:56:47.057Z" }, + { url = "https://files.pythonhosted.org/packages/71/46/8d1cb3f7a00f2fb6394140e7e6623696e54c6318a9d9691bb4904672cf42/numpy-2.4.3-cp312-cp312-win_arm64.whl", hash = "sha256:2abad5c7fef172b3377502bde47892439bae394a71bc329f31df0fd829b41a9e", size = 10220364, upload-time = "2026-03-09T07:56:49.849Z" }, + { url = "https://files.pythonhosted.org/packages/b6/d0/1fe47a98ce0df229238b77611340aff92d52691bcbc10583303181abf7fc/numpy-2.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b346845443716c8e542d54112966383b448f4a3ba5c66409771b8c0889485dd3", size = 16665297, upload-time = "2026-03-09T07:56:52.296Z" }, + { url = "https://files.pythonhosted.org/packages/27/d9/4e7c3f0e68dfa91f21c6fb6cf839bc829ec920688b1ce7ec722b1a6202fb/numpy-2.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2629289168f4897a3c4e23dc98d6f1731f0fc0fe52fb9db19f974041e4cc12b9", size = 14691853, upload-time = "2026-03-09T07:56:54.992Z" }, + { url = "https://files.pythonhosted.org/packages/3a/66/bd096b13a87549683812b53ab211e6d413497f84e794fb3c39191948da97/numpy-2.4.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:bb2e3cf95854233799013779216c57e153c1ee67a0bf92138acca0e429aefaee", size = 5198435, upload-time = "2026-03-09T07:56:57.184Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2f/687722910b5a5601de2135c891108f51dfc873d8e43c8ed9f4ebb440b4a2/numpy-2.4.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:7f3408ff897f8ab07a07fbe2823d7aee6ff644c097cc1f90382511fe982f647f", size = 6546347, upload-time = "2026-03-09T07:56:59.531Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ec/7971c4e98d86c564750393fab8d7d83d0a9432a9d78bb8a163a6dc59967a/numpy-2.4.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:decb0eb8a53c3b009b0962378065589685d66b23467ef5dac16cbe818afde27f", size = 15664626, upload-time = "2026-03-09T07:57:01.385Z" }, + { url = "https://files.pythonhosted.org/packages/7e/eb/7daecbea84ec935b7fc732e18f532073064a3816f0932a40a17f3349185f/numpy-2.4.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5f51900414fc9204a0e0da158ba2ac52b75656e7dce7e77fb9f84bfa343b4cc", size = 16608916, upload-time = "2026-03-09T07:57:04.008Z" }, + { url = "https://files.pythonhosted.org/packages/df/58/2a2b4a817ffd7472dca4421d9f0776898b364154e30c95f42195041dc03b/numpy-2.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6bd06731541f89cdc01b261ba2c9e037f1543df7472517836b78dfb15bd6e476", size = 17015824, upload-time = "2026-03-09T07:57:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/4a/ca/627a828d44e78a418c55f82dd4caea8ea4a8ef24e5144d9e71016e52fb40/numpy-2.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22654fe6be0e5206f553a9250762c653d3698e46686eee53b399ab90da59bd92", size = 18334581, upload-time = "2026-03-09T07:57:09.114Z" }, + { url = "https://files.pythonhosted.org/packages/cd/c0/76f93962fc79955fcba30a429b62304332345f22d4daec1cb33653425643/numpy-2.4.3-cp313-cp313-win32.whl", hash = "sha256:d71e379452a2f670ccb689ec801b1218cd3983e253105d6e83780967e899d687", size = 5958618, upload-time = "2026-03-09T07:57:11.432Z" }, + { url = "https://files.pythonhosted.org/packages/b1/3c/88af0040119209b9b5cb59485fa48b76f372c73068dbf9254784b975ac53/numpy-2.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:0a60e17a14d640f49146cb38e3f105f571318db7826d9b6fef7e4dce758faecd", size = 12312824, upload-time = "2026-03-09T07:57:13.586Z" }, + { url = "https://files.pythonhosted.org/packages/58/ce/3d07743aced3d173f877c3ef6a454c2174ba42b584ab0b7e6d99374f51ed/numpy-2.4.3-cp313-cp313-win_arm64.whl", hash = "sha256:c9619741e9da2059cd9c3f206110b97583c7152c1dc9f8aafd4beb450ac1c89d", size = 10221218, upload-time = "2026-03-09T07:57:16.183Z" }, + { url = "https://files.pythonhosted.org/packages/62/09/d96b02a91d09e9d97862f4fc8bfebf5400f567d8eb1fe4b0cc4795679c15/numpy-2.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7aa4e54f6469300ebca1d9eb80acd5253cdfa36f2c03d79a35883687da430875", size = 14819570, upload-time = "2026-03-09T07:57:18.564Z" }, + { url = "https://files.pythonhosted.org/packages/b5/ca/0b1aba3905fdfa3373d523b2b15b19029f4f3031c87f4066bd9d20ef6c6b/numpy-2.4.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d1b90d840b25874cf5cd20c219af10bac3667db3876d9a495609273ebe679070", size = 5326113, upload-time = "2026-03-09T07:57:21.052Z" }, + { url = "https://files.pythonhosted.org/packages/c0/63/406e0fd32fcaeb94180fd6a4c41e55736d676c54346b7efbce548b94a914/numpy-2.4.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a749547700de0a20a6718293396ec237bb38218049cfce788e08fcb716e8cf73", size = 6646370, upload-time = "2026-03-09T07:57:22.804Z" }, + { url = "https://files.pythonhosted.org/packages/b6/d0/10f7dc157d4b37af92720a196be6f54f889e90dcd30dce9dc657ed92c257/numpy-2.4.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f3c4a151a2e529adf49c1d54f0f57ff8f9b233ee4d44af623a81553ab86368", size = 15723499, upload-time = "2026-03-09T07:57:24.693Z" }, + { url = "https://files.pythonhosted.org/packages/66/f1/d1c2bf1161396629701bc284d958dc1efa3a5a542aab83cf11ee6eb4cba5/numpy-2.4.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22c31dc07025123aedf7f2db9e91783df13f1776dc52c6b22c620870dc0fab22", size = 16657164, upload-time = "2026-03-09T07:57:27.676Z" }, + { url = "https://files.pythonhosted.org/packages/1a/be/cca19230b740af199ac47331a21c71e7a3d0ba59661350483c1600d28c37/numpy-2.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:148d59127ac95979d6f07e4d460f934ebdd6eed641db9c0db6c73026f2b2101a", size = 17081544, upload-time = "2026-03-09T07:57:30.664Z" }, + { url = "https://files.pythonhosted.org/packages/b9/c5/9602b0cbb703a0936fb40f8a95407e8171935b15846de2f0776e08af04c7/numpy-2.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a97cbf7e905c435865c2d939af3d93f99d18eaaa3cabe4256f4304fb51604349", size = 18380290, upload-time = "2026-03-09T07:57:33.763Z" }, + { url = "https://files.pythonhosted.org/packages/ed/81/9f24708953cd30be9ee36ec4778f4b112b45165812f2ada4cc5ea1c1f254/numpy-2.4.3-cp313-cp313t-win32.whl", hash = "sha256:be3b8487d725a77acccc9924f65fd8bce9af7fac8c9820df1049424a2115af6c", size = 6082814, upload-time = "2026-03-09T07:57:36.491Z" }, + { url = "https://files.pythonhosted.org/packages/e2/9e/52f6eaa13e1a799f0ab79066c17f7016a4a8ae0c1aefa58c82b4dab690b4/numpy-2.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1ec84fd7c8e652b0f4aaaf2e6e9cc8eaa9b1b80a537e06b2e3a2fb176eedcb26", size = 12452673, upload-time = "2026-03-09T07:57:38.281Z" }, + { url = "https://files.pythonhosted.org/packages/c4/04/b8cece6ead0b30c9fbd99bb835ad7ea0112ac5f39f069788c5558e3b1ab2/numpy-2.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:120df8c0a81ebbf5b9020c91439fccd85f5e018a927a39f624845be194a2be02", size = 10290907, upload-time = "2026-03-09T07:57:40.747Z" }, + { url = "https://files.pythonhosted.org/packages/70/ae/3936f79adebf8caf81bd7a599b90a561334a658be4dcc7b6329ebf4ee8de/numpy-2.4.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:5884ce5c7acfae1e4e1b6fde43797d10aa506074d25b531b4f54bde33c0c31d4", size = 16664563, upload-time = "2026-03-09T07:57:43.817Z" }, + { url = "https://files.pythonhosted.org/packages/9b/62/760f2b55866b496bb1fa7da2a6db076bef908110e568b02fcfc1422e2a3a/numpy-2.4.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:297837823f5bc572c5f9379b0c9f3a3365f08492cbdc33bcc3af174372ebb168", size = 14702161, upload-time = "2026-03-09T07:57:46.169Z" }, + { url = "https://files.pythonhosted.org/packages/32/af/a7a39464e2c0a21526fb4fb76e346fb172ebc92f6d1c7a07c2c139cc17b1/numpy-2.4.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:a111698b4a3f8dcbe54c64a7708f049355abd603e619013c346553c1fd4ca90b", size = 5208738, upload-time = "2026-03-09T07:57:48.506Z" }, + { url = "https://files.pythonhosted.org/packages/29/8c/2a0cf86a59558fa078d83805589c2de490f29ed4fb336c14313a161d358a/numpy-2.4.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:4bd4741a6a676770e0e97fe9ab2e51de01183df3dcbcec591d26d331a40de950", size = 6543618, upload-time = "2026-03-09T07:57:50.591Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b8/612ce010c0728b1c363fa4ea3aa4c22fe1c5da1de008486f8c2f5cb92fae/numpy-2.4.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54f29b877279d51e210e0c80709ee14ccbbad647810e8f3d375561c45ef613dd", size = 15680676, upload-time = "2026-03-09T07:57:52.34Z" }, + { url = "https://files.pythonhosted.org/packages/a9/7e/4f120ecc54ba26ddf3dc348eeb9eb063f421de65c05fc961941798feea18/numpy-2.4.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:679f2a834bae9020f81534671c56fd0cc76dd7e5182f57131478e23d0dc59e24", size = 16613492, upload-time = "2026-03-09T07:57:54.91Z" }, + { url = "https://files.pythonhosted.org/packages/2c/86/1b6020db73be330c4b45d5c6ee4295d59cfeef0e3ea323959d053e5a6909/numpy-2.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d84f0f881cb2225c2dfd7f78a10a5645d487a496c6668d6cc39f0f114164f3d0", size = 17031789, upload-time = "2026-03-09T07:57:57.641Z" }, + { url = "https://files.pythonhosted.org/packages/07/3a/3b90463bf41ebc21d1b7e06079f03070334374208c0f9a1f05e4ae8455e7/numpy-2.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d213c7e6e8d211888cc359bab7199670a00f5b82c0978b9d1c75baf1eddbeac0", size = 18339941, upload-time = "2026-03-09T07:58:00.577Z" }, + { url = "https://files.pythonhosted.org/packages/a8/74/6d736c4cd962259fd8bae9be27363eb4883a2f9069763747347544c2a487/numpy-2.4.3-cp314-cp314-win32.whl", hash = "sha256:52077feedeff7c76ed7c9f1a0428558e50825347b7545bbb8523da2cd55c547a", size = 6007503, upload-time = "2026-03-09T07:58:03.331Z" }, + { url = "https://files.pythonhosted.org/packages/48/39/c56ef87af669364356bb011922ef0734fc49dad51964568634c72a009488/numpy-2.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:0448e7f9caefb34b4b7dd2b77f21e8906e5d6f0365ad525f9f4f530b13df2afc", size = 12444915, upload-time = "2026-03-09T07:58:06.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1f/ab8528e38d295fd349310807496fabb7cf9fe2e1f70b97bc20a483ea9d4a/numpy-2.4.3-cp314-cp314-win_arm64.whl", hash = "sha256:b44fd60341c4d9783039598efadd03617fa28d041fc37d22b62d08f2027fa0e7", size = 10494875, upload-time = "2026-03-09T07:58:08.734Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ef/b7c35e4d5ef141b836658ab21a66d1a573e15b335b1d111d31f26c8ef80f/numpy-2.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0a195f4216be9305a73c0e91c9b026a35f2161237cf1c6de9b681637772ea657", size = 14822225, upload-time = "2026-03-09T07:58:11.034Z" }, + { url = "https://files.pythonhosted.org/packages/cd/8d/7730fa9278cf6648639946cc816e7cc89f0d891602584697923375f801ed/numpy-2.4.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:cd32fbacb9fd1bf041bf8e89e4576b6f00b895f06d00914820ae06a616bdfef7", size = 5328769, upload-time = "2026-03-09T07:58:13.67Z" }, + { url = "https://files.pythonhosted.org/packages/47/01/d2a137317c958b074d338807c1b6a383406cdf8b8e53b075d804cc3d211d/numpy-2.4.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:2e03c05abaee1f672e9d67bc858f300b5ccba1c21397211e8d77d98350972093", size = 6649461, upload-time = "2026-03-09T07:58:15.912Z" }, + { url = "https://files.pythonhosted.org/packages/5c/34/812ce12bc0f00272a4b0ec0d713cd237cb390666eb6206323d1cc9cedbb2/numpy-2.4.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d1ce23cce91fcea443320a9d0ece9b9305d4368875bab09538f7a5b4131938a", size = 15725809, upload-time = "2026-03-09T07:58:17.787Z" }, + { url = "https://files.pythonhosted.org/packages/25/c0/2aed473a4823e905e765fee3dc2cbf504bd3e68ccb1150fbdabd5c39f527/numpy-2.4.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c59020932feb24ed49ffd03704fbab89f22aa9c0d4b180ff45542fe8918f5611", size = 16655242, upload-time = "2026-03-09T07:58:20.476Z" }, + { url = "https://files.pythonhosted.org/packages/f2/c8/7e052b2fc87aa0e86de23f20e2c42bd261c624748aa8efd2c78f7bb8d8c6/numpy-2.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9684823a78a6cd6ad7511fc5e25b07947d1d5b5e2812c93fe99d7d4195130720", size = 17080660, upload-time = "2026-03-09T07:58:23.067Z" }, + { url = "https://files.pythonhosted.org/packages/f3/3d/0876746044db2adcb11549f214d104f2e1be00f07a67edbb4e2812094847/numpy-2.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0200b25c687033316fb39f0ff4e3e690e8957a2c3c8d22499891ec58c37a3eb5", size = 18380384, upload-time = "2026-03-09T07:58:25.839Z" }, + { url = "https://files.pythonhosted.org/packages/07/12/8160bea39da3335737b10308df4f484235fd297f556745f13092aa039d3b/numpy-2.4.3-cp314-cp314t-win32.whl", hash = "sha256:5e10da9e93247e554bb1d22f8edc51847ddd7dde52d85ce31024c1b4312bfba0", size = 6154547, upload-time = "2026-03-09T07:58:28.289Z" }, + { url = "https://files.pythonhosted.org/packages/42/f3/76534f61f80d74cc9cdf2e570d3d4eeb92c2280a27c39b0aaf471eda7b48/numpy-2.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:45f003dbdffb997a03da2d1d0cb41fbd24a87507fb41605c0420a3db5bd4667b", size = 12633645, upload-time = "2026-03-09T07:58:30.384Z" }, + { url = "https://files.pythonhosted.org/packages/1f/b6/7c0d4334c15983cec7f92a69e8ce9b1e6f31857e5ee3a413ac424e6bd63d/numpy-2.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:4d382735cecd7bcf090172489a525cd7d4087bc331f7df9f60ddc9a296cf208e", size = 10565454, upload-time = "2026-03-09T07:58:33.031Z" }, + { url = "https://files.pythonhosted.org/packages/64/e4/4dab9fb43c83719c29241c535d9e07be73bea4bc0c6686c5816d8e1b6689/numpy-2.4.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c6b124bfcafb9e8d3ed09130dbee44848c20b3e758b6bbf006e641778927c028", size = 16834892, upload-time = "2026-03-09T07:58:35.334Z" }, + { url = "https://files.pythonhosted.org/packages/c9/29/f8b6d4af90fed3dfda84ebc0df06c9833d38880c79ce954e5b661758aa31/numpy-2.4.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:76dbb9d4e43c16cf9aa711fcd8de1e2eeb27539dcefb60a1d5e9f12fae1d1ed8", size = 14893070, upload-time = "2026-03-09T07:58:37.7Z" }, + { url = "https://files.pythonhosted.org/packages/9a/04/a19b3c91dbec0a49269407f15d5753673a09832daed40c45e8150e6fa558/numpy-2.4.3-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:29363fbfa6f8ee855d7569c96ce524845e3d726d6c19b29eceec7dd555dab152", size = 5399609, upload-time = "2026-03-09T07:58:39.853Z" }, + { url = "https://files.pythonhosted.org/packages/79/34/4d73603f5420eab89ea8a67097b31364bf7c30f811d4dd84b1659c7476d9/numpy-2.4.3-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:bc71942c789ef415a37f0d4eab90341425a00d538cd0642445d30b41023d3395", size = 6714355, upload-time = "2026-03-09T07:58:42.365Z" }, + { url = "https://files.pythonhosted.org/packages/58/ad/1100d7229bb248394939a12a8074d485b655e8ed44207d328fdd7fcebc7b/numpy-2.4.3-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e58765ad74dcebd3ef0208a5078fba32dc8ec3578fe84a604432950cd043d79", size = 15800434, upload-time = "2026-03-09T07:58:44.837Z" }, + { url = "https://files.pythonhosted.org/packages/0c/fd/16d710c085d28ba4feaf29ac60c936c9d662e390344f94a6beaa2ac9899b/numpy-2.4.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e236dbda4e1d319d681afcbb136c0c4a8e0f1a5c58ceec2adebb547357fe857", size = 16729409, upload-time = "2026-03-09T07:58:47.972Z" }, + { url = "https://files.pythonhosted.org/packages/57/a7/b35835e278c18b85206834b3aa3abe68e77a98769c59233d1f6300284781/numpy-2.4.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:4b42639cdde6d24e732ff823a3fa5b701d8acad89c4142bc1d0bd6dc85200ba5", size = 12504685, upload-time = "2026-03-09T07:58:50.525Z" }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + +[[package]] +name = "scikit-learn" +version = "1.7.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "threadpoolctl", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/c2/a7855e41c9d285dfe86dc50b250978105dce513d6e459ea66a6aeb0e1e0c/scikit_learn-1.7.2.tar.gz", hash = "sha256:20e9e49ecd130598f1ca38a1d85090e1a600147b9c02fa6f15d69cb53d968fda", size = 7193136, upload-time = "2025-09-09T08:21:29.075Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/3e/daed796fd69cce768b8788401cc464ea90b306fb196ae1ffed0b98182859/scikit_learn-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b33579c10a3081d076ab403df4a4190da4f4432d443521674637677dc91e61f", size = 9336221, upload-time = "2025-09-09T08:20:19.328Z" }, + { url = "https://files.pythonhosted.org/packages/1c/ce/af9d99533b24c55ff4e18d9b7b4d9919bbc6cd8f22fe7a7be01519a347d5/scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:36749fb62b3d961b1ce4fedf08fa57a1986cd409eff2d783bca5d4b9b5fce51c", size = 8653834, upload-time = "2025-09-09T08:20:22.073Z" }, + { url = "https://files.pythonhosted.org/packages/58/0e/8c2a03d518fb6bd0b6b0d4b114c63d5f1db01ff0f9925d8eb10960d01c01/scikit_learn-1.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7a58814265dfc52b3295b1900cfb5701589d30a8bb026c7540f1e9d3499d5ec8", size = 9660938, upload-time = "2025-09-09T08:20:24.327Z" }, + { url = "https://files.pythonhosted.org/packages/2b/75/4311605069b5d220e7cf5adabb38535bd96f0079313cdbb04b291479b22a/scikit_learn-1.7.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a847fea807e278f821a0406ca01e387f97653e284ecbd9750e3ee7c90347f18", size = 9477818, upload-time = "2025-09-09T08:20:26.845Z" }, + { url = "https://files.pythonhosted.org/packages/7f/9b/87961813c34adbca21a6b3f6b2bea344c43b30217a6d24cc437c6147f3e8/scikit_learn-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:ca250e6836d10e6f402436d6463d6c0e4d8e0234cfb6a9a47835bd392b852ce5", size = 8886969, upload-time = "2025-09-09T08:20:29.329Z" }, + { url = "https://files.pythonhosted.org/packages/43/83/564e141eef908a5863a54da8ca342a137f45a0bfb71d1d79704c9894c9d1/scikit_learn-1.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7509693451651cd7361d30ce4e86a1347493554f172b1c72a39300fa2aea79e", size = 9331967, upload-time = "2025-09-09T08:20:32.421Z" }, + { url = "https://files.pythonhosted.org/packages/18/d6/ba863a4171ac9d7314c4d3fc251f015704a2caeee41ced89f321c049ed83/scikit_learn-1.7.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:0486c8f827c2e7b64837c731c8feff72c0bd2b998067a8a9cbc10643c31f0fe1", size = 8648645, upload-time = "2025-09-09T08:20:34.436Z" }, + { url = "https://files.pythonhosted.org/packages/ef/0e/97dbca66347b8cf0ea8b529e6bb9367e337ba2e8be0ef5c1a545232abfde/scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89877e19a80c7b11a2891a27c21c4894fb18e2c2e077815bcade10d34287b20d", size = 9715424, upload-time = "2025-09-09T08:20:36.776Z" }, + { url = "https://files.pythonhosted.org/packages/f7/32/1f3b22e3207e1d2c883a7e09abb956362e7d1bd2f14458c7de258a26ac15/scikit_learn-1.7.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8da8bf89d4d79aaec192d2bda62f9b56ae4e5b4ef93b6a56b5de4977e375c1f1", size = 9509234, upload-time = "2025-09-09T08:20:38.957Z" }, + { url = "https://files.pythonhosted.org/packages/9f/71/34ddbd21f1da67c7a768146968b4d0220ee6831e4bcbad3e03dd3eae88b6/scikit_learn-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:9b7ed8d58725030568523e937c43e56bc01cadb478fc43c042a9aca1dacb3ba1", size = 8894244, upload-time = "2025-09-09T08:20:41.166Z" }, + { url = "https://files.pythonhosted.org/packages/a7/aa/3996e2196075689afb9fce0410ebdb4a09099d7964d061d7213700204409/scikit_learn-1.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d91a97fa2b706943822398ab943cde71858a50245e31bc71dba62aab1d60a96", size = 9259818, upload-time = "2025-09-09T08:20:43.19Z" }, + { url = "https://files.pythonhosted.org/packages/43/5d/779320063e88af9c4a7c2cf463ff11c21ac9c8bd730c4a294b0000b666c9/scikit_learn-1.7.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:acbc0f5fd2edd3432a22c69bed78e837c70cf896cd7993d71d51ba6708507476", size = 8636997, upload-time = "2025-09-09T08:20:45.468Z" }, + { url = "https://files.pythonhosted.org/packages/5c/d0/0c577d9325b05594fdd33aa970bf53fb673f051a45496842caee13cfd7fe/scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e5bf3d930aee75a65478df91ac1225ff89cd28e9ac7bd1196853a9229b6adb0b", size = 9478381, upload-time = "2025-09-09T08:20:47.982Z" }, + { url = "https://files.pythonhosted.org/packages/82/70/8bf44b933837ba8494ca0fc9a9ab60f1c13b062ad0197f60a56e2fc4c43e/scikit_learn-1.7.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4d6e9deed1a47aca9fe2f267ab8e8fe82ee20b4526b2c0cd9e135cea10feb44", size = 9300296, upload-time = "2025-09-09T08:20:50.366Z" }, + { url = "https://files.pythonhosted.org/packages/c6/99/ed35197a158f1fdc2fe7c3680e9c70d0128f662e1fee4ed495f4b5e13db0/scikit_learn-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:6088aa475f0785e01bcf8529f55280a3d7d298679f50c0bb70a2364a82d0b290", size = 8731256, upload-time = "2025-09-09T08:20:52.627Z" }, + { url = "https://files.pythonhosted.org/packages/ae/93/a3038cb0293037fd335f77f31fe053b89c72f17b1c8908c576c29d953e84/scikit_learn-1.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b7dacaa05e5d76759fb071558a8b5130f4845166d88654a0f9bdf3eb57851b7", size = 9212382, upload-time = "2025-09-09T08:20:54.731Z" }, + { url = "https://files.pythonhosted.org/packages/40/dd/9a88879b0c1104259136146e4742026b52df8540c39fec21a6383f8292c7/scikit_learn-1.7.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:abebbd61ad9e1deed54cca45caea8ad5f79e1b93173dece40bb8e0c658dbe6fe", size = 8592042, upload-time = "2025-09-09T08:20:57.313Z" }, + { url = "https://files.pythonhosted.org/packages/46/af/c5e286471b7d10871b811b72ae794ac5fe2989c0a2df07f0ec723030f5f5/scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:502c18e39849c0ea1a5d681af1dbcf15f6cce601aebb657aabbfe84133c1907f", size = 9434180, upload-time = "2025-09-09T08:20:59.671Z" }, + { url = "https://files.pythonhosted.org/packages/f1/fd/df59faa53312d585023b2da27e866524ffb8faf87a68516c23896c718320/scikit_learn-1.7.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a4c328a71785382fe3fe676a9ecf2c86189249beff90bf85e22bdb7efaf9ae0", size = 9283660, upload-time = "2025-09-09T08:21:01.71Z" }, + { url = "https://files.pythonhosted.org/packages/a7/c7/03000262759d7b6f38c836ff9d512f438a70d8a8ddae68ee80de72dcfb63/scikit_learn-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:63a9afd6f7b229aad94618c01c252ce9e6fa97918c5ca19c9a17a087d819440c", size = 8702057, upload-time = "2025-09-09T08:21:04.234Z" }, + { url = "https://files.pythonhosted.org/packages/55/87/ef5eb1f267084532c8e4aef98a28b6ffe7425acbfd64b5e2f2e066bc29b3/scikit_learn-1.7.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9acb6c5e867447b4e1390930e3944a005e2cb115922e693c08a323421a6966e8", size = 9558731, upload-time = "2025-09-09T08:21:06.381Z" }, + { url = "https://files.pythonhosted.org/packages/93/f8/6c1e3fc14b10118068d7938878a9f3f4e6d7b74a8ddb1e5bed65159ccda8/scikit_learn-1.7.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:2a41e2a0ef45063e654152ec9d8bcfc39f7afce35b08902bfe290c2498a67a6a", size = 9038852, upload-time = "2025-09-09T08:21:08.628Z" }, + { url = "https://files.pythonhosted.org/packages/83/87/066cafc896ee540c34becf95d30375fe5cbe93c3b75a0ee9aa852cd60021/scikit_learn-1.7.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98335fb98509b73385b3ab2bd0639b1f610541d3988ee675c670371d6a87aa7c", size = 9527094, upload-time = "2025-09-09T08:21:11.486Z" }, + { url = "https://files.pythonhosted.org/packages/9c/2b/4903e1ccafa1f6453b1ab78413938c8800633988c838aa0be386cbb33072/scikit_learn-1.7.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:191e5550980d45449126e23ed1d5e9e24b2c68329ee1f691a3987476e115e09c", size = 9367436, upload-time = "2025-09-09T08:21:13.602Z" }, + { url = "https://files.pythonhosted.org/packages/b5/aa/8444be3cfb10451617ff9d177b3c190288f4563e6c50ff02728be67ad094/scikit_learn-1.7.2-cp313-cp313t-win_amd64.whl", hash = "sha256:57dc4deb1d3762c75d685507fbd0bc17160144b2f2ba4ccea5dc285ab0d0e973", size = 9275749, upload-time = "2025-09-09T08:21:15.96Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/dee5acf66837852e8e68df6d8d3a6cb22d3df997b733b032f513d95205b7/scikit_learn-1.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fa8f63940e29c82d1e67a45d5297bdebbcb585f5a5a50c4914cc2e852ab77f33", size = 9208906, upload-time = "2025-09-09T08:21:18.557Z" }, + { url = "https://files.pythonhosted.org/packages/3c/30/9029e54e17b87cb7d50d51a5926429c683d5b4c1732f0507a6c3bed9bf65/scikit_learn-1.7.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f95dc55b7902b91331fa4e5845dd5bde0580c9cd9612b1b2791b7e80c3d32615", size = 8627836, upload-time = "2025-09-09T08:21:20.695Z" }, + { url = "https://files.pythonhosted.org/packages/60/18/4a52c635c71b536879f4b971c2cedf32c35ee78f48367885ed8025d1f7ee/scikit_learn-1.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9656e4a53e54578ad10a434dc1f993330568cfee176dff07112b8785fb413106", size = 9426236, upload-time = "2025-09-09T08:21:22.645Z" }, + { url = "https://files.pythonhosted.org/packages/99/7e/290362f6ab582128c53445458a5befd471ed1ea37953d5bcf80604619250/scikit_learn-1.7.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96dc05a854add0e50d3f47a1ef21a10a595016da5b007c7d9cd9d0bffd1fcc61", size = 9312593, upload-time = "2025-09-09T08:21:24.65Z" }, + { url = "https://files.pythonhosted.org/packages/8e/87/24f541b6d62b1794939ae6422f8023703bbf6900378b2b34e0b4384dfefd/scikit_learn-1.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:bb24510ed3f9f61476181e4db51ce801e2ba37541def12dc9333b946fc7a9cf8", size = 8820007, upload-time = "2025-09-09T08:21:26.713Z" }, +] + +[[package]] +name = "scikit-learn" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.11'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "threadpoolctl", marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload-time = "2025-12-10T07:08:53.618Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/92/53ea2181da8ac6bf27170191028aee7251f8f841f8d3edbfdcaf2008fde9/scikit_learn-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:146b4d36f800c013d267b29168813f7a03a43ecd2895d04861f1240b564421da", size = 8595835, upload-time = "2025-12-10T07:07:39.385Z" }, + { url = "https://files.pythonhosted.org/packages/01/18/d154dc1638803adf987910cdd07097d9c526663a55666a97c124d09fb96a/scikit_learn-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f984ca4b14914e6b4094c5d52a32ea16b49832c03bd17a110f004db3c223e8e1", size = 8080381, upload-time = "2025-12-10T07:07:41.93Z" }, + { url = "https://files.pythonhosted.org/packages/8a/44/226142fcb7b7101e64fdee5f49dbe6288d4c7af8abf593237b70fca080a4/scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e30adb87f0cc81c7690a84f7932dd66be5bac57cfe16b91cb9151683a4a2d3b", size = 8799632, upload-time = "2025-12-10T07:07:43.899Z" }, + { url = "https://files.pythonhosted.org/packages/36/4d/4a67f30778a45d542bbea5db2dbfa1e9e100bf9ba64aefe34215ba9f11f6/scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ada8121bcb4dac28d930febc791a69f7cb1673c8495e5eee274190b73a4559c1", size = 9103788, upload-time = "2025-12-10T07:07:45.982Z" }, + { url = "https://files.pythonhosted.org/packages/89/3c/45c352094cfa60050bcbb967b1faf246b22e93cb459f2f907b600f2ceda5/scikit_learn-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:c57b1b610bd1f40ba43970e11ce62821c2e6569e4d74023db19c6b26f246cb3b", size = 8081706, upload-time = "2025-12-10T07:07:48.111Z" }, + { url = "https://files.pythonhosted.org/packages/3d/46/5416595bb395757f754feb20c3d776553a386b661658fb21b7c814e89efe/scikit_learn-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:2838551e011a64e3053ad7618dda9310175f7515f1742fa2d756f7c874c05961", size = 7688451, upload-time = "2025-12-10T07:07:49.873Z" }, + { url = "https://files.pythonhosted.org/packages/90/74/e6a7cc4b820e95cc38cf36cd74d5aa2b42e8ffc2d21fe5a9a9c45c1c7630/scikit_learn-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5fb63362b5a7ddab88e52b6dbb47dac3fd7dafeee740dc6c8d8a446ddedade8e", size = 8548242, upload-time = "2025-12-10T07:07:51.568Z" }, + { url = "https://files.pythonhosted.org/packages/49/d8/9be608c6024d021041c7f0b3928d4749a706f4e2c3832bbede4fb4f58c95/scikit_learn-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5025ce924beccb28298246e589c691fe1b8c1c96507e6d27d12c5fadd85bfd76", size = 8079075, upload-time = "2025-12-10T07:07:53.697Z" }, + { url = "https://files.pythonhosted.org/packages/dd/47/f187b4636ff80cc63f21cd40b7b2d177134acaa10f6bb73746130ee8c2e5/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4496bb2cf7a43ce1a2d7524a79e40bc5da45cf598dbf9545b7e8316ccba47bb4", size = 8660492, upload-time = "2025-12-10T07:07:55.574Z" }, + { url = "https://files.pythonhosted.org/packages/97/74/b7a304feb2b49df9fafa9382d4d09061a96ee9a9449a7cbea7988dda0828/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bcfe4d0d14aec44921545fd2af2338c7471de9cb701f1da4c9d85906ab847a", size = 8931904, upload-time = "2025-12-10T07:07:57.666Z" }, + { url = "https://files.pythonhosted.org/packages/9f/c4/0ab22726a04ede56f689476b760f98f8f46607caecff993017ac1b64aa5d/scikit_learn-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:35c007dedb2ffe38fe3ee7d201ebac4a2deccd2408e8621d53067733e3c74809", size = 8019359, upload-time = "2025-12-10T07:07:59.838Z" }, + { url = "https://files.pythonhosted.org/packages/24/90/344a67811cfd561d7335c1b96ca21455e7e472d281c3c279c4d3f2300236/scikit_learn-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:8c497fff237d7b4e07e9ef1a640887fa4fb765647f86fbe00f969ff6280ce2bb", size = 7641898, upload-time = "2025-12-10T07:08:01.36Z" }, + { url = "https://files.pythonhosted.org/packages/03/aa/e22e0768512ce9255eba34775be2e85c2048da73da1193e841707f8f039c/scikit_learn-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a", size = 8513770, upload-time = "2025-12-10T07:08:03.251Z" }, + { url = "https://files.pythonhosted.org/packages/58/37/31b83b2594105f61a381fc74ca19e8780ee923be2d496fcd8d2e1147bd99/scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e", size = 8044458, upload-time = "2025-12-10T07:08:05.336Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5a/3f1caed8765f33eabb723596666da4ebbf43d11e96550fb18bdec42b467b/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57", size = 8610341, upload-time = "2025-12-10T07:08:07.732Z" }, + { url = "https://files.pythonhosted.org/packages/38/cf/06896db3f71c75902a8e9943b444a56e727418f6b4b4a90c98c934f51ed4/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e", size = 8900022, upload-time = "2025-12-10T07:08:09.862Z" }, + { url = "https://files.pythonhosted.org/packages/1c/f9/9b7563caf3ec8873e17a31401858efab6b39a882daf6c1bfa88879c0aa11/scikit_learn-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271", size = 7989409, upload-time = "2025-12-10T07:08:12.028Z" }, + { url = "https://files.pythonhosted.org/packages/49/bd/1f4001503650e72c4f6009ac0c4413cb17d2d601cef6f71c0453da2732fc/scikit_learn-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3", size = 7619760, upload-time = "2025-12-10T07:08:13.688Z" }, + { url = "https://files.pythonhosted.org/packages/d2/7d/a630359fc9dcc95496588c8d8e3245cc8fd81980251079bc09c70d41d951/scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735", size = 8826045, upload-time = "2025-12-10T07:08:15.215Z" }, + { url = "https://files.pythonhosted.org/packages/cc/56/a0c86f6930cfcd1c7054a2bc417e26960bb88d32444fe7f71d5c2cfae891/scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd", size = 8420324, upload-time = "2025-12-10T07:08:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/46/1e/05962ea1cebc1cf3876667ecb14c283ef755bf409993c5946ade3b77e303/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e", size = 8680651, upload-time = "2025-12-10T07:08:19.952Z" }, + { url = "https://files.pythonhosted.org/packages/fe/56/a85473cd75f200c9759e3a5f0bcab2d116c92a8a02ee08ccd73b870f8bb4/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb", size = 8925045, upload-time = "2025-12-10T07:08:22.11Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b7/64d8cfa896c64435ae57f4917a548d7ac7a44762ff9802f75a79b77cb633/scikit_learn-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702", size = 8507994, upload-time = "2025-12-10T07:08:23.943Z" }, + { url = "https://files.pythonhosted.org/packages/5e/37/e192ea709551799379958b4c4771ec507347027bb7c942662c7fbeba31cb/scikit_learn-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde", size = 7869518, upload-time = "2025-12-10T07:08:25.71Z" }, + { url = "https://files.pythonhosted.org/packages/24/05/1af2c186174cc92dcab2233f327336058c077d38f6fe2aceb08e6ab4d509/scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3", size = 8528667, upload-time = "2025-12-10T07:08:27.541Z" }, + { url = "https://files.pythonhosted.org/packages/a8/25/01c0af38fe969473fb292bba9dc2b8f9b451f3112ff242c647fee3d0dfe7/scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7", size = 8066524, upload-time = "2025-12-10T07:08:29.822Z" }, + { url = "https://files.pythonhosted.org/packages/be/ce/a0623350aa0b68647333940ee46fe45086c6060ec604874e38e9ab7d8e6c/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6", size = 8657133, upload-time = "2025-12-10T07:08:31.865Z" }, + { url = "https://files.pythonhosted.org/packages/b8/cb/861b41341d6f1245e6ca80b1c1a8c4dfce43255b03df034429089ca2a2c5/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4", size = 8923223, upload-time = "2025-12-10T07:08:34.166Z" }, + { url = "https://files.pythonhosted.org/packages/76/18/a8def8f91b18cd1ba6e05dbe02540168cb24d47e8dcf69e8d00b7da42a08/scikit_learn-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:56079a99c20d230e873ea40753102102734c5953366972a71d5cb39a32bc40c6", size = 8096518, upload-time = "2025-12-10T07:08:36.339Z" }, + { url = "https://files.pythonhosted.org/packages/d1/77/482076a678458307f0deb44e29891d6022617b2a64c840c725495bee343f/scikit_learn-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3bad7565bc9cf37ce19a7c0d107742b320c1285df7aab1a6e2d28780df167242", size = 7754546, upload-time = "2025-12-10T07:08:38.128Z" }, + { url = "https://files.pythonhosted.org/packages/2d/d1/ef294ca754826daa043b2a104e59960abfab4cf653891037d19dd5b6f3cf/scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7", size = 8848305, upload-time = "2025-12-10T07:08:41.013Z" }, + { url = "https://files.pythonhosted.org/packages/5b/e2/b1f8b05138ee813b8e1a4149f2f0d289547e60851fd1bb268886915adbda/scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9", size = 8432257, upload-time = "2025-12-10T07:08:42.873Z" }, + { url = "https://files.pythonhosted.org/packages/26/11/c32b2138a85dcb0c99f6afd13a70a951bfdff8a6ab42d8160522542fb647/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f", size = 8678673, upload-time = "2025-12-10T07:08:45.362Z" }, + { url = "https://files.pythonhosted.org/packages/c7/57/51f2384575bdec454f4fe4e7a919d696c9ebce914590abf3e52d47607ab8/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9", size = 8922467, upload-time = "2025-12-10T07:08:47.408Z" }, + { url = "https://files.pythonhosted.org/packages/35/4d/748c9e2872637a57981a04adc038dacaa16ba8ca887b23e34953f0b3f742/scikit_learn-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:00d6f1d66fbcf4eba6e356e1420d33cc06c70a45bb1363cd6f6a8e4ebbbdece2", size = 8774395, upload-time = "2025-12-10T07:08:49.337Z" }, + { url = "https://files.pythonhosted.org/packages/60/22/d7b2ebe4704a5e50790ba089d5c2ae308ab6bb852719e6c3bd4f04c3a363/scikit_learn-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c", size = 8002647, upload-time = "2025-12-10T07:08:51.601Z" }, +] + +[[package]] +name = "scipy" +version = "1.15.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/2f/4966032c5f8cc7e6a60f1b2e0ad686293b9474b65246b0c642e3ef3badd0/scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c", size = 38702770, upload-time = "2025-05-08T16:04:20.849Z" }, + { url = "https://files.pythonhosted.org/packages/a0/6e/0c3bf90fae0e910c274db43304ebe25a6b391327f3f10b5dcc638c090795/scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253", size = 30094511, upload-time = "2025-05-08T16:04:27.103Z" }, + { url = "https://files.pythonhosted.org/packages/ea/b1/4deb37252311c1acff7f101f6453f0440794f51b6eacb1aad4459a134081/scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aef683a9ae6eb00728a542b796f52a5477b78252edede72b8327a886ab63293f", size = 22368151, upload-time = "2025-05-08T16:04:31.731Z" }, + { url = "https://files.pythonhosted.org/packages/38/7d/f457626e3cd3c29b3a49ca115a304cebb8cc6f31b04678f03b216899d3c6/scipy-1.15.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:1c832e1bd78dea67d5c16f786681b28dd695a8cb1fb90af2e27580d3d0967e92", size = 25121732, upload-time = "2025-05-08T16:04:36.596Z" }, + { url = "https://files.pythonhosted.org/packages/db/0a/92b1de4a7adc7a15dcf5bddc6e191f6f29ee663b30511ce20467ef9b82e4/scipy-1.15.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:263961f658ce2165bbd7b99fa5135195c3a12d9bef045345016b8b50c315cb82", size = 35547617, upload-time = "2025-05-08T16:04:43.546Z" }, + { url = "https://files.pythonhosted.org/packages/8e/6d/41991e503e51fc1134502694c5fa7a1671501a17ffa12716a4a9151af3df/scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2abc762b0811e09a0d3258abee2d98e0c703eee49464ce0069590846f31d40", size = 37662964, upload-time = "2025-05-08T16:04:49.431Z" }, + { url = "https://files.pythonhosted.org/packages/25/e1/3df8f83cb15f3500478c889be8fb18700813b95e9e087328230b98d547ff/scipy-1.15.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed7284b21a7a0c8f1b6e5977ac05396c0d008b89e05498c8b7e8f4a1423bba0e", size = 37238749, upload-time = "2025-05-08T16:04:55.215Z" }, + { url = "https://files.pythonhosted.org/packages/93/3e/b3257cf446f2a3533ed7809757039016b74cd6f38271de91682aa844cfc5/scipy-1.15.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5380741e53df2c566f4d234b100a484b420af85deb39ea35a1cc1be84ff53a5c", size = 40022383, upload-time = "2025-05-08T16:05:01.914Z" }, + { url = "https://files.pythonhosted.org/packages/d1/84/55bc4881973d3f79b479a5a2e2df61c8c9a04fcb986a213ac9c02cfb659b/scipy-1.15.3-cp310-cp310-win_amd64.whl", hash = "sha256:9d61e97b186a57350f6d6fd72640f9e99d5a4a2b8fbf4b9ee9a841eab327dc13", size = 41259201, upload-time = "2025-05-08T16:05:08.166Z" }, + { url = "https://files.pythonhosted.org/packages/96/ab/5cc9f80f28f6a7dff646c5756e559823614a42b1939d86dd0ed550470210/scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b", size = 38714255, upload-time = "2025-05-08T16:05:14.596Z" }, + { url = "https://files.pythonhosted.org/packages/4a/4a/66ba30abe5ad1a3ad15bfb0b59d22174012e8056ff448cb1644deccbfed2/scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba", size = 30111035, upload-time = "2025-05-08T16:05:20.152Z" }, + { url = "https://files.pythonhosted.org/packages/4b/fa/a7e5b95afd80d24313307f03624acc65801846fa75599034f8ceb9e2cbf6/scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65", size = 22384499, upload-time = "2025-05-08T16:05:24.494Z" }, + { url = "https://files.pythonhosted.org/packages/17/99/f3aaddccf3588bb4aea70ba35328c204cadd89517a1612ecfda5b2dd9d7a/scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1", size = 25152602, upload-time = "2025-05-08T16:05:29.313Z" }, + { url = "https://files.pythonhosted.org/packages/56/c5/1032cdb565f146109212153339f9cb8b993701e9fe56b1c97699eee12586/scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889", size = 35503415, upload-time = "2025-05-08T16:05:34.699Z" }, + { url = "https://files.pythonhosted.org/packages/bd/37/89f19c8c05505d0601ed5650156e50eb881ae3918786c8fd7262b4ee66d3/scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982", size = 37652622, upload-time = "2025-05-08T16:05:40.762Z" }, + { url = "https://files.pythonhosted.org/packages/7e/31/be59513aa9695519b18e1851bb9e487de66f2d31f835201f1b42f5d4d475/scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9", size = 37244796, upload-time = "2025-05-08T16:05:48.119Z" }, + { url = "https://files.pythonhosted.org/packages/10/c0/4f5f3eeccc235632aab79b27a74a9130c6c35df358129f7ac8b29f562ac7/scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594", size = 40047684, upload-time = "2025-05-08T16:05:54.22Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a7/0ddaf514ce8a8714f6ed243a2b391b41dbb65251affe21ee3077ec45ea9a/scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb", size = 41246504, upload-time = "2025-05-08T16:06:00.437Z" }, + { url = "https://files.pythonhosted.org/packages/37/4b/683aa044c4162e10ed7a7ea30527f2cbd92e6999c10a8ed8edb253836e9c/scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019", size = 38766735, upload-time = "2025-05-08T16:06:06.471Z" }, + { url = "https://files.pythonhosted.org/packages/7b/7e/f30be3d03de07f25dc0ec926d1681fed5c732d759ac8f51079708c79e680/scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6", size = 30173284, upload-time = "2025-05-08T16:06:11.686Z" }, + { url = "https://files.pythonhosted.org/packages/07/9c/0ddb0d0abdabe0d181c1793db51f02cd59e4901da6f9f7848e1f96759f0d/scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477", size = 22446958, upload-time = "2025-05-08T16:06:15.97Z" }, + { url = "https://files.pythonhosted.org/packages/af/43/0bce905a965f36c58ff80d8bea33f1f9351b05fad4beaad4eae34699b7a1/scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c", size = 25242454, upload-time = "2025-05-08T16:06:20.394Z" }, + { url = "https://files.pythonhosted.org/packages/56/30/a6f08f84ee5b7b28b4c597aca4cbe545535c39fe911845a96414700b64ba/scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45", size = 35210199, upload-time = "2025-05-08T16:06:26.159Z" }, + { url = "https://files.pythonhosted.org/packages/0b/1f/03f52c282437a168ee2c7c14a1a0d0781a9a4a8962d84ac05c06b4c5b555/scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49", size = 37309455, upload-time = "2025-05-08T16:06:32.778Z" }, + { url = "https://files.pythonhosted.org/packages/89/b1/fbb53137f42c4bf630b1ffdfc2151a62d1d1b903b249f030d2b1c0280af8/scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e", size = 36885140, upload-time = "2025-05-08T16:06:39.249Z" }, + { url = "https://files.pythonhosted.org/packages/2e/2e/025e39e339f5090df1ff266d021892694dbb7e63568edcfe43f892fa381d/scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539", size = 39710549, upload-time = "2025-05-08T16:06:45.729Z" }, + { url = "https://files.pythonhosted.org/packages/e6/eb/3bf6ea8ab7f1503dca3a10df2e4b9c3f6b3316df07f6c0ded94b281c7101/scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed", size = 40966184, upload-time = "2025-05-08T16:06:52.623Z" }, + { url = "https://files.pythonhosted.org/packages/73/18/ec27848c9baae6e0d6573eda6e01a602e5649ee72c27c3a8aad673ebecfd/scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759", size = 38728256, upload-time = "2025-05-08T16:06:58.696Z" }, + { url = "https://files.pythonhosted.org/packages/74/cd/1aef2184948728b4b6e21267d53b3339762c285a46a274ebb7863c9e4742/scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62", size = 30109540, upload-time = "2025-05-08T16:07:04.209Z" }, + { url = "https://files.pythonhosted.org/packages/5b/d8/59e452c0a255ec352bd0a833537a3bc1bfb679944c4938ab375b0a6b3a3e/scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb", size = 22383115, upload-time = "2025-05-08T16:07:08.998Z" }, + { url = "https://files.pythonhosted.org/packages/08/f5/456f56bbbfccf696263b47095291040655e3cbaf05d063bdc7c7517f32ac/scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730", size = 25163884, upload-time = "2025-05-08T16:07:14.091Z" }, + { url = "https://files.pythonhosted.org/packages/a2/66/a9618b6a435a0f0c0b8a6d0a2efb32d4ec5a85f023c2b79d39512040355b/scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825", size = 35174018, upload-time = "2025-05-08T16:07:19.427Z" }, + { url = "https://files.pythonhosted.org/packages/b5/09/c5b6734a50ad4882432b6bb7c02baf757f5b2f256041da5df242e2d7e6b6/scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7", size = 37269716, upload-time = "2025-05-08T16:07:25.712Z" }, + { url = "https://files.pythonhosted.org/packages/77/0a/eac00ff741f23bcabd352731ed9b8995a0a60ef57f5fd788d611d43d69a1/scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11", size = 36872342, upload-time = "2025-05-08T16:07:31.468Z" }, + { url = "https://files.pythonhosted.org/packages/fe/54/4379be86dd74b6ad81551689107360d9a3e18f24d20767a2d5b9253a3f0a/scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126", size = 39670869, upload-time = "2025-05-08T16:07:38.002Z" }, + { url = "https://files.pythonhosted.org/packages/87/2e/892ad2862ba54f084ffe8cc4a22667eaf9c2bcec6d2bff1d15713c6c0703/scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163", size = 40988851, upload-time = "2025-05-08T16:08:33.671Z" }, + { url = "https://files.pythonhosted.org/packages/1b/e9/7a879c137f7e55b30d75d90ce3eb468197646bc7b443ac036ae3fe109055/scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8", size = 38863011, upload-time = "2025-05-08T16:07:44.039Z" }, + { url = "https://files.pythonhosted.org/packages/51/d1/226a806bbd69f62ce5ef5f3ffadc35286e9fbc802f606a07eb83bf2359de/scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5", size = 30266407, upload-time = "2025-05-08T16:07:49.891Z" }, + { url = "https://files.pythonhosted.org/packages/e5/9b/f32d1d6093ab9eeabbd839b0f7619c62e46cc4b7b6dbf05b6e615bbd4400/scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e", size = 22540030, upload-time = "2025-05-08T16:07:54.121Z" }, + { url = "https://files.pythonhosted.org/packages/e7/29/c278f699b095c1a884f29fda126340fcc201461ee8bfea5c8bdb1c7c958b/scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb", size = 25218709, upload-time = "2025-05-08T16:07:58.506Z" }, + { url = "https://files.pythonhosted.org/packages/24/18/9e5374b617aba742a990581373cd6b68a2945d65cc588482749ef2e64467/scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723", size = 34809045, upload-time = "2025-05-08T16:08:03.929Z" }, + { url = "https://files.pythonhosted.org/packages/e1/fe/9c4361e7ba2927074360856db6135ef4904d505e9b3afbbcb073c4008328/scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb", size = 36703062, upload-time = "2025-05-08T16:08:09.558Z" }, + { url = "https://files.pythonhosted.org/packages/b7/8e/038ccfe29d272b30086b25a4960f757f97122cb2ec42e62b460d02fe98e9/scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4", size = 36393132, upload-time = "2025-05-08T16:08:15.34Z" }, + { url = "https://files.pythonhosted.org/packages/10/7e/5c12285452970be5bdbe8352c619250b97ebf7917d7a9a9e96b8a8140f17/scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5", size = 38979503, upload-time = "2025-05-08T16:08:21.513Z" }, + { url = "https://files.pythonhosted.org/packages/81/06/0a5e5349474e1cbc5757975b21bd4fad0e72ebf138c5592f191646154e06/scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca", size = 40308097, upload-time = "2025-05-08T16:08:27.627Z" }, +] + +[[package]] +name = "scipy" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.11'", +] +dependencies = [ + { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/75/b4ce781849931fef6fd529afa6b63711d5a733065722d0c3e2724af9e40a/scipy-1.17.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1f95b894f13729334fb990162e911c9e5dc1ab390c58aa6cbecb389c5b5e28ec", size = 31613675, upload-time = "2026-02-23T00:16:00.13Z" }, + { url = "https://files.pythonhosted.org/packages/f7/58/bccc2861b305abdd1b8663d6130c0b3d7cc22e8d86663edbc8401bfd40d4/scipy-1.17.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e18f12c6b0bc5a592ed23d3f7b891f68fd7f8241d69b7883769eb5d5dfb52696", size = 28162057, upload-time = "2026-02-23T00:16:09.456Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ee/18146b7757ed4976276b9c9819108adbc73c5aad636e5353e20746b73069/scipy-1.17.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a3472cfbca0a54177d0faa68f697d8ba4c80bbdc19908c3465556d9f7efce9ee", size = 20334032, upload-time = "2026-02-23T00:16:17.358Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e6/cef1cf3557f0c54954198554a10016b6a03b2ec9e22a4e1df734936bd99c/scipy-1.17.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:766e0dc5a616d026a3a1cffa379af959671729083882f50307e18175797b3dfd", size = 22709533, upload-time = "2026-02-23T00:16:25.791Z" }, + { url = "https://files.pythonhosted.org/packages/4d/60/8804678875fc59362b0fb759ab3ecce1f09c10a735680318ac30da8cd76b/scipy-1.17.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:744b2bf3640d907b79f3fd7874efe432d1cf171ee721243e350f55234b4cec4c", size = 33062057, upload-time = "2026-02-23T00:16:36.931Z" }, + { url = "https://files.pythonhosted.org/packages/09/7d/af933f0f6e0767995b4e2d705a0665e454d1c19402aa7e895de3951ebb04/scipy-1.17.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43af8d1f3bea642559019edfe64e9b11192a8978efbd1539d7bc2aaa23d92de4", size = 35349300, upload-time = "2026-02-23T00:16:49.108Z" }, + { url = "https://files.pythonhosted.org/packages/b4/3d/7ccbbdcbb54c8fdc20d3b6930137c782a163fa626f0aef920349873421ba/scipy-1.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd96a1898c0a47be4520327e01f874acfd61fb48a9420f8aa9f6483412ffa444", size = 35127333, upload-time = "2026-02-23T00:17:01.293Z" }, + { url = "https://files.pythonhosted.org/packages/e8/19/f926cb11c42b15ba08e3a71e376d816ac08614f769b4f47e06c3580c836a/scipy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4eb6c25dd62ee8d5edf68a8e1c171dd71c292fdae95d8aeb3dd7d7de4c364082", size = 37741314, upload-time = "2026-02-23T00:17:12.576Z" }, + { url = "https://files.pythonhosted.org/packages/95/da/0d1df507cf574b3f224ccc3d45244c9a1d732c81dcb26b1e8a766ae271a8/scipy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:d30e57c72013c2a4fe441c2fcb8e77b14e152ad48b5464858e07e2ad9fbfceff", size = 36607512, upload-time = "2026-02-23T00:17:23.424Z" }, + { url = "https://files.pythonhosted.org/packages/68/7f/bdd79ceaad24b671543ffe0ef61ed8e659440eb683b66f033454dcee90eb/scipy-1.17.1-cp311-cp311-win_arm64.whl", hash = "sha256:9ecb4efb1cd6e8c4afea0daa91a87fbddbce1b99d2895d151596716c0b2e859d", size = 24599248, upload-time = "2026-02-23T00:17:34.561Z" }, + { url = "https://files.pythonhosted.org/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954, upload-time = "2026-02-23T00:17:49.855Z" }, + { url = "https://files.pythonhosted.org/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662, upload-time = "2026-02-23T00:18:01.64Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366, upload-time = "2026-02-23T00:18:12.015Z" }, + { url = "https://files.pythonhosted.org/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017, upload-time = "2026-02-23T00:18:21.502Z" }, + { url = "https://files.pythonhosted.org/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842, upload-time = "2026-02-23T00:18:35.367Z" }, + { url = "https://files.pythonhosted.org/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890, upload-time = "2026-02-23T00:18:49.188Z" }, + { url = "https://files.pythonhosted.org/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557, upload-time = "2026-02-23T00:18:54.74Z" }, + { url = "https://files.pythonhosted.org/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856, upload-time = "2026-02-23T00:19:00.307Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682, upload-time = "2026-02-23T00:19:07.67Z" }, + { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" }, + { url = "https://files.pythonhosted.org/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199, upload-time = "2026-02-23T00:19:17.192Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001, upload-time = "2026-02-23T00:19:22.241Z" }, + { url = "https://files.pythonhosted.org/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719, upload-time = "2026-02-23T00:19:26.329Z" }, + { url = "https://files.pythonhosted.org/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595, upload-time = "2026-02-23T00:19:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429, upload-time = "2026-02-23T00:19:35.536Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952, upload-time = "2026-02-23T00:19:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063, upload-time = "2026-02-23T00:19:47.547Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449, upload-time = "2026-02-23T00:19:53.238Z" }, + { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943, upload-time = "2026-02-23T00:20:50.89Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621, upload-time = "2026-02-23T00:20:55.871Z" }, + { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708, upload-time = "2026-02-23T00:19:58.694Z" }, + { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135, upload-time = "2026-02-23T00:20:03.934Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977, upload-time = "2026-02-23T00:20:07.935Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601, upload-time = "2026-02-23T00:20:12.161Z" }, + { url = "https://files.pythonhosted.org/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667, upload-time = "2026-02-23T00:20:17.208Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159, upload-time = "2026-02-23T00:20:23.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771, upload-time = "2026-02-23T00:20:28.636Z" }, + { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910, upload-time = "2026-02-23T00:20:34.743Z" }, + { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980, upload-time = "2026-02-23T00:20:40.575Z" }, + { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543, upload-time = "2026-02-23T00:20:45.313Z" }, + { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510, upload-time = "2026-02-23T00:21:01.015Z" }, + { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131, upload-time = "2026-02-23T00:21:05.888Z" }, + { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032, upload-time = "2026-02-23T00:21:09.904Z" }, + { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766, upload-time = "2026-02-23T00:21:14.313Z" }, + { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007, upload-time = "2026-02-23T00:21:19.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333, upload-time = "2026-02-23T00:21:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066, upload-time = "2026-02-23T00:21:31.358Z" }, + { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763, upload-time = "2026-02-23T00:21:37.247Z" }, + { url = "https://files.pythonhosted.org/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984, upload-time = "2026-02-23T00:22:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877, upload-time = "2026-02-23T00:22:39.798Z" }, + { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750, upload-time = "2026-02-23T00:21:42.289Z" }, + { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858, upload-time = "2026-02-23T00:21:47.706Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723, upload-time = "2026-02-23T00:21:52.039Z" }, + { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098, upload-time = "2026-02-23T00:21:56.185Z" }, + { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397, upload-time = "2026-02-23T00:22:01.404Z" }, + { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163, upload-time = "2026-02-23T00:22:07.024Z" }, + { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291, upload-time = "2026-02-23T00:22:12.585Z" }, + { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317, upload-time = "2026-02-23T00:22:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327, upload-time = "2026-02-23T00:22:24.442Z" }, + { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165, upload-time = "2026-02-23T00:22:29.563Z" }, +] + +[[package]] +name = "setuptools" +version = "82.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316, upload-time = "2026-03-09T12:47:17.221Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223, upload-time = "2026-03-09T12:47:15.026Z" }, +] + +[[package]] +name = "threadpoolctl" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, +] + +[[package]] +name = "tomli" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/30/31573e9457673ab10aa432461bee537ce6cef177667deca369efb79df071/tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c", size = 17477, upload-time = "2026-01-11T11:22:38.165Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/d9/3dc2289e1f3b32eb19b9785b6a006b28ee99acb37d1d47f78d4c10e28bf8/tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867", size = 153663, upload-time = "2026-01-11T11:21:45.27Z" }, + { url = "https://files.pythonhosted.org/packages/51/32/ef9f6845e6b9ca392cd3f64f9ec185cc6f09f0a2df3db08cbe8809d1d435/tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9", size = 148469, upload-time = "2026-01-11T11:21:46.873Z" }, + { url = "https://files.pythonhosted.org/packages/d6/c2/506e44cce89a8b1b1e047d64bd495c22c9f71f21e05f380f1a950dd9c217/tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95", size = 236039, upload-time = "2026-01-11T11:21:48.503Z" }, + { url = "https://files.pythonhosted.org/packages/b3/40/e1b65986dbc861b7e986e8ec394598187fa8aee85b1650b01dd925ca0be8/tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76", size = 243007, upload-time = "2026-01-11T11:21:49.456Z" }, + { url = "https://files.pythonhosted.org/packages/9c/6f/6e39ce66b58a5b7ae572a0f4352ff40c71e8573633deda43f6a379d56b3e/tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d", size = 240875, upload-time = "2026-01-11T11:21:50.755Z" }, + { url = "https://files.pythonhosted.org/packages/aa/ad/cb089cb190487caa80204d503c7fd0f4d443f90b95cf4ef5cf5aa0f439b0/tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576", size = 246271, upload-time = "2026-01-11T11:21:51.81Z" }, + { url = "https://files.pythonhosted.org/packages/0b/63/69125220e47fd7a3a27fd0de0c6398c89432fec41bc739823bcc66506af6/tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a", size = 96770, upload-time = "2026-01-11T11:21:52.647Z" }, + { url = "https://files.pythonhosted.org/packages/1e/0d/a22bb6c83f83386b0008425a6cd1fa1c14b5f3dd4bad05e98cf3dbbf4a64/tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa", size = 107626, upload-time = "2026-01-11T11:21:53.459Z" }, + { url = "https://files.pythonhosted.org/packages/2f/6d/77be674a3485e75cacbf2ddba2b146911477bd887dda9d8c9dfb2f15e871/tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614", size = 94842, upload-time = "2026-01-11T11:21:54.831Z" }, + { url = "https://files.pythonhosted.org/packages/3c/43/7389a1869f2f26dba52404e1ef13b4784b6b37dac93bac53457e3ff24ca3/tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1", size = 154894, upload-time = "2026-01-11T11:21:56.07Z" }, + { url = "https://files.pythonhosted.org/packages/e9/05/2f9bf110b5294132b2edf13fe6ca6ae456204f3d749f623307cbb7a946f2/tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8", size = 149053, upload-time = "2026-01-11T11:21:57.467Z" }, + { url = "https://files.pythonhosted.org/packages/e8/41/1eda3ca1abc6f6154a8db4d714a4d35c4ad90adc0bcf700657291593fbf3/tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a", size = 243481, upload-time = "2026-01-11T11:21:58.661Z" }, + { url = "https://files.pythonhosted.org/packages/d2/6d/02ff5ab6c8868b41e7d4b987ce2b5f6a51d3335a70aa144edd999e055a01/tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1", size = 251720, upload-time = "2026-01-11T11:22:00.178Z" }, + { url = "https://files.pythonhosted.org/packages/7b/57/0405c59a909c45d5b6f146107c6d997825aa87568b042042f7a9c0afed34/tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b", size = 247014, upload-time = "2026-01-11T11:22:01.238Z" }, + { url = "https://files.pythonhosted.org/packages/2c/0e/2e37568edd944b4165735687cbaf2fe3648129e440c26d02223672ee0630/tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51", size = 251820, upload-time = "2026-01-11T11:22:02.727Z" }, + { url = "https://files.pythonhosted.org/packages/5a/1c/ee3b707fdac82aeeb92d1a113f803cf6d0f37bdca0849cb489553e1f417a/tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729", size = 97712, upload-time = "2026-01-11T11:22:03.777Z" }, + { url = "https://files.pythonhosted.org/packages/69/13/c07a9177d0b3bab7913299b9278845fc6eaaca14a02667c6be0b0a2270c8/tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da", size = 108296, upload-time = "2026-01-11T11:22:04.86Z" }, + { url = "https://files.pythonhosted.org/packages/18/27/e267a60bbeeee343bcc279bb9e8fbed0cbe224bc7b2a3dc2975f22809a09/tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3", size = 94553, upload-time = "2026-01-11T11:22:05.854Z" }, + { url = "https://files.pythonhosted.org/packages/34/91/7f65f9809f2936e1f4ce6268ae1903074563603b2a2bd969ebbda802744f/tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0", size = 154915, upload-time = "2026-01-11T11:22:06.703Z" }, + { url = "https://files.pythonhosted.org/packages/20/aa/64dd73a5a849c2e8f216b755599c511badde80e91e9bc2271baa7b2cdbb1/tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e", size = 149038, upload-time = "2026-01-11T11:22:07.56Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8a/6d38870bd3d52c8d1505ce054469a73f73a0fe62c0eaf5dddf61447e32fa/tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4", size = 242245, upload-time = "2026-01-11T11:22:08.344Z" }, + { url = "https://files.pythonhosted.org/packages/59/bb/8002fadefb64ab2669e5b977df3f5e444febea60e717e755b38bb7c41029/tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e", size = 250335, upload-time = "2026-01-11T11:22:09.951Z" }, + { url = "https://files.pythonhosted.org/packages/a5/3d/4cdb6f791682b2ea916af2de96121b3cb1284d7c203d97d92d6003e91c8d/tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c", size = 245962, upload-time = "2026-01-11T11:22:11.27Z" }, + { url = "https://files.pythonhosted.org/packages/f2/4a/5f25789f9a460bd858ba9756ff52d0830d825b458e13f754952dd15fb7bb/tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f", size = 250396, upload-time = "2026-01-11T11:22:12.325Z" }, + { url = "https://files.pythonhosted.org/packages/aa/2f/b73a36fea58dfa08e8b3a268750e6853a6aac2a349241a905ebd86f3047a/tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86", size = 97530, upload-time = "2026-01-11T11:22:13.865Z" }, + { url = "https://files.pythonhosted.org/packages/3b/af/ca18c134b5d75de7e8dc551c5234eaba2e8e951f6b30139599b53de9c187/tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87", size = 108227, upload-time = "2026-01-11T11:22:15.224Z" }, + { url = "https://files.pythonhosted.org/packages/22/c3/b386b832f209fee8073c8138ec50f27b4460db2fdae9ffe022df89a57f9b/tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132", size = 94748, upload-time = "2026-01-11T11:22:16.009Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c4/84047a97eb1004418bc10bdbcfebda209fca6338002eba2dc27cc6d13563/tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6", size = 154725, upload-time = "2026-01-11T11:22:17.269Z" }, + { url = "https://files.pythonhosted.org/packages/a8/5d/d39038e646060b9d76274078cddf146ced86dc2b9e8bbf737ad5983609a0/tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc", size = 148901, upload-time = "2026-01-11T11:22:18.287Z" }, + { url = "https://files.pythonhosted.org/packages/73/e5/383be1724cb30f4ce44983d249645684a48c435e1cd4f8b5cded8a816d3c/tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66", size = 243375, upload-time = "2026-01-11T11:22:19.154Z" }, + { url = "https://files.pythonhosted.org/packages/31/f0/bea80c17971c8d16d3cc109dc3585b0f2ce1036b5f4a8a183789023574f2/tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d", size = 250639, upload-time = "2026-01-11T11:22:20.168Z" }, + { url = "https://files.pythonhosted.org/packages/2c/8f/2853c36abbb7608e3f945d8a74e32ed3a74ee3a1f468f1ffc7d1cb3abba6/tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702", size = 246897, upload-time = "2026-01-11T11:22:21.544Z" }, + { url = "https://files.pythonhosted.org/packages/49/f0/6c05e3196ed5337b9fe7ea003e95fd3819a840b7a0f2bf5a408ef1dad8ed/tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8", size = 254697, upload-time = "2026-01-11T11:22:23.058Z" }, + { url = "https://files.pythonhosted.org/packages/f3/f5/2922ef29c9f2951883525def7429967fc4d8208494e5ab524234f06b688b/tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776", size = 98567, upload-time = "2026-01-11T11:22:24.033Z" }, + { url = "https://files.pythonhosted.org/packages/7b/31/22b52e2e06dd2a5fdbc3ee73226d763b184ff21fc24e20316a44ccc4d96b/tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475", size = 108556, upload-time = "2026-01-11T11:22:25.378Z" }, + { url = "https://files.pythonhosted.org/packages/48/3d/5058dff3255a3d01b705413f64f4306a141a8fd7a251e5a495e3f192a998/tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2", size = 96014, upload-time = "2026-01-11T11:22:26.138Z" }, + { url = "https://files.pythonhosted.org/packages/b8/4e/75dab8586e268424202d3a1997ef6014919c941b50642a1682df43204c22/tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9", size = 163339, upload-time = "2026-01-11T11:22:27.143Z" }, + { url = "https://files.pythonhosted.org/packages/06/e3/b904d9ab1016829a776d97f163f183a48be6a4deb87304d1e0116a349519/tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0", size = 159490, upload-time = "2026-01-11T11:22:28.399Z" }, + { url = "https://files.pythonhosted.org/packages/e3/5a/fc3622c8b1ad823e8ea98a35e3c632ee316d48f66f80f9708ceb4f2a0322/tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df", size = 269398, upload-time = "2026-01-11T11:22:29.345Z" }, + { url = "https://files.pythonhosted.org/packages/fd/33/62bd6152c8bdd4c305ad9faca48f51d3acb2df1f8791b1477d46ff86e7f8/tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d", size = 276515, upload-time = "2026-01-11T11:22:30.327Z" }, + { url = "https://files.pythonhosted.org/packages/4b/ff/ae53619499f5235ee4211e62a8d7982ba9e439a0fb4f2f351a93d67c1dd2/tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f", size = 273806, upload-time = "2026-01-11T11:22:32.56Z" }, + { url = "https://files.pythonhosted.org/packages/47/71/cbca7787fa68d4d0a9f7072821980b39fbb1b6faeb5f5cf02f4a5559fa28/tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b", size = 281340, upload-time = "2026-01-11T11:22:33.505Z" }, + { url = "https://files.pythonhosted.org/packages/f5/00/d595c120963ad42474cf6ee7771ad0d0e8a49d0f01e29576ee9195d9ecdf/tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087", size = 108106, upload-time = "2026-01-11T11:22:34.451Z" }, + { url = "https://files.pythonhosted.org/packages/de/69/9aa0c6a505c2f80e519b43764f8b4ba93b5a0bbd2d9a9de6e2b24271b9a5/tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd", size = 120504, upload-time = "2026-01-11T11:22:35.764Z" }, + { url = "https://files.pythonhosted.org/packages/b3/9f/f1668c281c58cfae01482f7114a4b88d345e4c140386241a1a24dcc9e7bc/tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4", size = 99561, upload-time = "2026-01-11T11:22:36.624Z" }, + { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +]