Skip to content

wip for quantile transform #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions afsklearn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import yaml

app_dir = Path(__file__).resolve().parent
__version__ = '0.1.0'


def load_yaml_file(name: str, directory: Path = app_dir) -> Any:
Expand All @@ -13,3 +14,13 @@ def load_yaml_file(name: str, directory: Path = app_dir) -> Any:


patches_info = load_yaml_file("patched_modules.yml")

from .patcher import Patcher

def patch_sklearn():
Patcher.patch_all()

def unpatch_sklearn():
Patcher.rollback_all()

__all__ = ['Patcher', 'patch_sklearn', 'unpatch_sklearn']
3 changes: 1 addition & 2 deletions afsklearn/_nn_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
# License: BSD 3 clause

import arrayfire as af
#import numpy as np
import numpy as np
import numpy
from ._type_utils import typemap
from ..utils._type_utils import typemap


def logistic_sigmoid(x):
Expand Down
154 changes: 154 additions & 0 deletions afsklearn/afClassifierMixin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import arrayfire as af

def _weighted_sum(sample_score, sample_weight, normalize=False):
if normalize:
return np.average(sample_score, weights=sample_weight)
elif sample_weight is not None:
return np.dot(sample_score, sample_weight)
else:
return sample_score.sum()

def _check_targets(y_true, y_pred):
"""Check that y_true and y_pred belong to the same classification task
This converts multiclass or binary types to a common shape, and raises a
ValueError for a mix of multilabel and multiclass targets, a mix of
multilabel formats, for the presence of continuous-valued or multioutput
targets, or for targets of different lengths.
Column vectors are squeezed to 1d, while multilabel formats are returned
as CSR sparse label indicators.
Parameters
----------
y_true : array-like
y_pred : array-like
Returns
-------
type_true : one of {'multilabel-indicator', 'multiclass', 'binary'}
The type of the true target data, as output by
``utils.multiclass.type_of_target``
y_true : array or indicator matrix
y_pred : array or indicator matrix
"""
check_consistent_length(y_true, y_pred)
type_true = type_of_target(y_true)
type_pred = type_of_target(y_pred)

y_type = {type_true, type_pred}
if y_type == {"binary", "multiclass"}:
y_type = {"multiclass"}

if len(y_type) > 1:
raise ValueError("Classification metrics can't handle a mix of {0} "
"and {1} targets".format(type_true, type_pred))

# We can't have more than one value on y_type => The set is no more needed
y_type = y_type.pop()

# No metrics support "multiclass-multioutput" format
if (y_type not in ["binary", "multiclass", "multilabel-indicator"]):
raise ValueError("{0} is not supported".format(y_type))

if y_type in ["binary", "multiclass"]:
y_true = column_or_1d(y_true)
y_pred = column_or_1d(y_pred)
if y_type == "binary":
unique_values = np.union1d(y_true, y_pred)
if len(unique_values) > 2:
y_type = "multiclass"

if y_type.startswith('multilabel'):
y_true = csr_matrix(y_true)
y_pred = csr_matrix(y_pred)
y_type = 'multilabel-indicator'

return y_type, y_true, y_pred




def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
"""Accuracy classification score.
In multilabel classification, this function computes subset accuracy:
the set of labels predicted for a sample must *exactly* match the
corresponding set of labels in y_true.
Read more in the :ref:`User Guide <accuracy_score>`.
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) labels.
y_pred : 1d array-like, or label indicator array / sparse matrix
Predicted labels, as returned by a classifier.
normalize : bool, optional (default=True)
If ``False``, return the number of correctly classified samples.
Otherwise, return the fraction of correctly classified samples.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
Returns
-------
score : float
If ``normalize == True``, return the fraction of correctly
classified samples (float), else returns the number of correctly
classified samples (int).
The best performance is 1 with ``normalize == True`` and the number
of samples with ``normalize == False``.
See also
--------
jaccard_score, hamming_loss, zero_one_loss
Notes
-----
In binary and multiclass classification, this function is equal
to the ``jaccard_score`` function.
Examples
--------
>>> from sklearn.metrics import accuracy_score
>>> y_pred = [0, 2, 1, 3]
>>> y_true = [0, 1, 2, 3]
>>> accuracy_score(y_true, y_pred)
0.5
>>> accuracy_score(y_true, y_pred, normalize=False)
2
In the multilabel case with binary label indicators:
>>> import numpy as np
>>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))
0.5
"""

# Compute accuracy for each possible representation
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
check_consistent_length(y_true, y_pred, sample_weight)
if y_type.startswith('multilabel'):
differing_labels = count_nonzero(y_true - y_pred, axis=1)
score = differing_labels == 0
else:
score = y_true == y_pred

return _weighted_sum(score, sample_weight, normalize)

class afClassifierMixin:
"""ArrayFire enabled Mixin class for all classifiers in scikit-learn."""

_estimator_type = "classifier"

def score(self, X, y, sample_weight=None):
"""
Return the mean accuracy on the given test data and labels.
In multi-label classification, this is the subset accuracy
which is a harsh metric since you require for each sample that
each label set be correctly predicted.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Test samples.
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
True labels for X.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
Returns
-------
score : float
Mean accuracy of self.predict(X) wrt. y.
"""
#return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
return #TMP

def _more_tags(self):
return {'requires_y': True}
Loading