Skip to content
Binary file modified .gitignore
Binary file not shown.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,14 @@ This is a Python stats toolkit for easy and powerful statistical operations.
- Perform hypothesis testing
- Simplify data visualization

[![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
[![PyPI version](https://img.shields.io/pypi/v/py-stats-toolkit.svg)](https://pypi.org/project/py-stats-toolkit/)
[![PyPI pyversions](https://img.shields.io/pypi/pyversions/py-stats-toolkit.svg)](https://pypi.org/project/py-stats-toolkit/)
[![PyPI downloads](https://img.shields.io/pypi/dm/py-stats-toolkit.svg)](https://pypi.org/project/py-stats-toolkit/)
[![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Tests](https://github.com/ThePhoenixAgency/py-stats-toolkit/actions/workflows/tests.yml/badge.svg)](https://github.com/ThePhoenixAgency/py-stats-toolkit/actions/workflows/tests.yml)
[![Publish](https://github.com/ThePhoenixAgency/py-stats-toolkit/actions/workflows/publish.yml/badge.svg)](https://github.com/ThePhoenixAgency/py-stats-toolkit/actions/workflows/publish.yml)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
[![Linting: flake8](https://img.shields.io/badge/linting-flake8-yellowgreen)](https://flake8.pycqa.org/)
Expand Down
2 changes: 1 addition & 1 deletion py_stats_toolkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"""

# Version du toolkit
__version__ = "1.0.1"
__version__ = "1.0.4"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok


# Imports des modules principaux
try:
Expand Down
117 changes: 117 additions & 0 deletions py_stats_toolkit/capsules/BaseCapsule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
=====================================================================
File : BaseCapsule.py
=====================================================================
version : 1.0.0
release : 15/06/2025
author : Phoenix Project
contact : contact@phoenixproject.onmicrosoft.fr
license : MIT
=====================================================================
Copyright (c) 2025, Phoenix Project
All rights reserved.
Description du module BaseCapsule.py
Base class for all statistical analysis capsules/modules.
Provides common interface and functionality for data processing.
tags : module, base, capsule
=====================================================================
"""

from typing import Any, Dict, Union

import numpy as np
import pandas as pd


class BaseCapsule:
"""
Base class for all statistical analysis modules.
Provides common interface for data validation, configuration,
and processing workflow.
Attributes:
data: Input data being processed
parameters: Configuration parameters
result: Analysis results
"""

def __init__(self):
"""Initialize BaseCapsule with default attributes."""
self.data = None
self.parameters = {}
self.result = None

def configure(self, **kwargs) -> None:
"""
Configure the module parameters.
Args:
**kwargs: Configuration parameters
"""
self.parameters.update(kwargs)

def validate_data(
self, data: Union[pd.DataFrame, pd.Series, np.ndarray, list]
) -> None:
"""
Validate input data.
Args:
data: Data to validate
Raises:
ValueError: If data is invalid
"""
if data is None:
raise ValueError("Data cannot be None")

if isinstance(data, (pd.DataFrame, pd.Series)):
if data.empty:
raise ValueError("Data cannot be empty")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merci de corriger comme préconisé

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected as recommended - changed to catch specific exceptions (TypeError, ValueError). Fixed in commit b7a9008.

elif isinstance(data, (np.ndarray, list)):
if len(data) == 0:
raise ValueError("Data cannot be empty")
else:
# Try to convert to array-like
try:
data_array = np.array(data)
if data_array.size == 0:
raise ValueError("Data cannot be empty")
except Exception as e:
raise ValueError(f"Invalid data type: {type(data)}. Error: {e}")

def process(
self, data: Union[pd.DataFrame, pd.Series, np.ndarray], **kwargs
) -> Dict[str, Any]:
"""
Process data and perform analysis.
This method should be overridden by subclasses.
Args:
data: Input data to process
**kwargs: Additional processing parameters
Returns:
Dict[str, Any]: Analysis results
"""
raise NotImplementedError("Subclasses must implement the process method")

def get_result(self) -> Any:
"""
Get the analysis result.
Returns:
Analysis result
"""
return self.result

def reset(self) -> None:
"""Reset the module to initial state."""
self.data = None
self.parameters = {}
self.result = None
96 changes: 96 additions & 0 deletions py_stats_toolkit/stats/correlation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
Correlation analysis module.
Provides the CorrelationAnalysis class for computing correlations between variables.
"""

from typing import Any, Dict, Union

import numpy as np
import pandas as pd
from scipy import stats


class CorrelationAnalysis:
"""
Correlation analysis class.
Computes correlation coefficients between variables with support for
different correlation methods (Pearson, Spearman, Kendall).
"""

def __init__(self, method: str = "pearson"):
"""
Initialize CorrelationAnalysis.
Args:
method: Correlation method ('pearson', 'spearman', or 'kendall')
"""
Copy link

Copilot AI Dec 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method parameter is not validated in __init__. Invalid methods like 'invalid_method' will be accepted and only fail later during analyze(). Consider validating the method parameter in __init__ to fail fast:

def __init__(self, method: str = "pearson"):
    valid_methods = {"pearson", "spearman", "kendall"}
    if method not in valid_methods:
        raise ValueError(f"Method must be one of {valid_methods}, got '{method}'")
    self.method = method
Suggested change
"""
"""
valid_methods = {"pearson", "spearman", "kendall"}
if method not in valid_methods:
raise ValueError(f"Method must be one of {valid_methods}, got '{method}'")

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added method validation in init as suggested. Invalid methods now fail immediately with a clear error message. Fixed in commit b7a9008.

self.method = method

def analyze(
self,
data: Union[pd.DataFrame, pd.Series, np.ndarray],
y: Union[pd.Series, np.ndarray, None] = None,
) -> Dict[str, Any]:
"""
Perform correlation analysis.
Args:
data: Input data (DataFrame, Series, or array)
y: Optional second variable for bivariate correlation
Returns:
Dictionary containing correlation results
"""
# Univariate case (single variable correlation with itself or autocorrelation)
if y is None and isinstance(data, (pd.Series, np.ndarray)):
if isinstance(data, pd.Series):
data_array = data.values
else:
data_array = data

return {"correlation": 1.0, "method": self.method, "n": len(data_array)}

# DataFrame case - compute correlation matrix
if isinstance(data, pd.DataFrame):
if self.method == "pearson":
corr_matrix = data.corr(method="pearson")
elif self.method == "spearman":
corr_matrix = data.corr(method="spearman")
elif self.method == "kendall":
corr_matrix = data.corr(method="kendall")
else:
raise ValueError(f"Unknown correlation method: {self.method}")

return {"correlation_matrix": corr_matrix, "method": self.method}

# Bivariate case
if y is not None:
if isinstance(data, pd.Series):
data = data.values
if isinstance(y, pd.Series):
y = y.values

data = np.array(data) if not isinstance(data, np.ndarray) else data
y = np.array(y) if not isinstance(y, np.ndarray) else y

if self.method == "pearson":
corr, pval = stats.pearsonr(data, y)
elif self.method == "spearman":
corr, pval = stats.spearmanr(data, y)
elif self.method == "kendall":
corr, pval = stats.kendalltau(data, y)
else:
raise ValueError(f"Unknown correlation method: {self.method}")

return {
"correlation": corr,
"p_value": pval,
"method": self.method,
"n": len(data),
}

raise ValueError(
"Invalid input: provide either a DataFrame or two arrays/Series"
)

Check notice on line 96 in py_stats_toolkit/stats/correlation.py

View check run for this annotation

codefactor.io / CodeFactor

py_stats_toolkit/stats/correlation.py#L31-L96

Complex Method
57 changes: 57 additions & 0 deletions py_stats_toolkit/stats/descriptives.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Statistics descriptives module.
Provides the DescriptiveStatistics class for computing descriptive
statistics on various data types (lists, arrays, Series, DataFrames).
"""

from typing import Any, Dict, Union

import numpy as np
import pandas as pd


class DescriptiveStatistics:
"""
Class for computing descriptive statistics.
Handles various data types and provides comprehensive statistical measures
including central tendency, dispersion, and percentiles.
"""

def __init__(self):
"""Initialize DescriptiveStatistics."""
pass
Copy link

Copilot AI Dec 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The __init__ method only contains pass and does nothing. Since no initialization is needed, consider removing this method entirely - Python will use the default __init__ from the parent class (object).

Suggested change
def __init__(self):
"""Initialize DescriptiveStatistics."""
pass

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the empty init method. Python will use the default object.init. Fixed in commit b7a9008.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sonarqube a suggéré un changement par

def init(self):
"""Initialize DescriptiveStatistics."""
pass


def analyze(
self, data: Union[list, np.ndarray, pd.Series, pd.DataFrame]
) -> Dict[str, Any]:
"""
Analyze data and compute descriptive statistics.
Args:
data: Input data (list, array, Series, or DataFrame)
Returns:
Dictionary containing statistical measures
"""
if isinstance(data, list):
data = np.array(data)
elif isinstance(data, pd.Series):
data = data.values
elif isinstance(data, pd.DataFrame):
if len(data.columns) == 1:
data = data.iloc[:, 0].values
else:
return {col: self.analyze(data[col]) for col in data.columns}

return {
"count": len(data),
"mean": np.mean(data),
"std": np.std(data),
"min": np.min(data),
"max": np.max(data),
"median": np.median(data),
"q25": np.percentile(data, 25),
"q75": np.percentile(data, 75),
}
97 changes: 97 additions & 0 deletions py_stats_toolkit/stats/regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""
Linear regression module.

Provides the LinearRegression class for performing linear regression analysis.
"""

from typing import Any, Dict, Union

import numpy as np
from sklearn.linear_model import LinearRegression as SKLearnLinearRegression
from sklearn.metrics import mean_squared_error, r2_score


class LinearRegression:
"""
Linear regression analysis class.

Provides methods for fitting linear regression models and making predictions.
"""

def __init__(self):
"""Initialize LinearRegression."""
self.model = SKLearnLinearRegression()
self.is_fitted = False

def fit(
self, X: Union[np.ndarray, list], y: Union[np.ndarray, list]
) -> "LinearRegression":
"""
Fit the linear regression model.

Args:
X: Feature matrix
y: Target vector

Returns:
Self for method chaining
"""
X = np.array(X) if not isinstance(X, np.ndarray) else X
y = np.array(y) if not isinstance(y, np.ndarray) else y

self.model.fit(X, y)
self.is_fitted = True
return self

def predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
"""
Make predictions using the fitted model.

Args:
X: Feature matrix

Returns:
Predicted values

Raises:
RuntimeError: If model hasn't been fitted
"""
if not self.is_fitted:
raise RuntimeError("Model must be fitted before making predictions")

X = np.array(X) if not isinstance(X, np.ndarray) else X
return self.model.predict(X)

def analyze(
self, X: Union[np.ndarray, list], y: Union[np.ndarray, list]
) -> Dict[str, Any]:
"""
Perform complete regression analysis.

Args:
X: Feature matrix
y: Target vector

Returns:
Dictionary containing regression results and metrics
"""
self.fit(X, y)
predictions = self.predict(X)

return {
"coefficients": self.model.coef_,
"intercept": self.model.intercept_,
"predictions": predictions,
"mse": mean_squared_error(y, predictions),
"r2": r2_score(y, predictions),
}

@property
def coef_(self):
"""Get model coefficients."""
return self.model.coef_ if self.is_fitted else None

@property
def intercept_(self):
"""Get model intercept."""
return self.model.intercept_ if self.is_fitted else None
Loading