ThePhoenixAgency · EthanThePhoenix38 · Dec 16, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
diff --git a/.gitignore b/.gitignore
diff --git a/README.md b/README.md
@@ -7,8 +7,14 @@ This is a Python stats toolkit for easy and powerful statistical operations.
 - Perform hypothesis testing
 - Simplify data visualization
 
+[![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
+[![PyPI version](https://img.shields.io/pypi/v/py-stats-toolkit.svg)](https://pypi.org/project/py-stats-toolkit/)
+[![PyPI pyversions](https://img.shields.io/pypi/pyversions/py-stats-toolkit.svg)](https://pypi.org/project/py-stats-toolkit/)
+[![PyPI downloads](https://img.shields.io/pypi/dm/py-stats-toolkit.svg)](https://pypi.org/project/py-stats-toolkit/)
 [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Tests](https://github.com/ThePhoenixAgency/py-stats-toolkit/actions/workflows/tests.yml/badge.svg)](https://github.com/ThePhoenixAgency/py-stats-toolkit/actions/workflows/tests.yml)
+[![Publish](https://github.com/ThePhoenixAgency/py-stats-toolkit/actions/workflows/publish.yml/badge.svg)](https://github.com/ThePhoenixAgency/py-stats-toolkit/actions/workflows/publish.yml)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
 [![Linting: flake8](https://img.shields.io/badge/linting-flake8-yellowgreen)](https://flake8.pycqa.org/)

diff --git a/py_stats_toolkit/__init__.py b/py_stats_toolkit/__init__.py
@@ -13,7 +13,7 @@
 """
 
 # Version du toolkit
-__version__ = "1.0.1"
+__version__ = "1.0.4"
 
 # Imports des modules principaux
 try:

diff --git a/py_stats_toolkit/capsules/BaseCapsule.py b/py_stats_toolkit/capsules/BaseCapsule.py
@@ -0,0 +1,117 @@
+"""
+=====================================================================
+File : BaseCapsule.py
+=====================================================================
+version : 1.0.0
+release : 15/06/2025
+author : Phoenix Project
+contact : contact@phoenixproject.onmicrosoft.fr
+license : MIT
+=====================================================================
+Copyright (c) 2025, Phoenix Project
+All rights reserved.
+
+Description du module BaseCapsule.py
+
+Base class for all statistical analysis capsules/modules.
+Provides common interface and functionality for data processing.
+
+tags : module, base, capsule
+=====================================================================
+"""
+
+from typing import Any, Dict, Union
+
+import numpy as np
+import pandas as pd
+
+
+class BaseCapsule:
+    """
+    Base class for all statistical analysis modules.
+
+    Provides common interface for data validation, configuration,
+    and processing workflow.
+
+    Attributes:
+        data: Input data being processed
+        parameters: Configuration parameters
+        result: Analysis results
+    """
+
+    def __init__(self):
+        """Initialize BaseCapsule with default attributes."""
+        self.data = None
+        self.parameters = {}
+        self.result = None
+
+    def configure(self, **kwargs) -> None:
+        """
+        Configure the module parameters.
+
+        Args:
+            **kwargs: Configuration parameters
+        """
+        self.parameters.update(kwargs)
+
+    def validate_data(
+        self, data: Union[pd.DataFrame, pd.Series, np.ndarray, list]
+    ) -> None:
+        """
+        Validate input data.
+
+        Args:
+            data: Data to validate
+
+        Raises:
+            ValueError: If data is invalid
+        """
+        if data is None:
+            raise ValueError("Data cannot be None")
+
+        if isinstance(data, (pd.DataFrame, pd.Series)):
+            if data.empty:
+                raise ValueError("Data cannot be empty")
+        elif isinstance(data, (np.ndarray, list)):
+            if len(data) == 0:
+                raise ValueError("Data cannot be empty")
+        else:
+            # Try to convert to array-like
+            try:
+                data_array = np.array(data)
+                if data_array.size == 0:
+                    raise ValueError("Data cannot be empty")
+            except Exception as e:
+                raise ValueError(f"Invalid data type: {type(data)}. Error: {e}")
+
+    def process(
+        self, data: Union[pd.DataFrame, pd.Series, np.ndarray], **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Process data and perform analysis.
+
+        This method should be overridden by subclasses.
+
+        Args:
+            data: Input data to process
+            **kwargs: Additional processing parameters
+
+        Returns:
+            Dict[str, Any]: Analysis results
+        """
+        raise NotImplementedError("Subclasses must implement the process method")
+
+    def get_result(self) -> Any:
+        """
+        Get the analysis result.
+
+        Returns:
+            Analysis result
+        """
+        return self.result
+
+    def reset(self) -> None:
+        """Reset the module to initial state."""
+        self.data = None
+        self.parameters = {}
+        self.result = None
diff --git a/py_stats_toolkit/stats/correlation.py b/py_stats_toolkit/stats/correlation.py
@@ -0,0 +1,96 @@
+"""
+Correlation analysis module.
+
+Provides the CorrelationAnalysis class for computing correlations between variables.
+"""
+
+from typing import Any, Dict, Union
+
+import numpy as np
+import pandas as pd
+from scipy import stats
+
+
+class CorrelationAnalysis:
+    """
+    Correlation analysis class.
+
+    Computes correlation coefficients between variables with support for
+    different correlation methods (Pearson, Spearman, Kendall).
+    """
+
+    def __init__(self, method: str = "pearson"):
+        """
+        Initialize CorrelationAnalysis.
+
+        Args:
+            method: Correlation method ('pearson', 'spearman', or 'kendall')
+        """
-        """
+        """
+        valid_methods = {"pearson", "spearman", "kendall"}
+        if method not in valid_methods:
+            raise ValueError(f"Method must be one of {valid_methods}, got '{method}'")
-        """
+        """
+        valid_methods = {"pearson", "spearman", "kendall"}
+        if method not in valid_methods:
+            raise ValueError(f"Method must be one of {valid_methods}, got '{method}'")
+        self.method = method
+
+    def analyze(
+        self,
+        data: Union[pd.DataFrame, pd.Series, np.ndarray],
+        y: Union[pd.Series, np.ndarray, None] = None,
+    ) -> Dict[str, Any]:
+        """
+        Perform correlation analysis.
+
+        Args:
+            data: Input data (DataFrame, Series, or array)
+            y: Optional second variable for bivariate correlation
+
+        Returns:
+            Dictionary containing correlation results
+        """
+        # Univariate case (single variable correlation with itself or autocorrelation)
+        if y is None and isinstance(data, (pd.Series, np.ndarray)):
+            if isinstance(data, pd.Series):
+                data_array = data.values
+            else:
+                data_array = data
+
+            return {"correlation": 1.0, "method": self.method, "n": len(data_array)}
+
+        # DataFrame case - compute correlation matrix
+        if isinstance(data, pd.DataFrame):
+            if self.method == "pearson":
+                corr_matrix = data.corr(method="pearson")
+            elif self.method == "spearman":
+                corr_matrix = data.corr(method="spearman")
+            elif self.method == "kendall":
+                corr_matrix = data.corr(method="kendall")
+            else:
+                raise ValueError(f"Unknown correlation method: {self.method}")
+
+            return {"correlation_matrix": corr_matrix, "method": self.method}
+
+        # Bivariate case
+        if y is not None:
+            if isinstance(data, pd.Series):
+                data = data.values
+            if isinstance(y, pd.Series):
+                y = y.values
+
+            data = np.array(data) if not isinstance(data, np.ndarray) else data
+            y = np.array(y) if not isinstance(y, np.ndarray) else y
+
+            if self.method == "pearson":
+                corr, pval = stats.pearsonr(data, y)
+            elif self.method == "spearman":
+                corr, pval = stats.spearmanr(data, y)
+            elif self.method == "kendall":
+                corr, pval = stats.kendalltau(data, y)
+            else:
+                raise ValueError(f"Unknown correlation method: {self.method}")
+
+            return {
+                "correlation": corr,
+                "p_value": pval,
+                "method": self.method,
+                "n": len(data),
+            }
+
+        raise ValueError(
+            "Invalid input: provide either a DataFrame or two arrays/Series"
+        )
diff --git a/py_stats_toolkit/stats/descriptives.py b/py_stats_toolkit/stats/descriptives.py
@@ -0,0 +1,57 @@
+"""
+Statistics descriptives module.
+
+Provides the DescriptiveStatistics class for computing descriptive
+statistics on various data types (lists, arrays, Series, DataFrames).
+"""
+
+from typing import Any, Dict, Union
+
+import numpy as np
+import pandas as pd
+
+
+class DescriptiveStatistics:
+    """
+    Class for computing descriptive statistics.
+
+    Handles various data types and provides comprehensive statistical measures
+    including central tendency, dispersion, and percentiles.
+    """
+
+    def __init__(self):
+        """Initialize DescriptiveStatistics."""
+        pass
-    def __init__(self):
-        """Initialize DescriptiveStatistics."""
-        pass
-    def __init__(self):
-        """Initialize DescriptiveStatistics."""
-        pass
+
+    def analyze(
+        self, data: Union[list, np.ndarray, pd.Series, pd.DataFrame]
+    ) -> Dict[str, Any]:
+        """
+        Analyze data and compute descriptive statistics.
+
+        Args:
+            data: Input data (list, array, Series, or DataFrame)
+
+        Returns:
+            Dictionary containing statistical measures
+        """
+        if isinstance(data, list):
+            data = np.array(data)
+        elif isinstance(data, pd.Series):
+            data = data.values
+        elif isinstance(data, pd.DataFrame):
+            if len(data.columns) == 1:
+                data = data.iloc[:, 0].values
+            else:
+                return {col: self.analyze(data[col]) for col in data.columns}
+
+        return {
+            "count": len(data),
+            "mean": np.mean(data),
+            "std": np.std(data),
+            "min": np.min(data),
+            "max": np.max(data),
+            "median": np.median(data),
+            "q25": np.percentile(data, 25),
+            "q75": np.percentile(data, 75),
+        }
diff --git a/py_stats_toolkit/stats/regression.py b/py_stats_toolkit/stats/regression.py
@@ -0,0 +1,97 @@
+"""
+Linear regression module.
+
+Provides the LinearRegression class for performing linear regression analysis.
+"""
+
+from typing import Any, Dict, Union
+
+import numpy as np
+from sklearn.linear_model import LinearRegression as SKLearnLinearRegression
+from sklearn.metrics import mean_squared_error, r2_score
+
+
+class LinearRegression:
+    """
+    Linear regression analysis class.
+
+    Provides methods for fitting linear regression models and making predictions.
+    """
+
+    def __init__(self):
+        """Initialize LinearRegression."""
+        self.model = SKLearnLinearRegression()
+        self.is_fitted = False
+
+    def fit(
+        self, X: Union[np.ndarray, list], y: Union[np.ndarray, list]
+    ) -> "LinearRegression":
+        """
+        Fit the linear regression model.
+
+        Args:
+            X: Feature matrix
+            y: Target vector
+
+        Returns:
+            Self for method chaining
+        """
+        X = np.array(X) if not isinstance(X, np.ndarray) else X
+        y = np.array(y) if not isinstance(y, np.ndarray) else y
+
+        self.model.fit(X, y)
+        self.is_fitted = True
+        return self
+
+    def predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
+        """
+        Make predictions using the fitted model.
+
+        Args:
+            X: Feature matrix
+
+        Returns:
+            Predicted values
+
+        Raises:
+            RuntimeError: If model hasn't been fitted
+        """
+        if not self.is_fitted:
+            raise RuntimeError("Model must be fitted before making predictions")
+
+        X = np.array(X) if not isinstance(X, np.ndarray) else X
+        return self.model.predict(X)
+
+    def analyze(
+        self, X: Union[np.ndarray, list], y: Union[np.ndarray, list]
+    ) -> Dict[str, Any]:
+        """
+        Perform complete regression analysis.
+
+        Args:
+            X: Feature matrix
+            y: Target vector
+
+        Returns:
+            Dictionary containing regression results and metrics
+        """
+        self.fit(X, y)
+        predictions = self.predict(X)
+
+        return {
+            "coefficients": self.model.coef_,
+            "intercept": self.model.intercept_,
+            "predictions": predictions,
+            "mse": mean_squared_error(y, predictions),
+            "r2": r2_score(y, predictions),
+        }
+
+    @property
+    def coef_(self):
+        """Get model coefficients."""
+        return self.model.coef_ if self.is_fitted else None
+
+    @property
+    def intercept_(self):
+        """Get model intercept."""
+        return self.model.intercept_ if self.is_fitted else None