Skip to content

Commit

Permalink
adding is_custom to pandas (#361)
Browse files Browse the repository at this point in the history
Co-authored-by: jonatank <[email protected]>
  • Loading branch information
jkkronk and jonatank authored Dec 14, 2024
1 parent 9218e17 commit 6bb3adb
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 1 deletion.
24 changes: 23 additions & 1 deletion cuallee/pandas_validation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Union, List
from typing import Callable, Dict, Union, List
from cuallee import Check, Rule, CheckStatus
import pandas as pd # type: ignore
import operator
Expand All @@ -7,6 +7,7 @@
from toolz import first # type: ignore
from numbers import Number
from cuallee import utils as cuallee_utils
from cuallee import CustomComputeException
from itertools import repeat


Expand Down Expand Up @@ -235,6 +236,27 @@ def workflow(dataframe):

return workflow(dataframe.loc[:, rule.column])

def is_custom(self, rule: Rule, dataframe: pd.DataFrame) -> Union[bool, int]:
"""Validates dataframe by applying a custom function and returning the sum of boolean results."""
try:
assert isinstance(
rule.value, Callable
), "Please provide a Callable/Function for validation"
result = rule.value(dataframe)
if isinstance(result, pd.DataFrame):
assert (
len(result.columns) >= 1
), "Custom function should return at least one column"
result = result.iloc[:, -1]
elif isinstance(result, pd.Series):
pass
else:
result = pd.Series(result)

return result.astype(bool).astype(int).sum()
except Exception as err:
raise CustomComputeException(str(err))


def compute(rules: Dict[str, Rule]):
"""Pandas computes directly on the predicates"""
Expand Down
44 changes: 44 additions & 0 deletions test/unit/pandas_dataframe/test_is_custom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pytest

from cuallee import Check, CheckLevel, CustomComputeException
import pandas as pd


def test_positive(check: Check):
df = pd.DataFrame({"id": range(10)})
check = Check(CheckLevel.WARNING, "pytest")
check.is_custom("id", lambda x: x.assign(test=(x["id"] >= 0)))
rs = check.validate(df)
assert rs.iloc[0].status == "PASS"
assert rs.iloc[0].violations == 0
assert rs.iloc[0].pass_threshold == 1.0


def test_negative(check: Check):
df = pd.DataFrame({"id": range(10)})
check = Check(CheckLevel.WARNING, "pytest")
check.is_custom("id", lambda x: x.assign(test=(x["id"] >= 5)))
rs = check.validate(df)
assert rs.iloc[0].status == "FAIL"
assert rs.iloc[0].violations == 5
assert rs.iloc[0].pass_threshold == 1.0


def test_parameters(check: Check):
df = pd.DataFrame({"id": range(10)})
with pytest.raises(
CustomComputeException,
match="Please provide a Callable/Function for validation",
):
check = Check(CheckLevel.WARNING, "pytest")
check.is_custom("id", "wrong value")
check.validate(df)


def test_coverage(check: Check):
df = pd.DataFrame({"id": range(10)})
check = Check(CheckLevel.WARNING, "pytest")
check.is_custom("id", lambda x: x.assign(test=(x["id"] >= 5)), 0.4)
rs = check.validate(df)
assert rs.iloc[0].status == "PASS"
assert rs.iloc[0].pass_threshold == 0.4

0 comments on commit 6bb3adb

Please sign in to comment.