Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion howso/utilities/feature_attributes/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,9 +719,11 @@ def _infer_floating_point_attributes(self, feature_name: str) -> dict:

# Determine number of decimal places using
# np.format_float_positional to handle scientific notation.
# Convert to numpy array for faster iteration
col_array = col.to_numpy()
decimals = max([
len((str(np.format_float_positional(r))).split('.')[1])
for r in col
for r in col_array
])

# specify decimal place. Proceed with training but issue a warning.
Expand Down
23 changes: 14 additions & 9 deletions howso/utilities/feature_attributes/relational.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,24 +878,29 @@ def _infer_feature_bounds( # noqa: C901
# This loop grabs all the distinct values, then converts
# them according to the `format_dt` to a proper datetime
# instance, then compares them to find min and max values.
min_date_obj = datetime.datetime.max
max_date_obj = datetime.datetime.min

try:
unique_values = self._get_unique_values(feature_name)
# Collect all date objects first, then find min/max once
# (more efficient than comparing in loop, especially for many values)
date_objects = []
# The comma in this loop is necessary since
# unique_values is a list of sqlalchemy Row values
for dt_str, in unique_values:
# Parse using the `format_dt` into a datetime
if dt_str: # skip any empty values
date_obj = datetime.datetime.strptime(dt_str, format_dt)
min_date_obj = min(min_date_obj, date_obj)
max_date_obj = max(max_date_obj, date_obj)
date_objects.append(date_obj)
else:
warnings.warn(
f'Cannot guess the bounds for feature '
f'"{feature_name}" without samples.')
return None
# If no valid dates were found, warn and return None
if not date_objects:
warnings.warn(
f'Cannot guess the bounds for feature '
f'"{feature_name}" without samples.')
return None

# Compute min/max from collected date objects
min_date_obj = min(date_objects)
max_date_obj = max(date_objects)
except Exception: # noqa: Intentionally broad
warnings.warn(
f'Feature "{feature_name}" does not match the '
Expand Down
Loading