Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions colorizer_data/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ def _write_data(
if seg_ids is None:
logging.warning(
f"No object ID data found in the dataset for column name '{config.segmentation_id_column}'."
+ "\n The pixel value for each object in image frames will be assumed to be (= row index + 1)."
+ "\n This may cause issues if the dataset does not have globally-unique object IDs in the image."
+ "\n\tThe pixel value for each object in image frames will be assumed to be (= row index + 1)."
+ "\n\tThis may cause issues if the dataset does not have globally-unique object IDs in the image."
)
seg_ids = np.arange(1, len(dataset) + 1)

Expand Down Expand Up @@ -618,7 +618,7 @@ def convert_colorizer_data(
# TODO: Add validation step to check for either frames or frames3d property
_validate_manifest(writer)
writer.write_manifest(metadata=metadata)
logging.info("Dataset conversion completed successfully.")
logging.info("Dataset conversion completed successfully.\n")
except Exception as e:
raise e
finally:
Expand Down
35 changes: 10 additions & 25 deletions colorizer_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ def infer_feature_type(data: np.ndarray, info: FeatureInfo) -> FeatureType:
elif kind in {"f"}:
return FeatureType.CONTINUOUS
else:
logging.warning(
logging.info(
"Feature '{}' has non-numeric data, and will be assumed to be type CATEGORICAL.".format(
info.get_name()
)
Expand Down Expand Up @@ -681,28 +681,22 @@ def cast_feature_to_info_type(
info = info.clone()

if info.type == FeatureType.INDETERMINATE:
logging.warning(
"Info type for feature '{}' is INDETERMINATE. Will attempt to infer feature type.".format(
info.get_name()
)
logging.debug(
f"Info type for feature '{info.get_name()}' is INDETERMINATE. Will attempt to infer feature type."
)
info.type = infer_feature_type(data, info)

kind = data.dtype.kind
if info.type == FeatureType.CONTINUOUS:
if kind not in {"f", "u", "i"}:
raise RuntimeError(
"Feature '{}' has type set to CONTINUOUS, but has non-numeric data.".format(
info.get_name()
)
f"Feature '{info.get_name()}' has type set to CONTINUOUS, but has non-numeric data."
)
return (data.astype(float), info)
if info.type == FeatureType.DISCRETE:
if kind not in {"f", "u", "i"}:
raise RuntimeError(
"Feature '{}' has type set to DISCRETE, but has non-numeric data.".format(
info.get_name()
)
f"Feature '{info.get_name()}' has type set to DISCRETE, but has non-numeric data."
)
return (safely_cast_array_to_int(data), info)
if info.type == FeatureType.CATEGORICAL:
Expand All @@ -711,29 +705,20 @@ def cast_feature_to_info_type(
return (safely_cast_array_to_int(data), info)
# Attempt to parse the data
if info.categories is None:
logging.warning(
"Feature '{}' has type set to CATEGORICAL, but is missing a categories array.".format(
info.get_name()
)
)
logging.warning(
"Categories will be automatically inferred from the data. Set `FeatureInfo.categories` to override this behavior."
logging.info(
f"Feature '{info.get_name()}' is missing a categories array, so categories will be automatically inferred from the data. Set `FeatureInfo.categories` to override this behavior."
)
info.categories = get_categories_from_feature_array(data)
else:
# Feature has predefined categories, warn that we are mapping to preexisting categories.
logging.warning(
"CATEGORICAL feature '{}' has a categories array defined, but data type is not an int or float. Feature values will be mapped as integer indexes to categories.".format(
info.get_name()
)
logging.info(
f"Feature '{info.get_name()}' has a categories array defined, but data type is not an int or float. Feature values will be mapped as integer indexes to categories."
)
indexed_data = remap_categorical_feature_array(data, info.categories)
dropped_categories = get_unused_categories(data, info.categories)
if len(dropped_categories) > 0:
logging.warning(
"\tThe following values were not in the categories array and will be replaced with NaN (up to first 25): {}".format(
dropped_categories
)
f"Feature '{info.get_name()}' had values not present in the categories array, which will be replaced with NaN (up to first 25 shown): {dropped_categories[:25]}"
)
return (safely_cast_array_to_int(indexed_data), info)

Expand Down
31 changes: 14 additions & 17 deletions colorizer_data/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,13 @@ def write_categorical_feature(
categories, indexed_data = np.unique(data.astype(str), return_inverse=True)
if len(categories) > MAX_CATEGORIES:
logging.warning(
"write_feature_categorical: Too many unique categories in provided data for feature column '{}' ({} > max {}).".format(
"write_categorical_feature: Too many unique categories were present in feature column '{}' ({} > max {}). Feature will be skipped.".format(
info.get_name(), len(categories), MAX_CATEGORIES
)
+ "\n\tCategories provided (up to first 25 shown): {}".format(
categories[:25]
)
)
logging.warning("\tFEATURE WILL BE SKIPPED.")
logging.warning("\tCategories provided: {}".format(categories))
return
info.categories = categories.tolist()
info.type = FeatureType.CATEGORICAL
Expand Down Expand Up @@ -189,7 +190,7 @@ def write_feature(
except RuntimeError as error:
logging.error("RuntimeError: {}".format(error))
logging.warning(
"Could not parse feature '{}'. FEATURE WILL BE SKIPPED.".format(
"Could not parse feature '{}'. Feature will be skipped.".format(
info.get_name()
)
)
Expand All @@ -198,24 +199,20 @@ def write_feature(
if info.type == FeatureType.CATEGORICAL:
if len(info.categories) > MAX_CATEGORIES:
logging.warning(
"Feature '{}' has too many categories ({} > max {}).".format(
"Feature '{}' has too many categories ({} > max {}) and will be skipped.".format(
info.get_name(), len(info.categories), MAX_CATEGORIES
)
)
logging.warning("\tFEATURE WILL BE SKIPPED.")
logging.warning(
"\tCategories provided (up to first 25): {}".format(
+ "\n\tCategories provided (up to first 25 shown): {}".format(
info.categories[:25]
)
)
return
if np.min(data) < 0 or np.max(data) >= len(info.categories):
logging.warning(
"Feature '{}' has values out of range of the defined categories.".format(
"Feature '{}' has values out of range of the defined categories. Bad values will be replaced with NaN.".format(
info.get_name()
)
)
logging.warning("\tBad values will be replaced with NaN.")
replace_out_of_bounds_values_with_nan(data, 0, len(info.categories) - 1)

num_features = len(self.features.keys())
Expand Down Expand Up @@ -315,9 +312,9 @@ def write_feature(
# Throw a warning that we are overwriting data
old_feature_data = self.features[key]
logging.warning(
"Feature key '{}' already exists in manifest. Feature '{}' will overwrite existing feature '{}'. Overwriting...".format(
key,
"Feature '{}' has an identical key '{}' as the existing feature '{}' and will overwrite it. Set `FeatureInfo.key` to a unique value to avoid this.".format(
label,
key,
old_feature_data["name"],
)
)
Expand Down Expand Up @@ -508,7 +505,7 @@ def set_frame_paths(self, paths: List[str]) -> None:

def set_3d_frame_data(self, data: Frames3dMetadata) -> None:
if data.total_frames is None:
logging.warning(
logging.info(
"ColorizerDatasetWriter: The `total_frames` property of the Frames3dMetadata object is `None`. Will attempt to infer the number of frames from the provided data."
)
data.total_frames = _get_frame_count_from_3d_source(data.source)
Expand Down Expand Up @@ -674,7 +671,7 @@ def validate_dataset(
for i in range(min(10, len(gaps))):
time, segId1, segId2 = gaps[i]
logging.warning(
f" Time {time}: Segmentation ID gap between {segId1} and {segId2}."
f"\tTime {time}: Segmentation ID gap between {segId1} and {segId2}."
)

# Check that all features + backdrops have unique keys. This should be guaranteed because
Expand All @@ -688,7 +685,7 @@ def validate_dataset(
# Check for missing frames
if "frames" not in self.manifest and "frames3d" not in self.manifest:
logging.warning(
"No frames are provided! Did you forget to call `set_frame_paths` on the writer?"
"No frames are provided! Did you forget to call `set_frame_paths()` on the writer?"
)
elif "frames" in self.manifest:
# Check that all the 2D frame paths exist
Expand All @@ -705,7 +702,7 @@ def validate_dataset(
)
for i in range(len(missing_frames)):
index, path = missing_frames[i]
logging.warning(" {}: '{}'".format(index, path))
logging.warning("\t{}: '{}'".format(index, path))
logging.warning(
"For auto-generated frame numbers, check that no frames are missing data in the original dataset,"
+ " or add an offset if your frame numbers do not start at 0."
Expand Down
Loading