diff --git a/colorizer_data/converter.py b/colorizer_data/converter.py index 9d3005c..58c4ff2 100644 --- a/colorizer_data/converter.py +++ b/colorizer_data/converter.py @@ -144,8 +144,8 @@ def _write_data( if seg_ids is None: logging.warning( f"No object ID data found in the dataset for column name '{config.segmentation_id_column}'." - + "\n The pixel value for each object in image frames will be assumed to be (= row index + 1)." - + "\n This may cause issues if the dataset does not have globally-unique object IDs in the image." + + "\n\tThe pixel value for each object in image frames will be assumed to be (= row index + 1)." + + "\n\tThis may cause issues if the dataset does not have globally-unique object IDs in the image." ) seg_ids = np.arange(1, len(dataset) + 1) @@ -618,7 +618,7 @@ def convert_colorizer_data( # TODO: Add validation step to check for either frames or frames3d property _validate_manifest(writer) writer.write_manifest(metadata=metadata) - logging.info("Dataset conversion completed successfully.") + logging.info("Dataset conversion completed successfully.\n") except Exception as e: raise e finally: diff --git a/colorizer_data/utils.py b/colorizer_data/utils.py index 18fc6f1..953e7a2 100644 --- a/colorizer_data/utils.py +++ b/colorizer_data/utils.py @@ -630,7 +630,7 @@ def infer_feature_type(data: np.ndarray, info: FeatureInfo) -> FeatureType: elif kind in {"f"}: return FeatureType.CONTINUOUS else: - logging.warning( + logging.info( "Feature '{}' has non-numeric data, and will be assumed to be type CATEGORICAL.".format( info.get_name() ) @@ -681,10 +681,8 @@ def cast_feature_to_info_type( info = info.clone() if info.type == FeatureType.INDETERMINATE: - logging.warning( - "Info type for feature '{}' is INDETERMINATE. Will attempt to infer feature type.".format( - info.get_name() - ) + logging.debug( + f"Info type for feature '{info.get_name()}' is INDETERMINATE. Will attempt to infer feature type." ) info.type = infer_feature_type(data, info) @@ -692,17 +690,13 @@ def cast_feature_to_info_type( if info.type == FeatureType.CONTINUOUS: if kind not in {"f", "u", "i"}: raise RuntimeError( - "Feature '{}' has type set to CONTINUOUS, but has non-numeric data.".format( - info.get_name() - ) + f"Feature '{info.get_name()}' has type set to CONTINUOUS, but has non-numeric data." ) return (data.astype(float), info) if info.type == FeatureType.DISCRETE: if kind not in {"f", "u", "i"}: raise RuntimeError( - "Feature '{}' has type set to DISCRETE, but has non-numeric data.".format( - info.get_name() - ) + f"Feature '{info.get_name()}' has type set to DISCRETE, but has non-numeric data." ) return (safely_cast_array_to_int(data), info) if info.type == FeatureType.CATEGORICAL: @@ -711,29 +705,20 @@ def cast_feature_to_info_type( return (safely_cast_array_to_int(data), info) # Attempt to parse the data if info.categories is None: - logging.warning( - "Feature '{}' has type set to CATEGORICAL, but is missing a categories array.".format( - info.get_name() - ) - ) - logging.warning( - "Categories will be automatically inferred from the data. Set `FeatureInfo.categories` to override this behavior." + logging.info( + f"Feature '{info.get_name()}' is missing a categories array, so categories will be automatically inferred from the data. Set `FeatureInfo.categories` to override this behavior." ) info.categories = get_categories_from_feature_array(data) else: # Feature has predefined categories, warn that we are mapping to preexisting categories. - logging.warning( - "CATEGORICAL feature '{}' has a categories array defined, but data type is not an int or float. Feature values will be mapped as integer indexes to categories.".format( - info.get_name() - ) + logging.info( + f"Feature '{info.get_name()}' has a categories array defined, but data type is not an int or float. Feature values will be mapped as integer indexes to categories." ) indexed_data = remap_categorical_feature_array(data, info.categories) dropped_categories = get_unused_categories(data, info.categories) if len(dropped_categories) > 0: logging.warning( - "\tThe following values were not in the categories array and will be replaced with NaN (up to first 25): {}".format( - dropped_categories - ) + f"Feature '{info.get_name()}' had values not present in the categories array, which will be replaced with NaN (up to first 25 shown): {dropped_categories[:25]}" ) return (safely_cast_array_to_int(indexed_data), info) diff --git a/colorizer_data/writer.py b/colorizer_data/writer.py index 086610f..4e41caa 100644 --- a/colorizer_data/writer.py +++ b/colorizer_data/writer.py @@ -137,12 +137,13 @@ def write_categorical_feature( categories, indexed_data = np.unique(data.astype(str), return_inverse=True) if len(categories) > MAX_CATEGORIES: logging.warning( - "write_feature_categorical: Too many unique categories in provided data for feature column '{}' ({} > max {}).".format( + "write_categorical_feature: Too many unique categories were present in feature column '{}' ({} > max {}). Feature will be skipped.".format( info.get_name(), len(categories), MAX_CATEGORIES ) + + "\n\tCategories provided (up to first 25 shown): {}".format( + categories[:25] + ) ) - logging.warning("\tFEATURE WILL BE SKIPPED.") - logging.warning("\tCategories provided: {}".format(categories)) return info.categories = categories.tolist() info.type = FeatureType.CATEGORICAL @@ -189,7 +190,7 @@ def write_feature( except RuntimeError as error: logging.error("RuntimeError: {}".format(error)) logging.warning( - "Could not parse feature '{}'. FEATURE WILL BE SKIPPED.".format( + "Could not parse feature '{}'. Feature will be skipped.".format( info.get_name() ) ) @@ -198,24 +199,20 @@ def write_feature( if info.type == FeatureType.CATEGORICAL: if len(info.categories) > MAX_CATEGORIES: logging.warning( - "Feature '{}' has too many categories ({} > max {}).".format( + "Feature '{}' has too many categories ({} > max {}) and will be skipped.".format( info.get_name(), len(info.categories), MAX_CATEGORIES ) - ) - logging.warning("\tFEATURE WILL BE SKIPPED.") - logging.warning( - "\tCategories provided (up to first 25): {}".format( + + "\n\tCategories provided (up to first 25 shown): {}".format( info.categories[:25] ) ) return if np.min(data) < 0 or np.max(data) >= len(info.categories): logging.warning( - "Feature '{}' has values out of range of the defined categories.".format( + "Feature '{}' has values out of range of the defined categories. Bad values will be replaced with NaN.".format( info.get_name() ) ) - logging.warning("\tBad values will be replaced with NaN.") replace_out_of_bounds_values_with_nan(data, 0, len(info.categories) - 1) num_features = len(self.features.keys()) @@ -315,9 +312,9 @@ def write_feature( # Throw a warning that we are overwriting data old_feature_data = self.features[key] logging.warning( - "Feature key '{}' already exists in manifest. Feature '{}' will overwrite existing feature '{}'. Overwriting...".format( - key, + "Feature '{}' has an identical key '{}' as the existing feature '{}' and will overwrite it. Set `FeatureInfo.key` to a unique value to avoid this.".format( label, + key, old_feature_data["name"], ) ) @@ -508,7 +505,7 @@ def set_frame_paths(self, paths: List[str]) -> None: def set_3d_frame_data(self, data: Frames3dMetadata) -> None: if data.total_frames is None: - logging.warning( + logging.info( "ColorizerDatasetWriter: The `total_frames` property of the Frames3dMetadata object is `None`. Will attempt to infer the number of frames from the provided data." ) data.total_frames = _get_frame_count_from_3d_source(data.source) @@ -674,7 +671,7 @@ def validate_dataset( for i in range(min(10, len(gaps))): time, segId1, segId2 = gaps[i] logging.warning( - f" Time {time}: Segmentation ID gap between {segId1} and {segId2}." + f"\tTime {time}: Segmentation ID gap between {segId1} and {segId2}." ) # Check that all features + backdrops have unique keys. This should be guaranteed because @@ -688,7 +685,7 @@ def validate_dataset( # Check for missing frames if "frames" not in self.manifest and "frames3d" not in self.manifest: logging.warning( - "No frames are provided! Did you forget to call `set_frame_paths` on the writer?" + "No frames are provided! Did you forget to call `set_frame_paths()` on the writer?" ) elif "frames" in self.manifest: # Check that all the 2D frame paths exist @@ -705,7 +702,7 @@ def validate_dataset( ) for i in range(len(missing_frames)): index, path = missing_frames[i] - logging.warning(" {}: '{}'".format(index, path)) + logging.warning("\t{}: '{}'".format(index, path)) logging.warning( "For auto-generated frame numbers, check that no frames are missing data in the original dataset," + " or add an offset if your frame numbers do not start at 0."