allen-cell-animated · ShrimpCryptid · Nov 25, 2025 · Nov 20, 2025 · Nov 20, 2025
@@ -144,8 +144,8 @@ def _write_data(
     if seg_ids is None:
         logging.warning(
             f"No object ID data found in the dataset for column name '{config.segmentation_id_column}'."
-            + "\n  The pixel value for each object in image frames will be assumed to be (= row index + 1)."
-            + "\n  This may cause issues if the dataset does not have globally-unique object IDs in the image."
+            + "\n\tThe pixel value for each object in image frames will be assumed to be (= row index + 1)."
+            + "\n\tThis may cause issues if the dataset does not have globally-unique object IDs in the image."
         )
         seg_ids = np.arange(1, len(dataset) + 1)
 
@@ -618,7 +618,7 @@ def convert_colorizer_data(
         # TODO: Add validation step to check for either frames or frames3d property
         _validate_manifest(writer)
         writer.write_manifest(metadata=metadata)
-        logging.info("Dataset conversion completed successfully.")
+        logging.info("Dataset conversion completed successfully.\n")
     except Exception as e:
         raise e
     finally:

@@ -630,7 +630,7 @@ def infer_feature_type(data: np.ndarray, info: FeatureInfo) -> FeatureType:
     elif kind in {"f"}:
         return FeatureType.CONTINUOUS
     else:
-        logging.warning(
+        logging.info(
             "Feature '{}' has non-numeric data, and will be assumed to be type CATEGORICAL.".format(
                 info.get_name()
             )
@@ -681,28 +681,22 @@ def cast_feature_to_info_type(
     info = info.clone()
 
     if info.type == FeatureType.INDETERMINATE:
-        logging.warning(
-            "Info type for feature '{}' is INDETERMINATE. Will attempt to infer feature type.".format(
-                info.get_name()
-            )
+        logging.debug(
+            f"Info type for feature '{info.get_name()}' is INDETERMINATE. Will attempt to infer feature type."
         )
         info.type = infer_feature_type(data, info)
 
     kind = data.dtype.kind
     if info.type == FeatureType.CONTINUOUS:
         if kind not in {"f", "u", "i"}:
             raise RuntimeError(
-                "Feature '{}' has type set to CONTINUOUS, but has non-numeric data.".format(
-                    info.get_name()
-                )
+                f"Feature '{info.get_name()}' has type set to CONTINUOUS, but has non-numeric data."
             )
         return (data.astype(float), info)
     if info.type == FeatureType.DISCRETE:
         if kind not in {"f", "u", "i"}:
             raise RuntimeError(
-                "Feature '{}' has type set to DISCRETE, but has non-numeric data.".format(
-                    info.get_name()
-                )
+                f"Feature '{info.get_name()}' has type set to DISCRETE, but has non-numeric data."
             )
         return (safely_cast_array_to_int(data), info)
     if info.type == FeatureType.CATEGORICAL:
@@ -711,29 +705,20 @@ def cast_feature_to_info_type(
             return (safely_cast_array_to_int(data), info)
         # Attempt to parse the data
         if info.categories is None:
-            logging.warning(
-                "Feature '{}' has type set to CATEGORICAL, but is missing a categories array.".format(
-                    info.get_name()
-                )
-            )
-            logging.warning(
-                "Categories will be automatically inferred from the data. Set `FeatureInfo.categories` to override this behavior."
+            logging.info(
+                f"Feature '{info.get_name()}' is missing a categories array, so categories will be automatically inferred from the data. Set `FeatureInfo.categories` to override this behavior."
             )
             info.categories = get_categories_from_feature_array(data)
         else:
             # Feature has predefined categories, warn that we are mapping to preexisting categories.
-            logging.warning(
-                "CATEGORICAL feature '{}' has a categories array defined, but data type is not an int or float. Feature values will be mapped as integer indexes to categories.".format(
-                    info.get_name()
-                )
+            logging.info(
+                f"Feature '{info.get_name()}' has a categories array defined, but data type is not an int or float. Feature values will be mapped as integer indexes to categories."
             )
         indexed_data = remap_categorical_feature_array(data, info.categories)
         dropped_categories = get_unused_categories(data, info.categories)
         if len(dropped_categories) > 0:
             logging.warning(
-                "\tThe following values were not in the categories array and will be replaced with NaN (up to first 25): {}".format(
-                    dropped_categories
-                )
+                f"Feature '{info.get_name()}' had values not present in the categories array, which will be replaced with NaN (up to first 25 shown): {dropped_categories[:25]}"
             )
         return (safely_cast_array_to_int(indexed_data), info)
 

@@ -137,12 +137,13 @@ def write_categorical_feature(
         categories, indexed_data = np.unique(data.astype(str), return_inverse=True)
         if len(categories) > MAX_CATEGORIES:
             logging.warning(
-                "write_feature_categorical: Too many unique categories in provided data for feature column '{}' ({} > max {}).".format(
+                "write_categorical_feature: Too many unique categories were present in feature column '{}' ({} > max {}). Feature will be skipped.".format(
                     info.get_name(), len(categories), MAX_CATEGORIES
                 )
+                + "\n\tCategories provided (up to first 25 shown): {}".format(
+                    categories[:25]
+                )
             )
-            logging.warning("\tFEATURE WILL BE SKIPPED.")
-            logging.warning("\tCategories provided: {}".format(categories))
             return
         info.categories = categories.tolist()
         info.type = FeatureType.CATEGORICAL
@@ -189,7 +190,7 @@ def write_feature(
         except RuntimeError as error:
             logging.error("RuntimeError: {}".format(error))
             logging.warning(
-                "Could not parse feature '{}'. FEATURE WILL BE SKIPPED.".format(
+                "Could not parse feature '{}'. Feature will be skipped.".format(
                     info.get_name()
                 )
             )
@@ -198,24 +199,20 @@ def write_feature(
         if info.type == FeatureType.CATEGORICAL:
             if len(info.categories) > MAX_CATEGORIES:
                 logging.warning(
-                    "Feature '{}' has too many categories ({} > max {}).".format(
+                    "Feature '{}' has too many categories ({} > max {}) and will be skipped.".format(
                         info.get_name(), len(info.categories), MAX_CATEGORIES
                     )
-                )
-                logging.warning("\tFEATURE WILL BE SKIPPED.")
-                logging.warning(
-                    "\tCategories provided (up to first 25): {}".format(
+                    + "\n\tCategories provided (up to first 25 shown): {}".format(
                         info.categories[:25]
                     )
                 )
                 return
             if np.min(data) < 0 or np.max(data) >= len(info.categories):
                 logging.warning(
-                    "Feature '{}' has values out of range of the defined categories.".format(
+                    "Feature '{}' has values out of range of the defined categories. Bad values will be replaced with NaN.".format(
                         info.get_name()
                     )
                 )
-                logging.warning("\tBad values will be replaced with NaN.")
                 replace_out_of_bounds_values_with_nan(data, 0, len(info.categories) - 1)
 
         num_features = len(self.features.keys())
@@ -315,9 +312,9 @@ def write_feature(
             # Throw a warning that we are overwriting data
             old_feature_data = self.features[key]
             logging.warning(
-                "Feature key '{}' already exists in manifest. Feature '{}' will overwrite existing feature '{}'. Overwriting...".format(
-                    key,
+                "Feature '{}' has an identical key '{}' as the existing feature '{}' and will overwrite it. Set `FeatureInfo.key` to a unique value to avoid this.".format(
                     label,
+                    key,
                     old_feature_data["name"],
                 )
             )
@@ -508,7 +505,7 @@ def set_frame_paths(self, paths: List[str]) -> None:
 
     def set_3d_frame_data(self, data: Frames3dMetadata) -> None:
         if data.total_frames is None:
-            logging.warning(
+            logging.info(
                 "ColorizerDatasetWriter: The `total_frames` property of the Frames3dMetadata object is `None`. Will attempt to infer the number of frames from the provided data."
             )
             data.total_frames = _get_frame_count_from_3d_source(data.source)
@@ -674,7 +671,7 @@ def validate_dataset(
                 for i in range(min(10, len(gaps))):
                     time, segId1, segId2 = gaps[i]
                     logging.warning(
-                        f"  Time {time}: Segmentation ID gap between {segId1} and {segId2}."
+                        f"\tTime {time}: Segmentation ID gap between {segId1} and {segId2}."
                     )
 
         # Check that all features + backdrops have unique keys. This should be guaranteed because
@@ -688,7 +685,7 @@ def validate_dataset(
         # Check for missing frames
         if "frames" not in self.manifest and "frames3d" not in self.manifest:
             logging.warning(
-                "No frames are provided! Did you forget to call `set_frame_paths` on the writer?"
+                "No frames are provided! Did you forget to call `set_frame_paths()` on the writer?"
             )
         elif "frames" in self.manifest:
             # Check that all the 2D frame paths exist
@@ -705,7 +702,7 @@ def validate_dataset(
                 )
                 for i in range(len(missing_frames)):
                     index, path = missing_frames[i]
-                    logging.warning("  {}: '{}'".format(index, path))
+                    logging.warning("\t{}: '{}'".format(index, path))
                 logging.warning(
                     "For auto-generated frame numbers, check that no frames are missing data in the original dataset,"
                     + " or add an offset if your frame numbers do not start at 0."