Krv-Analytics · jeremy-wayland · Oct 21, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/thema/__init__.py b/thema/__init__.py
@@ -27,6 +27,91 @@
 from .multiverse.universe.utils.starGraph import starGraph
 from .thema import Thema
 
+import logging
+import warnings
+
+# Suppress sklearn deprecation warnings about force_all_finite -> ensure_all_finite
+warnings.filterwarnings(
+    "ignore",
+    message=".*'force_all_finite' was renamed to 'ensure_all_finite'.*",
+    category=FutureWarning,
+    module="sklearn.*",
+)
+
+
+def enable_logging(level="INFO"):
+    """
+    Enable thema logging for interactive use (e.g., notebooks).
+
+    Parameters
+    ----------
+    level : str, optional
+        Logging level ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
+        Default is 'INFO' for moderate verbosity.
+        Use 'DEBUG' for detailed operational info.
+        Use 'WARNING' for warnings and errors only.
+        Use 'ERROR' for errors only.
+
+    Examples
+    --------
+    >>> import thema
+    >>> thema.enable_logging('DEBUG')  # Detailed logging
+    >>> thema.enable_logging('INFO')   # Moderate logging
+    >>> thema.enable_logging('WARNING')  # Warnings/errors only
+    """
+    thema_logger = logging.getLogger("thema")
+
+    for handler in thema_logger.handlers[:]:
+        if not isinstance(handler, logging.NullHandler):
+            thema_logger.removeHandler(handler)
+
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
+    handler.setFormatter(formatter)
+    thema_logger.addHandler(handler)
+
+    # Set level
+    level_map = {
+        "DEBUG": logging.DEBUG,
+        "INFO": logging.INFO,
+        "WARNING": logging.WARNING,
+        "ERROR": logging.ERROR,
+        "CRITICAL": logging.CRITICAL,
+    }
+    log_level = level_map.get(level.upper(), logging.INFO)
+    thema_logger.setLevel(log_level)
+
+    for name in list(logging.Logger.manager.loggerDict.keys()):
+        if name.startswith("thema."):
+            child_logger = logging.getLogger(name)
+            # Reset level to NOTSET so parent logger controls the level
+            child_logger.setLevel(logging.NOTSET)
+
+    thema_logger.propagate = False
+
+    print(f"Thema logging enabled at {level.upper()} level")
+
+
+def disable_logging():
+    """
+    Disable thema logging (return to quiet mode).
+
+    Examples
+    --------
+    >>> import thema
+    >>> thema.disable_logging()
+    """
+    thema_logger = logging.getLogger("thema")
+
+    for handler in thema_logger.handlers[:]:
+        if not isinstance(handler, logging.NullHandler):
+            thema_logger.removeHandler(handler)
+
+    thema_logger.setLevel(logging.ERROR)
+
+    print("Thema logging disabled (errors only)")
+
+
 # Package metadata
 __version__ = "0.1.3"
 __author__ = "Krv-Analytics"
@@ -41,4 +126,6 @@
     "Star",
     "Galaxy",
     "starGraph",
+    "enable_logging",
+    "disable_logging",
 ]
diff --git a/thema/config.py b/thema/config.py
@@ -146,3 +146,27 @@ class jmapObservatoryConfig:
 star_to_observatory = {
     "jmapStar": "jmapObservatoryConfig",
 }
+
+# Map from filter YAML tags to filter functions and their parameter names
+filter_configs = {
+    "component_count": {
+        "function": "component_count_filter",
+        "params": {"target_components": 1}
+    },
+    "component_count_range": {
+        "function": "component_count_range_filter", 
+        "params": {"min_components": 1, "max_components": 10}
+    },
+    "minimum_nodes": {
+        "function": "minimum_nodes_filter",
+        "params": {"min_nodes": 3}
+    },
+    "minimum_edges": {
+        "function": "minimum_edges_filter",
+        "params": {"min_edges": 2}
+    },
+    "minimum_unique_items": {
+        "function": "minimum_unique_items_filter",
+        "params": {"min_unique_items": 10}
+    }
+}
diff --git a/thema/multiverse/system/inner/moon.py b/thema/multiverse/system/inner/moon.py
@@ -1,9 +1,9 @@
 # File: multiverse/system/inner/moon.py
-# Last Update: 05/15/24
-# Updated By: JW
+# Last Update: 10/15/25
+# Updated By: SG
 
 import pickle
-import warnings
+import logging
 
 import category_encoders as ce
 import pandas as pd
@@ -12,6 +12,9 @@
 from ....core import Core
 from . import inner_utils
 
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+
 
 class Moon(Core):
     """
@@ -122,81 +125,79 @@ def __init__(
         self.seed = seed
         self.imputeData = None
 
-    def fit(self):
-        """
-        Performs the cleaning procedure according to the constructor arguments.
-        Initializes the imputeData member as a DataFrame, which is a scaled,
-        numeric, and complete representation of the original raw data set.
+        # Log initial state
+        logger.debug(f"Moon initialized with data shape: {self.data.shape}")
+        logger.debug(f"Drop columns: {self.dropColumns}")
+        logger.debug(f"Impute columns: {self.imputeColumns}")
+        logger.debug(f"Impute methods: {self.imputeMethods}")
+        logger.debug(
+            f"Encoding: {self.encoding}, Scaler: {self.scaler}, Seed: {self.seed}"
+        )
 
-        Examples
-        ----------
-        >>> moon = Moon()
-        >>> moon.fit()
-        """
+    def fit(self):
+        # Add imputed flags
+        self.imputeData = inner_utils.add_imputed_flags(self.data, self.imputeColumns)
+        logger.debug("Added imputed flags to columns")
+        logger.debug(f"Data shape after adding flags: {self.imputeData.shape}")
 
-        self.imputeData = inner_utils.add_imputed_flags(
-            self.data, self.imputeColumns
-        )
+        # Apply imputation
         for index, column in enumerate(self.imputeColumns):
             impute_function = getattr(inner_utils, self.imputeMethods[index])
-            self.imputeData[column] = impute_function(
-                self.data[column], self.seed
+            self.imputeData[column] = impute_function(self.data[column], self.seed)
+            logger.debug(
+                f"Column '{column}' imputed using '{self.imputeMethods[index]}'. "
+                f"NaNs remaining: {self.imputeData[column].isna().sum()}"
             )
 
-        self.dropColumns = [
-            col for col in self.dropColumns if col in self.data.columns
-        ]
-        # Drop Columns
-        if not self.dropColumns == []:
-            self.imputeData = self.data.drop(columns=self.dropColumns)
+        # Drop specified columns
+        self.dropColumns = [col for col in self.dropColumns if col in self.data.columns]
+        if self.dropColumns:
+            before_drop = self.imputeData.shape
+            self.imputeData = self.imputeData.drop(columns=self.dropColumns)
+            logger.debug(
+                f"Dropped columns: {self.dropColumns}. Shape before: {before_drop}, after: {self.imputeData.shape}"
+            )
 
-        # Drop Rows with Nans
+        # Drop rows with NaNs
+        nan_cols = self.imputeData.columns[self.imputeData.isna().any()]
+        logger.debug(f"Columns with NaN values before dropping rows: {list(nan_cols)}")
         self.imputeData.dropna(axis=0, inplace=True)
+        logger.debug(f"Shape after dropping rows with NaNs: {self.imputeData.shape}")
 
-        if type(self.encoding) == str:
+        # Ensure encoding is a list
+        if isinstance(self.encoding, str):
             self.encoding = [
                 self.encoding
                 for _ in range(
-                    len(
-                        self.imputeData.select_dtypes(
-                            include=["object"]
-                        ).columns
-                    )
+                    len(self.imputeData.select_dtypes(include=["object"]).columns)
                 )
             ]
 
         # Encoding
-        assert len(self.encoding) == len(
-            self.imputeData.select_dtypes(include=["object"]).columns
-        ), f"length of encoding: {len(self.encoding)}, length of cat variables: {len(self.imputeData.select_dtypes(include=['object']).columns)}"
-
-        for i, column in enumerate(
-            self.imputeData.select_dtypes(include=["object"]).columns
-        ):
-            encoding = self.encoding[i]
-
-            if encoding == "one_hot":
-                if self.imputeData[column].dtype == object:
-                    self.imputeData = pd.get_dummies(
-                        self.imputeData, prefix=f"OH_{column}", columns=[column]
-                    )
-
-            elif encoding == "integer":
-                if self.imputeData[column].dtype == object:
-                    vals = self.imputeData[column].values
-                    self.imputeData[column] = inner_utils.integer_encoder(vals)
-
-            elif encoding == "hash":
-                if self.imputeData[column].dtype == object:
-                    hashing_encoder = ce.HashingEncoder(
-                        cols=[column], n_components=10
-                    )
-                    self.imputeData = hashing_encoder.fit_transform(
-                        self.imputeData
-                    )
-
-            else:
-                pass
+        cat_cols = self.imputeData.select_dtypes(include=["object"]).columns
+        assert len(self.encoding) == len(cat_cols), (
+            f"length of encoding: {len(self.encoding)}, "
+            f"length of categorical variables: {len(cat_cols)}"
+        )
+        for i, column in enumerate(cat_cols):
+            encoding_method = self.encoding[i]
+            if encoding_method == "one_hot" and self.imputeData[column].dtype == object:
+                self.imputeData = pd.get_dummies(
+                    self.imputeData, prefix=f"OH_{column}", columns=[column]
+                )
+                logger.debug(f"Column '{column}' one-hot encoded")
+
+            elif (
+                encoding_method == "integer" and self.imputeData[column].dtype == object
+            ):
+                vals = self.imputeData[column].values
+                self.imputeData[column] = inner_utils.integer_encoder(vals)
+                logger.debug(f"Column '{column}' integer encoded")
+
+            elif encoding_method == "hash" and self.imputeData[column].dtype == object:
+                hashing_encoder = ce.HashingEncoder(cols=[column], n_components=10)
+                self.imputeData = hashing_encoder.fit_transform(self.imputeData)
+                logger.debug(f"Column '{column}' hash encoded")
 
         # Scaling
         assert self.scaler in ["standard"], "Invalid Scaler"
@@ -206,6 +207,9 @@ def fit(self):
                 scaler.fit_transform(self.imputeData),
                 columns=list(self.imputeData.columns),
             )
+            logger.debug(
+                f"Data scaled using StandardScaler. Final shape: {self.imputeData.shape}"
+            )
 
     def save(self, file_path):
         """
@@ -224,3 +228,4 @@ def save(self, file_path):
         """
         with open(file_path, "wb") as f:
             pickle.dump(self, f)
+        logger.debug(f"Moon object saved to {file_path}")