Merge branch 'main' into updated_color_correction_tutorial

nfahlgren · web-flow · commit c476da55d83f · 2024-01-08T17:09:00.000-06:00
diff --git a/docs/outputs.md b/docs/outputs.md
@@ -48,32 +48,29 @@ Methods are accessed as plantcv.outputs.*method*.
 
 * scale: Units of the measurement or a scale in which the observations are expressed; if possible, standard units and scales should be used and mapped to existing ontologies; in case of a non-standard scale a full explanation should be given.
 
-* datatype: The type of data to be stored. In JSON, values must be one of the following data types:
-    - a string
-    - a number
-    - an array
-    - a boolean
-    - null
-    - a JSON object
-    
-    They are equilvalent to python data types of the following:
-    - 'str'
-    - 'int' or 'float'
-    - 'list' or 'tuple'
-    - 'bool'
-    - 'NoneType'
-    - 'dict'
+* datatype: The type of data to be stored. See note below for supported data types.
 
 * value: The data itself. Make sure the data type of value matches the data type stated in "datatype". 
 
 * label:  The label for each value, which will be useful when the data is a frequency table (e.g. hues). 
 
+**add_metadata**(*term, datatype, value*): Add metadata about the image or other information
+
+* term: Metadata term/name
+
+* datatype: The type of data to be stored. See note below for supported data types.
+
+* value: The data itself. Make sure the data type of value matches the data type stated in "datatype". 
+
 **save_results**(*filename, outformat="json"*): Save results to a file
 
 * filename: Path and name of the output file
 
 * outformat: Output file format (default = "json"). Supports "json" and "csv" formats
 
+!!!note
+    Supported data types for JSON output are: int, float, str, list, bool, tuple, dict, NoneType, numpy.float64.
+
 **Example use:**
     - [Use In VIS/NIR Tutorial](tutorials/vis_nir_tutorial.md)
 
@@ -119,6 +116,9 @@ pcv.outputs.add_observation(sample='default', variable='percent_diseased',
                             method='ratio of pixels', scale='percent', datatype=float,
                             value=percent_diseased, label='percent')
 
+# Add metadata 
+pcv.outputs.add_metadata(term="genotype", datatype=str, value="wildtype")
+
 # Write custom data to results file
 pcv.outputs.save_results(filename=args.result, outformat="json")
 
diff --git a/docs/updating.md b/docs/updating.md
@@ -731,6 +731,11 @@ pages for more details on the input and output variable types.
 * post v3.3: **plantcv.outputs.add_observation**(*variable, trait, method, scale, datatype, value, label*)
 * post v3.11: **plantcv.outputs.add_observation**(*sample, variable, trait, method, scale, datatype, value, label*)
 
+#### plantcv.outputs.add_metadata
+
+* pre v4.1: NA
+* post v4.1: **plantcv.outputs.add_metadata**(*term, datatype, value*)
+
 #### plantcv.outputs.clear
 
 * pre v3.2: NA
diff --git a/plantcv/plantcv/classes.py b/plantcv/plantcv/classes.py
@@ -70,12 +70,14 @@ def __init__(self):
         self.measurements = {}
         self.images = []
         self.observations = {}
+        self.metadata = {}
 
         # Add a method to clear measurements
     def clear(self):
         self.measurements = {}
         self.images = []
         self.observations = {}
+        self.metadata = {}
 
     # Method to add observation to outputs
     def add_observation(self, sample, variable, trait, method, scale, datatype, value, label):
@@ -108,16 +110,8 @@ def add_observation(self, sample, variable, trait, method, scale, datatype, valu
         if sample not in self.observations:
             self.observations[sample] = {}
 
-        # Supported data types
-        supported_dtype = ["int", "float", "str", "list", "bool", "tuple", "dict", "NoneType", "numpy.float64"]
-        # Supported class types
-        class_list = [f"<class '{cls}'>" for cls in supported_dtype]
-
-        # Send an error message if datatype is not supported by json
-        if str(type(value)) not in class_list:
-            # String list of supported types
-            type_list = ', '.join(map(str, supported_dtype))
-            fatal_error(f"The Data type {type(value)} is not compatible with JSON! Please use only these: {type_list}!")
+        # Validate that the data type is supported by JSON
+        _ = _validate_data_type(value)
 
         # Save the observation for the sample and variable
         self.observations[sample][variable] = {
@@ -129,6 +123,32 @@ def add_observation(self, sample, variable, trait, method, scale, datatype, valu
             "label": label
         }
 
+    # Method to add metadata instance to outputs
+    def add_metadata(self, term, datatype, value):
+        """Add a metadata term and value to outputs.
+
+        Parameters
+        ----------
+        term : str
+            Metadata term/name.
+        datatype : type
+            The type of data to be stored, e.g. 'int', 'float', 'str', 'list', 'bool', etc.
+        value : any
+            The data itself.
+        """
+        # Create an empty dictionary for the sample if it does not exist
+        if term not in self.metadata:
+            self.metadata[term] = {}
+
+        # Validate that the data type is supported by JSON
+        _ = _validate_data_type(value)
+
+        # Save the observation for the sample and variable
+        self.metadata[term] = {
+            "datatype": str(datatype),
+            "value": value
+        }
+
     # Method to save observations to a file
     def save_results(self, filename, outformat="json"):
         """Save results to a file.
@@ -145,16 +165,26 @@ def save_results(self, filename, outformat="json"):
                 with open(filename, 'r') as f:
                     hierarchical_data = json.load(f)
                     hierarchical_data["observations"] = self.observations
+                    existing_metadata = hierarchical_data["metadata"]
+                    for term in self.metadata:
+                        save_term = term
+                        if term in existing_metadata:
+                            save_term = f"{term}_1"
+                        hierarchical_data["metadata"][save_term] = self.metadata[term]
             else:
-                hierarchical_data = {"metadata": {}, "observations": self.observations}
-
+                hierarchical_data = {"metadata": self.metadata, "observations": self.observations}
             with open(filename, mode='w') as f:
                 json.dump(hierarchical_data, f)
+
         elif outformat.upper() == "CSV":
             # Open output CSV file
             csv_table = open(filename, "w")
+            # Gather any additional metadata
+            metadata_key_list = list(self.metadata.keys())
+            metadata_val_list = [val["value"] for val in self.metadata.values()]
             # Write the header
-            csv_table.write(",".join(map(str, ["sample", "trait", "value", "label"])) + "\n")
+            header = metadata_key_list + ["sample", "trait", "value", "label"]
+            csv_table.write(",".join(map(str, header)) + "\n")
             # Iterate over data samples
             for sample in self.observations:
                 # Iterate over traits for each sample
@@ -168,23 +198,18 @@ def save_results(self, filename, outformat="json"):
                             # Skip list of tuple data types
                             if not isinstance(value, tuple):
                                 # Save one row per value-label
-                                row = [sample, var, value, label]
+                                row = metadata_val_list + [sample, var, value, label]
                                 csv_table.write(",".join(map(str, row)) + "\n")
                     # If the data type is Boolean, store as a numeric 1/0 instead of True/False
                     elif isinstance(val, bool):
-                        row = [sample,
-                               var,
-                               int(self.observations[sample][var]["value"]),
-                               self.observations[sample][var]["label"]]
+                        row = metadata_val_list + [sample, var, int(self.observations[sample][var]["value"]),
+                                                   self.observations[sample][var]["label"]]
                         csv_table.write(",".join(map(str, row)) + "\n")
                     # For all other supported data types, save one row per trait
                     # Assumes no unusual data types are present (possibly a bad assumption)
                     else:
-                        row = [sample,
-                               var,
-                               self.observations[sample][var]["value"],
-                               self.observations[sample][var]["label"]
-                               ]
+                        row = metadata_val_list + [sample, var, self.observations[sample][var]["value"],
+                                                   self.observations[sample][var]["label"]]
                         csv_table.write(",".join(map(str, row)) + "\n")
 
     def plot_dists(self, variable):
@@ -233,6 +258,38 @@ def plot_dists(self, variable):
         return chart
 
 
+def _validate_data_type(data):
+    """Validate that the data type is supported by JSON.
+
+    Parameters
+    ----------
+    data : any
+        Data to be validated.
+
+    Returns
+    -------
+    bool
+        True if the data type is supported by JSON.
+
+    Raises
+    ------
+    ValueError
+        If the data type is not supported by JSON.
+    """
+    # Supported data types
+    supported_dtype = ["int", "float", "str", "list", "bool", "tuple", "dict", "NoneType", "numpy.float64"]
+    # Supported class types
+    class_list = [f"<class '{cls}'>" for cls in supported_dtype]
+
+    # Send an error message if datatype is not supported by json
+    if str(type(data)) not in class_list:
+        # String list of supported types
+        type_list = ', '.join(map(str, supported_dtype))
+        fatal_error(f"The Data type {type(data)} is not compatible with JSON! Please use only these: {type_list}!")
+
+    return True
+
+
 class Spectral_data:
     """PlantCV Hyperspectral data class"""
 
diff --git a/tests/plantcv/test_outputs.py b/tests/plantcv/test_outputs.py
@@ -35,6 +35,7 @@ def test_save_results_json_newfile(tmpdir):
     outputs = Outputs()
     outputs.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none',
                             datatype=str, value="test", label="none")
+    outputs.add_metadata(term="add_date", datatype="str", value="Nov-14-2023")
     outputs.save_results(filename=outfile, outformat="json")
     with open(outfile, "r") as fp:
         results = json.load(fp)
@@ -51,6 +52,8 @@ def test_save_results_json_existing_file(test_data, tmpdir):
     outputs = Outputs()
     outputs.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none',
                             datatype=str, value="test", label="none")
+    outputs.add_metadata(term="add_date", datatype="str", value="Nov-14-2023")
+    outputs.add_metadata(term="camera", datatype="str", value="TV")
     outputs.save_results(filename=outfile, outformat="json")
     with open(outfile, "r") as fp:
         results = json.load(fp)
@@ -81,6 +84,30 @@ def test_save_results_csv(test_data, tmpdir):
     assert results == test_results
 
 
+def test_save_results_csv_add_metadata(tmpdir):
+    """Test for PlantCV."""
+    # Create a test tmp directory
+    outfile = tmpdir.mkdir("cache").join("results.csv")
+    # Create output instance
+    outputs = Outputs()
+    outputs.add_observation(sample='default', variable='string', trait='string variable', method='string', scale='none',
+                            datatype=str, value="string", label="none")
+    outputs.add_metadata(term="add_date", datatype="str", value="Nov-14-2023")
+    outputs.save_results(filename=outfile, outformat="csv")
+    with open(outfile, "r") as fp:
+        results = fp.read()
+    x = slice(0, 33)
+    assert results[x] == "add_date,sample,trait,value,label"
+
+
+def test_add_metadata_invalid_type():
+    """Test for PlantCV."""
+    # Create output instance
+    outputs = Outputs()
+    with pytest.raises(RuntimeError):
+        outputs.add_metadata(term="bad_dtype", datatype="str", value=np.array([2]))
+
+
 def test_clear_outputs():
     """Test for PlantCV."""
     # Create output instance