Issue #324 add DataCube.print_json() and discourage flat_graph() stronger for general use

soxofaan · soxofaan · commit 30c6a1cfeafa · 2022-08-12T12:14:04.000+02:00
Also add `to_json`/`print_json` usage docs
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Allow passing raw JSON string, JSON file path or URL to `Connection.download()`,
   `Connection.execute()` and `Connection.create_job()`
 - Add support for reverse math operators on DataCube in `apply` mode ([#323](https://github.com/Open-EO/openeo-python-client/issues/323))
+- Add `DataCube.print_json()` to simplify exporting process graphs in Jupyter or other interactive environments ([#324](https://github.com/Open-EO/openeo-python-client/issues/324))
 
 
 ### Changed
diff --git a/docs/cookbook/tricks.rst b/docs/cookbook/tricks.rst
@@ -3,6 +3,58 @@ Miscellaneous tips and tricks
 ===============================
 
 
+.. _process_graph_export:
+
+Export a process graph
+-----------------------
+
+You can export the underlying process graph of
+a :py:class:`~openeo.rest.datacube.DataCube`, :py:class:`~openeo.rest.vectorcube.VectorCube`, etc,
+to a standardized JSON format, which allows interoperability with other openEO tools.
+
+For example, use :py:meth:`~openeo.rest.datacube.DataCube.print_json()` to directly print the JSON representation
+in your interactive Jupyter or Python session:
+
+.. code-block:: pycon
+
+    >>> dump = cube.print_json()
+    {
+      "process_graph": {
+        "loadcollection1": {
+          "process_id": "load_collection",
+    ...
+
+Or save it to a file, by getting the JSON representation first as a string
+with :py:meth:`~openeo.rest.datacube.DataCube.to_json()`:
+
+.. code-block:: python
+
+    # Export as JSON string
+    dump = cube.to_json()
+
+    # Write to file in `pathlib` style
+    export_path = pathlib.Path("path/to/export.json")
+    export_path.write_text(dump, encoding="utf8")
+
+    # Write to file in `open()` style
+    with open("path/to/export.json", encoding="utf8") as f:
+        f.write(dump)
+
+
+.. warning::
+
+    Avoid using methods like :py:meth:`~openeo.rest.datacube.DataCube.flat_graph()`,
+    which are mainly intended for internal use.
+    Not only are these methods subject to change, they also lead to representations
+    with interoperability and reuse issues.
+    For example, naively printing or automatic (``repr``) rendering of
+    :py:meth:`~openeo.rest.datacube.DataCube.flat_graph()` output will roughly look like JSON,
+    but is in fact invalid: it uses single quotes (instead of double quotes)
+    and booleans values are title-case (instead of lower case).
+
+
+
+
 Execute a process graph directly from raw JSON
 -----------------------------------------------
 
diff --git a/openeo/internal/graph_building.py b/openeo/internal/graph_building.py
@@ -114,7 +114,7 @@ def _deep_copy(x):
         return _deep_copy(self)
 
     def flat_graph(self) -> dict:
-        """Get the process graph in flat dict representation."""
+        """Get the process graph in internal flat dict representation."""
         return GraphFlattener().flatten(node=self)
 
     flatten = legacy_alias(flat_graph, name="flatten")
@@ -145,7 +145,7 @@ def from_flat_graph(flat_graph: dict, parameters: Optional[dict] = None) -> 'PGN
 
 def as_flat_graph(x: Union[dict, Any]) -> dict:
     """
-    Convert given object to a flat dict graph representation.
+    Convert given object to a internal flat dict graph representation.
     """
     if isinstance(x, dict):
         return x
diff --git a/openeo/internal/processes/builder.py b/openeo/internal/processes/builder.py
@@ -53,7 +53,7 @@ def process(cls, process_id: str, arguments: dict = None, namespace: Union[str,
         return cls(PGNode(process_id=process_id, arguments=arguments, namespace=namespace))
 
     def flat_graph(self) -> dict:
-        """Get the process graph in flat dict representation"""
+        """Get the process graph in internal flat dict representation."""
         return self.pgnode.flat_graph()
 
     def from_node(self) -> PGNode:
diff --git a/openeo/rest/_datacube.py b/openeo/rest/_datacube.py
@@ -1,7 +1,7 @@
 import json
 import logging
 import typing
-from typing import Optional
+from typing import Optional, Union, Tuple
 
 from openeo.internal.graph_building import PGNode, _FromNodeMixin
 from openeo.util import legacy_alias
@@ -32,23 +32,54 @@ def __str__(self):
 
     def flat_graph(self) -> dict:
         """
-        Get the process graph in flat dict representation
+        Get the process graph in internal flat dict representation.
 
-        .. note:: This method is mainly for internal use, subject to change and not recommended for general usage.
-            Instead, use :py:meth:`to_json()` to get a JSON representation of the process graph.
+        .. warning:: This method is mainly intended for internal use.
+            It is not recommended for general use and is *subject to change*.
+
+            Instead, it is recommended to use
+            :py:meth:`to_json()` or :py:meth:`print_json()`
+            to obtain a standardized, interoperable JSON representation of the process graph.
+            See :ref:`process_graph_export` for more information.
         """
         # TODO: wrap in {"process_graph":...} by default/optionally?
         return self._pg.flat_graph()
 
     flatten = legacy_alias(flat_graph, name="flatten")
 
-    def to_json(self, indent=2, separators=None) -> str:
+    def to_json(self, *, indent: Union[int, None] = 2, separators: Optional[Tuple[str, str]] = None) -> str:
         """
-        Get JSON representation of (flat dict) process graph.
+        Get interoperable JSON representation of the process graph.
+
+        See :py:meth:`DataCube.print_json` to directly print the JSON representation
+        and :ref:`process_graph_export` for more usage information.
+
+        Also see ``json.dumps`` docs for more information on the JSON formatting options.
+
+        :param indent: JSON indentation level.
+        :param separators: (optional) tuple of item/key separators.
+        :return: JSON string
         """
         pg = {"process_graph": self.flat_graph()}
         return json.dumps(pg, indent=indent, separators=separators)
 
+    def print_json(self, *, file=None, indent: Union[int, None] = 2, separators: Optional[Tuple[str, str]] = None):
+        """
+        Print interoperable JSON representation of the process graph.
+
+        See :py:meth:`DataCube.to_json` to get the JSON representation as a string
+        and :ref:`process_graph_export` for more usage information.
+
+        Also see ``json.dumps`` docs for more information on the JSON formatting options.
+
+        :param file: file-like object (stream) to print to (current ``sys.stdout`` by default).
+        :param indent: JSON indentation level.
+        :param separators: (optional) tuple of item/key separators.
+
+        .. versionadded:: 0.12.0
+        """
+        print(self.to_json(indent=indent, separators=separators), file=file)
+
     @property
     def _api_version(self):
         return self._connection.capabilities().api_version_check
diff --git a/tests/rest/datacube/test_datacube100.py b/tests/rest/datacube/test_datacube100.py
@@ -4,6 +4,7 @@
 
 """
 import collections
+import io
 import pathlib
 import re
 import sys
@@ -1428,32 +1429,34 @@ def test_save_result_format(con100, requests_mock):
     cube.save_result(format="pNg")
 
 
+EXPECTED_JSON_EXPORT_S2_NDVI = textwrap.dedent('''\
+  {
+    "process_graph": {
+      "loadcollection1": {
+        "process_id": "load_collection",
+        "arguments": {
+          "id": "S2",
+          "spatial_extent": null,
+          "temporal_extent": null
+        }
+      },
+      "ndvi1": {
+        "process_id": "ndvi",
+        "arguments": {
+          "data": {
+            "from_node": "loadcollection1"
+          }
+        },
+        "result": true
+      }
+    }
+  }''')
+
+
 @pytest.mark.skipif(sys.version_info < (3, 6), reason="requires 'insertion ordered' dicts from python3.6 or higher")
 def test_to_json(con100):
     ndvi = con100.load_collection("S2").ndvi()
-    expected = textwrap.dedent('''\
-      {
-        "process_graph": {
-          "loadcollection1": {
-            "process_id": "load_collection",
-            "arguments": {
-              "id": "S2",
-              "spatial_extent": null,
-              "temporal_extent": null
-            }
-          },
-          "ndvi1": {
-            "process_id": "ndvi",
-            "arguments": {
-              "data": {
-                "from_node": "loadcollection1"
-              }
-            },
-            "result": true
-          }
-        }
-      }''')
-    assert ndvi.to_json() == expected
+    assert ndvi.to_json() == EXPECTED_JSON_EXPORT_S2_NDVI
 
 
 @pytest.mark.skipif(sys.version_info < (3, 6), reason="requires 'insertion ordered' dicts from python3.6 or higher")
@@ -1465,6 +1468,22 @@ def test_to_json_compact(con100):
     assert ndvi.to_json(indent=None, separators=(',', ':')) == expected
 
 
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires 'insertion ordered' dicts from python3.6 or higher")
+def test_print_json_default(con100, capsys):
+    ndvi = con100.load_collection("S2").ndvi()
+    ndvi.print_json()
+    stdout, stderr = capsys.readouterr()
+    assert stdout == EXPECTED_JSON_EXPORT_S2_NDVI + "\n"
+
+
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires 'insertion ordered' dicts from python3.6 or higher")
+def test_print_json_file(con100):
+    ndvi = con100.load_collection("S2").ndvi()
+    f = io.StringIO()
+    ndvi.print_json(file=f)
+    assert f.getvalue() == EXPECTED_JSON_EXPORT_S2_NDVI + "\n"
+
+
 def test_sar_backscatter_defaults(con100):
     cube = con100.load_collection("S2").sar_backscatter()
     assert _get_leaf_node(cube) == {