cloudspannerecosystem
diff --git a/‎spanner_graphs/cloud_database.py‎
Lines changed: 218 additions & 0 deletions b/‎spanner_graphs/cloud_database.py‎
Lines changed: 218 additions & 0 deletions
diff --git a/‎spanner_graphs/conversion.py‎
Lines changed: 6 additions & 5 deletions b/‎spanner_graphs/conversion.py‎
Lines changed: 6 additions & 5 deletions
@@ -0,0 +1,218 @@
+# Copyright 2024 Google LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     https://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This module contains the cloud-specific implementation for talking to a Spanner database.
+"""
+
+from __future__ import annotations
+import json
+from typing import Any, Dict, List, Tuple
+
+from google.cloud import spanner
+from google.cloud.spanner_v1 import JsonObject
+from google.api_core.client_options import ClientOptions
+from google.cloud.spanner_v1.types import StructType, Type
+import pydata_google_auth
+
+from spanner_graphs.database import SpannerDatabase, MockSpannerDatabase, SpannerQueryResult, SpannerFieldInfo, get_as_field_info_list
+
+def _get_default_credentials_with_project():
+    return pydata_google_auth.default(
+        scopes=["https://www.googleapis.com/auth/cloud-platform"], use_local_webserver=False)
+
+class CloudSpannerDatabase(SpannerDatabase):
+    """Concrete implementation for Spanner database on the cloud."""
+    def __init__(self, project_id: str, instance_id: str,
+                 database_id: str) -> None:
+        credentials, _ = _get_default_credentials_with_project()
+        self.client = spanner.Client(
+            project=project_id, credentials=credentials, client_options=ClientOptions(quota_project_id=project_id))
+        self.instance = self.client.instance(instance_id)
+        self.database = self.instance.database(database_id)
+        self.schema_json: Any | None = None
+
+    def __repr__(self) -> str:
+        return (f"<CloudSpannerDatabase["
+                f"project:{self.client.project_name},"
+                f"instance:{self.instance.name},"
+                f"db:{self.database.name}]>")
+
+    def _extract_graph_name(self, query: str) -> str:
+        words = query.strip().split()
+        if len(words) < 3:
+            raise ValueError("invalid query: must contain at least (GRAPH, graph_name and query)")
+
+        if words[0].upper() != "GRAPH":
+            raise ValueError("invalid query: GRAPH must be the first word")
+
+        return words[1]
+
+    def _get_schema_for_graph(self, graph_query: str) -> Any | None:
+        try:
+            graph_name = self._extract_graph_name(graph_query)
+        except ValueError:
+            return None
+
+        with self.database.snapshot() as snapshot:
+            schema_query = """
+            SELECT property_graph_name, property_graph_metadata_json
+            FROM information_schema.property_graphs
+            WHERE property_graph_name = @graph_name
+            """
+            params = {"graph_name": graph_name}
+            param_type = {"graph_name": spanner.param_types.STRING}
+
+            result = snapshot.execute_sql(schema_query, params=params, param_types=param_type)
+            schema_rows = list(result)
+
+            if schema_rows:
+                return schema_rows[0][1]
+            else:
+                return None
+
+    def execute_query(
+        self,
+        query: str,
+        limit: int = None,
+        is_test_query: bool = False,
+    ) -> SpannerQueryResult:
+        """
+        This method executes the provided `query`
+
+        Args:
+            query: The SQL query to execute against the database
+            limit: An optional limit for the number of rows to return
+            is_test_query: If true, skips schema fetching for graph queries.
+
+        Returns:
+            A `SpannerQueryResult`
+        """
+        self.schema_json = None
+        if not is_test_query:
+            self.schema_json = self._get_schema_for_graph(query)
+
+        with self.database.snapshot() as snapshot:
+            params = None
+            param_types = None
+            if limit and limit > 0:
+                params = dict(limit=limit)
+
+            try:
+                results = snapshot.execute_sql(query, params=params, param_types=param_types)
+                rows = list(results)
+            except Exception as e:
+                return {}, [], [], self.schema_json, e
+
+            fields: List[SpannerFieldInfo] = get_as_field_info_list(results.fields)
+            data = {field.name: [] for field in fields}
+
+            if len(fields) == 0:
+                return SpannerQueryResult(
+                    data=data,
+                    fields=fields,
+                    rows=rows,
+                    schema_json=self.schema_json,
+                    error=None
+                )
+
+            for row_data in rows:
+                for field, value in zip(fields, row_data):
+                    if isinstance(value, JsonObject):
+                        data[field.name].append(json.loads(value.serialize()))
+                    else:
+                        data[field.name].append(value)
+
+            return SpannerQueryResult(
+                data=data,
+                fields=fields,
+                rows=rows,
+                schema_json=self.schema_json,
+                error=None
+            )
+
+class CloudMockSpannerResult:
+
+    def __init__(self, file_path: str):
+        self.file_path = file_path
+        self.fields: List[StructType] = []
+        self._rows: List[List[Any]] = []
+        self._load_data()
+
+    def _load_data(self):
+        with open(self.file_path, "r", encoding="utf-8") as csvfile:
+            csv_reader = csv.reader(csvfile)
+            headers = next(csv_reader)
+            self.fields = [
+                StructType.Field(name=header, type_=Type(code=TypeCode.JSON))
+                for header in headers
+            ]
+
+            for row in csv_reader:
+                parsed_row = []
+                for value in row:
+                    try:
+                        js = bytes(value, "utf-8").decode("unicode_escape")
+                        parsed_row.append(json.loads(js))
+                    except json.JSONDecodeError:
+                        pass
+                self._rows.append(parsed_row)
+
+    def __iter__(self):
+        return iter(self._rows)
+
+
+class CloudMockSpannerDatabase(MockSpannerDatabase):
+    """Cloud Mock database class"""
+
+    def __init__(self):
+        dirname = os.path.dirname(__file__)
+        self.graph_csv_path = os.path.join(
+                            dirname, "graph_mock_data.csv")
+        self.schema_json_path = os.path.join(
+                            dirname, "graph_mock_schema.json")
+        self.schema_json: dict = {}
+
+    def execute_query(
+        self,
+        _: str,
+        limit: int = 5
+    ) -> SpannerQueryResult:
+        """Mock execution of query"""
+
+        # Before the actual query we fetch the schema as well
+        with open(self.schema_json_path, "r", encoding="utf-8") as js:
+            self.schema_json = json.load(js)
+
+        results = CloudMockSpannerResult(self.graph_csv_path)
+        fields: List[SpannerFieldInfo] = get_as_field_info(results.fields)
+        rows = list(results)
+        data = {field.name: [] for field in fields}
+
+        if len(fields) == 0:
+            return data, fields, rows
+
+        for i, row in enumerate(results):
+            if limit is not None and i >= limit:
+                break
+            for field, value in zip(fields, row):
+                data[field.name].append(value)
+
+        return SpannerQueryResult(
+                data=data,
+                fields=fields,
+                rows=rows,
+                schema_json=self.schema_json,
+                error=None
+            )
@@ -23,10 +23,11 @@
 
 from google.cloud.spanner_v1.types import TypeCode, StructType
 
+from spanner_graphs.database import SpannerFieldInfo
 from spanner_graphs.graph_entities import Node, Edge
 from spanner_graphs.schema_manager import SchemaManager
 
-def get_nodes_edges(data: Dict[str, List[Any]], fields: List[StructType.Field], schema_json: dict = None) -> Tuple[List[Node], List[Edge]]:
+def get_nodes_edges(data: Dict[str, List[Any]], fields: List[SpannerFieldInfo], schema_json: dict = None) -> Tuple[List[Node], List[Edge]]:
     schema_manager = SchemaManager(schema_json)
     nodes: List[Node] = []
     edges: List[Edge] = []
@@ -37,15 +38,15 @@ def get_nodes_edges(data: Dict[str, List[Any]], fields: List[StructType.Field],
     for field in fields:
         column_name = field.name
         column_data = data[column_name]
-        
+
         # Only process JSON and Array of JSON types
-        if field.type_.code not in [TypeCode.JSON, TypeCode.ARRAY]:
+        if field.typename not in ["JSON", "ARRAY"]:
             continue
 
         # Process each value in the column
         for value in column_data:
             items_to_process = []
-            
+
             # Handle both single JSON and arrays of JSON
             if isinstance(value, list):
                 items_to_process.extend(value)
@@ -92,4 +93,4 @@ def get_nodes_edges(data: Dict[str, List[Any]], fields: List[StructType.Field],
         nodes.append(Node.make_intermediate(identifier))
         node_identifiers.add(identifier)
 
-    return nodes, edges
+    return nodes, edges