diff --git a/experiment.ipynb b/experiment.ipynb new file mode 100644 index 00000000..e181bdc5 --- /dev/null +++ b/experiment.ipynb @@ -0,0 +1,328 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "b6eba6c5", + "metadata": {}, + "outputs": [], + "source": [ + "import ee\n", + "import geetools" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f2d1935", + "metadata": {}, + "outputs": [], + "source": [ + "ee.Authenticate.geetools.list_user()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cda9cb7", + "metadata": {}, + "outputs": [], + "source": [ + "ee.Initialize.geetools.from_user(name=\"pierrick\", project=\"ee-geetools\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee7f0fd7", + "metadata": {}, + "outputs": [], + "source": [ + "ee.Initialize.geetools.project_id()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19d7c112", + "metadata": {}, + "outputs": [], + "source": [ + "ee.Number(1).getInfo()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f932c8e1", + "metadata": {}, + "outputs": [], + "source": [ + "# initialize the Google cloud storage client to save the generateed tiff files\n", + "from google.cloud.storage.client import Client\n", + "from google.cloud.storage.bucket import Bucket\n", + "\n", + "client = Client(\n", + " project=ee.Initialize.geetools.project_id(),\n", + " credentials=ee.Initialize.geetools.get_credentials()\n", + ")\n", + "list(client.list_buckets())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "100f7815", + "metadata": {}, + "outputs": [], + "source": [ + "# NOAA CPC Historical Weather Data for GEE\n", + "\n", + "import re\n", + "import tempfile\n", + "from datetime import datetime as dtm\n", + "from datetime import timedelta\n", + "from pathlib import Path\n", + "from functools import partial\n", + "import argparse\n", + "\n", + "import requests\n", + "import rioxarray as rioxarray\n", + "import xarray as xr\n", + "from tqdm import tqdm\n", + "from xarray.coding.times import CFDatetimeCoder\n", + "\n", + "\n", + "# Start the process timer for performance monitoring\n", + "now = dtm.now()\n", + "\n", + "# Configuration constants\n", + "DEFAULT_ASSET_PATH = \"~/assets/cpc_noaa/cpc_daily_raw\"\n", + "\n", + "# Base URLs for NOAA CPC data servers - each contains the prefix for that data type\n", + "CPC_URLS = {\n", + " \"precip\": \"https://downloads.psl.noaa.gov/Datasets/cpc_global_precip/precip.\",\n", + " \"tmin\": \"https://downloads.psl.noaa.gov/Datasets/cpc_global_temp/tmin.\",\n", + " \"tmax\": \"https://downloads.psl.noaa.gov/Datasets/cpc_global_temp/tmax.\",\n", + "}\n", + "\n", + "print(\"=== Starting NOAA CPC Data Processing ===\")\n", + "\n", + "# Create the Earth Engine collection asset if it doesn't exist\n", + "collection = ee.Asset(DEFAULT_ASSET_PATH).expanduser()\n", + "collection.mkdir(parents=True, exist_ok=True, image_collection=True)\n", + "\n", + "# Update the collection description with README content every time to remain up to date\n", + "readme = (Path(__file__).parent / \"README.md\").read_text()\n", + "collection.setProperties(**{\"description\": \"# The CPC NOAA Daily Weather Data\"})\n", + "\n", + "# Discover available years from NOAA CPC servers\n", + "# We check each data type (precip, tmin, tmax) and find years that exist in ALL datasets\n", + "for data_type, base_url in CPC_URLS.items():\n", + " # Remove the data type prefix to get the directory listing URL\n", + " # e.g., \"precip.\" becomes \"\" to get the directory listing\n", + " response = requests.get(base_url.replace(data_type + \".\", \"\"), timeout=30)\n", + " response.raise_for_status()\n", + "\n", + " # Extract year from filenames like \"precip.1979.nc\", \"tmin.2023.nc\"\n", + " # Using regex to find 4-digit years in NetCDF filenames\n", + " pattern = rf\"{data_type}\\.(\\d{{4}})\\.nc\"\n", + " matches = re.findall(pattern, response.text)\n", + " years_for_datatype = {int(year) for year in matches}\n", + "\n", + " # Keep only years that exist in ALL datasets (intersection)\n", + " # First iteration: set the initial years\n", + " # Subsequent iterations: intersect with previous results\n", + " available_years = years_for_datatype if available_years is None else available_years\n", + " available_years &= years_for_datatype\n", + "\n", + "available_years = sorted([year for year in available_years if year < dtm.now().year])\n", + "print(f\"Processing noa_cpc data for years: {available_years}\")\n", + "\n", + "# Download and process data year by year to manage memory efficiently\n", + "# Process each available year sequentially\n", + "task_list, failed_tasks = [], []\n", + "for year in available_years:\n", + " with tempfile.TemporaryDirectory() as temp_dir:\n", + " # Download all three data files (precip, tmin, tmax) for this year\n", + " for var in [\"precip\", \"tmin\", \"tmax\"]:\n", + " local_path = Path(temp_dir) / f\"{var}.{year}.nc\"\n", + " url = f\"{CPC_URLS[var]}{year}.nc\"\n", + "\n", + " # Only download if file doesn't already exist locally\n", + " if not local_path.exists():\n", + " response = requests.get(url, timeout=300)\n", + " response.raise_for_status()\n", + " local_path.write_bytes(response.content)\n", + "\n", + " # Open the three NetCDF datasets we just downloaded\n", + " time_coder = CFDatetimeCoder(use_cftime=True) # Handle time encoding properly\n", + " precip_file = Path(temp_dir) / f\"precip.{year}.nc\"\n", + " tmin_file = Path(temp_dir) / f\"tmin.{year}.nc\"\n", + " tmax_file = Path(temp_dir) / f\"tmax.{year}.nc\"\n", + "\n", + " # Use chunking to process data efficiently - one day at a time\n", + " # The Lazy Load avoids allocating the entire year into memory at once\n", + " # Note: CPC files use \"lat\"/\"lon\" coordinates, not \"latitude\"/\"longitude\"\n", + " chunks = {\"time\": 1, \"lat\": -1, \"lon\": -1}\n", + " open = partial(xr.open_dataset, decode_times=time_coder, chunks=chunks, engine=\"netcdf4\")\n", + " ds_precip, ds_tmin, ds_tmax = open(precip_file), open(tmin_file), open(tmax_file)\n", + "\n", + " # Find the actual variable names in each dataset\n", + " # NOAA CPC uses different variable names across datasets\n", + " precip_var = next(var for var in ds_precip.data_vars if var.lower().startswith(\"precip\"))\n", + " tmin_var = next(var for var in ds_tmin.data_vars if var.lower().startswith(\"tmin\"))\n", + " tmax_var = next(var for var in ds_tmax.data_vars if var.lower().startswith(\"tmax\"))\n", + "\n", + " # Extract and rename variables to standard names for Earth Engine\n", + " ds_precip = ds_precip[precip_var].rename(\"tp\") # Total precipitation\n", + " ds_tmin = ds_tmin[tmin_var].rename(\"tmin\") # Minimum temperature\n", + " ds_tmax = ds_tmax[tmax_var].rename(\"tmax\") # Maximum temperature\n", + "\n", + " # Combine all three variables into a single dataset\n", + " # This creates a dataset with 3 data variables: tp, tmin, tmax\n", + " ds_list = [ds_precip, ds_tmin, ds_tmax]\n", + " ds_merged = xr.merge(ds_list)\n", + "\n", + " # Close individual datasets since we won't use them anymore\n", + " # This releases file handles and saves memory\n", + " [ds.close() for ds in ds_list]\n", + "\n", + " # Standardize coordinate names to longitude/latitude\n", + " # Some datasets use \"lon/lat\", others use \"longitude/latitude\"\n", + " std_keys= {\"lon\": \"longitude\", \"lat\": \"latitude\"}\n", + " std_keys= {k: v for k, v in std_keys.items() if k in ds_merged.coords}\n", + " ds_merged = ds_merged.rename(std_keys)\n", + "\n", + " # Convert longitude from [0, 360] to [-180, 180] format\n", + " # This is the standard format expected by Earth Engine\n", + " adjusted_lon = (ds_merged[\"longitude\"].values - 180) % 360 - 180\n", + " ds_merged = ds_merged.assign_coords(longitude=adjusted_lon).sortby(\"longitude\")\n", + "\n", + " # Get total number of days in this year's dataset\n", + " total_time_steps = len(ds_merged.time)\n", + "\n", + " # Process each day individually to minimize memory usage\n", + " for day_index in tqdm(range(total_time_steps), desc=f\"Processing {year} days\", unit=\"day\"):\n", + " # Extract the date for this day (cftime is a troublsome format it cannot be safely\n", + " # extracted even with pd.to_datetime)\n", + " cftime_date = ds_merged.time.values[day_index]\n", + " start_time = dtm(cftime_date.year, cftime_date.month, cftime_date.day)\n", + "\n", + " # Create a unique asset ID for this day's data\n", + " filename = f\"cpc_daily_root_{start_time:%Y%m%d}\"\n", + " asset_id = ee.Asset(DEFAULT_ASSET_PATH) / filename\n", + " if asset_id.exists():\n", + " continue\n", + "\n", + " # Extract data for just this one day (memory efficient)\n", + " day_slice = ds_merged.isel(time=day_index)\n", + "\n", + " # Convert to a DataArray with bands in the correct order\n", + " # Earth Engine expects data as a multi-band image\n", + " desired_order = [\"tp\", \"tmin\", \"tmax\"]\n", + " ds_merged_gee = day_slice[desired_order].to_array().rio.write_crs(\"epsg:4326\")\n", + "\n", + " # Export this day's data to Google Earth Engine\n", + " # Create a temporary GeoTIFF file in the same temp directory (elegant solution)\n", + " tiff_file = Path(temp_dir) / f\"cpc_daily_{start_time:%Y%m%d}.tiff\"\n", + "\n", + " # Write the data to the temporary GeoTIFF file\n", + " ds_merged_gee.rio.to_raster(tiff_file, driver=\"GTiff\", dtype=\"float32\", compress=\"lzw\")\n", + "\n", + " # Prepare metadata properties for the Earth Engine asset\n", + " properties = {\"upload_time\": int(dtm.now().timestamp() * 1000)}\n", + "\n", + " # send the file to GCP\n", + "\n", + "\n", + "# # Submit the upload task to Earth Engine\n", + "# try:\n", + "# task = ee.batch.Export.ldc.geotiff.toAsset(\n", + "# filename=str(tiff_file),\n", + "# assetId=asset_id,\n", + "# start_time=start_time,\n", + "# end_time=start_time + timedelta(days=1),\n", + "# properties=properties,\n", + "# band_names=desired_order\n", + "# )\n", + "# task.start()\n", + "# task_list.append(task)\n", + "# except Exception as e:\n", + "# logger.error(f\"Error occurred while submitting task for {start_time}: {e}\")\n", + "# failed_tasks.append(asset_id)\n", + "# continue # Exit the loop on error to avoid further issues\n", + "\n", + "# # Clean up the temporary DataArray\n", + "# ds_merged_gee.close()\n", + "\n", + "# # Close the merged dataset after processing all days for this year\n", + "# # This is crucial for Windows file handle management\n", + "# ds_merged.close()\n", + "# logger.info(f\"Completed generating tasks for year {year}, all datasets closed\")\n", + "\n", + "# # Monitor Tasks\n", + "# logger.warning(\"Do not shut down the process\")\n", + "# logger.info(f\"Waiting for {len(task_list)} tasks to complete...\")\n", + "# start_wait = dtm.now()\n", + "# TM = LDCGEETools.TaskMonitor(task_list)\n", + "# TM.run()\n", + "# elapsed_wait_time = dtm.now() - start_wait\n", + "\n", + "# # update the collection start and end time\n", + "# ic = ee.ImageCollection(collection.as_posix())\n", + "# collection.setProperties(**{\n", + "# \"system:time_start\": ic.aggregate_min(\"system:time_start\").getInfo(),\n", + "# \"system:time_end\": ic.aggregate_max(\"system:time_start\").getInfo(),\n", + "# })\n", + "\n", + "# # log the computation total time including the time spent waiting for Google\n", + "# logger.info(\"Processing completed. All tasks have been submitted for export to GEE.\")\n", + "# elapsed_time = dtm.now() - now\n", + "# logger.info(\n", + "# f\"Total processing time: {elapsed_time}\"\n", + "# f\" including {elapsed_wait_time} waiting for GEE tasks to complete.\"\n", + "# )\n", + "\n", + "# # log is some task could not be send to GEE\n", + "# if len(failed_tasks) > 0:\n", + "# logger.error(\"Some tasks failed to submit to GEE:\")\n", + "# log = \"Task ({}) failed with error: {}\"\n", + "# [logger.error(log.format(task, \"Failed to submit to GEE\")) for task in failed_tasks]\n", + "\n", + "# # log if the data are all there\n", + "# not_completed = [t for t in task_list if t.state() not in [\"COMPLETED\", \"SUCCEEDED\"]]\n", + "# if len(not_completed) > 0:\n", + "# logger.error(\"Some tasks did not complete successfully:\")\n", + "# log = \"Task ({}) failed with status: {}\"\n", + "# [logger.error(log.format(task.task_id, task.state())) for task in not_completed]\n", + "\n", + "# if len(not_completed) > 0 or len(failed_tasks) > 0:\n", + "# exit(1) # return falsy exit\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/geetools/ee_asset.py b/geetools/ee_asset.py index abd93c12..d8f8e733 100644 --- a/geetools/ee_asset.py +++ b/geetools/ee_asset.py @@ -1,6 +1,7 @@ """An Asset management class mimicking the ``pathlib.Path`` class behaviour.""" from __future__ import annotations +import json import os import re from datetime import date, datetime @@ -8,6 +9,10 @@ import ee import ee.data +import rasterio as rio +import requests +from google.auth.transport.requests import AuthorizedSession +from google.cloud import storage from .accessors import _register_extention from .utils import format_description @@ -572,19 +577,44 @@ def move(self, new_asset: os.PathLike, overwrite: bool = False) -> Asset: return new_asset - def delete(self, recursive: bool = False, dry_run: bool | None = None) -> list: + def is_gcp_backed(self, raised: bool = False) -> bool: + """Return True if the asset is backed by a GCP file. + + Args: + raised: If True, raise an exception if the asset is not GCP backed. Defaults to False. + + Examples: + .. code-block:: python + + asset = ee.Asset("projects/ee-geetools/assets/folder/image") + asset.is_gcp_backed() + """ + self.exists(raised=True) + properties = ee.data.getAsset(self.as_posix()).get("properties", {}) + is_gcp = "gcp_backed" in properties + if is_gcp is False and raised is True: + raise ValueError(f"Asset {self.as_posix()} is not a GCP backed asset.") + return is_gcp + + def delete(self, recursive: bool = False, dry_run: bool | None = None, delete_src: bool = False) -> list: """Remove the asset. This method will delete an asset (any type) asset and all its potential children. By default, it is not recursive and will raise an error if the container is not empty. By setting the recursive argument to True, the method will delete all the children and the container asset (including potential subfolders). To avoid deleting important assets by accident the method is set to dry_run by default. + Warning: + In the special case of a GCP backed asset (COG, TIF, SHP, etc) + the method will not delete the source file by default. To delete the source file + you need to set the ``delete_src`` argument to True. Be careful as this action is irreversible. + Note: A container is an asset containing other assets, it can be a ``Folder`` or an ``ImageCollection``. Args: recursive: If True, delete all the children and the container asset. Defaults to False. dry_run: If True, do not delete the asset simply pass them to the output list. Defaults to True. + delete_src: try to delete the src file used in case of a GCP backed asset. Defaults to False. Returns: The list of deleted assets. @@ -599,6 +629,7 @@ def delete(self, recursive: bool = False, dry_run: bool | None = None) -> list: # if we run a recursive rmdir the dry_run is set to True to avoid deleting too many things by accident # if we run a non-recursive rmdir the dry_run is set to False to delete the folder only dry_run = dry_run if dry_run is not None else recursive + delete_src = delete_src if dry_run is False else False # define a delete function to change the behaviour of the method depending of the mode # in dry mode, the function only store the assets to be destroyed as a dictionary. @@ -626,7 +657,20 @@ def delete(asset): # delete all items starting from the more nested ones assets_ordered = dict(sorted(assets_ordered.items(), reverse=True)) for lvl in assets_ordered: - [delete(asset) for asset in assets_ordered[lvl]] + [delete(asset, delete_src=delete_src) for asset in assets_ordered[lvl]] + + # if required delete the src file in case of a GCP backed asset + if self.is_gcp_backed() and delete_src is True: + properties = ee.data.getAsset(self.as_posix()).get("properties", {}) + gcp_uri = properties.get("gcp_uri", "") + try: + client = storage.Client() + bucket_name, blob_name = gcp_uri.replace("gs://", "").split("/", 1) + bucket = client.get_bucket(bucket_name) + blob = bucket.blob(blob_name) + blob.delete() + except Exception as e: + print(f"Failed to delete GCS source file {gcp_uri}: {e}") # delete the initial folder/asset delete(self) @@ -818,3 +862,88 @@ def date_in_str(d: str | int | float | datetime | date) -> str: ) return self + + def register_cog_asset( + self, + uri: str, + bands: list | None = None, + properties: dict | None = None, + start_time: str | datetime | date | None = None, + end_time: str | datetime | date | None = None, + overwrite: bool = False, + ) -> Asset: + """Register a Cloud Optimized GeoTIFF (COG) as an Earth Engine Image asset. + + Args: + uri: The URI of the COG file. Supported schemes are "gs://". + bands: A list of band names to assign to the image. If None, the band names from the COG will be used. Defaults to None. + properties: A dictionary of properties to set on the image asset. Defaults to None. + start_time: The start time of the image asset. Can be a string in ISO format, a datetime or a date object. Defaults to None. + end_time: The end time of the image asset. Can be a string in ISO format, a datetime or a date object. Defaults to None. + overwrite: If True, overwrite the destination asset if it exists. Defaults to False. + + Returns: + The new Image asset instance. + + Examples: + .. code-block:: python + + asset = ee.Asset("projects/ee-geetools/assets/folder/image") + asset.register_cog_asset("gs://bucket/path/to/cog.tif", bands=["B1", "B2"], overwrite=True) + """ + # exit if the destination asset exist and overwrite is False + if self.exists(): + if overwrite is False: + raise ValueError(f"Asset {self.as_posix()} already exists.") + else: + self.delete() + + # Start an incomplete manifest request + request = { + "imageManifest": { + "name": self.as_posix(), + "tilesets": [{"id": "0", "sources": [{"uris": [uri]}]}], + }, + } + + # Define the band information based on the content of the file and/or the + # provided band names + with rio.open(uri) as src: + bands = bands or src.descriptions or [f"b{i}" for i in range(1, src.count + 1)] + count = src.count + + if len(bands) != count: + raise ValueError( + f"Number of bands in COG ({count}) does not match the number of band names provided ({len(bands)})." + ) + request["imageManifest"]["bands"] = [ + {"id": b, "tilesetId": "0", "tilesetBandIndex": i} for i, b in enumerate(bands) + ] + + # add time properties if they are set + if start_time is not None: + start_time = start_time if isinstance(start_time, str) else start_time.isoformat() + "Z" + request["imageManifest"]["startTime"] = start_time + + if end_time is not None: + end_time = end_time if isinstance(end_time, str) else end_time.isoformat() + "Z" + request["imageManifest"]["endTime"] = end_time + + # set the properties of the file and add 2 specific one: the uri and a tag indicating + # That the file is COG backed + properties = properties or {} + properties = {"gcp_backed": "COG", "gcp_uri": uri, **properties} + request["imageManifest"]["properties"] = properties + + # register the COG as an Image asset + project = ee.Initialize.geetools.project_id() + creds = ee.data.get_persistent_credentials() + session = AuthorizedSession(creds.with_quota_project(project)) + url = f"https://earthengine.googleapis.com/v1alpha/projects/{project}/image:importExternal" + response = session.post(url=url, data=json.dumps(request)) + + # raise an error if something went wrong + if response.status_code != requests.codes.ok: + raise ValueError(f"Error registering COG asset: {response.content}") + + return self diff --git a/geetools/ee_authenticate.py b/geetools/ee_authenticate.py index 350558e0..55edbae0 100644 --- a/geetools/ee_authenticate.py +++ b/geetools/ee_authenticate.py @@ -17,7 +17,7 @@ class AuthenticateAccessor: """Create an accessor for the :py:func:`ee.Authenticate` function.""" @staticmethod - def new_user(name: str = "", credential_pathname: str | os.PathLike = ""): + def new_user(name: str = "default", credential_pathname: str | os.PathLike = ""): """Authenticate the user and save the credentials in a specific folder. Equivalent to :py:func:`ee.Authenticate` but where the registered user will not be the default one (the one you get when running :py:func:`ee.Initialize`). @@ -56,7 +56,7 @@ def new_user(name: str = "", credential_pathname: str | os.PathLike = ""): move(Path(dir) / default.name, default) @staticmethod - def delete_user(name: str = "", credential_pathname: str | os.PathLike = ""): + def delete_user(name: str = "default", credential_pathname: str | os.PathLike = ""): """Delete a user credential file. Args: @@ -77,8 +77,11 @@ def delete_user(name: str = "", credential_pathname: str | os.PathLike = ""): name = f"credentials{name}" credential_pathname = credential_pathname or ee.oauth.get_credentials_path() credential_path = Path(credential_pathname).parent - with suppress(FileNotFoundError): + + try: (credential_path / name).unlink() + except FileNotFoundError: + raise FileNotFoundError(f"The user {name} does not exist") @staticmethod def list_user(credential_pathname: str | os.PathLike = "") -> list[str]: @@ -103,10 +106,10 @@ def list_user(credential_pathname: str | os.PathLike = "") -> list[str]: credential_pathname = credential_pathname or ee.oauth.get_credentials_path() credential_path = Path(credential_pathname).parent files = [f for f in credential_path.glob("credentials*") if f.is_file()] - return [f.name.replace("credentials", "") or "default" for f in files] + return [f.name.replace("credentials", "") for f in files] @staticmethod - def rename_user(new: str, old: str = "", credential_pathname: str | os.PathLike = ""): + def rename_user(new: str, old: str = "default", credential_pathname: str | os.PathLike = ""): """Rename a user without changing the credentials. Args: @@ -129,5 +132,10 @@ def rename_user(new: str, old: str = "", credential_pathname: str | os.PathLike new = f"credentials{new}" credential_pathname = credential_pathname or ee.oauth.get_credentials_path() credential_path = Path(credential_pathname).parent - with suppress(FileNotFoundError): + + try: (credential_path / old).rename(credential_path / new) + except FileNotFoundError: + raise FileNotFoundError(f"The user {old} does not exist") + except FileExistsError: + raise FileExistsError(f"The user {new} already exists") diff --git a/geetools/ee_initialize.py b/geetools/ee_initialize.py index 5feb398b..c971ea16 100644 --- a/geetools/ee_initialize.py +++ b/geetools/ee_initialize.py @@ -43,28 +43,17 @@ def from_user(name: str = "", credential_pathname: str | os.PathLike = "", proje # gather global variable to be modified global _project_id + # Using the saved credentials initialize the EE API + ee.Initialize(ee.Initialize.geetools.get_credentials(name)) + # set the user profile information name = f"credentials{name}" credential_pathname = credential_pathname or ee.oauth.get_credentials_path() credential_folder = Path(credential_pathname).parent credential_path = credential_folder / name - # check if the user exists - if not credential_path.exists(): - msg = "Please register this user first by using geetools.User.create first" - raise ee.EEException(msg) - - # Set the credential object and Init GEE API - tokens = json.loads((credential_path / name).read_text()) - credentials = Credentials( - None, - refresh_token=tokens["refresh_token"], - token_uri=ee.oauth.TOKEN_URI, - client_id=tokens["client_id"], - client_secret=tokens["client_secret"], - scopes=ee.oauth.SCOPES, - ) - ee.Initialize(credentials) + # read the tokens a second time to extract the project_id + tokens = json.loads((credential_path).read_text()) # save the project_id in a dedicated global variable as it's not saved # from GEE side @@ -116,5 +105,54 @@ def project_id() -> str: ee.Initialize.geetools.project_id() """ if _project_id is None: - raise RuntimeError("The GEE account is not initialized") + raise RuntimeError( + "The GEE account is not initialized or was initialized outside of geetools." + " Please use from_user or from_service_account to get access to the project_id" + ) return _project_id + + @staticmethod + def get_credentials(name="default") -> Credentials: + """Get the credentials of a specific user to use for other Google API. + + Return the credential oAuth object necessary to authenticate to other Python Google APIs. + The scope of the registered API will entirely depend on the setting passed by the user at authentication time. + + Args: + name: The name of the user as saved when created. use default if not set + + Returns: + The Google Credentials object of the connected profile. + + Examples: + .. code-block:: python + + import ee, geetools + from google.cloud.storage.client import Client + + credentials = ee.Initialize.geetools.get_credentials("") + + service = build('drive', 'v3', credentials=credentials) + client = Client(credentials=credentials, project=ee.Initialize.geetools.project_id()) + """ + name = f"credentials{name}" + credential_pathname = ee.oauth.get_credentials_path() + credential_folder = Path(credential_pathname).parent + credential_path = credential_folder / name + + # check if the user exists + if not credential_path.exists(): + msg = "Please register this user first by using ee.Authenticate.geetools.new_user method first" + raise ee.EEException(msg) + + # Set the credential object and Init GEE API + tokens = json.loads((credential_path).read_text()) + credentials = Credentials( + None, + refresh_token=tokens["refresh_token"], + token_uri=ee.oauth.TOKEN_URI, + client_id=tokens["client_id"], + client_secret=tokens["client_secret"], + scopes=ee.oauth.SCOPES, + ) + return credentials diff --git a/pyproject.toml b/pyproject.toml index 7a146bd1..306bda7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,11 +30,13 @@ dependencies = [ "requests", "pandas", "geopandas", + "rasterio", "deprecated", "xee>=0.0.11", # xee change dtype management "yamlable", "matplotlib", "anyascii", + "google-cloud-storage", ] [[project.authors]]