diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 6e57501..0000000 --- a/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -# Logs and databases # -###################### -slurm-*.out -output*.out -ERA5_createForcing_CONUS*.sh -core.* - -# notebook stuff -.ipynb_checkpoints \ No newline at end of file diff --git a/0_control_files/control_Bow_at_Banff.txt b/0_control_files/control_Bow_at_Banff.txt index ff9201d..1fa32b9 100644 --- a/0_control_files/control_Bow_at_Banff.txt +++ b/0_control_files/control_Bow_at_Banff.txt @@ -6,7 +6,7 @@ # Modeling domain settings -root_path | /project/gwf/gwf_cmt/wknoben/summaWorkflow_data # Root folder where data will be stored. +root_path | /project/gwf/gwf_cmt/wknoben/summaWorkflow_data # Root folder where data will be stored. domain_name | BowAtBanff # Used as part of the root folder name for the prepared data. @@ -225,4 +225,4 @@ visualization_folder | default # If ' | |_ installs |_ mizuRoute - |_ SUMMA \ No newline at end of file + |_ SUMMA diff --git a/3b_parameters/MODIS_MCD12Q1_V6/1_download/download_modis_mcd12q1_v6.ipynb b/3b_parameters/MODIS_MCD12Q1_V6/1_download/download_modis_mcd12q1_v6.ipynb index eb8abde..5259b60 100644 --- a/3b_parameters/MODIS_MCD12Q1_V6/1_download/download_modis_mcd12q1_v6.ipynb +++ b/3b_parameters/MODIS_MCD12Q1_V6/1_download/download_modis_mcd12q1_v6.ipynb @@ -16,15 +16,20 @@ "metadata": {}, "outputs": [], "source": [ - "# modules\n", - "import os\n", + "# Import Modules\n", + "import os,sys,glob\n", "import time\n", "import shutil\n", "import requests\n", "from netrc import netrc\n", "from pathlib import Path\n", "from shutil import copyfile\n", - "from datetime import datetime" + "from concurrent.futures import ThreadPoolExecutor\n", + "import subprocess\n", + "import ipynbname\n", + "thisFile = ipynbname.name()+'.ipynb'\n", + "#Import local modules\n", + "from workflow_utility_functions import read_from_control,make_default_path, create_log_file\n" ] }, { @@ -60,24 +65,25 @@ "metadata": {}, "outputs": [], "source": [ - "# Function to extract a given setting from the control file\n", - "def read_from_control( file, setting ):\n", - " \n", - " # Open 'control_active.txt' and ...\n", - " with open(file) as contents:\n", - " for line in contents:\n", - " \n", - " # ... find the line with the requested setting\n", - " if setting in line and not line.startswith('#'):\n", - " break\n", - " \n", - " # Extract the setting's value\n", - " substring = line.split('|',1)[1] # Remove the setting's name (split into 2 based on '|', keep only 2nd part)\n", - " substring = substring.split('#',1)[0] # Remove comments, does nothing if no '#' is found\n", - " substring = substring.strip() # Remove leading and trailing whitespace, tabs, newlines\n", - " \n", - " # Return this value \n", - " return substring" + "def request_get(file_url,output_file, usr, pwd):\n", + " '''Function to request and download data, given user credentials'''\n", + "\n", + " try:\n", + " res = requests.get(file_url, verify=True, stream=True, auth=(usr, pwd))\n", + "\n", + " # Decode the response\n", + " res.raw.decode_content = True\n", + " content = res.raw\n", + "\n", + " # Write to file\n", + " with open(output_file, 'wb') as data:\n", + " shutil.copyfileobj(content, data)\n", + "\n", + " except:\n", + " logger.warning(f'File {file_url} was not downloaded correctly, on attempt {retries_cur} of {retires_max}')\n", + " retries_cur += 1\n", + "\n", + " return None" ] }, { @@ -86,20 +92,53 @@ "metadata": {}, "outputs": [], "source": [ - "# Function to specify a default path\n", - "def make_default_path(suffix):\n", - " \n", - " # Get the root path\n", - " rootPath = Path( read_from_control(controlFolder/controlFile,'root_path') )\n", - " \n", - " # Get the domain folder\n", - " domainName = read_from_control(controlFolder/controlFile,'domain_name')\n", - " domainFolder = 'domain_' + domainName\n", - " \n", - " # Specify the forcing path\n", - " defaultPath = rootPath / domainFolder / suffix\n", - " \n", - " return defaultPath" + "def run_modis_download(file_list,usr,pwd,modis_path):\n", + " '''Download the needed files using Threading'''\n", + "\n", + " with ThreadPoolExecutor() as executor:\n", + " futures = []\n", + " for file_url_raw in file_list:\n", + "\n", + " file_url = file_url_raw.strip()\n", + " file_name = file_url.split('/')[-1].strip() # Get the last part of the url, strip whitespace and characters\n", + "\n", + " #Check if file already exists and move to next file if so\n", + " if (modis_path / file_name).is_file():\n", + " logger.debug(f'File {file_name} exists, skipping download')\n", + " else:\n", + " #Set the output file name, and submit the download request\n", + " output_file = os.path.join(modis_path, file_name)\n", + " futures.append(executor.submit(request_get, file_url, output_file, usr, pwd))\n", + "\n", + " logger.info(f'Downloading file: {file_name} from: {file_url}')\n", + "\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def download_check(file_list, modis_path,retries_cur):\n", + " '''This function checks that all needed files are downloaded, and if not will try again '''\n", + "\n", + " check_folder = str(modis_path) + \"/*.hdf\"\n", + " file_list_check = glob.glob(check_folder)\n", + "\n", + " file_list.sort()\n", + " file_list_check.sort()\n", + "\n", + " if len(file_list) == len(file_list_check):\n", + " logger.info(f'All required files have been downloaded')\n", + " download_complete_bool = True\n", + " else:\n", + " logger.warning(f'Required files were not downloaded, another attempt will be made')\n", + " download_complete_bool = False\n", + " retries_cur += 1\n", + "\n", + " return download_complete_bool,retries_cur" ] }, { @@ -111,7 +150,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -122,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -135,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -145,20 +184,20 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Specify the default paths if required \n", "if modis_path == 'default':\n", - " modis_path = make_default_path('parameters/landclass/1_MODIS_raw_data') # outputs a Path()\n", + " modis_path = make_default_path('parameters/landclass/1_MODIS_raw_data',controlFolder,controlFile) # outputs a Path()\n", "else:\n", " modis_path = Path(modis_path) # make sure a user-specified path is a Path()" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -166,6 +205,32 @@ "modis_path.mkdir(parents=True, exist_ok=True)" ] }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2021-05-10 10:21:25,241 - INFO - Log file /Users/drc858/Data/workflow_output/domain_BowAtBanff/parameters/landclass/1_MODIS_raw_data/_workflow_log/20210510_modis_download__log.txt generated by download_modis_mcd12q1_v6.ipynb on 2021/05/10 10:21:25\n" + ] + } + ], + "source": [ + "# Set the log path and file name\n", + "logPath = modis_path\n", + "log_suffix = '_modis_download_'\n", + "\n", + "# Create a log folder\n", + "logFolder = '_workflow_log'\n", + "Path(logPath / logFolder).mkdir(parents=True, exist_ok=True)\n", + "\n", + "#Create a logging file\n", + "logger = create_log_file(logPath / logFolder,thisFile,suffix=log_suffix)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -175,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -185,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -195,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -213,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -223,84 +288,62 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2021-05-10 10:21:25,262 - INFO - Downloading MODIS MCD12Q1_V6 data with global coverage.\n" + ] + } + ], "source": [ - "# Retry settings: connection can be unstable, so specify a number of retries\n", - "retries_max = 100 " + "logger.info('Downloading MODIS MCD12Q1_V6 data with global coverage.')\n", + "\n", + "#Read all files in folder, in order to compare to download list\n", + "check_folder = str(modis_path)+\"/*.hdf\"\n", + "file_list_check = glob.glob(check_folder)\n", + "\n", + "file_list.sort()\n", + "file_list_check.sort()\n", + "\n", + "download_complete_bool = False\n", + "retries_cur = 1\n", + "retries_max = 10" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Successfully downloaded: MCD12Q1.A2001001.h02v10.006.2018142183006.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h03v09.006.2018142183026.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h01v10.006.2018142182931.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h03v10.006.2018142183036.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h00v08.006.2018142182903.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h03v07.006.2018142183028.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h04v11.006.2018142183101.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h00v09.006.2018142182901.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h02v11.006.2018142183012.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h01v08.006.2018142182920.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h03v11.006.2018142183043.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h04v09.006.2018142183050.hdf\n", - "Error downloading MCD12Q1.A2001001.h01v09.006.2018142182927.hdf on try 1\n", - "Successfully downloaded: MCD12Q1.A2001001.h01v09.006.2018142182927.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h02v08.006.2018142182955.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h02v06.006.2018142182941.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h01v11.006.2018142182942.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h03v06.006.2018142183016.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h00v10.006.2018142182916.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h02v09.006.2018142182957.hdf\n", - "Successfully downloaded: MCD12Q1.A2001001.h04v10.006.2018142183058.hdf\n" + "2021-05-10 10:21:25,681 - INFO - All required files have been downloaded\n" ] } ], "source": [ - "# Loop over the download files\n", - "for file_url in file_list:\n", - " \n", - " # Make the file name\n", - " file_name = file_url.split('/')[-1].strip() # Get the last part of the url, strip whitespace and characters\n", - " \n", - " # Check if file already exists (i.e. interupted earlier download) and move to next file if so\n", - " if (modis_path / file_name).is_file():\n", - " continue \n", - " \n", - " # Make sure the connection is re-tried if it fails\n", - " retries_cur = 1\n", - " while retries_cur <= retries_max:\n", - " try:\n", - " # Send a HTTP request to the server and save the HTTP response in a response object called resp\n", - " # 'stream = True' ensures that only response headers are downloaded initially (and not all file contents too, which are 2GB+)\n", - " with requests.get(file_url.strip(), verify=True, stream=True, auth=(usr,pwd)) as response:\n", - " \n", - " # Decode the response\n", - " response.raw.decode_content = True\n", - " content = response.raw \n", - " \n", - " # Write to file\n", - " with open(modis_path / file_name, 'wb') as data:\n", - " shutil.copyfileobj(content, data)\n", - " \n", - " # Progress\n", - " print('Successfully downloaded: {}'.format(file_name))\n", - " time.sleep(3) # sleep for a bit so we don't overwhelm the server\n", - " \n", - " except:\n", - " print('Error downloading ' + file_name + ' on try ' + str(retries_cur))\n", - " retries_cur += 1\n", - " continue\n", - " else:\n", - " break" + "\"\"\"This is the main download loop\"\"\"\n", + "while download_complete_bool == False:\n", + "\n", + " #Run download given complete list\n", + " run_modis_download(file_list, usr, pwd, modis_path)\n", + " #Check if number of files meets the length of the list\n", + " download_complete_bool,retries_cur = download_check(file_list, modis_path,retries_cur)\n", + "\n", + " #Break when all files are downloaded\n", + " if download_complete_bool == True:\n", + " break\n", + "\n", + " #If there are too many retries, then break\n", + " if retries_cur >= retries_max:\n", + " logger.error(f'Maximum number of tries ({retries_max}) has been reached, aborting')\n", + " break" ] }, { @@ -311,63 +354,28 @@ "Generates a basic log file in the domain folder and copies the control file and itself there." ] }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# Set the log path and file name\n", - "logPath = modis_path\n", - "log_suffix = '_modis_download_log.txt'" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a log folder\n", - "logFolder = '_workflow_log'\n", - "Path( logPath / logFolder ).mkdir(parents=True, exist_ok=True)" - ] - }, { "cell_type": "code", "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2021-05-10 10:24:36,701 - INFO - File: (../../../0_control_files/control_active.txt) has been moved to /Users/drc858/Data/workflow_output/domain_BowAtBanff/parameters/landclass/1_MODIS_raw_data/_workflow_log/control_active.txt\n", + "2021-05-10 10:24:36,703 - INFO - File: (download_modis_mcd12q1_v6.ipynb) has been moved to /Users/drc858/Data/workflow_output/domain_BowAtBanff/parameters/landclass/1_MODIS_raw_data/_workflow_log/download_modis_mcd12q1_v6.ipynb\n" + ] + } + ], "source": [ + "# Generates copies the control file and itself there.\n", + "#Copy the control file\n", + "copyfile(controlFolder / controlFile, logPath / logFolder / controlFile)\n", + "logger.info(f'File: ({controlFolder / controlFile}) has been moved to {logPath / logFolder / controlFile}')\n", "# Copy this script\n", - "thisFile = 'download_modis_mcd12q1_v6.ipynb'\n", - "copyfile(thisFile, logPath / logFolder / thisFile);" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "# Get current date and time\n", - "now = datetime.now()" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a log file \n", - "logFile = now.strftime('%Y%m%d') + log_suffix\n", - "with open( logPath / logFolder / logFile, 'w') as file:\n", - " \n", - " lines = ['Log generated by ' + thisFile + ' on ' + now.strftime('%Y/%m/%d %H:%M:%S') + '\\n',\n", - " 'Downloaded MODIS MCD12Q1_V6 data with global coverage.']\n", - " for txt in lines:\n", - " file.write(txt) " + "copyfile(thisFile, logPath / logFolder / thisFile)\n", + "logger.info(f'File: ({thisFile}) has been moved to {logPath / logFolder / thisFile}')\n" ] }, { @@ -380,9 +388,9 @@ ], "metadata": { "kernelspec": { - "display_name": "summa-env", + "display_name": "summa-venv", "language": "python", - "name": "summa-env" + "name": "summa-venv" }, "language_info": { "codemirror_mode": { @@ -394,7 +402,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.2" } }, "nbformat": 4, diff --git a/3b_parameters/MODIS_MCD12Q1_V6/1_download/download_modis_mcd12q1_v6.py b/3b_parameters/MODIS_MCD12Q1_V6/1_download/download_modis_mcd12q1_v6.py index 17d12b3..4b1ef2c 100644 --- a/3b_parameters/MODIS_MCD12Q1_V6/1_download/download_modis_mcd12q1_v6.py +++ b/3b_parameters/MODIS_MCD12Q1_V6/1_download/download_modis_mcd12q1_v6.py @@ -1,17 +1,29 @@ -# Download MODIS MCD12Q1_V6 -# Script based on example provided on: https://git.earthdata.nasa.gov/projects/LPDUR/repos/daac_data_download_python/browse -# Requires a `.netrc` file in user's home directory with login credentials for `urs.earthdata.nasa.gov`. See: https://lpdaac.usgs.gov/resources/e-learning/how-access-lp-daac-data-command-line/ +""" -# modules -import os +Download MODIS MCD12Q1_V6 + +This script downloads all required MODIS data, approximately 5669 files with a total of 24.67 GB +Based on example provided on: https://git.earthdata.nasa.gov/projects/LPDUR/repos/daac_data_download_python/browse + +***NOTE*** +Requires a `.netrc` file in user's home directory with login credentials for `urs.earthdata.nasa.gov`. +See: https://lpdaac.usgs.gov/resources/e-learning/how-access-lp-daac-data-command-line/ +""" + +# Import Modules +import os,sys,glob import time import shutil import requests from netrc import netrc from pathlib import Path from shutil import copyfile -from datetime import datetime +from concurrent.futures import ThreadPoolExecutor +import subprocess +#Import local modules +from workflow_utility_functions import read_from_control,make_default_path, create_log_file +thisFile = os.path.basename(sys.argv[0]) # --- Control file handling # Easy access to control file folder @@ -20,41 +32,67 @@ # Store the name of the 'active' file in a variable controlFile = 'control_active.txt' -# Function to extract a given setting from the control file -def read_from_control( file, setting ): - - # Open 'control_active.txt' and ... - with open(file) as contents: - for line in contents: - - # ... find the line with the requested setting - if setting in line and not line.startswith('#'): - break - - # Extract the setting's value - substring = line.split('|',1)[1] # Remove the setting's name (split into 2 based on '|', keep only 2nd part) - substring = substring.split('#',1)[0] # Remove comments, does nothing if no '#' is found - substring = substring.strip() # Remove leading and trailing whitespace, tabs, newlines - - # Return this value - return substring - -# Function to specify a default path -def make_default_path(suffix): - - # Get the root path - rootPath = Path( read_from_control(controlFolder/controlFile,'root_path') ) - - # Get the domain folder - domainName = read_from_control(controlFolder/controlFile,'domain_name') - domainFolder = 'domain_' + domainName - - # Specify the forcing path - defaultPath = rootPath / domainFolder / suffix - - return defaultPath - - +def request_get(file_url,output_file, usr, pwd): + '''Function to request and download data, given user credentials''' + + try: + res = requests.get(file_url, verify=True, stream=True, auth=(usr, pwd)) + + # Decode the response + res.raw.decode_content = True + content = res.raw + + # Write to file + with open(output_file, 'wb') as data: + shutil.copyfileobj(content, data) + + except: + logger.warning(f'File {file_url} was not downloaded correctly, on attempt {retries_cur} of {retires_max}') + retries_cur += 1 + + return None + +def run_modis_download(file_list,usr,pwd,modis_path): + '''Download the needed files using Threading''' + + with ThreadPoolExecutor() as executor: + futures = [] + for file_url_raw in file_list: + + file_url = file_url_raw.strip() + file_name = file_url.split('/')[-1].strip() # Get the last part of the url, strip whitespace and characters + + #Check if file already exists and move to next file if so + if (modis_path / file_name).is_file(): + logger.debug(f'File {file_name} exists, skipping download') + else: + #Set the output file name, and submit the download request + output_file = os.path.join(modis_path, file_name) + futures.append(executor.submit(request_get, file_url, output_file, usr, pwd)) + + logger.info(f'Downloading file: {file_name} from: {file_url}') + + return None + +def download_check(file_list, modis_path,retries_cur): + '''This function checks that all needed files are downloaded, and if not will try again ''' + + check_folder = str(modis_path) + "/*.hdf" + file_list_check = glob.glob(check_folder) + + file_list.sort() + file_list_check.sort() + + if len(file_list) == len(file_list_check): + logger.info(f'All required files have been downloaded') + download_complete_bool = True + else: + logger.warning(f'Required files were not downloaded, another attempt will be made') + download_complete_bool = False + retries_cur += 1 + + return download_complete_bool,retries_cur + # --- Get the download settings # Path and name of file with download links links_path = read_from_control(controlFolder/controlFile,'parameter_land_list_path') @@ -71,15 +109,24 @@ def make_default_path(suffix): # Specify the default paths if required if modis_path == 'default': - modis_path = make_default_path('parameters/landclass/1_MODIS_raw_data') # outputs a Path() + modis_path = make_default_path('parameters/landclass/1_MODIS_raw_data',controlFolder,controlFile) # outputs a Path() else: modis_path = Path(modis_path) # make sure a user-specified path is a Path() # Make output dir modis_path.mkdir(parents=True, exist_ok=True) +# Set the log path and file name +logPath = modis_path +log_suffix = '_modis_download' + +# Create a log folder +logFolder = '_workflow_log' +Path(logPath / logFolder).mkdir(parents=True, exist_ok=True) +#Create a logging file +logger = create_log_file(logPath / logFolder,thisFile,suffix=log_suffix) -# --- Get the authentication info +# Get the authentication info # authentication url url = 'urs.earthdata.nasa.gov' @@ -95,70 +142,40 @@ def make_default_path(suffix): # Get the download links from file file_list = open(links_file, 'r').readlines() -# Retry settings: connection can be unstable, so specify a number of retries -retries_max = 100 +logger.info('Downloading MODIS MCD12Q1_V6 data with global coverage.') -# Loop over the download files -for file_url in file_list: - - # Make the file name - file_name = file_url.split('/')[-1].strip() # Get the last part of the url, strip whitespace and characters - - # Check if file already exists (i.e. interupted earlier download) and move to next file if so - if (modis_path / file_name).is_file(): - continue - - # Make sure the connection is re-tried if it fails - retries_cur = 1 - while retries_cur <= retries_max: - try: - # Send a HTTP request to the server and save the HTTP response in a response object called resp - # 'stream = True' ensures that only response headers are downloaded initially (and not all file contents too, which are 2GB+) - with requests.get(file_url.strip(), verify=True, stream=True, auth=(usr,pwd)) as response: - - # Decode the response - response.raw.decode_content = True - content = response.raw - - # Write to file - with open(modis_path / file_name, 'wb') as data: - shutil.copyfileobj(content, data) - - # Progress - print('Successfully downloaded: {}'.format(file_name)) - time.sleep(3) # sleep for a bit so we don't overwhelm the server - - except: - print('Error downloading ' + file_name + ' on try ' + str(retries_cur)) - retries_cur += 1 - continue - else: - break - +check_folder = str(modis_path)+"/*.hdf" +file_list_check = glob.glob(check_folder) -# --- Code provenance -# Generates a basic log file in the domain folder and copies the control file and itself there. +file_list.sort() +file_list_check.sort() -# Set the log path and file name -logPath = modis_path -log_suffix = '_modis_download_log.txt' +download_complete_bool = False +retries_cur = 1 +retries_max = 10 -# Create a log folder -logFolder = '_workflow_log' -Path( logPath / logFolder ).mkdir(parents=True, exist_ok=True) +"""This is the main download loop""" +while download_complete_bool == False: + #Run download given complete list + run_modis_download(file_list, usr, pwd, modis_path) + #Check if number of files meets the length of the list + download_complete_bool,retries_cur = download_check(file_list, modis_path,retries_cur) + + #Break when all files are downloaded + if download_complete_bool == True: + break + + #If there are too many retries, then break + if retries_cur >= retries_max: + logger.error(f'Maximum number of tries ({retries_max}) has been reached, aborting') + break + +# --- Code provenance +# Generates copies the control file and itself there. +#Copy the control file +copyfile(controlFolder / controlFile, logPath / logFolder / controlFile) # Copy this script -thisFile = 'download_modis_mcd12q1_v6.py' -copyfile(thisFile, logPath / logFolder / thisFile); +copyfile(thisFile, logPath / logFolder / thisFile) -# Get current date and time -now = datetime.now() -# Create a log file -logFile = now.strftime('%Y%m%d') + log_suffix -with open( logPath / logFolder / logFile, 'w') as file: - - lines = ['Log generated by ' + thisFile + ' on ' + now.strftime('%Y/%m/%d %H:%M:%S') + '\n', - 'Downloaded MODIS MCD12Q1_V6 data with global coverage.'] - for txt in lines: - file.write(txt) \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..3027e95 --- /dev/null +++ b/setup.py @@ -0,0 +1,32 @@ +from setuptools import setup, find_packages +import pathlib + +here = pathlib.Path(__file__).parent.resolve() + +# Get the long description from the README file +long_description = (here / 'README.md').read_text(encoding='utf-8') + +# Arguments marked as "Required" below must be included for upload to PyPI. +# Fields marked as "Optional" may be commented out. + +setup( + name='summaWorkflow_public', # Required + version='0.1', # Required + description='SUMMA workflow repository', # Optional + long_description=long_description, # Optional + long_description_content_type='text/markdown', # Optional + url='https://github.com/CH-Earth/summaWorkflow_public', # Optional + author='Wouter Knoben', # Optional + author_email='Wouter.knoben@usask.ca', # Optional + keywords='hydrology, SUMMA', # Optional + + # When your source code is in a subdirectory under the project root, e.g. + # `src/`, it is necessary to specify the `package_dir` argument. + package_dir={'': 'utility_scripts'}, # Optional + packages=find_packages(where='utlity_scripts'), # Required + + + #python_requires='>=3.6, <4', # Optional + #install_requires=requirements # Optional + +) diff --git a/utility_scripts/logger_config.ini b/utility_scripts/logger_config.ini new file mode 100644 index 0000000..c85a10f --- /dev/null +++ b/utility_scripts/logger_config.ini @@ -0,0 +1,36 @@ +[loggers] +keys=root,sLogger + +[handlers] +keys=consoleHandler,fileHandler + +[formatters] +keys=fileFormatter,consoleFormatter + +[logger_root] +level=DEBUG +handlers=consoleHandler + +[logger_sLogger] +level=DEBUG +handlers=consoleHandler,fileHandler +qualname=sLogger +propagate=0 + +[handler_consoleHandler] +class=StreamHandler +level=INFO +formatter=consoleFormatter +args=(sys.stdout,) + +[handler_fileHandler] +class=FileHandler +level=DEBUG +formatter=fileFormatter +args=('%(logfilename)s',) + +[formatter_fileFormatter] +format=%(asctime)s - %(name)s - %(levelname)s - %(message)s + +[formatter_consoleFormatter] +format=%(asctime)s - %(levelname)s - %(message)s diff --git a/utility_scripts/workflow_utility_functions.py b/utility_scripts/workflow_utility_functions.py new file mode 100644 index 0000000..16f619e --- /dev/null +++ b/utility_scripts/workflow_utility_functions.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# coding: utf-8 + +''' +Workflow Utility Functions + +Contains commonly used functions in the SUMMA workflow generation +This functions can be directly called by other Python scripts or Jupyter Notebooks. + +''' + +# Modules +import os +from pathlib import Path +from shutil import copyfile +from datetime import datetime +import logging +import logging.config + +# Function to extract a given setting from the control file +def read_from_control( file, setting ): + """Read line item from the SUMMAworkflow (https://github.com/CH-Earth/summaWorkflow_public) + format control file. + + Parameters + ---------- + file : SUMMAworkflow control file path + i.e./summaWorkflow_public/0_control_files/control_active.txt + + setting : line item of SUMMA workflow control file + i.e. catchment_shp_name + + Returns + ------- + substring : configuration setting from control file + i.e. bow_distributed_elevation_zone.shp + """ + # Open 'control_active.txt' and ... + with open(file) as contents: + for line in contents: + + # ... find the line with the requested setting + if setting in line: + break + + # Extract the setting's value + substring = line.split('|',1)[1] # Remove the setting's name (split into 2 based on '|', keep only 2nd part) + substring = substring.split('#',1)[0] # Remove comments, does nothing if no '#' is found + substring = substring.strip() # Remove leading and trailing whitespace, tabs, newlines + + # Return this value + return substring + + +# Function to specify a default path +def make_default_path(suffix,controlFolder,controlFile): + + """Return a default path based on values read from the control file + + Parameters + ---------- + suffix : str + suffix to be used to set directory + + controlFolder : str + complete path (without file name) + + controlFile : str + Name of control file + + Returns + ------- + defaultPath : str + defaultPath for output folder + """ + + # Get the root path + rootPath = Path(read_from_control(controlFolder / controlFile, 'root_path')) + + # Get the domain folder + domainName = read_from_control(controlFolder / controlFile, 'domain_name') + domainFolder = 'domain_' + domainName + + # Specify the forcing path + defaultPath = rootPath / domainFolder / suffix + + return defaultPath + +def create_log_file(logfile_folder,filename,suffix=None): + """ Create an output log file based on an existing log configuration file. + Note that the logger configuration file is located at ../utility_scripts/logger_config.ini + + See examples for logging.conf at https://docs.python.org/3/howto/logging.html + + Parameters + ---------- + logfile_folder : str + output folder for log file + + filename : str + log file name + + suffix (optional) : str + additional string to be added to logfile anme + + Returns + ------- + logger : + logger object to be used by other functions + """ + #Construct name and location of log file + now = datetime.now() + suffix_str = isstr(suffix) #Returns the suffix if defined, otherwise '' + log_file_name = now.strftime('%Y%m%d') + suffix_str + '_log.txt' + logfile = os.path.join(logfile_folder,log_file_name) + #Delete previous file if it exists + if os.path.exists(logfile): + os.remove(logfile) + + #Read in logger configuration, note it must be in same directory as this file + logger_config_path = os.path.dirname(os.path.realpath(__file__)) + logging.config.fileConfig(os.path.join(logger_config_path,'logger_config.ini'),defaults={'logfilename': logfile},disable_existing_loggers=False) + + logger = logging.getLogger('sLogger') + logger.info(f'Log file {logfile} generated by {filename}' + ' on ' + now.strftime('%Y/%m/%d %H:%M:%S')) + + return logger + +def isstr(s): + # Returns the suffix if defined, otherwise '' + return '' if s is None else str(s)