diff --git a/append_files.py b/append_files.py new file mode 100644 index 0000000..7c8d7be --- /dev/null +++ b/append_files.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +__version__= 1.0 +__author__= "Justin Beckers, YorkU/UAlberta" + +def append_files(outdir,custom="_concat",fileList,header=0,skiprows=0,sep=',', + dropna=0): + ''' + Generic file appending. For a directory, find all files matching certain + file name characteristics and append the files. Files must be delimited. + Files may or may not have a header. If the header flag is set, then the + header from the first file is used for the output. + + Author: Justin F. Beckers + Author Contact: beckers@ualberta.ca + + Version: 1.0 + Version Notes: Initial release + + Release Date: March 15,2016 + + Inputs: + outdir: output directory + custom: custom string to add into filename. Default is "_concat" + fileList: list of files to append + header: Sets whether or not file has a header row or if it should be + read. Set to None if you don't want a header to be read, or if + the file has no header. Set to header row if the header is + present and you want it to be read. Default is None. + skiprows: rows to skip before the header + sep: file delimiter. Default is comma (','). use '\s+' for space + delimited files (*.dat). + dropna: if dropna is set to 1, drop rows that have any nans. + Outputs: + Outputs a delimited file with delimiter equal to input file delimiter, + name equal to the start of the name of the first file with the custom + string added. Output file has a header if the input header is specified + or used. Output file is put into outdir directory. + Usage Notes: + Case 1: Using Main + From commandline/terminal, can simply run: + >>>python append_files.py + This executes the program with the parameters set on + lines 110 - 124. Modify this section in the script file text to + meet your requirements + if __name__=='__main__': + Case 2: Import function: + import append_files + append_files(outdir,custom,files,header=0,skiprows=0,dropna=0,sep=',') + + ''' + #Load the required libraries + import os + import pandas as pd + import numpy as np + + #Set the output filename + outname = os.path.join(outdir,os.path.basename( + fileList[0]).split('_')[0]+custom+'.csv') + + #If files have no header (user specified) or is being ignored: + if header == None: + #Read the first file to establish the data record. + out = pd.read_table(fileList[0],sep=sep,header=None,skiprows=skiprows, + index_col=None,na_values=['nan','NaN',str(np.nan),np.nan]) + if len(fileList)>1: #If the fileList has more than one file in it. + #For the rest of the filenames in the fileList + for x in np.arange(1,len(fileList)): + data=pd.read_table(fileList[x],sep=sep,header=None, + index_col=None,skiprows=skiprows,na_values=['nan','NaN', + str(np.nan),np.nan]) + #Concatenate the data files + out=pd.concat([out,data],ignore_index=True,axis=0) + #If you want to remove rows where any value is a nan value. + if dropna==1: + out=out.dropna() + ''' + If you need to do anything else to the data, i.e. calculate a new + column, sort the data, etc., this is the place to insert that code. + ''' + #Write the output to a new file with the same delimiter as the input. + #Output file also has no header (like the input files) + out.to_csv(path_or_buf=outname,sep=sep,header=None,index=False, + mode='w') + else: #If the input files have a header (user-specified flag) + #Read the first file to establish the output data record + out = pd.read_table(fileList[0],sep=sep,header=header,index_col=None, + skiprows=skiprows,na_values=['nan','NaN',np.nan]) + #Extract the header in case we need it again further down. + head=out.columns + if len(fileList)>1:#If fileList has more than one file. + #For the rest of the filenames in the fileList + for x in np.arange(1,len(fileList)): + data=pd.read_table(fileList[x],sep=sep,index_col=None, + na_values=['nan','NaN',np.nan],header=header, + skiprows=skiprows) + #Concatenate the data records. + out=pd.concat([out,data],ignore_index=True,axis=0) + #If you want to remove rows where any value is a nan value. + if dropna==1: + out=out.dropna() + ''' + If you need to do anything else to the data, i.e. calculate a new + column, sort the data, etc., this is the place to insert that code. + ''' + #Write the output to a new file with the same delimiter as the input. + #Output file also has a header (like the input files) + out.to_csv(path_or_buf=outname,sep=sep,na_rep='nan',header=True, + index=False,mode='w') + +if __name__=='__main__': + import glob + import time #Used to roughly time the run time of the code. + + #The setup. + indir='/Volumes/JustinBeckers_2TBPortable/CryoSat_Lakes/PARLSAR_CS2Data/' + files=glob.glob(indir+"*_positions.csv") + outdir='/Volumes/JustinBeckers_2TBPortable/CryoSat_Lakes/PARLSAR_CS2Data' + custom='concat' + + #Run. + a=time.clock() + append_files(outdir,custom,files,header=0,skiprows=0,dropna=0,sep=',') + b=time.clock() + print "Time to run the script (s): ",b-a \ No newline at end of file diff --git a/concatenate_output.py b/concatenate_output.py new file mode 100644 index 0000000..59f39ef --- /dev/null +++ b/concatenate_output.py @@ -0,0 +1,319 @@ +__version__ = 1.0 +__author__ = "Justin Beckers, YorkU/UAlberta" +def concatenate_output(indir,completeflag=1,version="_A_V001"): + ''' + Takes a directory containing the individual output, resampled, subset files + and joins them together in a single file. + + Author: Justin F. Beckers, + Release Date: 2016/03/18 + + Verions: 1.0 + Version Notes: + 1.0: Initial release + + Inputs: + indir: directory to search for input files. non-recursive. + AEM*,ASR*,ALS*,OIB*,CS2*. Explicitly does not include + CVLDAT* (the output of this script) and CS2ORB* (corner + coordinate file sized to CS2 files) + completeflag: Set to 0 if producing concatenated output at some inter- + mediate stage where not all the data is available for a + particular golden day yet (just incase). Flag is default + set to 1, which indicates you have all necessary files. + Only affects the filename (if 0, _incomplete is appended) + version: Processing version output code string. Used to only get + the same version of the data. + + Outputs: concatenated file contain all found files in a directory. + Format of the files is csv. + + Usage: + 1).Can import function and run with string of indir as + import concatenat_output + indir = 'somedirectory' + concatenate_output(indir) + 2). Can run as main. + Modify code on lines: 308-319 to your usage needs. + Currently, input directory is set to current working directory. + Furthermore we have implemented a directory walk to recursively + walk through all subdirectories and look for the ESA CS2 file. + If found, then we use that directory and run the concatente + script before moving to the next directory. This lets one + process an entire directory containing multiple subdirectories + for each golden day in one go. + + python concatenate_output.py + ''' + + #Import Python Modules. + import os + import glob + import pandas as pd + import numpy as np + + #Find relevent files in the input directory + cs2file = glob.glob(indir+'*CS2L2I*'+version+'*') + AWIfile = glob.glob(indir+'*CS2AWI*'+version+'*') + resdata = set(glob.glob(indir+'*'+version+'*')) - \ + set(glob.glob(indir+'*CS2*')) - \ + set(glob.glob(indir+'*CS2ORB*'))-\ + set(glob.glob(indir+'*CVLDAT*')) + #Convert the sets back to a list + resdata=list(resdata) + resdata.sort() #Sort the list into alphabetical order (AEM,ALS,ASR,OIB) + + #If a golden day does not include all data types, then we need to add in + #columns of nans with the correct column names. + addcols=0 + addhead=[] + + #If file is not file in the list of files, count the columns we should have + #and create a header string for those columns. + if any("CS2AWI" in s for s in AWIfile)==False: #No AWI CS2 file + addcols+=25 + addhead+=['AWICS2_time','AWICS2_lon','AWICS2_lat','AWICS2_fb_stat', + 'AWICS2_fb_syst','AWICS2_fb_unc','AWICS2_fb','AWICS2_sit_stat', + 'AWICS2_sit_syst','AWICS2_sit_unc','AWICS2_sit_fb_syst', + 'AWICS2_sit_snow_syst','AWICS2_sit_snow_syst_iav', + 'AWICS2_sit_rho_i_syst','AWICS2_sit_rho_s_syst','AWICS2_sit', + 'AWICS2_ssha_unc','AWICS2_ssh','AWICS2_ssha','AWICS2_sd', + 'AWICS2_rho_i','AWICS2_rho_s','AWICS2_ice_type', + 'AWICS2_surface_type','AWICS2_ice_conc'] + if any("CS2L2I" in s for s in cs2file)==False: #No ESA CS2 file + addcols+=49 + addhead+=['ID','CryoSatDateTime','Year','Month','Day','Hour','Minute', + 'Second','Orbit','Latitude','Longitude','Freeboard','FreeboardFlag', + 'Height_1','Peakiness','Sigma0_1','SHA','Interp_SHA', + 'Interp_OceanHt_Error','Interp_OceanHt_CNT_FWD', + 'Interp_OceanHt_CNT_BKWD','N_Avg','IceConc','SnowDepth','SnowDensity', + 'SAR_DISCRIM','MSS_Mod','Geoid_Mod','ODLE_Mod','DryT_C','WetT_C', + 'IB_C','DAC_C','IONO_GIM','IONO_Mod','H_OT_C','H_LPEOT_C','H_OLT_C', + 'H_SET_C','H_GPT_C','Beam_Center','Beam_Amp','Beam_Kurt','Beam_SD', + 'Beam_SDAng','Beam_Skew','Beam_Center_ang','BadDataFlag', + 'Corr_Error_F'] + if any("AEMSIT" in s for s in resdata)==False: #No AEM file + addcols+=14 + addhead+=['AEM_Year','AEM_Month','AEM_Day','AEM_Hour','AEM_Minute', + 'AEM_Seconds','AEM_lon','AEM_lat','AEM_dT','AEM_navg','AEM_Mean', + 'AEM_Median','AEM_stdev','AEM_Mode'] + if any("ALS_nodes" in s for s in resdata)==False: #No ALS nodes file + addcols+=1 + addhead+=['ALS_leads'] + if any("ASR_nodes" in s for s in resdata)==False: #No ASR nodes file + addcols+=1 + addhead+=['ASR_leads'] + if any("ASRRFB" in s for s in resdata)==False: #No ASR FB file + addcols+=14 + addhead+=['ASR_fb_Year','ASR_fb_Month','ASR_fb_Day','ASR_fb_Hour', + 'ASR_fb_Minute','ASR_fb_Seconds','ASR_fb_lon','ASR_fb_lat','ASR_fb_dT' + ,'ASR_fb_navg','ASR_fb_Mean','ASR_fb_Median','ASR_fb_stdev', + 'ASR_fb_Mode'] + if any("ASRSSH" in s for s in resdata)==False: #No ASR SSH file + addcols+=14 + addhead+=['ASR_ssh_Year','ASR_ssh_Month','ASR_ssh_Day','ASR_ssh_Hour', + 'ASR_ssh_Minute','ASR_ssh_Seconds','ASR_ssh_lon','ASR_ssh_lat', + 'ASR_ssh_dT','ASR_ssh_navg','ASR_ssh_Mean','ASR_ssh_Median', + 'ASR_ssh_stdev','ASR_ssh_Mode'] + if any("ASRSSA" in s for s in resdata)==False: #No ASR SSA file + addcols+=14 + addhead+=['ASR_ssha_Year','ASR_ssha_Month','ASR_ssha_Day', + 'ASR_ssha_Hour','ASR_ssha_Minute','ASR_ssha_Seconds','ASR_ssha_lon', + 'ASR_ssha_lat','ASR_ssha_dT','ASR_ssha_navg','ASR_ssha_Mean', + 'ASR_ssha_Median','ASR_ssha_stdev','ASR_ssha_Mode'] + if any("ALSLFB" in s for s in resdata)==False: #No ALS FB file + addcols+=14 + addhead+=['ALS_fb_Year','ALS_fb_Month','ALS_fb_Day','ALS_fb_Hour', + 'ALS_fb_Minute','ALS_fb_Seconds','ALS_fb_lon','ALS_fb_lat','ALS_fb_dT', + 'ALS_fb_navg','ALS_fb_Mean','ALS_fb_Median','ALS_fb_stdev', + 'ALS_fb_Mode'] + if any("OIBSI" in s for s in resdata)==False: #No OIB file. + #Note that here we also rename the oib headers for the OIB quicklook + #to match the OIB IDCSI product. + addcols+=55 + addhead+=['OIB_lat_mean','OIB_lon_mean','OIB_thickness_mean', + 'OIB_thickness_unc_mean','OIB_mean_fb_mean','OIB_ATM_fb_mean', + 'OIB_fb_unc_mean','OIB_snow_depth_mean','OIB_snow_depth_unc_mean', + 'OIB_ssh_mean','OIB_ssh_sd_mean','OIB_ssh_tp_dist_mean', + 'OIB_thickness_stdev','OIB_thickness_unc_stdev','OIB_mean_fb_stdev', + 'OIB_ATM_fb_stdev','OIB_fb_unc_stdev','OIB_snow_depth_stdev', + 'OIB_snow_depth_unc_stdev','OIB_ssh_stdev','OIB_ssh_sd_stdev', + 'OIB_thickness_median','OIB_thickness_unc_median','OIB_mean_fb_median', + 'OIB_ATM_fb_median','OIB_fb_unc_median','OIB_snow_depth_median', + 'OIB_snow_depth_unc_median','OIB_ssh_median','OIB_ssh_sd_median', + 'OIB_thickness_max','OIB_thickness_unc_max','OIB_mean_fb_max', + 'OIB_ATM_fb_max','OIB_fb_unc_max','OIB_snow_depth_max', + 'OIB_snow_depth_unc_max','OIB_ssh_max','OIB_ssh_sd_max', + 'OIB_ssh_tp_dist_max','OIB_thickness_min','OIB_thickness_unc_min', + 'OIB_mean_fb_min','OIB_ATM_fb_min','OIB_fb_unc_min', + 'OIB_snow_depth_min','OIB_snow_depth_unc_min','OIB_ssh_min', + 'OIB_ssh_sd_min','OIB_ssh_tp_dist_min','OIB_MYIflag_mode', + 'OIB_dt_time','OIB_fp_lat','OIB_fp_lon','OIB_n_pts'] + + #Print the files we found + print "CS2file: ",cs2file + print "AWIfile: ",AWIfile + print "Airborne Files: ",resdata + + #Set the output file name. We need to check for existence of AWI/ESA CS2 + #files. We should always have at least one of these (usually both), so we + #use their filename structure to generate the output filename structure. + if cs2file != []: #If ESA CS2 file exists we use it. + if completeflag == 1: + foo = os.path.join(indir,'CVLDAT_'+ + os.path.basename(cs2file[0]).split('_')[1]+"_"+ + os.path.basename(cs2file[0]).split('_')[2]+"_"+ + os.path.basename(cs2file[0]).split('_')[3]+'_'+ + os.path.basename(cs2file[0]).split('_')[4]+'_'+ + os.path.basename(cs2file[0]).split('_')[5]+'_'+ + version+'.csv') + elif completeflag == 0: + foo = os.path.join(indir,'CVLDAT_'+ + os.path.basename(cs2file[0]).split('_')[1]+"_"+ + os.path.basename(cs2file[0]).split('_')[2]+"_"+ + os.path.basename(cs2file[0]).split('_')[3]+'_'+ + os.path.basename(cs2file[0]).split('_')[4]+'_'+ + os.path.basename(cs2file[0]).split('_')[5]+'_'+ + version+'_incomplete.csv') + #If ESA file does not exist but AWI does + elif cs2file == [] and AWIfile[0] != []: + if completeflag ==1: + foo = os.path.join(indir,'CVLDAT_'+ + os.path.basename(AWIfile[0]).split('_')[1]+"_"+ + os.path.basename(AWIfile[0]).split('_')[2]+'_'+ + os.path.basename(AWIfile[0]).split('_')[3]+'_'+ + os.path.basename(AWIfile[0]).split('_')[4]+'_'+ + os.path.basename(AWIfile[0]).split('_')[5]+'_' + +version+'.csv') + elif completeflag == 0: + foo = os.path.join(indir,'CVLDAT_'+ + os.path.basename(AWIfile[0]).split('_')[1]+"_"+ + os.path.basename(AWIfile[0]).split('_')[2]+'_'+ + os.path.basename(AWIfile[0]).split('_')[3]+'_'+ + os.path.basename(AWIfile[0]).split('_')[4]+'_'+ + os.path.basename(AWIfile[0]).split('_')[5]+'_'+ + version+'_incomplete.csv') + + #Let's start reading in the data files. + if cs2file !=[]: #ESA file exists + cs2=pd.read_table(cs2file[0],sep=',',header=0,index_col=False, + na_values=['nan']) + if AWIfile !=[]: #AWI file exists + AWI = pd.read_table(AWIfile[0],sep=',',header=0,index_col=False, + na_values=['nan']) + #Join the two CS2 files + if AWIfile != [] and cs2file !=[]: #Both AWI and ESA CS2 files exist + out=pd.concat([cs2,AWI],ignore_index=False,axis=1) + elif AWIfile ==[] and cs2file !=[]: #ESA only + out=pd.concat([cs2],ignore_index=False,axis=1) + elif AWIfile !=[] and cs2file ==[]: #AWI only + out=pd.concat([AWI],ignore_index=False,axis=1) + + #Read in the resampled, subset airborne data. + for rdata in resdata: #iterate through list of filenames + data=pd.read_table(rdata,sep=',',header=0,index_col=False, + na_values=['nan']) + #If the OIB file is a quicklook, let's rename the headers in this + #product, the headers in the OIBSIQ file stay the same. + if "OIBSIQ" in rdata: + data.columns=['OIB_lat_mean','OIB_lon_mean','OIB_thickness_mean', + 'OIB_thickness_unc_mean','OIB_mean_fb_mean','OIB_ATM_fb_mean', + 'OIB_fb_unc_mean','OIB_snow_depth_mean','OIB_snow_depth_unc_mean', + 'OIB_ssh_mean','OIB_ssh_sd_mean','OIB_ssh_tp_dist_mean', + 'OIB_thickness_stdev','OIB_thickness_unc_stdev','OIB_mean_fb_stdev', + 'OIB_ATM_fb_stdev','OIB_fb_unc_stdev','OIB_snow_depth_stdev', + 'OIB_snow_depth_unc_stdev','OIB_ssh_stdev','OIB_ssh_sd_stdev', + 'OIB_thickness_median','OIB_thickness_unc_median','OIB_mean_fb_median', + 'OIB_ATM_fb_median','OIB_fb_unc_median','OIB_snow_depth_median', + 'OIB_snow_depth_unc_median','OIB_ssh_median','OIB_ssh_sd_median', + 'OIB_thickness_max','OIB_thickness_unc_max','OIB_mean_fb_max', + 'OIB_ATM_fb_max','OIB_fb_unc_max','OIB_snow_depth_max', + 'OIB_snow_depth_unc_max','OIB_ssh_max','OIB_ssh_sd_max', + 'OIB_ssh_tp_dist_max','OIB_thickness_min','OIB_thickness_unc_min', + 'OIB_mean_fb_min','OIB_ATM_fb_min','OIB_fb_unc_min', + 'OIB_snow_depth_min','OIB_snow_depth_unc_min','OIB_ssh_min', + 'OIB_ssh_sd_min','OIB_ssh_tp_dist_min','OIB_MYIflag_mode', + 'OIB_dt_time','OIB_fp_lat','OIB_fp_lon','OIB_n_pts'] + out=pd.concat([out,data],ignore_index=False,axis=1) + + #Now let's add in any missing data records + if "Latitude" in out.columns: #If ESA exists + addarr=np.empty((len(out["Latitude"]),addcols)) + elif "AWICS2_lat" in out.columns: #Else if the AWI exists + addarr=np.empty((len(out["Latitude"]),addcols)) + #Change those fields to nans + addarr[:]=np.nan + #Change to dataframe + addpd = pd.DataFrame(data=addarr,columns=addhead) + #Add them to the output dataframe + out=pd.concat([out,addpd],ignore_index=False,axis=1) + + #We want the data sorted into ESA,AWI, then airborne sensors alphabetically + sortorder=['ID','CryoSatDateTime','Year','Month','Day','Hour','Minute', + 'Second','Orbit','Latitude','Longitude','Freeboard','FreeboardFlag', + 'Height_1','Peakiness','Sigma0_1','SHA','Interp_SHA', + 'Interp_OceanHt_Error','Interp_OceanHt_CNT_FWD', + 'Interp_OceanHt_CNT_BKWD','N_Avg','IceConc','SnowDepth','SnowDensity', + 'SAR_DISCRIM','MSS_Mod','Geoid_Mod','ODLE_Mod','DryT_C','WetT_C', + 'IB_C','DAC_C','IONO_GIM','IONO_Mod','H_OT_C','H_LPEOT_C','H_OLT_C', + 'H_SET_C','H_GPT_C','Beam_Center','Beam_Amp','Beam_Kurt','Beam_SD', + 'Beam_SDAng','Beam_Skew','Beam_Center_ang','BadDataFlag', + 'Corr_Error_F','AWICS2_time','AWICS2_lon','AWICS2_lat','AWICS2_fb_stat', + 'AWICS2_fb_syst','AWICS2_fb_unc','AWICS2_fb','AWICS2_sit_stat', + 'AWICS2_sit_syst','AWICS2_sit_unc','AWICS2_sit_fb_syst', + 'AWICS2_sit_snow_syst','AWICS2_sit_snow_syst_iav', + 'AWICS2_sit_rho_i_syst','AWICS2_sit_rho_s_syst','AWICS2_sit', + 'AWICS2_ssha_unc','AWICS2_ssh','AWICS2_ssha','AWICS2_sd', + 'AWICS2_rho_i','AWICS2_rho_s','AWICS2_ice_type', + 'AWICS2_surface_type','AWICS2_ice_conc','AEM_Year','AEM_Month', + 'AEM_Day','AEM_Hour','AEM_Minute','AEM_Seconds','AEM_lon','AEM_lat', + 'AEM_dT','AEM_navg','AEM_Mean','AEM_Median','AEM_stdev','AEM_Mode', + 'ALS_fb_Year','ALS_fb_Month','ALS_fb_Day','ALS_fb_Hour', + 'ALS_fb_Minute','ALS_fb_Seconds','ALS_fb_lon','ALS_fb_lat','ALS_fb_dT', + 'ALS_fb_navg','ALS_fb_Mean','ALS_fb_Median','ALS_fb_stdev', + 'ALS_fb_Mode','ALS_leads','ASR_fb_Year','ASR_fb_Month','ASR_fb_Day', + 'ASR_fb_Hour','ASR_fb_Minute','ASR_fb_Seconds','ASR_fb_lon', + 'ASR_fb_lat','ASR_fb_dT','ASR_fb_navg','ASR_fb_Mean','ASR_fb_Median', + 'ASR_fb_stdev','ASR_fb_Mode','ASR_ssh_Year','ASR_ssh_Month', + 'ASR_ssh_Day','ASR_ssh_Hour','ASR_ssh_Minute','ASR_ssh_Seconds', + 'ASR_ssh_lon','ASR_ssh_lat','ASR_ssh_dT','ASR_ssh_navg','ASR_ssh_Mean', + 'ASR_ssh_Median','ASR_ssh_stdev','ASR_ssh_Mode','ASR_ssha_Year', + 'ASR_ssha_Month','ASR_ssha_Day','ASR_ssha_Hour','ASR_ssha_Minute', + 'ASR_ssha_Seconds','ASR_ssha_lon','ASR_ssha_lat','ASR_ssha_dT', + 'ASR_ssha_navg','ASR_ssha_Mean','ASR_ssha_Median','ASR_ssha_stdev', + 'ASR_ssha_Mode','ASR_leads','OIB_lat_mean','OIB_lon_mean','OIB_thickness_mean', + 'OIB_thickness_unc_mean','OIB_mean_fb_mean','OIB_ATM_fb_mean', + 'OIB_fb_unc_mean','OIB_snow_depth_mean','OIB_snow_depth_unc_mean', + 'OIB_ssh_mean','OIB_ssh_sd_mean','OIB_ssh_tp_dist_mean', + 'OIB_thickness_stdev','OIB_thickness_unc_stdev','OIB_mean_fb_stdev', + 'OIB_ATM_fb_stdev','OIB_fb_unc_stdev','OIB_snow_depth_stdev', + 'OIB_snow_depth_unc_stdev','OIB_ssh_stdev','OIB_ssh_sd_stdev', + 'OIB_thickness_median','OIB_thickness_unc_median','OIB_mean_fb_median', + 'OIB_ATM_fb_median','OIB_fb_unc_median','OIB_snow_depth_median', + 'OIB_snow_depth_unc_median','OIB_ssh_median','OIB_ssh_sd_median', + 'OIB_thickness_max','OIB_thickness_unc_max','OIB_mean_fb_max', + 'OIB_ATM_fb_max','OIB_fb_unc_max','OIB_snow_depth_max', + 'OIB_snow_depth_unc_max','OIB_ssh_max','OIB_ssh_sd_max', + 'OIB_ssh_tp_dist_max','OIB_thickness_min','OIB_thickness_unc_min', + 'OIB_mean_fb_min','OIB_ATM_fb_min','OIB_fb_unc_min', + 'OIB_snow_depth_min','OIB_snow_depth_unc_min','OIB_ssh_min', + 'OIB_ssh_sd_min','OIB_ssh_tp_dist_min','OIB_MYIflag_mode', + 'OIB_dt_time','OIB_fp_lat','OIB_fp_lon','OIB_n_pts'] + out=out[sortorder] + + #Write the output data + out.to_csv(path_or_buf=foo,sep=',',na_rep='nan',header=True,index=False, + mode='w',float_format='%f') + +if __name__=='__main__': + import os + import fnmatch + indir=os.getcwd() + dirList=[] + version="A_V001" + for root,dirnames,filenames in os.walk(indir): + for filename in fnmatch.filter(filenames,'CS2L2I*'+version+'.csv'): + dirList.append(os.path.join(root,os.path.dirname(filename))) + + for direct in dirList: + concatenate_output(direct,flag=1,version=version,append=version) \ No newline at end of file diff --git a/lib/pyoval.py b/lib/pyoval.py index 96db5e5..d8fd6c3 100644 --- a/lib/pyoval.py +++ b/lib/pyoval.py @@ -28,6 +28,7 @@ import numpy as np +import bottleneck as bn import os import sys @@ -88,6 +89,7 @@ def from_file(self, filename, corner_coords=False): if corner_coords: self.orbit = int(self.filename.split('_')[2]) + #self.orbit = int(self.filename.split('_')[1]) #for subset orbit files (CS2ORB*_V001) self.__readCCFile() else: self.orbit = int(self.filename.split('_')[2]) @@ -96,7 +98,9 @@ def from_file(self, filename, corner_coords=False): self.__calc_pointdist() self.__correct_sarin_center_coords() self.__dist_filter() - self.__calc_fp_corner_coords() + self.__calc_fp_corner_coords() #standard footprint + #self.__calc_fp_corner_coords(actr_fp=5000.0) + #self.__calc_fp_corner_coords(actr_fp=20000.0) def limit_region(self, lon_limit=None, lat_limit=None): @@ -313,6 +317,7 @@ def __calc_fp_corner_coords(self, actr_fp='default'): self.__actr_fp = actr_fp # Custom across track footprint else: self.__actr_fp = 1650.0 # Standard across track footprint + print "Footprint width= ",self.__actr_fp self.corner_pos = np.ndarray(shape=(2, 4, self.n_records), dtype=np.float64) for i in np.arange(self.n_records): self.corner_pos[:,:,i] = geobox(self.lon[i], self.lat[i], # center position @@ -437,9 +442,11 @@ def set_data_type(self, datatype): Thickness PDF 0cm - 500cm, bin size = 10cm 500cm - 2000cm, bin size = 50cm + + SSH PDF -3000cm - 3000cm, bin size = 5cm Arguments: datatype : str - Valid choices: 'freeboard' | 'thickness' + Valid choices: 'freeboard' | 'thickness' | 'ssh' """ if datatype.lower() == 'freeboard': self.set_pdf_bins(np.concatenate((np.linspace(0, 50, 50/2+1), @@ -448,8 +455,10 @@ def set_data_type(self, datatype): elif datatype.lower() == 'thickness': self.set_pdf_bins(np.concatenate((np.linspace(0, 500, 500/10+1), np.linspace(550, 2000, 1500/50)))*0.01) + elif datatype.lower() == 'ssh': + self.set_pdf_bins((np.linspace(-3000,3000,6000/5+1))*0.01) else: - raise ValueError('Wrong datatype string:\nValid choices: \'freeboard\', \'thickness\'') + raise ValueError('Wrong datatype string:\nValid choices: \'freeboard\', \'thickness\', \'ssh\'') def set_pdf_bins(self, bins): @@ -494,12 +503,15 @@ def set_data_unit(self, unit): if not isinstance(unit, str): raise ValueError('Argument needs to be of type string') self.data_unit = unit + + - - def resample(self): + def resample(self,timer=False): """ Perform the data resampling + Set timer to true to time execution """ + import time # Check preconditions: if not self.__flag_orbit_pos: raise ValueError('Use method add_cs2orbit to add footprint loctions before resampling') @@ -508,7 +520,7 @@ def resample(self): if not self.__flag_pdf_bins: raise ValueError('Use method set_data_type of set_pdf_bins to add binning information before resampling') - # Prepare data fiels + # Prepare data fields self.n_samples = np.zeros(shape=(self.cs2fp.n_records), dtype=np.int32) self.time_offset_seconds = np.ones(shape=(self.cs2fp.n_records), dtype=np.float32)*np.nan self.res_mean = np.zeros(shape=(self.cs2fp.n_records), dtype=np.float32)*np.nan @@ -518,17 +530,27 @@ def resample(self): self.pdf_scaled = np.zeros(shape=(self.cs2fp.n_records, self._nbins), dtype=np.float32) self.pdf_scale_fact = np.zeros(shape=(self.cs2fp.n_records), dtype=np.float32) self.cs2fp_points_list = np.ndarray(shape=(self.cs2fp.n_records), dtype=object) - + num=0 + if timer: + StartTimeRes=time.clock() # All clear, start resampling for i in np.arange(self.cs2fp.n_records): + totalnum=self.cs2fp.n_records + #if float(num) % 10.0 == 0: + # print "Record"+str(num)+" of "+str(totalnum) + if i % int(totalnum/20.0) == 0: + print (str(num*5)+"%, Time:"+str(datetime.datetime.now())) + num+=1 # Define CS-2 footprint as polygon cs2fp_x = [self.cs2fp.lon_ur[i], self.cs2fp.lon_ul[i], self.cs2fp.lon_ll[i], self.cs2fp.lon_lr[i]] cs2fp_y = [self.cs2fp.lat_ur[i], self.cs2fp.lat_ul[i], self.cs2fp.lat_ll[i], self.cs2fp.lat_lr[i]] # CryoSat-2 tracks are mostly norths-south in cal/val regions # -> Use a latitude filter to speed things up again # -> look only at values +/- 0.02 degree latitude around footprint center coordinate - data_subset = np.where(np.abs(self.cs2fp.lat[i] - self.cv_lat) < 0.02)[0] - + data_subset = np.where(np.abs(self.cs2fp.lat[i] - self.cv_lat) < 0.02)[0] #for standard footprint + #data_subset = np.where(np.abs(self.cs2fp.lat[i] - self.cv_lat) < 0.2)[0] #for large footprints(JB) + if len(np.shape(data_subset)) > 1: + raise ValueError("Error datasubset dimensions too large.") # Check if any Cal/Val data point near footprint center coordinate if len(data_subset) == 0: continue @@ -548,14 +570,14 @@ def resample(self): self.cs2fp_points_list[i] = cs2fp_points # Analyze statistics of footprint cal/val data self.n_samples[i] = len(cs2fp_points) - self.res_mean[i] = np.nanmean(self.cv_data[cs2fp_points]) - self.res_median[i] = np.median(self.cv_data[cs2fp_points]) - self.res_sdev[i] = np.nanstd(self.cv_data[cs2fp_points]) + self.res_mean[i] = bn.nanmean(self.cv_data[cs2fp_points]) + self.res_median[i] = bn.median(self.cv_data[cs2fp_points]) + self.res_sdev[i] = bn.nanstd(self.cv_data[cs2fp_points]) # Calculate maximum time difference time_delta = self.cs2fp.time[i] - self.cv_time[cs2fp_points] max_index = np.argmax(np.abs(time_delta)) self.time_offset_seconds[i] = time_delta[max_index].total_seconds() - # Calculate pdf + ## Calculate pdf # Scale the pdf result that maximum will be on # Number of points per bin = pdf_scaled * pdf_scale_fact freq, bins = np.histogram(self.cv_data[cs2fp_points], bins=self.bins, density=True) @@ -563,9 +585,111 @@ def resample(self): self.pdf_scaled[i,:] = freq.astype(np.float32)/self.pdf_scale_fact[i] self.res_mode[i] = self.bin_center[np.argmax(freq)] + if timer: + StopTimeRes=time.clock() + print "Total ResTime = ", StopTimeRes-StartTimeRes + # Get overlap between orbit and cal/cal data + # (remove empty footprints at end & beginning of orbit) + self._get_overlap() + + def resampleIDX(self,timer=False): + ''' + Perform the data resampling using a spatial index (RTREE), + Shapely polygons and prepared polygons (and enable speedups), + and use bottneck to do some calculations (instead of numpy). + + Usage: + instead of calling resample() call resampleIDX() + + Set timer to true if you want to time the index generation and resampling. + ''' + + #Check preconditions + if not self.__flag_orbit_pos: + raise ValueError('Use method add_cs2orbit to add footprint loctions before resampling') + if not self.__flag_calval_data: + raise ValueError('Use method add_calval_data to add high resolution data before resampling') + if not self.__flag_pdf_bins: + raise ValueError('Use method set_data_type of set_pdf_bins to add binning information before resampling') + + #Load modules + import time + from rtree import index + from rtree.index import Rtree + import time + from shapely.geometry import Point,Polygon + from shapely import speedups + speedups.enable() + from shapely.prepared import prep + import bottleneck as bn + + # Prepare data fields + self.n_samples = np.zeros(shape=(self.cs2fp.n_records), dtype=np.int32) + self.time_offset_seconds = np.ones(shape=(self.cs2fp.n_records), dtype=np.float32)*np.nan + self.res_mean = np.zeros(shape=(self.cs2fp.n_records), dtype=np.float32)*np.nan + self.res_median = np.zeros(shape=(self.cs2fp.n_records), dtype=np.float32)*np.nan + self.res_sdev = np.zeros(shape=(self.cs2fp.n_records), dtype=np.float32)*np.nan + self.res_mode = np.zeros(shape=(self.cs2fp.n_records), dtype=np.float32)*np.nan + self.pdf_scaled = np.zeros(shape=(self.cs2fp.n_records, self._nbins), dtype=np.float32) + self.pdf_scale_fact = np.zeros(shape=(self.cs2fp.n_records), dtype=np.float32) + self.cs2fp_points_list = np.ndarray(shape=(self.cs2fp.n_records), dtype=object) + self.polygons = np.ndarray(shape=(self.cs2fp.n_records), dtype=object) + + if timer: + StartTimeIDX=time.clock() + #Generate Spatial Index and Save to disk + self.idx = index.Rtree('bulk',generator(), overwrite=1) + del self.idx + self.idx=index.Index("bulk") + if timer: + StopTimeIDX=time.clock() + StartTimeRes=StopTimeIDX + #Do the resampling + for i in np.arange(self.cs2fp.n_records): + fp_x = [self.cs2fp.lon_ur[i], self.cs2fp.lon_ul[i], + self.cs2fp.lon_ll[i], self.cs2fp.lon_lr[i], + self.cs2fp.lon_ur[i]] + fp_y = [self.cs2fp.lat_ur[i], self.cs2fp.lat_ul[i], + self.cs2fp.lat_ll[i], self.cs2fp.lat_lr[i], + self.cs2fp.lat_ur[i]] + polygon_points[:,0]=fp_x[:] + polygon_points[:,1]=fp_y[:] + self.polygons[i]=Polygon(polygon_points) + result = np.array(list(idx.intersection(self.polygons[i].bounds,objects="raw"))) + if len(result) !=0: + prp = prep(self.polygons[i]) + hits = np.nonzero(map(prp.contains,(map(Point,self.cv_lon[result],self.cv_lat[result]))))[0] + if len(hits)>0: + if len(np.where(np.isfinite(self.cv_data[result[hits]]))[0]) == 0 : + continue + self.cs2fp_points_list[i] = result[hits] + self.n_samples[i] = len(hits) + self.res_mean[i] = bn.nanmean(self.cv_data[result[hits]]) + self.res_median[i] = bn.median(self.cv_data[result[hits]]) + self.res_sdev[i] = bn.nanstd(self.cv_data[result[hits]]) + time_delta = self.cs2fp.time[i] - self.cv_time[result[hits]] + max_index = np.argmax(np.abs(time_delta)) + self.time_offset_seconds[i] = time_delta[max_index].total_seconds() + #Calculate the histogram + freq,bins = np.histogram(self.cv_data[result[hits]], bins=self.bins, density=True) + self.pdf_scale_fact[i] = np.amax(freq) + self.pdf_scaled[i,:] = freq.astype(np.float32)/self.pdf_scale_fact[i] + self.res_mode[i] = self.bin_center[np.argmax(freq)] + if timer: + StopTimeRes=time.clock() + print "Time to Generate Index = ", StopTimeIDX-StartTimeIDX + print "Time to Resample Data = ", StopTimeRes -StartTimeRes + print "Total ResTime = ", StopTimeRes-StartTimeIDX # Get overlap between orbit and cal/cal data # (remove empty footprints at end & beginning of orbit) self._get_overlap() + + def generator(self): + ''' + A generator function to fill the rtree index + ''' + for i in np.arange(len(self.cv_lat)): + yield(i,(self.cv_lon[i],self.cv_lat[i],cv_lon[i],cv_lat[i]),i) def to_file(self, folder): """ @@ -586,9 +710,12 @@ def to_file(self, folder): # Create filename i0 = self.overlap[0] i1 = self.overlap[-1] + start_time = self.cs2fp.time[i0].strftime(str_fmt_dt_filename) stop_time = self.cs2fp.time[i1].strftime(str_fmt_dt_filename) - filename = str_fmt_filename% (self.data_label, self.cs2fp.orbit, start_time, stop_time) + print start_time + print stop_time + filename = str_fmt_filename % (self.data_label, self.cs2fp.orbit, start_time, stop_time) fileout = os.path.join(folder, filename) # Write data to file fhandle = open(fileout,'w') @@ -1091,7 +1218,7 @@ def __geo_getr(self, lat): """ Mean earth ellipsoidial radius at latitude range in meter """ sma = 6378137.0 smn = 6356752.3142755 - mlat = np.nanmean(lat) + mlat = bn.nanmean(lat) r = np.sqrt( ((sma**2.0 / np.cos(mlat))**2.0 + (smn**2.0 / np.sin(mlat))**2.0) / \ ((sma / np.cos(mlat))**2.0 + (smn / np.sin(mlat))**2.0) ) return r @@ -1195,7 +1322,9 @@ def points_in_polygon(x, y, px, py, return_type='bool'): """ from shapely.geometry import Polygon from shapely.geometry import Point - + from shapely import speedups + from shapely.prepared import prep + speedups.enable() # Sanity checks if np.shape(x) != np.shape(y): raise ValueError('Position of input points are of different shape') @@ -1203,20 +1332,35 @@ def points_in_polygon(x, y, px, py, return_type='bool'): raise ValueError('Position of polygon points are of different shape') if len(px) < 3: raise ValueError('Not enough points to span polygon (px, py)') + + #print np.shape(x) + n_points = len(x) n_polygon_points = len(px) # Set up the polygon px_closed = np.concatenate((px, [px[0]])) py_closed = np.concatenate((py, [py[0]])) polygon_points = np.ndarray(shape=(n_polygon_points+1, 2), dtype=np.float32) - for j in np.arange(n_polygon_points+1): - polygon_points[j, 0] = px_closed[j] - polygon_points[j, 1] = py_closed[j] - polygon = Polygon(polygon_points) + polygon_points[:, 0] = px_closed[:] #this is one of the advantages of ndarrays + polygon_points[:, 1] = py_closed[:] #this is one of the advantages of ndarrays + #for j in np.arange(n_polygon_points+1): + # polygon_points[j, 0] = px_closed[j] + # polygon_points[j, 1] = py_closed[j] + #polygon = Polygon(polygon_points) # Check if each point is in Polygon - in_polygon = np.ndarray(shape=(n_points), dtype=np.bool) - for i in np.arange(n_points): - in_polygon[i] = polygon.contains(Point(x[i], y[i])) + + #Modification (Justin Beckers, March 17,2016) + prp = prep(polygon) + in_polygon = np.asarray(map(prp.contains,(map(Point,x,y))),dtype=bool) + #Original pyoval code + #in_polygon = np.ndarray(shape=(n_points), dtype=np.bool) + #for i in np.arange(n_points): + # #if polygon.area >0.1: #when crossing the international dateline + # # cv(polygon_points[:,0]) + # # polygon=Polygon(polygon_points) + # # in_polygon[i]=polygon.contains(Point(x[i],y[i])) + # #else: + # in_polygon[i] = polygon.contains(Point(x[i], y[i])) if return_type == 'bool': return in_polygon elif return_type == 'masked': diff --git a/resample_OIBIDCSI.py b/resample_OIBIDCSI.py new file mode 100644 index 0000000..d80016b --- /dev/null +++ b/resample_OIBIDCSI.py @@ -0,0 +1,501 @@ +import traceback +import numpy as np +from rtree import index +import pandas as pd +__version__=1.0 +__author__="Justin Beckers, YorkU/UAlberta" + +''' + + Resamples the NASA Operation IceBridge IDCSI data product. + Takes the IDCSI file and resamples it to the footprint corner coordinates + file, providing statistics for each footprint. + + This code requires that the CS2ORB file (orbit file sized to Cryosat2 file) + has already been generated. If not, the file will produce a file for the + extent of any cc file (but then contains many polygons with no data and no + CryoSat-2 data). However this functionality may be useful. + The code performs similar operations to the pyoval CS2OrbitResData.resample + function. However, as of version 1.0 this script does not produce a map or + other pdf summary. Furthermore, because the NASA OIB IDCSI product is a 40m + average product, and this code is intended to resample the data to CryoSat + footprints which are 300m along track (thus ~7-9 points per footprint), a + statistical probability distribution is not calculated, thus we have no + calculation of the modal value in each footprint. + Mode/PDF calculations may be implemented in a future release. + + + Author: Justin Beckers + Author Contact: beckers@ualberta.ca + + Verion: 1.0 + Version Notes: + 1.0: Initial release. + + Release Date: 2016/03/17 + + Inputs: + oibfile: Operation IceBridge IDCSI data file. + ccfile: CryoSat-2 corner coordinate file (CS2ORB*, cs2_reforbit*) + resample: Flag set to 1 if you wish to resample the OIB IDCSI file + exp_dat: Flag set to 1 if writing the resampled data to a file + version: File version output code string. + Outputs: + OIBSIX*.csv: where X is either 2,4 or Q for the NASA OIB IDCSI + quicklook product + + Usage Notes: + Case 1: Using Main + From commandline/terminal, can simply run: + >>>python process_OIB.py + This executes the program with the parameters set on + lines 497 - 511. Modify this section in the script file text to + meet your requirements + if __name__=='__main__': + Case 2: Import functions: + import process_OIB + import deconvert_lon + import resample_OIB + import read_OIB + import export_dat + + ''' + +def process_OIB(oibfile,ccfile, resample=1,exp_dat=1,version=''): + ''' + The main processing script. Calls the other functions to read, resample, + export the data. + ''' + try: + print "Processing OIB file: ", oibfile + print "Processing Orbit file: ",ccfile + oibdata, ccdata = read_OIB(oibfile,ccfile) + print "Length of Orbit file: ", len(ccdata) + print "Original Length of IceBridge File: ", len(oibdata) + res=resample + if res == 1: + if "ql" in oibfile: + print "OIB file is a quicklook file" + outdata = resample_OIB(oibdata,ccdata,ql=1) + else: + print "OIB file is a final version file" + outdata = resample_OIB(oibdata,ccdata,ql=0) + + if outdata.shape[0] == len(ccdata): + print "OIB output file and orbit file are same length: SUCCESS!!" + else: + print "Strange!? The OIB output and orbit file are not the same length. Error!","\n" + if exp_dat == 1: + if "ql" in oibfile: + outfile = os.path.join(os.path.dirname(ccfile),'OIBSIQ_'+ + os.path.basename(ccfile).split('_')[1]+'_'+ + os.path.basename(ccfile).split('_')[2]+'_'+ + os.path.basename(ccfile).split('_')[3]+'_'+ + os.path.basename(ccfile).split('_')[4]+version+ + '_V001.csv') + else: + outfile = os.path.join(os.path.dirname(ccfile),'OIBSI'+ + os.path.basename(oibfile).split('_')[0][-1]+'_'+ + os.path.basename(ccfile).split('_')[1]+'_'+ + os.path.basename(ccfile).split('_')[2]+'_'+ + os.path.basename(ccfile).split('_')[3]+'_'+ + os.path.basename(ccfile).split('_')[4]+version+ + '_V001.csv') + export_dat(outdata,outfile) + print "Output path: ",outfile,"\n" + except: + print traceback.format_exc() + pass +def export_dat(outdata,filename): + '''Exports the resampled data to a csv file''' + + a=list(outdata.columns.values) + + del a[-1] #needed because we were hanging onto the polygon objects. + b=[x for x in a if not "empty" in x] #needed to remove the polygon objects + #Try Exporting the data: + try: + outdata.to_csv(path_or_buf=filename,sep=',',columns = b, + na_rep='nan',header=True,index=False,mode='w',index_label='ID') + Error = "Success" + #Catch an errors and print them + except: + Error = "Error" + print traceback.format_exc() + pass + + return Error +# +def deconvert_lon(x): + '''Converts 0-360 longitudes to -180/180 longitudes''' + x=np.where(x>180.0,x-360.0,x) + return x + +def read_OIB(oibfile,ccfile): + '''Read in an oib IDCSI file''' + import numpy as np + #These correspond to the NASA OIB IDCSI columns of interest. + cols =np.array([0,1,2,3,4,5,6,7,8,15,16,24,26,29,39]) + + #Had great difficulty reading in the NASA OIB IDCSI NAN = -99999, -999 + #as NaNs. + oibdata = pd.read_table(oibfile,sep=',',header=0, + usecols=cols,na_values=['-99999','-99999.','-99999.0','-99999.00','-999', + '-999.','-99999.000','-99999.0000','-99999.00000','-99999.000000', + '-99999.0000000','-99999.00000000','-99999.000000000', + str(np.int64(-99999.000000)),'-99999.0000000000','nan', + str(np.int64(-99999)),' -99999',np.int64(-99999),-99999.,-99999, + -99999.0,-99999.00,-99999.000,-99999.0000,-99999.00000,-99999.000000, + -99999.0000000,-99999.00000000,-99999.000000000,int(-99999), + np.int64(-99999),np.float64(-99999),np.int64(-99999.000000)] + ) + #Had an issue with a few of the ICDSI2 files or QL files where the latitude + #column was not being read correctly, so we had to read in the data another + #way. + if oibdata['lat'].max() >90.0: + print "Error reading data using Pandas. Lat column not being read" + print "(affects 20110317 and 20110415 data files)." + print "Reading data from numpy.genfromtxt instead." + oibdata=np.empty(1) + #read using numpy + bar=np.genfromtxt(oibfile,delimiter=',',names=True,usecols=cols, + missing_values=['-99999','-99999.','-99999.0','-99999.00','-99999.000', + '-999.','-999','-99999.0000','-99999.00000','-99999.000000', + '-99999.0000000','-99999.00000000','-99999.000000000', + '-99999.0000000000','nan',' -99999',str(np.int64(-99999.000000)), + str(np.int64(-99999)),np.int64(-99999),-99999.,-99999,-99999.0, + -99999.00,-99999.000,-99999.0000,-99999.00000,-99999.000000, + -99999.0000000,-99999.00000000,-99999.000000000,int(-99999), + np.float64(-99999),np.int64(-99999.000000)], + filling_values=np.nan,usemask=False) + #Convert loaded numpy data array to Pandas dataframe + oibdata=pd.DataFrame(data=bar,columns = list(bar.dtype.names)) + oibdata.replace(['-99999','-99999.','-99999.0','-99999.00','-99999.000', + '-99999.0000','-99999.00000','-99999.000000','-99999.0000000', + '-999.','-999','-99999.00000000','-99999.000000000', + '-99999.0000000000','nan',str(np.int64(-99999.000000)), + str(np.int64(-99999)),np.int64(-99999),' -99999',' -99999', + -99999.,-99999,-99999.0,-99999.00,-99999.000,-99999.0000, + -99999.00000,-99999.000000,-99999.0000000,-99999.00000000, + -99999.000000000,int(-99999),np.float64(-99999), + np.int64(-99999.000000)],value=np.nan,inplace=True) + + #Read in the cc file. + cv = lambda x: float(x)+360.0 if float(x) < 0.0 else float(x) + ccdata = np.genfromtxt(ccfile,converters={6:cv,8:cv,10:cv,12:cv,14:cv}) + + return oibdata,ccdata + +def resample_OIB(oibdata,ccdata,ql=1): + ''' + Does the resampling of the OIB IDCSI files. + Uses a spatial index to search points fast. Does not use prepared polygons + ''' + from shapely.geometry import Point, Polygon + from shapely import speedups + import datetime + import numpy as np + import pytz + #Setup which columns we want to calculate which statistics for. + meancols = [0,1,2,3,4,5,6,7,8,11,12,13] + stdevcols = [2,3,4,5,6,7,8,11,12] + mediancols = [2,3,4,5,6,7,8,11,12] + maxmincols = [2,3,4,5,6,7,8,11,12,13] + speedups.enable() + #Check for crossings of IDL/prime meridian + if ((oibdata['lon'] >=359.).any(skipna=True)==1 and + (oibdata['lon']<1.).any(skipna=True)==1 and + np.logical_and((179.=359.).any(skipna=True)==0 and + (oibdata['lon']<1.).any(skipna=True)==0 and + np.logical_and((179.=359.).any(skipna=True)==1 and + (oibdata['lon']<1.).any(skipna=True)==1 and + np.logical_and((179.=359.).any(skipna=True)==0 and + (oibdata['lon']<1.).any(skipna=True)==0 and + np.logical_and((179.0.1: + #because polygon is crossing prime meridian area is large + #Let's transform the polygon coordinates back to -180/180 + #Transform the points to -180/180 and test + print poly + #left in so that we notice when these cases happen. + points=list(poly.exterior.coords) + points_x,points_y=zip(*points) + newx=[] + for i in np.arange(len(points_x)): + if points_x[i]>180.0: + newx.append(points_x[i]-360.0) + else: + newx.append(points_x[i]) + points_x=newx + n_polygon_points=len(points_x) + polygon_points = np.ndarray(shape=(n_polygon_points, 2), dtype=np.float64) + for k in np.arange(n_polygon_points): + polygon_points[k,0]=points_x[k] + polygon_points[k,1]=points_y[k] + poly=Polygon(polygon_points) + #Test that the point is in the polygon + if poly.contains( + Point(deconvert_lon(oibdata.iloc[j]['lon']), + oibdata.iloc[j]['lat'])): + newpts.append(j) + n_pts+=1 + #If the area is okay, let's move on and check if the point + #is in the polygon + else: + if poly.contains(Point(oibdata.iloc[j]['lon'], + oibdata.iloc[j]['lat'])): + newpts.append(j) + n_pts+=1 + #let's append the polygon number (is keeping all polygons). + #Relies on having already calculated orbit extent for CS2 file. + newpolys.append(i) + #Based on the number of points, calculate the statistics + #If no points, values are nan or 0 (for navg) + if n_pts == 0: + u[i] = np.empty(len(meancols))*np.nan + s[i] = np.empty(len(stdevcols))*np.nan + m[i] = np.empty(len(mediancols))*np.nan + h[i] = np.empty(len(maxmincols))*np.nan + l[i] =np.empty(len(maxmincols))*np.nan + myiflag[i] = np.empty(1)*np.nan + #foobar will hold the median point of the points in a poly + foobar[i]= 0.0 # + navg[i]=0 + #If 1 point, calculate the statistics, but std will be nan, + #others will just be the value of the point + elif n_pts == 1: + u[i] = oibdata.iloc[newpts,meancols].mean() + s[i] = oibdata.iloc[newpts,stdevcols].std() + m[i] = oibdata.iloc[newpts,mediancols].median() + h[i] = oibdata.iloc[newpts,maxmincols].max() + l[i] = oibdata.iloc[newpts,maxmincols].min() + myiflag[i] = oibdata.iloc[newpts,14].mean() + navg[i] = n_pts + foobar[i]=np.floor(np.median(newpts)) + #If more than one point (what we expect), calculate the + #statistics. Note that the mode (most common value) is + #calculated for all fields, but only the value for the + #MY_ICE_FLAG is kept. + elif n_pts > 1: + foo = oibdata.iloc[newpts].mode() + u[i] = oibdata.iloc[newpts,meancols].mean() + s[i] = oibdata.iloc[newpts,stdevcols].std() + m[i] = oibdata.iloc[newpts,mediancols].median() + h[i] = oibdata.iloc[newpts,maxmincols].max() + l[i] = oibdata.iloc[newpts,maxmincols].min() + myiflag[i]=foo['my_ice_flag'].iloc[0] + navg[i] = n_pts + foobar[i]=np.floor(np.median(newpts)) + except: + print traceback.format_exc() + #Newpolys sould be unique anyways, but doublecheck and return only the + #unique polygon records (no doubles) + polys=ccdata[np.unique(newpolys),:] + #Concatenate the variables holding the resampled data. + out=np.concatenate((u,s,m,h,l,myiflag),axis=1) + #Out should have lenght of newpolys so again check for uniqueness + out=out[np.unique(newpolys),:] + #Same with navg. + navg=navg[np.unique(newpolys),:] + #Lets rewrite the header here to separate out IDCSI and IDCSI quickooks + if ql==1: + outhead=['OIBql_lat_mean','OIBql_lon_mean','OIBql_thickness_mean','OIBql_thickness_unc_mean','OIBql_mean_fb_mean', + 'OIBql_ATM_fb_mean','OIBql_fb_unc_mean','OIBql_snow_depth_mean','OIBql_snow_depth_unc_mean', + 'OIBql_ssh_mean','OIBql_ssh_sd_mean','OIBql_ssh_tp_dist_mean', + 'OIBql_thickness_stdev','OIBql_thickness_unc_stdev','OIBql_mean_fb_stdev', + 'OIBql_ATM_fb_stdev','OIBql_fb_unc_stdev','OIBql_snow_depth_stdev','OIBql_snow_depth_unc_stdev', + 'OIBql_ssh_stdev','OIBql_ssh_sd_stdev', + 'OIBql_thickness_median','OIBql_thickness_unc_median','OIBql_mean_fb_median','OIBql_ATM_fb_median', + 'OIBql_fb_unc_median','OIBql_snow_depth_median','OIBql_snow_depth_unc_median', + 'OIBql_ssh_median','OIBql_ssh_sd_median', + 'OIBql_thickness_max','OIBql_thickness_unc_max','OIBql_mean_fb_max','OIBql_ATM_fb_max','OIBql_fb_unc_max', + 'OIBql_snow_depth_max','OIBql_snow_depth_unc_max','OIBql_ssh_max','OIBql_ssh_sd_max','OIBql_ssh_tp_dist_max', + 'OIBql_thickness_min','OIBql_thickness_unc_min', + 'OIBql_mean_fb_min','OIBql_ATM_fb_min','OIBql_fb_unc_min','OIBql_snow_depth_min','OIBql_snow_depth_unc_min', + 'OIBql_ssh_min','OIBql_ssh_sd_min','OIBql_ssh_tp_dist_min','OIBql_MYIflag_mode'] + else: + outhead=['OIB_lat_mean','OIB_lon_mean','OIB_thickness_mean','OIB_thickness_unc_mean','OIB_mean_fb_mean', + 'OIB_ATM_fb_mean','OIB_fb_unc_mean','OIB_snow_depth_mean','OIB_snow_depth_unc_mean', + 'OIB_ssh_mean','OIB_ssh_sd_mean','OIB_ssh_tp_dist_mean', + 'OIB_thickness_stdev','OIB_thickness_unc_stdev','OIB_mean_fb_stdev', + 'OIB_ATM_fb_stdev','OIB_fb_unc_stdev','OIB_snow_depth_stdev','OIB_snow_depth_unc_stdev', + 'OIB_ssh_stdev','OIB_ssh_sd_stdev', + 'OIB_thickness_median','OIB_thickness_unc_median','OIB_mean_fb_median','OIB_ATM_fb_median', + 'OIB_fb_unc_median','OIB_snow_depth_median','OIB_snow_depth_unc_median', + 'OIB_ssh_median','OIB_ssh_sd_median', + 'OIB_thickness_max','OIB_thickness_unc_max','OIB_mean_fb_max','OIB_ATM_fb_max','OIB_fb_unc_max', + 'OIB_snow_depth_max','OIB_snow_depth_unc_max','OIB_ssh_max','OIB_ssh_sd_max','OIB_ssh_tp_dist_max', + 'OIB_thickness_min','OIB_thickness_unc_min', + 'OIB_mean_fb_min','OIB_ATM_fb_min','OIB_fb_unc_min','OIB_snow_depth_min','OIB_snow_depth_unc_min', + 'OIB_ssh_min','OIB_ssh_sd_min','OIB_ssh_tp_dist_min','OIB_MYIflag_mode'] + + #Let's create some of the other variables, like the timestamp + oib_time = np.ndarray(shape=(len(oibdata)),dtype=object) + secs = np.empty(len(oibdata))*np.nan + msecs = np.empty(len(oibdata))*np.nan + secs = np.floor(oibdata['elapsed'].values.tolist()) + msecs = np.floor(1e6 * (oibdata['elapsed'].values.tolist() - np.floor(secs))) + date = oibdata['date'].values.tolist() + date=map(int,date) + date=map(str,date) + #Lets calculate the OIB timestamp. + for i in np.arange(len(date)): + oib_time[i] = datetime.datetime.strptime(date[i],"%Y%m%d").replace( + tzinfo=pytz.utc) +\ + datetime.timedelta(seconds=int(secs[i]), microseconds=int( + msecs[i])) + foobar=foobar.astype(int) + #Get the for the middle point of all OIB points in each footprint + oib_time=np.where(foobar==0,0,oib_time[foobar]) + + #Let's calculate the CS2 timestamp from the CC data. + cc_time=np.ndarray(shape=(len(polys)),dtype=object) + for i in np.arange(len(cc_time)): + cc_time[i] = datetime.datetime(int(ccdata[i,0]), int(ccdata[i,1]),int(ccdata[i,2]), + int(ccdata[i,3]), int(ccdata[i,4]), int(ccdata[i,5]),int(1e6*(ccdata[i,5] - np.floor(ccdata[i,5]))), + tzinfo=pytz.utc) + #Let's calculate the difference between the CS2 time and OIB. + dt_time=np.ndarray(shape=(len(cc_time)),dtype=object) + for i in np.arange(len(cc_time)): + if oib_time[i]==0: + dt_time[i]=np.nan + else: + dt_time[i]=(cc_time[i]-oib_time[i]).total_seconds() + + #Check for uniqueness in the shapely polygon objects. + g = np.unique(newpolys) + c = [p[x] for x in g] + #Setup the output dataframe + outdf=pd.DataFrame(data=out,columns=outhead) + + #Add in the delta time, footprint latitude,longitude, and npts + if ql == 1: + outdf['OIB_dt_time']=dt_time + outdf['OIBql_fp_lat']=polys[:,7] + outdf['OIBql_fp_lon']=polys[:,6] + outdf['OIBql_n_pts']=navg[:,0] + if converted == 1: + outdf['OIBql_fp_lon']=deconvert_lon(outdf['OIBql_fp_lon']) + outdf['OIBql_lon_mean']=deconvert_lon(outdf['OIBql_lon_mean']) + nfinite = np.count_nonzero(np.isfinite(outdf['OIBql_mean_fb_mean'])) + else: + outdf['OIB_dt_time']=dt_time + outdf['OIB_fp_lat']=polys[:,7] + outdf['OIB_fp_lon']=polys[:,6] + outdf['OIB_n_pts']=navg[:,0] + if converted == 1: + outdf['OIB_fp_lon']=deconvert_lon(outdf['OIB_fp_lon']) + outdf['OIB_lon_mean']=deconvert_lon(outdf['OIB_lon_mean']) + nfinite = np.count_nonzero(np.isfinite(outdf['OIB_mean_fb_mean'])) + #Let's add in the polygon geometry objects + + outdf['OIB_geometry']=c + #Print out a bit of info about the resampling result and return data. + print "Number of Resampled IceBridge Data Points: ", outdf.shape[0] + print "Number of finite freeboard values: ",nfinite + return outdf + + except ValueError: + pass + +if __name__=='__main__': + import os + import time + try: + ccfileList=[u'/Volumes/Data/OneDrive/CryoVal-SI/GoldenDays_V001/20120315_010262/CS2ORB_010262_20120315T165011_20120315T165246_C001_V001.dat'] + oibfileList=[u'/Volumes/Data/OneDrive/CryoVal-SI/GoldenDays_V001/20120315_010262/IDCSI4_20120315.txt'] + a=time.clock() + for i,ofile in enumerate(oibfileList): + try: + process_OIB(oibfileList[i],ccfileList[i], resample=1,exp_dat=1,version='') + except: + print traceback.format_exc() + pass + b=time.clock() + print "Time to execute the script (s): ",b-a + except: + print traceback.format_exc() + diff --git a/subset_CS2.py b/subset_CS2.py new file mode 100644 index 0000000..c8591b9 --- /dev/null +++ b/subset_CS2.py @@ -0,0 +1,349 @@ +__version__ = 1.0 +__author__ = "Justin Beckers, YorkU/UAlberta" +def subset_CS2(cs2file,ccfile,cc=0,version='V001'): + ''' + subset_cs2 + Takes the CryoSat-2 L2 output from IDL, and the corner coordinate file + produced by pyoval. The CC file is cut to the size of the CryoSat-2 + file. Furthermore, if you give it both an AWI and an ESA CS2 file, it + makes sure that both have the same points and same extent (in case any + differences do exist, we use the ESA CS2 file as the standard (since + the CryoVal project is designed to examine the ESA CS2 data.) + module: subset_cs2 + parameters: + cs2file: the .csv file with the CS2 L2 Baseline C data. + this can be an ESA file or AWI CS2 file, or both. + ccfile: the .dat file of corner coordinates + cc: flag should be set to 1 if you wish to output the + CS2ORB corner coordinate file that is now sized to + the extent of the CS2 file. + version: processing version code string. + Author: Justin Beckers + Author Contact: beckers@ualberta.ca + + Verion: 1.0 + Version Notes: + 1.0: Initial release. + + Release Date: 2016/03/17 + + Usage Notes: + Case 1: Using Main + From commandline/terminal, can simply run: + >>>python subset_CS2.py + This executes the program with the parameters set on + lines 327 - 349. Modify this section in the script file text to + meet your requirements + if __name__=='__main__': + ''' + + + #Import python modules. + import os + import numpy as np + from shapely.geometry import Polygon,Point + from rtree import index + from shapely import speedups + speedups.enable() + + #Add a quick converter to convert longitude to 0/360 from -180/180. + cv = lambda x: float(x)+360.0 if float(x) < 0.0 else float(x) + + #Load in the Cryosat-2 Corner Coordinates file. Convert the Longitude to + #0-360 + ccdata = np.genfromtxt(ccfile,converters={6:cv,8:cv,10:cv,12:cv,14:cv}) + + #Load the Cryosat-2 L2 Data from AWI or ESA. Convert the Coordinates to + #0-360 + if os.path.basename(cs2file).startswith('CS2AWI'): #If it is an AWI file + cs2data=np.genfromtxt(cs2file,delimiter=',',skip_header=1, + missing_values='nan',filling_values=np.nan,converters={1:cv}) + + else: # It is assumed to be an ESA CS L2 file + cs2data=np.genfromtxt(cs2file,delimiter=',',skip_header=1, + missing_values='nan',filling_values=np.nan,converters={10:cv}) + + #The AWI CS L2 data contains NaNs in the Lon,Lat, and Time fields that + #result from the L1B missing waveforms. These NaN values cause problems for + #the polygon generation later on, and we don't really need them as they are + #not in the ESA CS2 output from IDL (already cut out) so they are cut. + foo=[] #A temporary variable + #If of the longitude,latitude, or time are NaN, keep a list of the row. + for j in np.arange(len(cs2data)): + if (np.isnan(cs2data[j,1]) or np.isnan(cs2data[j,2]) or + np.isnan(cs2data[j,0])): + foo.append(j) + cs2data=np.delete(cs2data,foo,axis=0) #Cut the bad rows. + + #Calculating a polygon over the Prime Meridian or the International Date + #Line can be troublesome (polygons sometimes wrap around the earth in the + #wrong direction, depending on your coordinate system). Here we check + #if we cross either the Prime Meridian or the I.D.L., and either convert to + #0/360 coordinate system (already done), or back to -180/180 + converted=1 #Flag to indicate if we needed to convert the longitudes. + if os.path.basename(cs2file).startswith('CS2AWI'): #If AWI CS2 file: + if (np.any(cs2data[:,1] >=359.)==1 and np.any(cs2data[:,1] <=1.)==1 and + np.any(np.logical_and(179.=359.)==0 and np.any(cs2data[:,1] <=1.)==0 + and np.any(np.logical_and( + 179.=359.)==1 and np.any(cs2data[:,1] <=1.)==1 + and np.any(np.logical_and(179.=359.)==0 and np.any(cs2data[:,1] <=1.)==0 + and np.any(np.logical_and(179.=359.) and np.any(cs2data[:,10] <=1.) and + np.any(np.logical_and(179.=359.)==0 and np.any(cs2data[:,10] <=1.)==0 + and np.any(np.logical_and(179.=359.)==1 and np.any(cs2data[:,10] <=1.)==1 + and np.any(np.logical_and(179.=359.)==0 and np.any(cs2data[:,10] <=1.)==0 + and np.any(np.logical_and(179.0.01: #because polygon is crossing prime meridian but + #in the wrong direction. Let's transform the polygon + #coordinates back to -180/180 system, transform the points to + #-180/180 and test for containment of points. + points=list(poly.exterior.coords) + points_x,points_y=zip(*points) + newx=[] #holds the retransformed points + for i in np.arange(len(points_x)): + if points_x[i]>180.0: + newx.append(points_x[i]-360.0) + else: + newx.append(points_x[i]) + points_x=newx + polygon_points[:,0]=points_x[:] + polygon_points[:,1]=points_y[:] + newpoly=Polygon(polygon_points) + #Do the actual test for containment. + if os.path.basename(cs2file).startswith('CS2AWI'): + if (newpoly.contains(Point(deconvert_lon(cs2data[j,1]), + cs2data[j,2]))): + newpolys.append(ccdata[i]) + newcs2.append(cs2data[j]) + outpoly.append(poly) + t=np.append(t,cs2data[j,0]) + else: + newcs2.append(nanline) + newpolys.append(ccdata[i]) + outpoly.append(poly) + pass + else: + if (newpoly.contains(Point(deconvert_lon(cs2data[j,10]), + cs2data[j,9]))==1): + newpolys.append(ccdata[i]) + newcs2.append(cs2data[j]) + outpoly.append(poly) + else: + pass + else: + if os.path.basename(cs2file).startswith('CS2AWI'): + if poly.contains(Point(cs2data[j,1],cs2data[j,2]))==1: + newpolys.append(ccdata[i]) + newcs2.append(cs2data[j]) + outpoly.append(poly) + t=np.append(t,cs2data[j,0]) + else: + pass #So if no points are found in the polygon, then we + #do not write out the polygon. This limits the output + #to the CS2 file extent. So if you wanted to keep each + #polygon, despite no CS2 data, you can do so here. + else: #ESA CS2 file + if poly.contains(Point(cs2data[j,10],cs2data[j,9]))==1: + newpolys.append(ccdata[i]) + newcs2.append(cs2data[j]) + outpoly.append(poly) + else: + pass + + #Now we do some back conversion of lat/lon to -180/180 system + if os.path.basename(cs2file).startswith('CS2AWI'): + if converted==1: + for i in np.arange(len(newcs2)): + newcs2[i][1]=deconvert_lon(newcs2[i][1]) + newpolys[i][6]=deconvert_lon(newpolys[i][6]) + newpolys[i][8]=deconvert_lon(newpolys[i][8]) + newpolys[i][10]=deconvert_lon(newpolys[i][10]) + newpolys[i][12]=deconvert_lon(newpolys[i][12]) + newpolys[i][14]=deconvert_lon(newpolys[i][14]) + else: #Was not converted out of -180/180 so we can leave it + pass + else: + if converted ==1: + for i in np.arange(len(newcs2)) : + newcs2[i][10]=deconvert_lon(newcs2[i][10]) + newpolys[i][6]=deconvert_lon(newpolys[i][6]) + newpolys[i][8]=deconvert_lon(newpolys[i][8]) + newpolys[i][10]=deconvert_lon(newpolys[i][10]) + newpolys[i][12]=deconvert_lon(newpolys[i][12]) + newpolys[i][14]=deconvert_lon(newpolys[i][14]) + else:#Was not converted out of -180/180 so we can leave it + pass + + #Print some information to the user. This is a useful check that the file + #was correctly sized. + print "Length of CCFile to start: ", len(ccdata) + print "Length of CS2data: ",len(newcs2)," Length of CCs: ",len(newpolys) + print "Length of CCs == Length of CS2data: ",len(newcs2)==len(newpolys) + print "" + + #Let's write the new corner coordinate file to CS2ORB_*.dat? + if cc==1:#Yes let's write it + if os.path.basename(cs2file).startswith('CS2AWI'): + ccoutfile=os.path.join(os.path.dirname(ccfile), + 'CS2ORB_'+ + os.path.basename(ccfile).split('_')[2].zfill(6)+'_'+ + os.path.basename(cs2file).split('_')[2]+'_'+ + os.path.basename(cs2file).split('_')[3]+'_'+ + os.path.basename(cs2file).split('_')[4][:-4]+ + '_'+version+os.path.basename(ccfile)[-4:]) + else: + + + ccoutfile=os.path.join(os.path.dirname(ccfile), + 'CS2ORB_'+ + os.path.basename(ccfile).split('_')[2].zfill(6)+'_'+ + os.path.basename(cs2file).split('_')[2]+'_'+ + os.path.basename(cs2file).split('_')[3]+'_'+ + os.path.basename(cs2file).split('_')[4][:-4]+ + '_'+version+os.path.basename(ccfile)[-4:]) + with open(ccoutfile,'w') as ccout: + np.savetxt(ccout,newpolys,delimiter=' ',fmt='%14.8f', newline='\n') + + #Setup the output CS2 file names. + if os.path.basename(cs2file).startswith('CS2AWI'): + csoutfile=os.path.join(os.path.dirname(ccfile), + os.path.basename(cs2file)[:-14]+'_'+version+ + os.path.basename(cs2file)[-4:]) + else: + csoutfile=os.path.join(os.path.dirname(ccfile), + os.path.basename(cs2file).split('_')[0]+'_'+ + os.path.basename(cs2file).split('_')[1].zfill(6)+'_'+ + os.path.basename(cs2file).split('_')[2]+'_'+ + os.path.basename(cs2file).split('_')[3]+'_'+ + os.path.basename(cs2file).split('_')[4][:-4]+ + '_'+version+os.path.basename(cs2file)[-4:]) + #Write the cs2 output data + #Open the file, read in the header line + with open(cs2file)as r: + header=r.readline() + #Now write the output file, first writing in the header line. + with open(csoutfile,'w') as csout: + csout.write(header) + np.savetxt(csout,newcs2,delimiter=',',fmt='%14.8f', newline='\n') + +def deconvert_lon(x): + ''' + Converts longitude in 0-360 system to -180/180 system + ''' + import numpy as np + x=np.where(x>180.0,x-360.0,x) + return x + +if __name__ == '__main__': + ''' + If called from the command-line execute a recursive search of the current + working directory for all matches of corner coordinate and airborne data + files and execute the code above for both shp and dat files for each file. + ''' + import os + import traceback + + ccfileList=[u'/Volumes/Data/Projects/CryoVal-SI_CCN/20140326_021016/REFERENCE/cs2_reforbit_021016_20140326T171524_20140326T173439_C001.dat'] + cs2fileList=["/Volumes/Data/Projects/CryoVal-SI_CCN/20140326_021016/CS2L2I_21016_20140326T172805_20140326T172902_C001.csv"] + + for i,cfile in enumerate(ccfileList): + print "File: ",i,": ","GoldenDay: ",os.path.dirname(cfile).split("/")[3] + print "Orbit File: ",os.path.basename(cfile) + print "CS2 file: ",os.path.basename(cs2fileList[i]) + orb = os.path.basename(ccfileList[i]).split('_')[2] + print "Orbit: ",orb + try: + subset_CS2(cs2fileList[i],ccfileList[i],cc=1,version='A_V001') + except: + print traceback.format_exc() + pass \ No newline at end of file diff --git a/subset_resdata.py b/subset_resdata.py new file mode 100644 index 0000000..3fe97e9 --- /dev/null +++ b/subset_resdata.py @@ -0,0 +1,211 @@ +__version__= 1.1 +__author__= "Justin Beckers, YorkU/UAlberta" + +def subset_resdata(ccfile,subdatafile,version="A_V001"): + '''Takes the resampled airborne data (cs2_resdata_ABCDEFn*) and cuts and + extends it to the extent of the relevent CryoSat-2 file.The input ccfile + is therefore the CS2ORB files produced by subset_cs2 code that ensures that + the ESA and AWI files are of the size extent. + + Author: Justin Beckers + Author Contact: beckers@ualberta.ca + + Verion: 1.1 + Version Notes: + 1.0: Initial release. + 1.1: Added in check for more than one resampled point per polygon. + This has been done post CryoVal-SI analysis work. + Release Date: 2016/03/14 + + Inputs: + ccfile: CryoSat-2 corner coordinate file (CS2ORB*, cs2_reforbit*) + subdatafile: File to be subset to ccfile extent. + Version: file version output code string. + Outputs: + AEMSIT* + ALSLFB* + ASRRFB* + ASRSSH* + ASRSSA* + + Usage Notes: + Case 1: Using Main + From commandline/terminal, can simply run: + >>>python susbset_resdata.py + This executes the program with the parameters set on + lines 205 - 213. Modify this section in the script file text to + meet your requirements + if __name__=='__main__': + Case 2: Import function: + import subset_resdata + subset_resdata(ccfile,subdatafile,version="versionstr") + ''' + + + #Load in the required python libraries + import os + import numpy as np + import pandas as pd + from shapely.geometry import Point,Polygon + from shapely import speedups + from rtree import index + import re + speedups.enable() #enable faster analysis of polygons and points with shapely + + #Convert from -180/180 to 0-360 longitude systems + cv = lambda x: float(x)+360.0 if float(x) <0.0 else float(x) + + #Read the corner coordinate file. + ccdata = np.genfromtxt(ccfile,converters={6:cv,8:cv,10:cv,12:cv,14:cv}) + + #Quick message just to indicate that + print "Assumes that airborne data has already been resampled."\ + " If not, you need to run pyoval resampling scripts first." + + #Extract the type of airborne data from the subdatafile filename. + t = '_'+os.path.basename(subdatafile).split('_')[2]+'_' + v = re.sub("[^A-Za-z]","",t) + z='_'+v+'_' + + #Use the type of airborne data extracted on lines 47-49 above to setup the + #output filenames and field names. + if "asr" in v: + if z=='_asr_': + z='_fb_' + x="ASRRFB_" + elif z=='_asrssha_': + z='_ssha_' + x='ASRSSA_' + elif z=='_asrssh_': + z='_ssh_' + x='ASRSSH_' + nam=['ASR'+z+'Year','ASR'+z+'Month','ASR'+z+'Day','ASR'+z+'Hour', + 'ASR'+z+'Minute','ASR'+z+'Seconds','ASR'+z+'lon','ASR'+z+'lat', + 'ASR'+z+'dT','ASR'+z+'nazg','ASR'+z+'Mean','ASR'+z+'Median', + 'ASR'+z+'stdez','ASR'+z+'Mode'] + elif "als" in v: + if z=='_als_': + z='_fb_' + x="ALSLFB_" + elif z=='_alsssha_': + z='_ssha_' + x='ALSSSA_' + elif z=='_alsssh_': + z='_ssh_' + x='ALSSSH_' + nam=['ALS'+z+'Year','ALS'+z+'Month','ALS'+z+'Day','ALS'+z+'Hour', + 'ALS'+z+'Minute','ALS'+z+'Seconds','ALS'+z+'lon','ALS'+z+'lat', + 'ALS'+z+'dT','ALS'+z+'nazg','ALS'+z+'Mean','ALS'+z+'Median', + 'ALS'+z+'stdez','ALS'+z+'Mode'] + elif "aem" in v: + nam=['AEM_Year','AEM_Month','AEM_Day','AEM_Hour', + 'AEM_Minute','AEM_Seconds','AEM_lon','AEM_lat', + 'AEM_dT','AEM_navg','AEM_Mean','AEM_Median', + 'AEM_stdev','AEM_Mode'] + else: + print "Error input file has not been resampled, or is unsupported." + + + #Read in the subdatafile with pandas. Convert the longitude field to 0-360. + #Here is also where we could expand future output to include the + #probability distribution by setting usecols=None. + subdata=pd.read_table(subdatafile,sep='\s+',header=None, + usecols=np.arange(14),converters={6:cv},index_col=False, + na_values=['nan'],names=nam) + + #Setup the spatial index and relationship tree. + idx=index.Index() + p=[] #empty list to hold the polygons + n_records=len(ccdata) #Number of records in the corner coordinate file. + #Empty array to hold results + newsub=np.empty((n_records,len(subdata.iloc[0])))*np.nan + #A line of nans the size of the input airborne resampled data + nanline=np.empty(len(subdata.iloc[0]))*np.nan + n_polygon_points=5 #number of points in each polygon + #Empty array to hold the polygon points + polygon_points = np.ndarray(shape=(n_polygon_points, 2), dtype=np.float64) + #For every coorner coordinate, generate the polygon. + for i in np.arange(n_records): + fp_x = [ccdata[i,8],ccdata[i,10],ccdata[i,14],ccdata[i,12], + ccdata[i,8]] + fp_y = [ccdata[i,9],ccdata[i,11],ccdata[i,15],ccdata[i,13], + ccdata[i,9]] + for j in np.arange(n_polygon_points): + polygon_points[j,0]=fp_x[j] + polygon_points[j,1]=fp_y[j] + p.append(Polygon(polygon_points)) + #Generate a spatial index + for j in np.arange(len(subdata[nam[6]])): + idx.insert(j,Point(subdata.iloc[j][nam[6]], + subdata.iloc[j][nam[7]]).bounds) + ''' + There should only be one resampled datapoint for each footprint. + V1.1: Let's test that this is true, and let the user know. During CryoVal- + SI several concatenated files (include ALS files split into 5ths during + resampling) were tested and none showed overlapping footprints. ASR files + were also tested. + + Since this code is used to subset the resampled data to the extent of + other data, we want to keep a record for each polygon. Polygons with no + resampled data should get a NaN value. + ''' + newpts=[] #holds the rows where there are airborne points within a polygon + out=[] #holds the polygon where airborne data contains points. + nanlines=[] #holds the rows with no airborne data points in a polygon + for i,poly in enumerate(p): + n_pts=0 + for j in idx.intersection(poly.bounds): + if poly.contains(Point(subdata.iloc[j][nam[6]], + subdata.iloc[j][nam[7]]))==1: + n_pts+=1 + newpts.append(j) + out.append(i) + + #If no points in polygon, we will fill it with nans, so keep track + if n_pts == 0: + nanlines.append(i) + elif n_pts >1: + print "Hmm, found %i points in this polygon: %i " %n_pts,i + + newsub[out]=subdata.iloc[newpts] + newsub[nanlines]=nanline #not entirely sure this is needed. + outdf=pd.DataFrame(data=newsub,columns=nam) + outdf[nam[6]]=deconvert_lon(outdf[nam[6]]) #convert the longitude back. + + #Print some statistics so we know that everything is okay. + nfinite = np.count_nonzero(np.isfinite(outdf[nam[10]])) + print v+" file: ",os.path.basename(subdatafile) + print "CC file: ",os.path.basename(ccfile) + print "Number of input "+v+" data points: ",len(subdata) + print "Number of footprints in Orbit file: ",len(ccdata) + print "Number of output "+v+" Data Points: ", outdf.shape[0] + print "Number of valid "+v+" footprints: ",nfinite + + #Create the output filename + outfile=os.path.join(os.path.dirname(subdatafile),x+ + os.path.basename(ccfile).split('_')[1]+'_'+ + os.path.basename(ccfile).split('_')[2]+'_'+ + os.path.basename(ccfile).split('_')[3]+'_'+ + os.path.basename(ccfile).split('_')[4]+'_'+version+ + '.csv') + print "Output file name: ",os.path.basename(outfile) + + #Write out the data. + outdf.to_csv(path_or_buf=outfile,sep=',',na_rep='nan',header=True, + index=False,mode='w',float_format='%14.8f',index_label='ID') + +def deconvert_lon(x): + '''Converts from 0-360 to -180/180 longitude systems''' + import numpy as np + x=np.where(x>180.0,x-360.0,x) + return x + +if __name__=='__main__': + import time #Used to roughly time the run time of the code. + + ccfile=['/Volumes/Data/Projects/CryoVal-SI/GoldenDays_V001/20110415_005399/CS2ORB_005399_20110415T142828_20110415T143357_C001_A_V001.dat'] + alsfile=['/Volumes/Data/Projects/CryoVal-SI/GoldenDays_V001/20110415_005399/ASIRAS/cs2_resdata_asr_005399_20110415T142828_20110415T142828_concat.csv'] + a=time.clock() + subset_resdata(ccfile[0],alsfile[0],version="A_V001") + b=time.clock() + print "Time to run the script (s): ",b-a \ No newline at end of file