diff --git a/examples/.ipynb_checkpoints/agc_following-checkpoint.py b/examples/.ipynb_checkpoints/agc_following-checkpoint.py index 082c37f..577f22b 100644 --- a/examples/.ipynb_checkpoints/agc_following-checkpoint.py +++ b/examples/.ipynb_checkpoints/agc_following-checkpoint.py @@ -5,73 +5,106 @@ import plotly.express as px # Specify where we will be caching the raw AEMO data. -raw_data_cache = 'C:/Users/nick/Desktop/cache' +raw_data_cache = "C:/Users/nick/Desktop/cache" # Time window to pull data from. -start_time = '2021/04/27 00:00:00' -end_time = '2021/04/28 00:00:00' +start_time = "2021/04/27 00:00:00" +end_time = "2021/04/28 00:00:00" # Download the latest FCAS causer pays elements file. The update_static_file=True argument forces nemosis to # download the a new copy of file from AEMO even if a copy already exists in the cache. -fcas_causer_pays_elements = static_table(table_name='ELEMENTS_FCAS_4_SECOND', raw_data_location=raw_data_cache, - update_static_file=True) +fcas_causer_pays_elements = static_table( + table_name="ELEMENTS_FCAS_4_SECOND", + raw_data_location=raw_data_cache, + update_static_file=True, +) # Using filtering and manual inspection find which fcas element numbers belong to Hornsdale Power Reserve. -elements_for_honsdale_power_reserve = \ - fcas_causer_pays_elements[fcas_causer_pays_elements['EMSNAME'].str.contains('HPR')] +elements_for_honsdale_power_reserve = fcas_causer_pays_elements[ + fcas_causer_pays_elements["EMSNAME"].str.contains("HPR") +] # Check which variable numbers we will need. -fcas_causer_pays_elements = static_table(table_name='ELEMENTS_FCAS_4_SECOND', raw_data_location=raw_data_cache, - update_static_file=True) - -scada_4s_resolution = dynamic_data_compiler(start_time, end_time, table_name='FCAS_4_SECOND', - raw_data_location=raw_data_cache, - filter_cols=['ELEMENTNUMBER', 'VARIABLENUMBER'], - filter_values=([330, 331], [2, 5]), fformat='parquet') - -scada_5min_resolution = dynamic_data_compiler(start_time, end_time, 'DISPATCHLOAD', raw_data_cache, - select_columns=['SETTLEMENTDATE', 'DUID', 'INITIALMW', - 'TOTALCLEARED'], - filter_cols=['DUID'], filter_values=(['HPRG1', 'HPRL1'],)) - -elements = { - 330: 'HPRG1', - 331: 'HPRL1' -} - -variables = { - 2: 'scada_value', - 5: 'regulation_target' -} - -scada_4s_resolution['DUID'] = scada_4s_resolution['ELEMENTNUMBER'].apply(lambda x: elements[x]) -scada_4s_resolution['variable'] = scada_4s_resolution['VARIABLENUMBER'].apply(lambda x: variables[x]) - -scada_4s_resolution = scada_4s_resolution.pivot(index=['TIMESTAMP', 'DUID'], columns='variable', values='VALUE') +fcas_causer_pays_elements = static_table( + table_name="ELEMENTS_FCAS_4_SECOND", + raw_data_location=raw_data_cache, + update_static_file=True, +) + +scada_4s_resolution = dynamic_data_compiler( + start_time, + end_time, + table_name="FCAS_4_SECOND", + raw_data_location=raw_data_cache, + filter_cols=["ELEMENTNUMBER", "VARIABLENUMBER"], + filter_values=([330, 331], [2, 5]), + fformat="parquet", +) + +scada_5min_resolution = dynamic_data_compiler( + start_time, + end_time, + "DISPATCHLOAD", + raw_data_cache, + select_columns=["SETTLEMENTDATE", "DUID", "INITIALMW", "TOTALCLEARED"], + filter_cols=["DUID"], + filter_values=(["HPRG1", "HPRL1"],), +) + +elements = {330: "HPRG1", 331: "HPRL1"} + +variables = {2: "scada_value", 5: "regulation_target"} + +scada_4s_resolution["DUID"] = scada_4s_resolution["ELEMENTNUMBER"].apply( + lambda x: elements[x] +) +scada_4s_resolution["variable"] = scada_4s_resolution["VARIABLENUMBER"].apply( + lambda x: variables[x] +) + +scada_4s_resolution = scada_4s_resolution.pivot( + index=["TIMESTAMP", "DUID"], columns="variable", values="VALUE" +) scada_4s_resolution.reset_index(inplace=True) -scada = pd.merge_asof(scada_4s_resolution, scada_5min_resolution, left_on='TIMESTAMP', - right_on='SETTLEMENTDATE', by='DUID', direction='forward') - -scada['fraction_ramp_complete'] = 1 - ((scada['SETTLEMENTDATE'] - scada['TIMESTAMP']) / timedelta(minutes=5)) - -scada['linear_ramp_target'] = scada['INITIALMW'] + \ - (scada['TOTALCLEARED'] - scada['INITIALMW']) * scada['fraction_ramp_complete'] - -scada['linear_ramp_target'] = np.where(scada['DUID'] == 'HPRL1', -1 * scada['linear_ramp_target'], - scada['linear_ramp_target']) -scada['scada_value'] = np.where(scada['DUID'] == 'HPRL1', -1 * scada['scada_value'], - scada['scada_value']) -scada['regulation_target'] = np.where(scada['DUID'] == 'HPRL1', -1 * scada['regulation_target'], - scada['regulation_target']) - -scada = scada.groupby('TIMESTAMP', as_index=False).agg( - {'linear_ramp_target': 'sum', 'scada_value': 'sum', 'regulation_target': 'sum'}) - -scada['target'] = scada['linear_ramp_target'] + scada['regulation_target'] - -fig = px.line(scada, x='TIMESTAMP', y=['target', 'scada_value']) +scada = pd.merge_asof( + scada_4s_resolution, + scada_5min_resolution, + left_on="TIMESTAMP", + right_on="SETTLEMENTDATE", + by="DUID", + direction="forward", +) + +scada["fraction_ramp_complete"] = 1 - ( + (scada["SETTLEMENTDATE"] - scada["TIMESTAMP"]) / timedelta(minutes=5) +) + +scada["linear_ramp_target"] = ( + scada["INITIALMW"] + + (scada["TOTALCLEARED"] - scada["INITIALMW"]) * scada["fraction_ramp_complete"] +) + +scada["linear_ramp_target"] = np.where( + scada["DUID"] == "HPRL1", + -1 * scada["linear_ramp_target"], + scada["linear_ramp_target"], +) +scada["scada_value"] = np.where( + scada["DUID"] == "HPRL1", -1 * scada["scada_value"], scada["scada_value"] +) +scada["regulation_target"] = np.where( + scada["DUID"] == "HPRL1", + -1 * scada["regulation_target"], + scada["regulation_target"], +) + +scada = scada.groupby("TIMESTAMP", as_index=False).agg( + {"linear_ramp_target": "sum", "scada_value": "sum", "regulation_target": "sum"} +) + +scada["target"] = scada["linear_ramp_target"] + scada["regulation_target"] + +fig = px.line(scada, x="TIMESTAMP", y=["target", "scada_value"]) fig.show() - - diff --git a/nemosis/custom_errors.py b/nemosis/custom_errors.py index bb6e16a..6554f00 100644 --- a/nemosis/custom_errors.py +++ b/nemosis/custom_errors.py @@ -11,4 +11,4 @@ class DataMismatchError(Exception): class DataFormatError(Exception): - pass \ No newline at end of file + pass diff --git a/nemosis/custom_tables.py b/nemosis/custom_tables.py index 7c42432..c39d7df 100644 --- a/nemosis/custom_tables.py +++ b/nemosis/custom_tables.py @@ -5,131 +5,160 @@ from nemosis import defaults, data_fetch_methods, filters -def fcas4s_scada_match(start_time, end_time, table_name, raw_data_location, select_columns=None, filter_cols=None, - filter_values=None): +def fcas4s_scada_match( + start_time, + end_time, + table_name, + raw_data_location, + select_columns=None, + filter_cols=None, + filter_values=None, +): # Pull in the 4 second fcas data. - table_name_fcas4s = 'FCAS_4_SECOND' + table_name_fcas4s = "FCAS_4_SECOND" fcas4s = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table_name_fcas4s, raw_data_location) + start_time, end_time, table_name_fcas4s, raw_data_location + ) # Pull in the 4 second fcas variable types. - table_name_variable_types = 'VARIABLES_FCAS_4_SECOND' - fcas4s_variable_types = data_fetch_methods.static_table(table_name_variable_types, - raw_data_location) + table_name_variable_types = "VARIABLES_FCAS_4_SECOND" + fcas4s_variable_types = data_fetch_methods.static_table( + table_name_variable_types, raw_data_location + ) # Select the variable types that measure MW on an interconnector and Gen_MW from a dispatch unit. - fcas4s_variable_types = fcas4s_variable_types[fcas4s_variable_types['VARIABLETYPE'].isin([ - 'MW', 'Gen_MW'])] - fcas4s = fcas4s[fcas4s['VARIABLENUMBER'].isin( - fcas4s_variable_types['VARIABLENUMBER'])] + fcas4s_variable_types = fcas4s_variable_types[ + fcas4s_variable_types["VARIABLETYPE"].isin(["MW", "Gen_MW"]) + ] + fcas4s = fcas4s[ + fcas4s["VARIABLENUMBER"].isin(fcas4s_variable_types["VARIABLENUMBER"]) + ] # Select just the fcas 4 second data variable columns that we need. - fcas4s = fcas4s.loc[:, ('TIMESTAMP', 'ELEMENTNUMBER', 'VALUE')] + fcas4s = fcas4s.loc[:, ("TIMESTAMP", "ELEMENTNUMBER", "VALUE")] # Convert the fcas MW measured values to numeric type. - fcas4s['VALUE'] = pd.to_numeric(fcas4s['VALUE']) + fcas4s["VALUE"] = pd.to_numeric(fcas4s["VALUE"]) # Rename the 4 second measurements to the timestamp of the start of the 5 min interval i.e round down to nearest # 5 min interval. - fcas4s = fcas4s[(fcas4s['TIMESTAMP'].dt.minute.isin(list(range(0, 60, 5)))) & - (fcas4s['TIMESTAMP'].dt.second < 20)] - fcas4s['TIMESTAMP'] = fcas4s['TIMESTAMP'].apply( - lambda dt: datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute)) + fcas4s = fcas4s[ + (fcas4s["TIMESTAMP"].dt.minute.isin(list(range(0, 60, 5)))) + & (fcas4s["TIMESTAMP"].dt.second < 20) + ] + fcas4s["TIMESTAMP"] = fcas4s["TIMESTAMP"].apply( + lambda dt: datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute) + ) # Pull in the dispatch unit scada data. - table_name_scada = 'DISPATCH_UNIT_SCADA' + table_name_scada = "DISPATCH_UNIT_SCADA" scada = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table_name_scada, raw_data_location) - scada['SETTLEMENTDATE'] = scada['SETTLEMENTDATE'] - timedelta(minutes=5) - scada = scada.loc[:, ('SETTLEMENTDATE', 'DUID', 'SCADAVALUE')] - scada.columns = ['SETTLEMENTDATE', 'MARKETNAME', 'SCADAVALUE'] - scada['SCADAVALUE'] = pd.to_numeric(scada['SCADAVALUE']) + start_time, end_time, table_name_scada, raw_data_location + ) + scada["SETTLEMENTDATE"] = scada["SETTLEMENTDATE"] - timedelta(minutes=5) + scada = scada.loc[:, ("SETTLEMENTDATE", "DUID", "SCADAVALUE")] + scada.columns = ["SETTLEMENTDATE", "MARKETNAME", "SCADAVALUE"] + scada["SCADAVALUE"] = pd.to_numeric(scada["SCADAVALUE"]) # Pull in the interconnector scada data and use the intervention records where the exist. - table_name_inter_flow = 'DISPATCHINTERCONNECTORRES' - inter_flows = data_fetch_methods.dynamic_data_compiler(start_time, end_time, table_name_inter_flow, - raw_data_location) - inter_flows['METEREDMWFLOW'] = pd.to_numeric(inter_flows['METEREDMWFLOW']) - inter_flows = inter_flows.sort_values('INTERVENTION') + table_name_inter_flow = "DISPATCHINTERCONNECTORRES" + inter_flows = data_fetch_methods.dynamic_data_compiler( + start_time, end_time, table_name_inter_flow, raw_data_location + ) + inter_flows["METEREDMWFLOW"] = pd.to_numeric(inter_flows["METEREDMWFLOW"]) + inter_flows = inter_flows.sort_values("INTERVENTION") inter_flows = inter_flows.groupby( - ['SETTLEMENTDATE', 'INTERCONNECTORID'], as_index=False).last() - inter_flows = inter_flows.loc[:, - ('SETTLEMENTDATE', 'INTERCONNECTORID', 'METEREDMWFLOW')] - inter_flows['SETTLEMENTDATE'] = inter_flows['SETTLEMENTDATE'] - \ - timedelta(minutes=5) - inter_flows.columns = ['SETTLEMENTDATE', 'MARKETNAME', 'SCADAVALUE'] + ["SETTLEMENTDATE", "INTERCONNECTORID"], as_index=False + ).last() + inter_flows = inter_flows.loc[ + :, ("SETTLEMENTDATE", "INTERCONNECTORID", "METEREDMWFLOW") + ] + inter_flows["SETTLEMENTDATE"] = inter_flows["SETTLEMENTDATE"] - timedelta(minutes=5) + inter_flows.columns = ["SETTLEMENTDATE", "MARKETNAME", "SCADAVALUE"] # Combine scada data from interconnectors and dispatch units. scada_elements = pd.concat([scada, inter_flows], sort=False) # Merge the fcas and scada data based on time stamp, these leads every scada element to be joined to every fcas # element that then allows them to be comapred. - profile_comp = pd.merge(fcas4s, scada_elements, 'inner', - left_on='TIMESTAMP', right_on='SETTLEMENTDATE') + profile_comp = pd.merge( + fcas4s, scada_elements, "inner", left_on="TIMESTAMP", right_on="SETTLEMENTDATE" + ) # Calculate the error between each measurement. - profile_comp['ERROR'] = profile_comp['VALUE'] - profile_comp['SCADAVALUE'] - profile_comp['ERROR'] = profile_comp['ERROR'].abs() + profile_comp["ERROR"] = profile_comp["VALUE"] - profile_comp["SCADAVALUE"] + profile_comp["ERROR"] = profile_comp["ERROR"].abs() # Choose the fcas values that best matches the scada value during the 5 min interval. - profile_comp = profile_comp.sort_values('ERROR') + profile_comp = profile_comp.sort_values("ERROR") error_comp = profile_comp.groupby( - ['MARKETNAME', 'ELEMENTNUMBER', 'TIMESTAMP'], as_index=False).first() + ["MARKETNAME", "ELEMENTNUMBER", "TIMESTAMP"], as_index=False + ).first() # Aggregate the error to comapre each scada and fcas element potential match. error_comp = error_comp.groupby( - ['MARKETNAME', 'ELEMENTNUMBER'], as_index=False).sum() + ["MARKETNAME", "ELEMENTNUMBER"], as_index=False + ).sum() # Sort the comparisons based on aggregate error. - error_comp = error_comp.sort_values('ERROR') + error_comp = error_comp.sort_values("ERROR") # Drop duplicates of element numbers and scada element names, keeping the record for each with the least error. # Don't include units 0 values for scada - best_matches_scada = error_comp[error_comp['SCADAVALUE'].abs() > 0] + best_matches_scada = error_comp[error_comp["SCADAVALUE"].abs() > 0] best_matches_scada = best_matches_scada.drop_duplicates( - 'ELEMENTNUMBER', keep='first') - best_matches_scada = best_matches_scada.drop_duplicates( - 'MARKETNAME', keep='first') + "ELEMENTNUMBER", keep="first" + ) + best_matches_scada = best_matches_scada.drop_duplicates("MARKETNAME", keep="first") # Remove fcas elements where a match only occurred because both fcas and scada showed no dispatch. - best_matches_scada['ELEMENTNUMBER'] = pd.to_numeric( - best_matches_scada['ELEMENTNUMBER']) - best_matches_scada = best_matches_scada.sort_values('ELEMENTNUMBER') - best_matches_scada['ELEMENTNUMBER'] = best_matches_scada['ELEMENTNUMBER'].astype( - str) + best_matches_scada["ELEMENTNUMBER"] = pd.to_numeric( + best_matches_scada["ELEMENTNUMBER"] + ) + best_matches_scada = best_matches_scada.sort_values("ELEMENTNUMBER") + best_matches_scada["ELEMENTNUMBER"] = best_matches_scada["ELEMENTNUMBER"].astype( + str + ) # Give error as a percentage. - best_matches_scada['ERROR'] = best_matches_scada['ERROR'] / \ - best_matches_scada['SCADAVALUE'] + best_matches_scada["ERROR"] = ( + best_matches_scada["ERROR"] / best_matches_scada["SCADAVALUE"] + ) # drop matches with error greater than 100 % - best_matches_scada = best_matches_scada[( - best_matches_scada['ERROR'] < 1) & (best_matches_scada['ERROR'] > -1)] + best_matches_scada = best_matches_scada[ + (best_matches_scada["ERROR"] < 1) & (best_matches_scada["ERROR"] > -1) + ] - best_matches_scada = best_matches_scada.loc[:, - ('ELEMENTNUMBER', 'MARKETNAME', 'ERROR')] + best_matches_scada = best_matches_scada.loc[ + :, ("ELEMENTNUMBER", "MARKETNAME", "ERROR") + ] if select_columns is not None: best_matches_scada = best_matches_scada.loc[:, select_columns] if filter_cols is not None: best_matches_scada = filters.filter_on_column_value( - best_matches_scada, filter_cols, filter_values) + best_matches_scada, filter_cols, filter_values + ) return best_matches_scada def capacity_factor(capacity_and_scada_grouped): - scada_data = np.where(capacity_and_scada_grouped['SCADAVALUE'].isnull(), 0.0, - capacity_and_scada_grouped['SCADAVALUE']) - cf = scada_data / capacity_and_scada_grouped['MAXCAPACITY'] + scada_data = np.where( + capacity_and_scada_grouped["SCADAVALUE"].isnull(), + 0.0, + capacity_and_scada_grouped["SCADAVALUE"], + ) + cf = scada_data / capacity_and_scada_grouped["MAXCAPACITY"] cf = cf.mean() return cf def volume(capacity_and_scada_grouped): # Assumes 5 min Scada data - return capacity_and_scada_grouped['SCADAVALUE'].sum()/12 + return capacity_and_scada_grouped["SCADAVALUE"].sum() / 12 def volume_weighted_average_price(output, prices): @@ -141,41 +170,48 @@ def volume_weighted_average_price(output, prices): def volume_weighted_average_trading_price(capacity_and_scada_grouped): - return volume_weighted_average_price(capacity_and_scada_grouped['TRADING_TOTALCLEARED'], - capacity_and_scada_grouped['TRADING_RRP']) + return volume_weighted_average_price( + capacity_and_scada_grouped["TRADING_TOTALCLEARED"], + capacity_and_scada_grouped["TRADING_RRP"], + ) def volume_weighted_average_spot_price(capacity_and_scada_grouped): - return volume_weighted_average_price(capacity_and_scada_grouped['SCADAVALUE'], - capacity_and_scada_grouped['DISPATCH_RRP']) + return volume_weighted_average_price( + capacity_and_scada_grouped["SCADAVALUE"], + capacity_and_scada_grouped["DISPATCH_RRP"], + ) def alt_volume_weighted_average_trading_price(capacity_and_scada_grouped): - return volume_weighted_average_price(capacity_and_scada_grouped['TRADING_TOTALCLEARED'], - capacity_and_scada_grouped['TRADING_VWAP']) + return volume_weighted_average_price( + capacity_and_scada_grouped["TRADING_TOTALCLEARED"], + capacity_and_scada_grouped["TRADING_VWAP"], + ) def performance_at_nodal_peak(capacity_and_scada_grouped): - index_max = capacity_and_scada_grouped['TOTALDEMAND'].idxmax() + index_max = capacity_and_scada_grouped["TOTALDEMAND"].idxmax() try: - output_at_peak = capacity_and_scada_grouped['SCADAVALUE'][index_max] + output_at_peak = capacity_and_scada_grouped["SCADAVALUE"][index_max] except: x = 1 if np.isnan(output_at_peak): output_at_peak = 0 - performance = output_at_peak / \ - capacity_and_scada_grouped['MAXCAPACITY'][index_max] + performance = output_at_peak / capacity_and_scada_grouped["MAXCAPACITY"][index_max] return performance def capacity_factor_over_90th_percentile_of_nodal_demand(capacity_and_scada_grouped): - data_entries = len(capacity_and_scada_grouped['TOTALDEMAND']) - enteries_in_90th_percentile = math.ceil(data_entries/10) + data_entries = len(capacity_and_scada_grouped["TOTALDEMAND"]) + enteries_in_90th_percentile = math.ceil(data_entries / 10) capacity_and_scada_grouped = capacity_and_scada_grouped.sort_values( - 'TOTALDEMAND', ascending=False) - capacity_and_scada_grouped = capacity_and_scada_grouped.reset_index( - drop=True) - capacity_and_scada_grouped = capacity_and_scada_grouped.iloc[:enteries_in_90th_percentile, :] + "TOTALDEMAND", ascending=False + ) + capacity_and_scada_grouped = capacity_and_scada_grouped.reset_index(drop=True) + capacity_and_scada_grouped = capacity_and_scada_grouped.iloc[ + :enteries_in_90th_percentile, : + ] cf = capacity_factor(capacity_and_scada_grouped) return cf @@ -185,227 +221,375 @@ def stats_for_group(capacity_and_scada_grouped): v = volume(capacity_and_scada_grouped) tvwap = volume_weighted_average_trading_price(capacity_and_scada_grouped) dvwap = volume_weighted_average_spot_price(capacity_and_scada_grouped) - #alttvwap = alt_volume_weighted_average_trading_price(capacity_and_scada_grouped) + # alttvwap = alt_volume_weighted_average_trading_price(capacity_and_scada_grouped) peak = performance_at_nodal_peak(capacity_and_scada_grouped) peak_percentile = capacity_factor_over_90th_percentile_of_nodal_demand( - capacity_and_scada_grouped) - month = list(capacity_and_scada_grouped['MONTH'])[0] - duid = list(capacity_and_scada_grouped['DUID'])[0] - cf_df = pd.DataFrame({'Month': [month], 'DUID': [duid], 'CapacityFactor': [cf], 'Volume': [v], - 'TRADING_VWAP': [tvwap], 'DISPATCH_VWAP': [dvwap], 'NodalPeakCapacityFactor': peak, - 'Nodal90thPercentileCapacityFactor': [peak_percentile], - # 'ALT_TRADING_VWAP': alttvwap - }) + capacity_and_scada_grouped + ) + month = list(capacity_and_scada_grouped["MONTH"])[0] + duid = list(capacity_and_scada_grouped["DUID"])[0] + cf_df = pd.DataFrame( + { + "Month": [month], + "DUID": [duid], + "CapacityFactor": [cf], + "Volume": [v], + "TRADING_VWAP": [tvwap], + "DISPATCH_VWAP": [dvwap], + "NodalPeakCapacityFactor": peak, + "Nodal90thPercentileCapacityFactor": [peak_percentile], + # 'ALT_TRADING_VWAP': alttvwap + } + ) return cf_df def stats_by_month_and_plant(capacity_and_scada): # - timedelta(seconds=1) - capacity_and_scada['effective_set_date'] = capacity_and_scada['SETTLEMENTDATE'] - capacity_and_scada['MONTH'] = capacity_and_scada['effective_set_date'].dt.year.astype(str) + '-' + \ - capacity_and_scada['effective_set_date'].dt.month.astype( - str).str.zfill(2) + capacity_and_scada["effective_set_date"] = capacity_and_scada["SETTLEMENTDATE"] + capacity_and_scada["MONTH"] = ( + capacity_and_scada["effective_set_date"].dt.year.astype(str) + + "-" + + capacity_and_scada["effective_set_date"].dt.month.astype(str).str.zfill(2) + ) capacity_factors = capacity_and_scada.groupby( - ['MONTH', 'DUID'], as_index=False).apply(stats_for_group) + ["MONTH", "DUID"], as_index=False + ).apply(stats_for_group) return capacity_factors -def merge_tables_for_plant_stats(timeseries_df, gen_max_cap, gen_region, scada, trading_load, dispatch_price, - trading_price, region_summary): - gen_max_cap = gen_max_cap.sort_values('EFFECTIVEDATE') - scada = scada.sort_values('SETTLEMENTDATE') +def merge_tables_for_plant_stats( + timeseries_df, + gen_max_cap, + gen_region, + scada, + trading_load, + dispatch_price, + trading_price, + region_summary, +): + gen_max_cap = gen_max_cap.sort_values("EFFECTIVEDATE") + scada = scada.sort_values("SETTLEMENTDATE") merged_data = pd.merge_asof( - scada, gen_max_cap, left_on='SETTLEMENTDATE', right_on='EFFECTIVEDATE', by='DUID') - gen_region = gen_region.sort_values('START_DATE') + scada, + gen_max_cap, + left_on="SETTLEMENTDATE", + right_on="EFFECTIVEDATE", + by="DUID", + ) + gen_region = gen_region.sort_values("START_DATE") merged_data = pd.merge_asof( - merged_data, gen_region, left_on='SETTLEMENTDATE', right_on='START_DATE', by='DUID') - merged_data = merged_data[~merged_data['REGIONID'].isnull()] - merged_data = pd.merge(merged_data, trading_load, - 'left', on=['SETTLEMENTDATE', 'DUID']) - merged_data = pd.merge(merged_data, dispatch_price, 'left', on=[ - 'SETTLEMENTDATE', 'REGIONID']) - merged_data = pd.merge(merged_data, trading_price, 'left', on=[ - 'SETTLEMENTDATE', 'REGIONID']) - merged_data = pd.merge(merged_data, region_summary, 'left', on=[ - 'SETTLEMENTDATE', 'REGIONID']) - merged_data = merged_data.loc[:, ('DUID', 'EFFECTIVEDATE', 'REGIONID', 'SETTLEMENTDATE', 'MAXCAPACITY', - 'SCADAVALUE', 'TOTALCLEARED', 'RRP_x', 'RRP_y', 'TOTALDEMAND')] - merged_data.columns = ['DUID', 'DUDETAIL_EFFECTIVEDATE', 'REGIONID', 'SETTLEMENTDATE', 'MAXCAPACITY', 'SCADAVALUE', - 'TRADING_TOTALCLEARED', 'DISPATCH_RRP', 'TRADING_RRP', 'TOTALDEMAND'] + merged_data, + gen_region, + left_on="SETTLEMENTDATE", + right_on="START_DATE", + by="DUID", + ) + merged_data = merged_data[~merged_data["REGIONID"].isnull()] + merged_data = pd.merge( + merged_data, trading_load, "left", on=["SETTLEMENTDATE", "DUID"] + ) + merged_data = pd.merge( + merged_data, dispatch_price, "left", on=["SETTLEMENTDATE", "REGIONID"] + ) + merged_data = pd.merge( + merged_data, trading_price, "left", on=["SETTLEMENTDATE", "REGIONID"] + ) + merged_data = pd.merge( + merged_data, region_summary, "left", on=["SETTLEMENTDATE", "REGIONID"] + ) + merged_data = merged_data.loc[ + :, + ( + "DUID", + "EFFECTIVEDATE", + "REGIONID", + "SETTLEMENTDATE", + "MAXCAPACITY", + "SCADAVALUE", + "TOTALCLEARED", + "RRP_x", + "RRP_y", + "TOTALDEMAND", + ), + ] + merged_data.columns = [ + "DUID", + "DUDETAIL_EFFECTIVEDATE", + "REGIONID", + "SETTLEMENTDATE", + "MAXCAPACITY", + "SCADAVALUE", + "TRADING_TOTALCLEARED", + "DISPATCH_RRP", + "TRADING_RRP", + "TOTALDEMAND", + ] return merged_data def select_intervention_if_present(data, primary_key): - data = data.sort_values(['INTERVENTION']) - data = data.groupby([col for col in primary_key if col != - 'INTERVENTION'], as_index=False).last() + data = data.sort_values(["INTERVENTION"]) + data = data.groupby( + [col for col in primary_key if col != "INTERVENTION"], as_index=False + ).last() return data def select_highest_version_number(data, primary_key): - data['VERSIONNO'] = pd.to_numeric(data['VERSIONNO']) - data = data.sort_values(['VERSIONNO']) - data['VERSIONNO'] = data['VERSIONNO'].astype(int).astype(str) - data = data.groupby([col for col in primary_key if col != - 'VERSIONNO'], as_index=False).last() + data["VERSIONNO"] = pd.to_numeric(data["VERSIONNO"]) + data = data.sort_values(["VERSIONNO"]) + data["VERSIONNO"] = data["VERSIONNO"].astype(int).astype(str) + data = data.groupby( + [col for col in primary_key if col != "VERSIONNO"], as_index=False + ).last() return data def calc_trading_vwap_price(regional_demand, dispatch_price): - demand_and_price = pd.merge(regional_demand, dispatch_price, 'inner', on=[ - 'SETTLEMENTDATE', 'REGIONID']) - demand_and_price['COST'] = demand_and_price['RRP'] * \ - demand_and_price['TOTALDEMAND'] - demand_and_price = demand_and_price.set_index('SETTLEMENTDATE') - demand_and_price = demand_and_price.groupby('REGIONID').resample('30T', label='right', closed='right').aggregate( - {'COST': 'sum', 'TOTALDEMAND': 'sum'}) - demand_and_price['TRADING_VWAP'] = demand_and_price['COST'] / \ - demand_and_price['TOTALDEMAND'] + demand_and_price = pd.merge( + regional_demand, dispatch_price, "inner", on=["SETTLEMENTDATE", "REGIONID"] + ) + demand_and_price["COST"] = demand_and_price["RRP"] * demand_and_price["TOTALDEMAND"] + demand_and_price = demand_and_price.set_index("SETTLEMENTDATE") + demand_and_price = ( + demand_and_price.groupby("REGIONID") + .resample("30T", label="right", closed="right") + .aggregate({"COST": "sum", "TOTALDEMAND": "sum"}) + ) + demand_and_price["TRADING_VWAP"] = ( + demand_and_price["COST"] / demand_and_price["TOTALDEMAND"] + ) demand_and_price.reset_index(inplace=True) - demand_and_price = demand_and_price.loc[:, - ('SETTLEMENTDATE', 'REGIONID', 'TRADING_VWAP')] + demand_and_price = demand_and_price.loc[ + :, ("SETTLEMENTDATE", "REGIONID", "TRADING_VWAP") + ] return demand_and_price def calc_trading_load(scada): trading_load = scada.copy() - trading_load = trading_load.set_index('SETTLEMENTDATE') - trading_load['SCADAVALUE'] = pd.to_numeric(trading_load['SCADAVALUE']) - trading_load = trading_load.groupby('DUID').resample('30T', label='right', closed='right').aggregate( - {'SCADAVALUE': 'mean'}) + trading_load = trading_load.set_index("SETTLEMENTDATE") + trading_load["SCADAVALUE"] = pd.to_numeric(trading_load["SCADAVALUE"]) + trading_load = ( + trading_load.groupby("DUID") + .resample("30T", label="right", closed="right") + .aggregate({"SCADAVALUE": "mean"}) + ) trading_load.reset_index(inplace=True) - trading_load.columns = ['DUID', 'SETTLEMENTDATE', 'TOTALCLEARED'] + trading_load.columns = ["DUID", "SETTLEMENTDATE", "TOTALCLEARED"] return trading_load def calc_trading_price(dispatch_price): - dispatch_price = dispatch_price.set_index('SETTLEMENTDATE') - dispatch_price = dispatch_price.groupby('REGIONID').resample( - '30T', label='right', closed='right').aggregate({'RRP': 'mean'}) + dispatch_price = dispatch_price.set_index("SETTLEMENTDATE") + dispatch_price = ( + dispatch_price.groupby("REGIONID") + .resample("30T", label="right", closed="right") + .aggregate({"RRP": "mean"}) + ) dispatch_price.reset_index(inplace=True) return dispatch_price -def plant_stats(start_time, end_time, table_name, raw_data_location, select_columns=None, filter_cols=None, - filter_values=None): - - ix = pd.date_range(start=datetime.strptime(start_time, '%Y/%m/%d %H:%M:%S'), - end=datetime.strptime( - end_time, '%Y/%m/%d %H:%M:%S') - timedelta(minutes=5), - freq='5T') +def plant_stats( + start_time, + end_time, + table_name, + raw_data_location, + select_columns=None, + filter_cols=None, + filter_values=None, +): + + ix = pd.date_range( + start=datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S"), + end=datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S") - timedelta(minutes=5), + freq="5T", + ) timeseries_df = pd.DataFrame(index=ix) timeseries_df.reset_index(inplace=True) - timeseries_df.columns = ['SETTLEMENTDATE'] - - gen_max_cap = data_fetch_methods.dynamic_data_compiler(start_time, end_time, 'DUDETAIL', raw_data_location, - select_columns=['EFFECTIVEDATE', 'DUID', 'VERSIONNO', - 'MAXCAPACITY'], filter_cols=filter_cols, - filter_values=filter_values) + timeseries_df.columns = ["SETTLEMENTDATE"] + + gen_max_cap = data_fetch_methods.dynamic_data_compiler( + start_time, + end_time, + "DUDETAIL", + raw_data_location, + select_columns=["EFFECTIVEDATE", "DUID", "VERSIONNO", "MAXCAPACITY"], + filter_cols=filter_cols, + filter_values=filter_values, + ) gen_max_cap = select_highest_version_number( - gen_max_cap, defaults.table_primary_keys['DUDETAIL']) - gen_region = data_fetch_methods.dynamic_data_compiler(start_time, end_time, 'DUDETAILSUMMARY', raw_data_location, - select_columns=[ - 'START_DATE', 'END_DATE', 'DUID', 'REGIONID'], - filter_cols=filter_cols, filter_values=filter_values) - scada = data_fetch_methods.dynamic_data_compiler(start_time, end_time, 'DISPATCH_UNIT_SCADA', raw_data_location, - select_columns=['SETTLEMENTDATE', 'DUID', 'SCADAVALUE']) - dispatch_price = data_fetch_methods.dynamic_data_compiler(start_time, end_time, 'DISPATCHPRICE', raw_data_location, - select_columns=['SETTLEMENTDATE', 'REGIONID', 'RRP', - 'INTERVENTION']) + gen_max_cap, defaults.table_primary_keys["DUDETAIL"] + ) + gen_region = data_fetch_methods.dynamic_data_compiler( + start_time, + end_time, + "DUDETAILSUMMARY", + raw_data_location, + select_columns=["START_DATE", "END_DATE", "DUID", "REGIONID"], + filter_cols=filter_cols, + filter_values=filter_values, + ) + scada = data_fetch_methods.dynamic_data_compiler( + start_time, + end_time, + "DISPATCH_UNIT_SCADA", + raw_data_location, + select_columns=["SETTLEMENTDATE", "DUID", "SCADAVALUE"], + ) + dispatch_price = data_fetch_methods.dynamic_data_compiler( + start_time, + end_time, + "DISPATCHPRICE", + raw_data_location, + select_columns=["SETTLEMENTDATE", "REGIONID", "RRP", "INTERVENTION"], + ) dispatch_price = select_intervention_if_present( - dispatch_price, defaults.table_primary_keys['DISPATCHPRICE']) - trading_price = data_fetch_methods.dynamic_data_compiler(start_time, end_time, 'TRADINGPRICE', raw_data_location, - select_columns=['SETTLEMENTDATE', 'REGIONID', 'RRP']) - dispatch_price['RRP'] = pd.to_numeric(dispatch_price['RRP']) - trading_price['RRP'] = pd.to_numeric(trading_price['RRP']) - #trading_price = calc_trading_price(dispatch_price) - - region_summary = data_fetch_methods.dynamic_data_compiler(start_time, end_time, 'DISPATCHREGIONSUM', - raw_data_location, - select_columns=['SETTLEMENTDATE', 'REGIONID', - 'TOTALDEMAND', 'INTERVENTION', - 'DISPATCHINTERVAL']) + dispatch_price, defaults.table_primary_keys["DISPATCHPRICE"] + ) + trading_price = data_fetch_methods.dynamic_data_compiler( + start_time, + end_time, + "TRADINGPRICE", + raw_data_location, + select_columns=["SETTLEMENTDATE", "REGIONID", "RRP"], + ) + dispatch_price["RRP"] = pd.to_numeric(dispatch_price["RRP"]) + trading_price["RRP"] = pd.to_numeric(trading_price["RRP"]) + # trading_price = calc_trading_price(dispatch_price) + + region_summary = data_fetch_methods.dynamic_data_compiler( + start_time, + end_time, + "DISPATCHREGIONSUM", + raw_data_location, + select_columns=[ + "SETTLEMENTDATE", + "REGIONID", + "TOTALDEMAND", + "INTERVENTION", + "DISPATCHINTERVAL", + ], + ) region_summary = select_intervention_if_present( - region_summary, defaults.table_primary_keys['DISPATCHREGIONSUM']) + region_summary, defaults.table_primary_keys["DISPATCHREGIONSUM"] + ) scada_list = [] - for gen in scada.groupby(['DUID'], as_index=False): - temp = pd.merge(timeseries_df, gen[1], 'left', on='SETTLEMENTDATE') - temp['SCADAVALUE'] = np.where( - temp['SCADAVALUE'].isnull(), 0.0, temp['SCADAVALUE']) - temp['DUID'] = np.where(temp['DUID'].isnull(), gen[0], temp['DUID']) + for gen in scada.groupby(["DUID"], as_index=False): + temp = pd.merge(timeseries_df, gen[1], "left", on="SETTLEMENTDATE") + temp["SCADAVALUE"] = np.where( + temp["SCADAVALUE"].isnull(), 0.0, temp["SCADAVALUE"] + ) + temp["DUID"] = np.where(temp["DUID"].isnull(), gen[0], temp["DUID"]) scada_list.append(temp) scada = pd.concat(scada_list) trading_load = calc_trading_load(scada) - combined_data = merge_tables_for_plant_stats(timeseries_df, gen_max_cap, gen_region, scada, trading_load, - dispatch_price, trading_price, region_summary) - combined_data['SCADAVALUE'] = pd.to_numeric(combined_data['SCADAVALUE']) - combined_data['MAXCAPACITY'] = pd.to_numeric(combined_data['MAXCAPACITY']) - combined_data['TRADING_TOTALCLEARED'] = pd.to_numeric( - combined_data['TRADING_TOTALCLEARED']) - - combined_data['TOTALDEMAND'] = pd.to_numeric(combined_data['TOTALDEMAND']) + combined_data = merge_tables_for_plant_stats( + timeseries_df, + gen_max_cap, + gen_region, + scada, + trading_load, + dispatch_price, + trading_price, + region_summary, + ) + combined_data["SCADAVALUE"] = pd.to_numeric(combined_data["SCADAVALUE"]) + combined_data["MAXCAPACITY"] = pd.to_numeric(combined_data["MAXCAPACITY"]) + combined_data["TRADING_TOTALCLEARED"] = pd.to_numeric( + combined_data["TRADING_TOTALCLEARED"] + ) + + combined_data["TOTALDEMAND"] = pd.to_numeric(combined_data["TOTALDEMAND"]) stats = stats_by_month_and_plant(combined_data) return stats def trading_and_dispatch_cost(): - gen_region = data_fetch_methods.dynamic_data_compiler('2017/01/01 00:05:00', '2018/01/01 00:05:00', - 'DUDETAILSUMMARY', defaults.raw_data_cache, - select_columns=['START_DATE', 'END_DATE', 'DUID', 'REGIONID']) - scada = data_fetch_methods.dynamic_data_compiler('2017/01/01 00:05:00', '2018/01/01 00:05:00', - 'DISPATCH_UNIT_SCADA', defaults.raw_data_cache) - - ix = pd.date_range(start=datetime.strptime('2017/01/01 00:00:00', '%Y/%m/%d %H:%M:%S'), - end=datetime.strptime('2018/01/01 00:00:00', '%Y/%m/%d %H:%M:%S'), - freq='5T') + gen_region = data_fetch_methods.dynamic_data_compiler( + "2017/01/01 00:05:00", + "2018/01/01 00:05:00", + "DUDETAILSUMMARY", + defaults.raw_data_cache, + select_columns=["START_DATE", "END_DATE", "DUID", "REGIONID"], + ) + scada = data_fetch_methods.dynamic_data_compiler( + "2017/01/01 00:05:00", + "2018/01/01 00:05:00", + "DISPATCH_UNIT_SCADA", + defaults.raw_data_cache, + ) + + ix = pd.date_range( + start=datetime.strptime("2017/01/01 00:00:00", "%Y/%m/%d %H:%M:%S"), + end=datetime.strptime("2018/01/01 00:00:00", "%Y/%m/%d %H:%M:%S"), + freq="5T", + ) timeseries_df = pd.DataFrame(index=ix) timeseries_df.reset_index(inplace=True) - timeseries_df.columns = ['SETTLEMENTDATE'] + timeseries_df.columns = ["SETTLEMENTDATE"] scada_list = [] - for gen in scada.groupby(['DUID'], as_index=False): - temp = pd.merge(timeseries_df, gen[1], 'left', on='SETTLEMENTDATE') - temp['SCADAVALUE'] = np.where( - temp['SCADAVALUE'].isnull(), 0.0, temp['SCADAVALUE']) - temp['DUID'] = np.where(temp['DUID'].isnull(), gen[0], temp['DUID']) + for gen in scada.groupby(["DUID"], as_index=False): + temp = pd.merge(timeseries_df, gen[1], "left", on="SETTLEMENTDATE") + temp["SCADAVALUE"] = np.where( + temp["SCADAVALUE"].isnull(), 0.0, temp["SCADAVALUE"] + ) + temp["DUID"] = np.where(temp["DUID"].isnull(), gen[0], temp["DUID"]) scada_list.append(temp) scada = pd.concat(scada_list) - dispatch_price = data_fetch_methods.dynamic_data_compiler('2017/01/01 00:00:00', '2018/01/01 00:05:00', - 'DISPATCHPRICE', defaults.raw_data_cache) - dispatch_price = select_intervention_if_present(dispatch_price, defaults.table_primary_keys['DISPATCHPRICE']) - gen_region = gen_region.sort_values('START_DATE') - scada = scada.sort_values('SETTLEMENTDATE') + dispatch_price = data_fetch_methods.dynamic_data_compiler( + "2017/01/01 00:00:00", + "2018/01/01 00:05:00", + "DISPATCHPRICE", + defaults.raw_data_cache, + ) + dispatch_price = select_intervention_if_present( + dispatch_price, defaults.table_primary_keys["DISPATCHPRICE"] + ) + gen_region = gen_region.sort_values("START_DATE") + scada = scada.sort_values("SETTLEMENTDATE") scada_and_regions = pd.merge_asof( - scada, gen_region, left_on='SETTLEMENTDATE', right_on='START_DATE', by='DUID') - scada_and_regions = scada_and_regions[~scada_and_regions['REGIONID'].isnull( - )] - scada_and_regions = pd.merge(scada_and_regions, dispatch_price, 'inner', left_on=['REGIONID', 'SETTLEMENTDATE'], - right_on=['REGIONID', 'SETTLEMENTDATE']) - - scada_and_regions['SCADAVALUE'] = pd.to_numeric( - scada_and_regions['SCADAVALUE']) - scada_and_regions['RRP'] = pd.to_numeric(scada_and_regions['RRP']) - - scada_and_regions['DISPATCHCOST'] = ( - scada_and_regions['SCADAVALUE'] * scada_and_regions['RRP']) / 12 - - scada_and_regions = scada_and_regions.set_index('SETTLEMENTDATE') - scada_and_regions = scada_and_regions.groupby('DUID').resample('30T', label='right', closed='right').aggregate( - {'SCADAVALUE': 'mean', 'RRP': 'mean', 'DISPATCHCOST': 'sum'}) + scada, gen_region, left_on="SETTLEMENTDATE", right_on="START_DATE", by="DUID" + ) + scada_and_regions = scada_and_regions[~scada_and_regions["REGIONID"].isnull()] + scada_and_regions = pd.merge( + scada_and_regions, + dispatch_price, + "inner", + left_on=["REGIONID", "SETTLEMENTDATE"], + right_on=["REGIONID", "SETTLEMENTDATE"], + ) + + scada_and_regions["SCADAVALUE"] = pd.to_numeric(scada_and_regions["SCADAVALUE"]) + scada_and_regions["RRP"] = pd.to_numeric(scada_and_regions["RRP"]) + + scada_and_regions["DISPATCHCOST"] = ( + scada_and_regions["SCADAVALUE"] * scada_and_regions["RRP"] + ) / 12 + + scada_and_regions = scada_and_regions.set_index("SETTLEMENTDATE") + scada_and_regions = ( + scada_and_regions.groupby("DUID") + .resample("30T", label="right", closed="right") + .aggregate({"SCADAVALUE": "mean", "RRP": "mean", "DISPATCHCOST": "sum"}) + ) scada_and_regions.reset_index(inplace=True) - scada_and_regions['TRADINGCOST'] = ( - scada_and_regions['SCADAVALUE'] * scada_and_regions['RRP']) / 2 + scada_and_regions["TRADINGCOST"] = ( + scada_and_regions["SCADAVALUE"] * scada_and_regions["RRP"] + ) / 2 - scada_and_regions['SCADAVALUE'] = scada_and_regions['SCADAVALUE'] / 2 + scada_and_regions["SCADAVALUE"] = scada_and_regions["SCADAVALUE"] / 2 - scada_and_regions = scada_and_regions.groupby('DUID').sum() + scada_and_regions = scada_and_regions.groupby("DUID").sum() - scada_and_regions.to_csv( - 'C:/Users/user/Documents/dispatch_trading_cost.csv') + scada_and_regions.to_csv("C:/Users/user/Documents/dispatch_trading_cost.csv") diff --git a/nemosis/defaults.py b/nemosis/defaults.py index 7deb3f2..bf32d1a 100644 --- a/nemosis/defaults.py +++ b/nemosis/defaults.py @@ -1,348 +1,760 @@ -names = {'FCAS Providers': 'NEM Registration and Exemption List.xls', - 'DISPATCHLOAD': 'PUBLIC_DVD_DISPATCHLOAD', - 'DUDETAILSUMMARY': 'PUBLIC_DVD_DUDETAILSUMMARY', - 'DUDETAIL': 'PUBLIC_DVD_DUDETAIL', - 'DISPATCHCONSTRAINT': 'PUBLIC_DVD_DISPATCHCONSTRAINT', - 'GENCONDATA': 'PUBLIC_DVD_GENCONDATA', - 'DISPATCH_UNIT_SCADA': 'PUBLIC_DVD_DISPATCH_UNIT_SCADA', - 'DISPATCHPRICE': 'PUBLIC_DVD_DISPATCHPRICE', - 'SPDREGIONCONSTRAINT': 'PUBLIC_DVD_SPDREGIONCONSTRAINT', - 'SPDCONNECTIONPOINTCONSTRAINT': 'PUBLIC_DVD_SPDCONNECTIONPOINTCONSTRAINT', - 'SPDINTERCONNECTORCONSTRAINT': 'PUBLIC_DVD_SPDINTERCONNECTORCONSTRAINT', - 'BIDPEROFFER_D': 'PUBLIC_DVD_BIDPEROFFER_D', - 'DISPATCHINTERCONNECTORRES': 'PUBLIC_DVD_DISPATCHINTERCONNECTORRES', - 'BIDDAYOFFER_D': 'PUBLIC_DVD_BIDDAYOFFER_D', - 'DISPATCHREGIONSUM': 'PUBLIC_DVD_DISPATCHREGIONSUM', - 'FCAS_4_SECOND': 'FCAS', - 'ELEMENTS_FCAS_4_SECOND': 'Elements_FCAS.csv', - 'VARIABLES_FCAS_4_SECOND': 'Ancillary Services Market Causer Pays Variables File.csv', - 'Generators and Scheduled Loads': 'NEM Registration and Exemption List.xls', - 'MNSP_INTERCONNECTOR': 'PUBLIC_DVD_MNSP_INTERCONNECTOR', - 'MNSP_PEROFFER': 'PUBLIC_DVD_MNSP_PEROFFER', - 'INTERCONNECTOR': 'PUBLIC_DVD_INTERCONNECTOR', - 'INTERCONNECTORCONSTRAINT': 'PUBLIC_DVD_INTERCONNECTORCONSTRAINT', - 'MNSP_DAYOFFER': 'PUBLIC_DVD_MNSP_DAYOFFER', - 'LOSSMODEL': 'PUBLIC_DVD_LOSSMODEL', - 'LOSSFACTORMODEL': 'PUBLIC_DVD_LOSSFACTORMODEL', - 'FCAS_4s_SCADA_MAP': '', - 'PLANTSTATS': '', - 'TRADINGLOAD': 'PUBLIC_DVD_TRADINGLOAD', - 'TRADINGPRICE': 'PUBLIC_DVD_TRADINGPRICE', - 'TRADINGREGIONSUM': 'PUBLIC_DVD_TRADINGREGIONSUM', - 'TRADINGINTERCONNECT': 'PUBLIC_DVD_TRADINGINTERCONNECT', - 'MARKET_PRICE_THRESHOLDS': 'PUBLIC_DVD_MARKET_PRICE_THRESHOLDS'} - -table_types = {'FCAS Providers': 'STATICXL', - 'DISPATCHLOAD': 'MMS', - 'DUDETAILSUMMARY': 'MMS', - 'DUDETAIL': 'MMS', - 'DISPATCHCONSTRAINT': 'MMS', - 'GENCONDATA': 'MMS', - 'DISPATCH_UNIT_SCADA': 'MMS', - 'DISPATCHPRICE': 'MMS', - 'SPDREGIONCONSTRAINT': 'MMS', - 'SPDCONNECTIONPOINTCONSTRAINT': 'MMS', - 'SPDINTERCONNECTORCONSTRAINT': 'MMS', - 'BIDPEROFFER_D': 'MMS_AND_ARCHIVE', - 'DISPATCHINTERCONNECTORRES': 'MMS', - 'BIDDAYOFFER_D': 'MMS_AND_ARCHIVE', - 'DISPATCHREGIONSUM': 'MMS', - 'FCAS_4_SECOND': 'FCAS', - 'ELEMENTS_FCAS_4_SECOND': 'STATIC', - 'VARIABLES_FCAS_4_SECOND': 'STATIC', - 'Generators and Scheduled Loads': 'STATICXL', - 'MNSP_INTERCONNECTOR': 'MMS', - 'MNSP_PEROFFER': 'MMS', - 'INTERCONNECTOR': 'MMS', - 'INTERCONNECTORCONSTRAINT': 'MMS', - 'MNSP_DAYOFFER': 'MMS', - 'LOSSMODEL': 'MMS', - 'LOSSFACTORMODEL': 'MMS', - 'FCAS_4s_SCADA_MAP': 'CUSTOM', - 'TRADINGLOAD': 'MMS', - 'TRADINGPRICE': 'MMS', - 'TRADINGREGIONSUM': 'MMS', - 'TRADINGINTERCONNECT': 'MMS', - 'MARKET_PRICE_THRESHOLDS': 'MMS' - } - -dynamic_tables = [table for table, type in table_types.items() if type in ['MMS', 'MMS_AND_ARCHIVE', 'FCAS']] +names = { + "FCAS Providers": "NEM Registration and Exemption List.xls", + "DISPATCHLOAD": "PUBLIC_DVD_DISPATCHLOAD", + "DUDETAILSUMMARY": "PUBLIC_DVD_DUDETAILSUMMARY", + "DUDETAIL": "PUBLIC_DVD_DUDETAIL", + "DISPATCHCONSTRAINT": "PUBLIC_DVD_DISPATCHCONSTRAINT", + "GENCONDATA": "PUBLIC_DVD_GENCONDATA", + "DISPATCH_UNIT_SCADA": "PUBLIC_DVD_DISPATCH_UNIT_SCADA", + "DISPATCHPRICE": "PUBLIC_DVD_DISPATCHPRICE", + "SPDREGIONCONSTRAINT": "PUBLIC_DVD_SPDREGIONCONSTRAINT", + "SPDCONNECTIONPOINTCONSTRAINT": "PUBLIC_DVD_SPDCONNECTIONPOINTCONSTRAINT", + "SPDINTERCONNECTORCONSTRAINT": "PUBLIC_DVD_SPDINTERCONNECTORCONSTRAINT", + "BIDPEROFFER_D": "PUBLIC_DVD_BIDPEROFFER_D", + "DISPATCHINTERCONNECTORRES": "PUBLIC_DVD_DISPATCHINTERCONNECTORRES", + "BIDDAYOFFER_D": "PUBLIC_DVD_BIDDAYOFFER_D", + "DISPATCHREGIONSUM": "PUBLIC_DVD_DISPATCHREGIONSUM", + "FCAS_4_SECOND": "FCAS", + "ELEMENTS_FCAS_4_SECOND": "Elements_FCAS.csv", + "VARIABLES_FCAS_4_SECOND": "Ancillary Services Market Causer Pays Variables File.csv", + "Generators and Scheduled Loads": "NEM Registration and Exemption List.xls", + "MNSP_INTERCONNECTOR": "PUBLIC_DVD_MNSP_INTERCONNECTOR", + "MNSP_PEROFFER": "PUBLIC_DVD_MNSP_PEROFFER", + "INTERCONNECTOR": "PUBLIC_DVD_INTERCONNECTOR", + "INTERCONNECTORCONSTRAINT": "PUBLIC_DVD_INTERCONNECTORCONSTRAINT", + "MNSP_DAYOFFER": "PUBLIC_DVD_MNSP_DAYOFFER", + "LOSSMODEL": "PUBLIC_DVD_LOSSMODEL", + "LOSSFACTORMODEL": "PUBLIC_DVD_LOSSFACTORMODEL", + "FCAS_4s_SCADA_MAP": "", + "PLANTSTATS": "", + "TRADINGLOAD": "PUBLIC_DVD_TRADINGLOAD", + "TRADINGPRICE": "PUBLIC_DVD_TRADINGPRICE", + "TRADINGREGIONSUM": "PUBLIC_DVD_TRADINGREGIONSUM", + "TRADINGINTERCONNECT": "PUBLIC_DVD_TRADINGINTERCONNECT", + "MARKET_PRICE_THRESHOLDS": "PUBLIC_DVD_MARKET_PRICE_THRESHOLDS", +} + +table_types = { + "FCAS Providers": "STATICXL", + "DISPATCHLOAD": "MMS", + "DUDETAILSUMMARY": "MMS", + "DUDETAIL": "MMS", + "DISPATCHCONSTRAINT": "MMS", + "GENCONDATA": "MMS", + "DISPATCH_UNIT_SCADA": "MMS", + "DISPATCHPRICE": "MMS", + "SPDREGIONCONSTRAINT": "MMS", + "SPDCONNECTIONPOINTCONSTRAINT": "MMS", + "SPDINTERCONNECTORCONSTRAINT": "MMS", + "BIDPEROFFER_D": "MMS_AND_ARCHIVE", + "DISPATCHINTERCONNECTORRES": "MMS", + "BIDDAYOFFER_D": "MMS_AND_ARCHIVE", + "DISPATCHREGIONSUM": "MMS", + "FCAS_4_SECOND": "FCAS", + "ELEMENTS_FCAS_4_SECOND": "STATIC", + "VARIABLES_FCAS_4_SECOND": "STATIC", + "Generators and Scheduled Loads": "STATICXL", + "MNSP_INTERCONNECTOR": "MMS", + "MNSP_PEROFFER": "MMS", + "INTERCONNECTOR": "MMS", + "INTERCONNECTORCONSTRAINT": "MMS", + "MNSP_DAYOFFER": "MMS", + "LOSSMODEL": "MMS", + "LOSSFACTORMODEL": "MMS", + "FCAS_4s_SCADA_MAP": "CUSTOM", + "TRADINGLOAD": "MMS", + "TRADINGPRICE": "MMS", + "TRADINGREGIONSUM": "MMS", + "TRADINGINTERCONNECT": "MMS", + "MARKET_PRICE_THRESHOLDS": "MMS", +} + +dynamic_tables = [ + table + for table, type in table_types.items() + if type in ["MMS", "MMS_AND_ARCHIVE", "FCAS"] +] return_tables = list(names.keys()) -display_as_AMEO = ['FCAS Providers', 'DISPATCHLOAD', 'DUDETAILSUMMARY', 'DUDETAIL', 'DISPATCHCONSTRAINT', 'GENCONDATA', 'DISPATCH_UNIT_SCADA', - 'DISPATCHPRICE', 'SPDREGIONCONSTRAINT', 'SPDCONNECTIONPOINTCONSTRAINT', 'SPDINTERCONNECTORCONSTRAINT', - 'BIDPEROFFER_D', 'DISPATCHINTERCONNECTORRES', 'BIDDAYOFFER_D', 'DISPATCHREGIONSUM', 'FCAS_4_SECOND', - 'ELEMENTS_FCAS_4_SECOND', 'VARIABLES_FCAS_4_SECOND', 'Generators and Scheduled Loads', 'TRADINGLOAD', - 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT'] - -display_as_Custom = ['FCAS_4s_SCADA_MAP', 'PLANTSTATS'] - -static_tables = ['ELEMENTS_FCAS_4_SECOND', - 'VARIABLES_FCAS_4_SECOND', - 'Generators and Scheduled Loads', - 'FCAS Providers'] +display_as_AMEO = [ + "FCAS Providers", + "DISPATCHLOAD", + "DUDETAILSUMMARY", + "DUDETAIL", + "DISPATCHCONSTRAINT", + "GENCONDATA", + "DISPATCH_UNIT_SCADA", + "DISPATCHPRICE", + "SPDREGIONCONSTRAINT", + "SPDCONNECTIONPOINTCONSTRAINT", + "SPDINTERCONNECTORCONSTRAINT", + "BIDPEROFFER_D", + "DISPATCHINTERCONNECTORRES", + "BIDDAYOFFER_D", + "DISPATCHREGIONSUM", + "FCAS_4_SECOND", + "ELEMENTS_FCAS_4_SECOND", + "VARIABLES_FCAS_4_SECOND", + "Generators and Scheduled Loads", + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", +] + +display_as_Custom = ["FCAS_4s_SCADA_MAP", "PLANTSTATS"] + +static_tables = [ + "ELEMENTS_FCAS_4_SECOND", + "VARIABLES_FCAS_4_SECOND", + "Generators and Scheduled Loads", + "FCAS Providers", +] static_table_url = { - 'ELEMENTS_FCAS_4_SECOND': 'https://www.nemweb.com.au/Reports/Current/Causer_Pays_Elements/', - 'VARIABLES_FCAS_4_SECOND': 'https://aemo.com.au/-/media/files/electricity/nem/settlements_and_payments/settlements/auction-reports/archive/ancillary-services-market-causer-pays-variables-file.csv', - 'Generators and Scheduled Loads': 'https://www.aemo.com.au/-/media/Files/Electricity/NEM/Participant_Information/NEM-Registration-and-Exemption-List.xls', - '_downloader.download_xl': 'https://www.aemo.com.au/-/media/Files/Electricity/NEM/Participant_Information/NEM-Registration-and-Exemption-List.xls'} - -aemo_data_url = 'http://www.nemweb.com.au/Data_Archive/Wholesale_Electricity/MMSDM/{}/MMSDM_{}_{}/MMSDM_Historical_Data_SQLLoader/DATA/{}.zip' - -fcas_4_url = 'http://www.nemweb.com.au/Reports/Current/Causer_Pays/FCAS_{}{}{}{}.zip' - -fcas_4_url_hist = 'http://www.nemweb.com.au/Data_Archive/Wholesale_Electricity/FCAS_Causer_Pays/{}/FCAS_Causer_Pays_{}_{}/FCAS_{}{}{}{}.zip' - -data_url = {'DISPATCHLOAD': 'aemo_data_url', - 'DUDETAILSUMMARY': 'aemo_data_url', - 'DUDETAIL': 'aemo_data_url', - 'DISPATCHCONSTRAINT': 'aemo_data_url', - 'GENCONDATA': 'aemo_data_url', - 'DISPATCH_UNIT_SCADA': 'aemo_data_url', - 'DISPATCHPRICE': 'aemo_data_url', - 'SPDREGIONCONSTRAINT': 'aemo_data_url', - 'SPDCONNECTIONPOINTCONSTRAINT': 'aemo_data_url', - 'SPDINTERCONNECTORCONSTRAINT': 'aemo_data_url', - 'BIDPEROFFER_D': 'aemo_data_url', - 'DISPATCHINTERCONNECTORRES': 'aemo_data_url', - 'INTERCONNECTOR': 'aemo_data_url', - 'INTERCONNECTORCONSTRAINT': 'aemo_data_url', - 'MNSP_INTERCONNECTOR': 'aemo_data_url', - 'BIDDAYOFFER_D': 'aemo_data_url', - 'DISPATCHREGIONSUM': 'aemo_data_url', - 'MNSP_DAYOFFER': 'aemo_data_url', - 'MNSP_PEROFFER': 'aemo_data_url', - 'LOSSMODEL': 'aemo_data_url', - 'LOSSFACTORMODEL': 'aemo_data_url', - 'DISPATCHCASESOLUTION': 'aemo_data_url', - 'FCAS': 'fcas_4_url', - 'TRADINGLOAD': 'aemo_data_url', - 'TRADINGPRICE': 'aemo_data_url', - 'TRADINGREGIONSUM': 'aemo_data_url', - 'TRADINGINTERCONNECT': 'aemo_data_url', - 'MARKET_PRICE_THRESHOLDS': 'aemo_data_url' - } - -filterable_cols = ['DUID', 'REGIONID', 'STATIONID', 'PARTICIPANTID', 'STARTTYPE', 'SCHEDULE_TYPE', 'GENCONID', - 'BIDTYPE', 'VARIABLEID', 'INTERVENTION', 'DISPATCHMODE', 'STARTTYPE', 'CONNECTIONPOINTID', - 'DISPATCHTYPE', 'CONSTRAINTID', 'PREDISPATCH', 'STPASA', 'MTPASA', 'LIMITTYPE', 'STATIONNAME', - 'AGCFLAG', 'INTERCONNECTORID', 'NAME', 'Fuel Source - Primary', 'Fuel Source - Descriptor', - 'Technology Type - Primary', 'Technology Type - Descriptor', 'ELEMENTNUMBER', 'MARKETNAME', - 'VARIABLENUMBER', 'VARIABLETYPE', 'MMSDESCRIPTOR', 'ELEMENTTYPE', 'Region', 'Max Cap (MW)', - 'Min Enablement Level', 'Max Enablement Level', 'Max Lower Angle', 'Max Upper Angle', - 'Bid Type'] + "ELEMENTS_FCAS_4_SECOND": "https://www.nemweb.com.au/Reports/Current/Causer_Pays_Elements/", + "VARIABLES_FCAS_4_SECOND": "https://aemo.com.au/-/media/files/electricity/nem/settlements_and_payments/settlements/auction-reports/archive/ancillary-services-market-causer-pays-variables-file.csv", + "Generators and Scheduled Loads": "https://www.aemo.com.au/-/media/Files/Electricity/NEM/Participant_Information/NEM-Registration-and-Exemption-List.xls", + "_downloader.download_xl": "https://www.aemo.com.au/-/media/Files/Electricity/NEM/Participant_Information/NEM-Registration-and-Exemption-List.xls", +} + +aemo_data_url = "http://www.nemweb.com.au/Data_Archive/Wholesale_Electricity/MMSDM/{}/MMSDM_{}_{}/MMSDM_Historical_Data_SQLLoader/DATA/{}.zip" + +fcas_4_url = "http://www.nemweb.com.au/Reports/Current/Causer_Pays/FCAS_{}{}{}{}.zip" + +fcas_4_url_hist = "http://www.nemweb.com.au/Data_Archive/Wholesale_Electricity/FCAS_Causer_Pays/{}/FCAS_Causer_Pays_{}_{}/FCAS_{}{}{}{}.zip" + +data_url = { + "DISPATCHLOAD": "aemo_data_url", + "DUDETAILSUMMARY": "aemo_data_url", + "DUDETAIL": "aemo_data_url", + "DISPATCHCONSTRAINT": "aemo_data_url", + "GENCONDATA": "aemo_data_url", + "DISPATCH_UNIT_SCADA": "aemo_data_url", + "DISPATCHPRICE": "aemo_data_url", + "SPDREGIONCONSTRAINT": "aemo_data_url", + "SPDCONNECTIONPOINTCONSTRAINT": "aemo_data_url", + "SPDINTERCONNECTORCONSTRAINT": "aemo_data_url", + "BIDPEROFFER_D": "aemo_data_url", + "DISPATCHINTERCONNECTORRES": "aemo_data_url", + "INTERCONNECTOR": "aemo_data_url", + "INTERCONNECTORCONSTRAINT": "aemo_data_url", + "MNSP_INTERCONNECTOR": "aemo_data_url", + "BIDDAYOFFER_D": "aemo_data_url", + "DISPATCHREGIONSUM": "aemo_data_url", + "MNSP_DAYOFFER": "aemo_data_url", + "MNSP_PEROFFER": "aemo_data_url", + "LOSSMODEL": "aemo_data_url", + "LOSSFACTORMODEL": "aemo_data_url", + "DISPATCHCASESOLUTION": "aemo_data_url", + "FCAS": "fcas_4_url", + "TRADINGLOAD": "aemo_data_url", + "TRADINGPRICE": "aemo_data_url", + "TRADINGREGIONSUM": "aemo_data_url", + "TRADINGINTERCONNECT": "aemo_data_url", + "MARKET_PRICE_THRESHOLDS": "aemo_data_url", +} + +filterable_cols = [ + "DUID", + "REGIONID", + "STATIONID", + "PARTICIPANTID", + "STARTTYPE", + "SCHEDULE_TYPE", + "GENCONID", + "BIDTYPE", + "VARIABLEID", + "INTERVENTION", + "DISPATCHMODE", + "STARTTYPE", + "CONNECTIONPOINTID", + "DISPATCHTYPE", + "CONSTRAINTID", + "PREDISPATCH", + "STPASA", + "MTPASA", + "LIMITTYPE", + "STATIONNAME", + "AGCFLAG", + "INTERCONNECTORID", + "NAME", + "Fuel Source - Primary", + "Fuel Source - Descriptor", + "Technology Type - Primary", + "Technology Type - Descriptor", + "ELEMENTNUMBER", + "MARKETNAME", + "VARIABLENUMBER", + "VARIABLETYPE", + "MMSDESCRIPTOR", + "ELEMENTTYPE", + "Region", + "Max Cap (MW)", + "Min Enablement Level", + "Max Enablement Level", + "Max Lower Angle", + "Max Upper Angle", + "Bid Type", +] table_columns = { - - 'DISPATCHLOAD': ['SETTLEMENTDATE', 'DUID', 'INTERVENTION', 'DISPATCHMODE', 'AGCSTATUS', 'INITIALMW', - 'TOTALCLEARED', 'RAMPDOWNRATE', 'RAMPUPRATE', 'LOWER5MIN', 'LOWER60SEC', - 'LOWER6SEC', 'RAISE5MIN', 'RAISE60SEC', 'RAISE6SEC', 'LOWERREG', 'RAISEREG', - 'SEMIDISPATCHCAP', 'AVAILABILITY', 'RAISEREGENABLEMENTMAX', 'RAISEREGENABLEMENTMIN', - 'LOWERREGENABLEMENTMAX', 'LOWERREGENABLEMENTMIN'], - - 'TRADINGLOAD': ['SETTLEMENTDATE', 'DUID', 'INITIALMW', 'TOTALCLEARED', 'RAMPDOWNRATE', 'RAMPUPRATE', 'LOWER5MIN', - 'LOWER60SEC', 'LOWER6SEC', 'RAISE5MIN', 'RAISE60SEC', 'RAISE6SEC', 'LOWERREG', 'RAISEREG', - 'SEMIDISPATCHCAP', 'AVAILABILITY'], - - 'TRADINGPRICE': ['SETTLEMENTDATE', 'REGIONID', 'RRP', 'RAISE6SECRRP', 'RAISE60SECRRP', - 'RAISE5MINRRP', 'RAISEREGRRP', 'LOWER6SECRRP', 'LOWER60SECRRP', 'LOWER5MINRRP', 'LOWERREGRRP', 'PRICE_STATUS'], - - 'TRADINGREGIONSUM': ['SETTLEMENTDATE', 'REGIONID', 'TOTALDEMAND', - 'AVAILABLEGENERATION', 'AVAILABLELOAD', 'DEMANDFORECAST', 'DISPATCHABLEGENERATION', - 'DISPATCHABLELOAD', 'NETINTERCHANGE', 'EXCESSGENERATION', 'LOWER5MINLOCALDISPATCH', - 'LOWER60SECLOCALDISPATCH', 'LOWER6SECLOCALDISPATCH', 'RAISE5MINLOCALDISPATCH', - 'RAISE60SECLOCALDISPATCH', 'RAISE6SECLOCALDISPATCH', 'LOWERREGLOCALDISPATCH', - 'RAISEREGLOCALDISPATCH', 'INITIALSUPPLY', 'CLEAREDSUPPLY', 'TOTALINTERMITTENTGENERATION', - 'DEMAND_AND_NONSCHEDGEN', 'UIGF'], - - 'TRADINGINTERCONNECT': ['SETTLEMENTDATE', 'INTERCONNECTORID', 'MWFLOW', 'METEREDMWFLOW', 'MWLOSSES'], - - 'DUDETAILSUMMARY': ['DUID', 'START_DATE', 'END_DATE', 'DISPATCHTYPE', 'CONNECTIONPOINTID', 'REGIONID', 'STATIONID', - 'PARTICIPANTID', 'LASTCHANGED', 'TRANSMISSIONLOSSFACTOR', 'STARTTYPE', 'DISTRIBUTIONLOSSFACTOR', - 'SCHEDULE_TYPE', 'MAX_RAMP_RATE_UP', 'MAX_RAMP_RATE_DOWN'], - - 'DISPATCHCONSTRAINT': ['SETTLEMENTDATE', 'RUNNO', 'CONSTRAINTID', 'INTERVENTION', 'RHS', 'MARGINALVALUE', - 'VIOLATIONDEGREE', 'LASTCHANGED', 'GENCONID_EFFECTIVEDATE', 'GENCONID_VERSIONNO', 'LHS', - 'DISPATCHINTERVAL'], - - 'GENCONDATA': ['GENCONID', 'EFFECTIVEDATE', 'VERSIONNO', 'CONSTRAINTTYPE', 'CONSTRAINTVALUE', 'DESCRIPTION', - 'GENERICCONSTRAINTWEIGHT', 'LASTCHANGED', 'DISPATCH', 'PREDISPATCH', 'STPASA', 'MTPASA', - 'LIMITTYPE', 'REASON'], - - 'DISPATCH_UNIT_SCADA': ['SETTLEMENTDATE', 'DUID', 'SCADAVALUE'], - - 'DUDETAIL': ['EFFECTIVEDATE', 'DUID', 'VERSIONNO', 'CONNECTIONPOINTID', 'REGISTEREDCAPACITY', 'AGCCAPABILITY', - 'DISPATCHTYPE', 'MAXCAPACITY', 'STARTTYPE', 'NORMALLYONFLAG', 'LASTCHANGED'], - - 'DISPATCHPRICE': ['SETTLEMENTDATE', 'REGIONID', 'INTERVENTION', 'RRP', 'RAISE6SECRRP', 'RAISE60SECRRP', - 'RAISE5MINRRP', 'RAISEREGRRP', 'LOWER6SECRRP', 'LOWER60SECRRP', 'LOWER5MINRRP', 'LOWERREGRRP', - 'PRICE_STATUS'], - - 'SPDREGIONCONSTRAINT': ['REGIONID', 'EFFECTIVEDATE', 'VERSIONNO', 'GENCONID', 'FACTOR', 'LASTCHANGED', 'BIDTYPE'], - - 'SPDCONNECTIONPOINTCONSTRAINT': ['CONNECTIONPOINTID', 'EFFECTIVEDATE', 'VERSIONNO', 'GENCONID', 'FACTOR', 'BIDTYPE', - 'LASTCHANGED'], - - 'SPDINTERCONNECTORCONSTRAINT': ['INTERCONNECTORID', 'EFFECTIVEDATE', 'VERSIONNO', 'GENCONID', 'FACTOR', - 'LASTCHANGED'], - - 'BIDPEROFFER_D': ['DUID', 'BANDAVAIL1', 'BANDAVAIL2', 'BANDAVAIL3', 'BANDAVAIL4', 'BANDAVAIL5', 'BANDAVAIL6', - 'BANDAVAIL7', 'BANDAVAIL8', 'BANDAVAIL9', 'BANDAVAIL10', 'MAXAVAIL', 'BIDTYPE', 'SETTLEMENTDATE', - 'ENABLEMENTMIN', 'ENABLEMENTMAX', 'LOWBREAKPOINT', - 'HIGHBREAKPOINT', 'INTERVAL_DATETIME', 'OFFERDATE'], - - 'DISPATCHINTERCONNECTORRES': ['SETTLEMENTDATE', 'INTERCONNECTORID', 'DISPATCHINTERVAL', 'INTERVENTION', 'MWFLOW', - 'METEREDMWFLOW', 'MWLOSSES'], - - 'INTERCONNECTOR': ['INTERCONNECTORID', 'REGIONFROM', 'REGIONTO', 'LASTCHANGED'], - - 'INTERCONNECTORCONSTRAINT': ['INTERCONNECTORID', 'FROMREGIONLOSSSHARE', 'EFFECTIVEDATE', 'VERSIONNO', - 'LOSSCONSTANT', 'LOSSFLOWCOEFFICIENT', 'ICTYPE'], - - 'MNSP_INTERCONNECTOR': ['INTERCONNECTORID', 'LINKID', 'FROMREGION', 'TOREGION', 'MAXCAPACITY', 'FROM_REGION_TLF', - 'TO_REGION_TLF', 'LHSFACTOR', 'EFFECTIVEDATE', 'VERSIONNO'], - - 'BIDDAYOFFER_D': ['SETTLEMENTDATE', 'DUID', 'BIDTYPE', 'OFFERDATE', 'VERSIONNO', 'PRICEBAND1', 'PRICEBAND2', - 'PRICEBAND3', 'PRICEBAND4', 'PRICEBAND5', 'PRICEBAND6', 'PRICEBAND7', 'PRICEBAND8', - 'PRICEBAND9', 'PRICEBAND10', 'T1', 'T2', 'T3', 'T4', 'MINIMUMLOAD'], - - - 'DISPATCHREGIONSUM': ['SETTLEMENTDATE', 'REGIONID', 'DISPATCHINTERVAL', 'INTERVENTION', 'TOTALDEMAND', - 'AVAILABLEGENERATION', 'AVAILABLELOAD', 'DEMANDFORECAST', 'DISPATCHABLEGENERATION', - 'DISPATCHABLELOAD', 'NETINTERCHANGE', 'EXCESSGENERATION', 'LOWER5MINLOCALDISPATCH', - 'LOWER60SECLOCALDISPATCH', 'LOWER6SECLOCALDISPATCH', 'RAISE5MINLOCALDISPATCH', - 'RAISE60SECLOCALDISPATCH', 'RAISE6SECLOCALDISPATCH', 'LOWERREGLOCALDISPATCH', - 'RAISEREGLOCALDISPATCH', 'INITIALSUPPLY', 'CLEAREDSUPPLY', 'TOTALINTERMITTENTGENERATION', - 'DEMAND_AND_NONSCHEDGEN', 'UIGF', 'SEMISCHEDULE_CLEAREDMW', 'SEMISCHEDULE_COMPLIANCEMW'], - - 'MNSP_PEROFFER': ['SETTLEMENTDATE', 'OFFERDATE', 'VERSIONNO', 'PARTICIPANTID', 'LINKID', 'PERIODID', - 'BANDAVAIL1', 'BANDAVAIL2', 'BANDAVAIL3', 'BANDAVAIL4', 'BANDAVAIL5', 'BANDAVAIL6', - 'BANDAVAIL7', 'BANDAVAIL8', 'BANDAVAIL9', 'BANDAVAIL10'], - - 'MNSP_DAYOFFER': ['SETTLEMENTDATE', 'OFFERDATE', 'VERSIONNO', 'PARTICIPANTID', 'LINKID', 'PERIODID', - 'PRICEBAND1', 'PRICEBAND2', 'PRICEBAND3', 'PRICEBAND4', 'PRICEBAND5', 'PRICEBAND6', - 'PRICEBAND7', 'PRICEBAND8', 'PRICEBAND9', 'PRICEBAND10'], - - 'LOSSMODEL': ['EFFECTIVEDATE', 'VERSIONNO', 'INTERCONNECTORID', 'LOSSSEGMENT', 'MWBREAKPOINT'], - - 'LOSSFACTORMODEL': ['EFFECTIVEDATE', 'VERSIONNO', 'INTERCONNECTORID', 'REGIONID', 'DEMANDCOEFFICIENT'], - - 'DISPATCHCASESOLUTION': ['SETTLEMENTDATE', 'TOTALOBJECTIVE'], - - 'FCAS_4_SECOND': ['TIMESTAMP', 'ELEMENTNUMBER', 'VARIABLENUMBER', 'VALUE', 'VALUEQUALITY'], - - 'ELEMENTS_FCAS_4_SECOND': ['ELEMENTNUMBER', 'EMSNAME', 'ELEMENTTYPE', 'MMSDESCRIPTOR'], - - 'VARIABLES_FCAS_4_SECOND': ['VARIABLENUMBER', 'VARIABLETYPE'], - - 'Generators and Scheduled Loads': ['Participant', 'Station Name', 'Region', 'Dispatch Type', 'Category', 'Classification', - 'Fuel Source - Primary', 'Fuel Source - Descriptor', 'Technology Type - Primary', - 'Technology Type - Descriptor', 'Aggregation', 'DUID', 'Reg Cap (MW)'], - - 'FCAS Providers': ['Participant', 'Station Name', 'Region', 'DUID', 'Bid Type', 'Max Cap (MW)', - 'Min Enablement Level', 'Max Enablement Level', 'Max Lower Angle', 'Max Upper Angle'], - - 'FCAS_4s_SCADA_MAP': ['ELEMENTNUMBER', 'MARKETNAME', 'ERROR'], - - 'PLANTSTATS': ['Month', 'DUID', 'CapacityFactor', 'Volume', 'TRADING_VWAP', 'DISPATCH_VWAP', - 'NodalPeakCapacityFactor', 'Nodal90thPercentileCapacityFactor'], - - 'MARKET_PRICE_THRESHOLDS': ['EFFECTIVEDATE', 'VERSIONNO', 'VOLL', 'MARKETPRICEFLOOR']} - -table_primary_keys = {'DISPATCHCONSTRAINT': ['CONSTRAINTID', 'GENCONID_EFFECTIVEDATE', 'GENCONID_VERSIONNO', - 'SETTLEMENTDATE', 'INTERVENTION'], - 'DUDETAILSUMMARY': ['DUID', 'START_DATE', 'END_DATE'], 'STATION': ['STATIONID'], - 'DUDETAIL': ['EFFECTIVEDATE', 'DUID', 'VERSIONNO'], - 'SPDREGIONCONSTRAINT': ['EFFECTIVEDATE', 'GENCONID', 'REGIONID', 'VERSIONNO', 'BIDTYPE'], - 'SPDCONNECTIONPOINTCONSTRAINT': ['EFFECTIVEDATE', 'GENCONID', 'CONNECTIONPOINTID', 'VERSIONNO', - 'BIDTYPE'], - 'SPDINTERCONNECTORCONSTRAINT': ['EFFECTIVEDATE', 'GENCONID', 'INTERCONNECTORID', 'VERSIONNO'], - 'GENCONDATA': ['GENCONID', 'EFFECTIVEDATE', 'VERSIONNO'], - 'MNSP_PEROFFER': ['SETTLEMENTDATE', 'OFFERDATE', 'VERSIONNO', 'PARTICIPANTID', 'LINKID', 'PERIODID'], - 'MNSP_DAYOFFER': ['SETTLEMENTDATE', 'OFFERDATE', 'VERSIONNO', 'PARTICIPANTID', 'LINKID'], - 'INTERCONNECTORCONSTRAINT': ['EFFECTIVEDATE', 'INTERCONNECTORID', 'VERSIONNO'], - 'MNSP_INTERCONNECTOR': ['EFFECTIVEDATE', 'LINKID', 'VERSIONNO'], - 'LOSSMODEL': ['EFFECTIVEDATE', 'INTERCONNECTORID', 'LOSSSEGMENT', 'VERSIONNO'], - 'LOSSFACTORMODEL': ['EFFECTIVEDATE', 'INTERCONNECTORID', 'REGIONID', 'VERSIONNO'], - 'BIDPEROFFER_D': ['BIDTYPE', 'DUID', 'OFFERDATE', 'INTERVAL_DATETIME', 'SETTLEMENTDATE'], - 'DISPATCHINTERCONNECTORRES': ['DISPATCHINTERVAL', 'INTERCONNECTORID', 'INTERVENTION', - 'SETTLEMENTDATE'], - 'INTERCONNECTOR': ['INTERCONNECTORID'], - 'DISPATCHPRICE': ['INTERVENTION', 'REGIONID', 'SETTLEMENTDATE'], - 'BIDDAYOFFER_D': ['BIDTYPE', 'DUID', 'SETTLEMENTDATE'], - 'DISPATCHREGIONSUM': ['DISPATCHINTERVAL', 'INTERVENTION', 'REGIONID', 'SETTLEMENTDATE'], - 'DISPATCHLOAD': ['SETTLEMENTDATE', 'INTERVENTION', 'DUID'], - 'DISPATCH_UNIT_SCADA': ['SETTLEMENTDATE', 'DUID'], - 'FCAS_4_SECOND': ['TIMESTAMP', 'ELEMENTNUMBER', 'VARIABLENUMBER'], - 'ELEMENTS_FCAS_4_SECOND': ['ELEMENTNUMBER'], - 'VARIABLES_FCAS_4_SECOND': ['VARIABLENUMBER', 'VARIABLETYPE'], - 'Generators and Scheduled Loads': ['DUID'], - 'FCAS Providers': ['DUID', 'Bid Type'], - 'FCAS_4s_SCADA_MAP': ['ELEMENTNUMBER', 'MARKETNAME'], - 'TRADINGLOAD': ['SETTLEMENTDATE', 'DUID'], - 'TRADINGPRICE': ['SETTLEMENTDATE', 'REGIONID'], - 'TRADINGREGIONSUM': ['SETTLEMENTDATE', 'REGIONID'], - 'TRADINGINTERCONNECT': ['SETTLEMENTDATE', 'INTERCONNECTORID'], - 'PLANTSTATS': ['Month', 'DUID'], - 'MARKET_PRICE_THRESHOLDS': ['EFFECTIVEDATE', 'VERSIONNO']} - -effective_date_group_col = {'SPDREGIONCONSTRAINT': ['GENCONID'], - 'SPDCONNECTIONPOINTCONSTRAINT': ['GENCONID'], - 'SPDINTERCONNECTORCONSTRAINT': ['GENCONID'], - 'GENCONDATA': ['GENCONID'], - 'MNSP_INTERCONNECTOR': ['INTERCONNECTORID'], - 'INTERCONNECTORCONSTRAINT': ['INTERCONNECTORID'], - 'INTERCONNECTOR': ['INTERCONNECTORID'], - 'LOSSMODEL': ['INTERCONNECTORID'], - 'LOSSFACTORMODEL': ['INTERCONNECTORID'], - 'DUDETAILSUMMARY': ['DUID'], - 'MNSP_PEROFFER': ['LINKID'], - 'MNSP_DAYOFFER': ['LINKID'], - 'DUDETAIL': ['DUID'], - 'MARKET_PRICE_THRESHOLDS': []} - -primary_date_columns = {'DISPATCHLOAD': 'SETTLEMENTDATE', - 'TRADINGLOAD': 'SETTLEMENTDATE', - 'TRADINGPRICE': 'SETTLEMENTDATE', - 'TRADINGREGIONSUM': 'SETTLEMENTDATE', - 'TRADINGINTERCONNECT': 'SETTLEMENTDATE', - 'DUDETAILSUMMARY': 'START_DATE', - 'DUDETAIL': 'EFFECTIVEDATE', - 'DISPATCHCONSTRAINT': 'SETTLEMENTDATE', - 'GENCONDATA': 'EFFECTIVEDATE', - 'DISPATCH_UNIT_SCADA': 'SETTLEMENTDATE', - 'DISPATCHPRICE': 'SETTLEMENTDATE', - 'SPDREGIONCONSTRAINT': 'EFFECTIVEDATE', - 'SPDCONNECTIONPOINTCONSTRAINT': 'EFFECTIVEDATE', - 'SPDINTERCONNECTORCONSTRAINT': 'EFFECTIVEDATE', - 'BIDPEROFFER_D': 'INTERVAL_DATETIME', - 'DISPATCHINTERCONNECTORRES': 'SETTLEMENTDATE', - 'BIDDAYOFFER_D': 'SETTLEMENTDATE', - 'DISPATCHREGIONSUM': 'SETTLEMENTDATE', - 'FCAS_4_SECOND': 'TIMESTAMP', - 'ELEMENTS_FCAS_4_SECOND': None, - 'VARIABLES_FCAS_4_SECOND': None, - 'Generators and Scheduled Loads': None, - 'FCAS Providers': None, - 'MNSP_INTERCONNECTOR': 'EFFECTIVEDATE', - 'MNSP_PEROFFER': 'SETTLEMENTDATE', - 'INTERCONNECTOR': 'LASTCHANGED', - 'INTERCONNECTORCONSTRAINT': 'EFFECTIVEDATE', - 'MNSP_DAYOFFER': 'SETTLEMENTDATE', - 'LOSSMODEL': 'EFFECTIVEDATE', - 'LOSSFACTORMODEL': 'EFFECTIVEDATE', - 'FCAS_4s_SCADA_MAP': None, - 'MARKET_PRICE_THRESHOLDS': 'EFFECTIVEDATE'} - -reg_exemption_list_tabs = {'Generators and Scheduled Loads': 'Generators and Scheduled Loads', - 'FCAS Providers': 'Ancillary Services'} - -months = ['01', '02', '03', '04', '05', - '06', '07', '08', '09', '10', '11', '12'] - -nem_data_model_start_time = '2009/07/01 00:00:00' + "DISPATCHLOAD": [ + "SETTLEMENTDATE", + "DUID", + "INTERVENTION", + "DISPATCHMODE", + "AGCSTATUS", + "INITIALMW", + "TOTALCLEARED", + "RAMPDOWNRATE", + "RAMPUPRATE", + "LOWER5MIN", + "LOWER60SEC", + "LOWER6SEC", + "RAISE5MIN", + "RAISE60SEC", + "RAISE6SEC", + "LOWERREG", + "RAISEREG", + "SEMIDISPATCHCAP", + "AVAILABILITY", + "RAISEREGENABLEMENTMAX", + "RAISEREGENABLEMENTMIN", + "LOWERREGENABLEMENTMAX", + "LOWERREGENABLEMENTMIN", + ], + "TRADINGLOAD": [ + "SETTLEMENTDATE", + "DUID", + "INITIALMW", + "TOTALCLEARED", + "RAMPDOWNRATE", + "RAMPUPRATE", + "LOWER5MIN", + "LOWER60SEC", + "LOWER6SEC", + "RAISE5MIN", + "RAISE60SEC", + "RAISE6SEC", + "LOWERREG", + "RAISEREG", + "SEMIDISPATCHCAP", + "AVAILABILITY", + ], + "TRADINGPRICE": [ + "SETTLEMENTDATE", + "REGIONID", + "RRP", + "RAISE6SECRRP", + "RAISE60SECRRP", + "RAISE5MINRRP", + "RAISEREGRRP", + "LOWER6SECRRP", + "LOWER60SECRRP", + "LOWER5MINRRP", + "LOWERREGRRP", + "PRICE_STATUS", + ], + "TRADINGREGIONSUM": [ + "SETTLEMENTDATE", + "REGIONID", + "TOTALDEMAND", + "AVAILABLEGENERATION", + "AVAILABLELOAD", + "DEMANDFORECAST", + "DISPATCHABLEGENERATION", + "DISPATCHABLELOAD", + "NETINTERCHANGE", + "EXCESSGENERATION", + "LOWER5MINLOCALDISPATCH", + "LOWER60SECLOCALDISPATCH", + "LOWER6SECLOCALDISPATCH", + "RAISE5MINLOCALDISPATCH", + "RAISE60SECLOCALDISPATCH", + "RAISE6SECLOCALDISPATCH", + "LOWERREGLOCALDISPATCH", + "RAISEREGLOCALDISPATCH", + "INITIALSUPPLY", + "CLEAREDSUPPLY", + "TOTALINTERMITTENTGENERATION", + "DEMAND_AND_NONSCHEDGEN", + "UIGF", + ], + "TRADINGINTERCONNECT": [ + "SETTLEMENTDATE", + "INTERCONNECTORID", + "MWFLOW", + "METEREDMWFLOW", + "MWLOSSES", + ], + "DUDETAILSUMMARY": [ + "DUID", + "START_DATE", + "END_DATE", + "DISPATCHTYPE", + "CONNECTIONPOINTID", + "REGIONID", + "STATIONID", + "PARTICIPANTID", + "LASTCHANGED", + "TRANSMISSIONLOSSFACTOR", + "STARTTYPE", + "DISTRIBUTIONLOSSFACTOR", + "SCHEDULE_TYPE", + "MAX_RAMP_RATE_UP", + "MAX_RAMP_RATE_DOWN", + ], + "DISPATCHCONSTRAINT": [ + "SETTLEMENTDATE", + "RUNNO", + "CONSTRAINTID", + "INTERVENTION", + "RHS", + "MARGINALVALUE", + "VIOLATIONDEGREE", + "LASTCHANGED", + "GENCONID_EFFECTIVEDATE", + "GENCONID_VERSIONNO", + "LHS", + "DISPATCHINTERVAL", + ], + "GENCONDATA": [ + "GENCONID", + "EFFECTIVEDATE", + "VERSIONNO", + "CONSTRAINTTYPE", + "CONSTRAINTVALUE", + "DESCRIPTION", + "GENERICCONSTRAINTWEIGHT", + "LASTCHANGED", + "DISPATCH", + "PREDISPATCH", + "STPASA", + "MTPASA", + "LIMITTYPE", + "REASON", + ], + "DISPATCH_UNIT_SCADA": ["SETTLEMENTDATE", "DUID", "SCADAVALUE"], + "DUDETAIL": [ + "EFFECTIVEDATE", + "DUID", + "VERSIONNO", + "CONNECTIONPOINTID", + "REGISTEREDCAPACITY", + "AGCCAPABILITY", + "DISPATCHTYPE", + "MAXCAPACITY", + "STARTTYPE", + "NORMALLYONFLAG", + "LASTCHANGED", + ], + "DISPATCHPRICE": [ + "SETTLEMENTDATE", + "REGIONID", + "INTERVENTION", + "RRP", + "RAISE6SECRRP", + "RAISE60SECRRP", + "RAISE5MINRRP", + "RAISEREGRRP", + "LOWER6SECRRP", + "LOWER60SECRRP", + "LOWER5MINRRP", + "LOWERREGRRP", + "PRICE_STATUS", + ], + "SPDREGIONCONSTRAINT": [ + "REGIONID", + "EFFECTIVEDATE", + "VERSIONNO", + "GENCONID", + "FACTOR", + "LASTCHANGED", + "BIDTYPE", + ], + "SPDCONNECTIONPOINTCONSTRAINT": [ + "CONNECTIONPOINTID", + "EFFECTIVEDATE", + "VERSIONNO", + "GENCONID", + "FACTOR", + "BIDTYPE", + "LASTCHANGED", + ], + "SPDINTERCONNECTORCONSTRAINT": [ + "INTERCONNECTORID", + "EFFECTIVEDATE", + "VERSIONNO", + "GENCONID", + "FACTOR", + "LASTCHANGED", + ], + "BIDPEROFFER_D": [ + "DUID", + "BANDAVAIL1", + "BANDAVAIL2", + "BANDAVAIL3", + "BANDAVAIL4", + "BANDAVAIL5", + "BANDAVAIL6", + "BANDAVAIL7", + "BANDAVAIL8", + "BANDAVAIL9", + "BANDAVAIL10", + "MAXAVAIL", + "BIDTYPE", + "SETTLEMENTDATE", + "ENABLEMENTMIN", + "ENABLEMENTMAX", + "LOWBREAKPOINT", + "HIGHBREAKPOINT", + "INTERVAL_DATETIME", + "OFFERDATE", + ], + "DISPATCHINTERCONNECTORRES": [ + "SETTLEMENTDATE", + "INTERCONNECTORID", + "DISPATCHINTERVAL", + "INTERVENTION", + "MWFLOW", + "METEREDMWFLOW", + "MWLOSSES", + ], + "INTERCONNECTOR": ["INTERCONNECTORID", "REGIONFROM", "REGIONTO", "LASTCHANGED"], + "INTERCONNECTORCONSTRAINT": [ + "INTERCONNECTORID", + "FROMREGIONLOSSSHARE", + "EFFECTIVEDATE", + "VERSIONNO", + "LOSSCONSTANT", + "LOSSFLOWCOEFFICIENT", + "ICTYPE", + ], + "MNSP_INTERCONNECTOR": [ + "INTERCONNECTORID", + "LINKID", + "FROMREGION", + "TOREGION", + "MAXCAPACITY", + "FROM_REGION_TLF", + "TO_REGION_TLF", + "LHSFACTOR", + "EFFECTIVEDATE", + "VERSIONNO", + ], + "BIDDAYOFFER_D": [ + "SETTLEMENTDATE", + "DUID", + "BIDTYPE", + "OFFERDATE", + "VERSIONNO", + "PRICEBAND1", + "PRICEBAND2", + "PRICEBAND3", + "PRICEBAND4", + "PRICEBAND5", + "PRICEBAND6", + "PRICEBAND7", + "PRICEBAND8", + "PRICEBAND9", + "PRICEBAND10", + "T1", + "T2", + "T3", + "T4", + "MINIMUMLOAD", + ], + "DISPATCHREGIONSUM": [ + "SETTLEMENTDATE", + "REGIONID", + "DISPATCHINTERVAL", + "INTERVENTION", + "TOTALDEMAND", + "AVAILABLEGENERATION", + "AVAILABLELOAD", + "DEMANDFORECAST", + "DISPATCHABLEGENERATION", + "DISPATCHABLELOAD", + "NETINTERCHANGE", + "EXCESSGENERATION", + "LOWER5MINLOCALDISPATCH", + "LOWER60SECLOCALDISPATCH", + "LOWER6SECLOCALDISPATCH", + "RAISE5MINLOCALDISPATCH", + "RAISE60SECLOCALDISPATCH", + "RAISE6SECLOCALDISPATCH", + "LOWERREGLOCALDISPATCH", + "RAISEREGLOCALDISPATCH", + "INITIALSUPPLY", + "CLEAREDSUPPLY", + "TOTALINTERMITTENTGENERATION", + "DEMAND_AND_NONSCHEDGEN", + "UIGF", + "SEMISCHEDULE_CLEAREDMW", + "SEMISCHEDULE_COMPLIANCEMW", + ], + "MNSP_PEROFFER": [ + "SETTLEMENTDATE", + "OFFERDATE", + "VERSIONNO", + "PARTICIPANTID", + "LINKID", + "PERIODID", + "BANDAVAIL1", + "BANDAVAIL2", + "BANDAVAIL3", + "BANDAVAIL4", + "BANDAVAIL5", + "BANDAVAIL6", + "BANDAVAIL7", + "BANDAVAIL8", + "BANDAVAIL9", + "BANDAVAIL10", + ], + "MNSP_DAYOFFER": [ + "SETTLEMENTDATE", + "OFFERDATE", + "VERSIONNO", + "PARTICIPANTID", + "LINKID", + "PERIODID", + "PRICEBAND1", + "PRICEBAND2", + "PRICEBAND3", + "PRICEBAND4", + "PRICEBAND5", + "PRICEBAND6", + "PRICEBAND7", + "PRICEBAND8", + "PRICEBAND9", + "PRICEBAND10", + ], + "LOSSMODEL": [ + "EFFECTIVEDATE", + "VERSIONNO", + "INTERCONNECTORID", + "LOSSSEGMENT", + "MWBREAKPOINT", + ], + "LOSSFACTORMODEL": [ + "EFFECTIVEDATE", + "VERSIONNO", + "INTERCONNECTORID", + "REGIONID", + "DEMANDCOEFFICIENT", + ], + "DISPATCHCASESOLUTION": ["SETTLEMENTDATE", "TOTALOBJECTIVE"], + "FCAS_4_SECOND": [ + "TIMESTAMP", + "ELEMENTNUMBER", + "VARIABLENUMBER", + "VALUE", + "VALUEQUALITY", + ], + "ELEMENTS_FCAS_4_SECOND": [ + "ELEMENTNUMBER", + "EMSNAME", + "ELEMENTTYPE", + "MMSDESCRIPTOR", + ], + "VARIABLES_FCAS_4_SECOND": ["VARIABLENUMBER", "VARIABLETYPE"], + "Generators and Scheduled Loads": [ + "Participant", + "Station Name", + "Region", + "Dispatch Type", + "Category", + "Classification", + "Fuel Source - Primary", + "Fuel Source - Descriptor", + "Technology Type - Primary", + "Technology Type - Descriptor", + "Aggregation", + "DUID", + "Reg Cap (MW)", + ], + "FCAS Providers": [ + "Participant", + "Station Name", + "Region", + "DUID", + "Bid Type", + "Max Cap (MW)", + "Min Enablement Level", + "Max Enablement Level", + "Max Lower Angle", + "Max Upper Angle", + ], + "FCAS_4s_SCADA_MAP": ["ELEMENTNUMBER", "MARKETNAME", "ERROR"], + "PLANTSTATS": [ + "Month", + "DUID", + "CapacityFactor", + "Volume", + "TRADING_VWAP", + "DISPATCH_VWAP", + "NodalPeakCapacityFactor", + "Nodal90thPercentileCapacityFactor", + ], + "MARKET_PRICE_THRESHOLDS": [ + "EFFECTIVEDATE", + "VERSIONNO", + "VOLL", + "MARKETPRICEFLOOR", + ], +} + +table_primary_keys = { + "DISPATCHCONSTRAINT": [ + "CONSTRAINTID", + "GENCONID_EFFECTIVEDATE", + "GENCONID_VERSIONNO", + "SETTLEMENTDATE", + "INTERVENTION", + ], + "DUDETAILSUMMARY": ["DUID", "START_DATE", "END_DATE"], + "STATION": ["STATIONID"], + "DUDETAIL": ["EFFECTIVEDATE", "DUID", "VERSIONNO"], + "SPDREGIONCONSTRAINT": [ + "EFFECTIVEDATE", + "GENCONID", + "REGIONID", + "VERSIONNO", + "BIDTYPE", + ], + "SPDCONNECTIONPOINTCONSTRAINT": [ + "EFFECTIVEDATE", + "GENCONID", + "CONNECTIONPOINTID", + "VERSIONNO", + "BIDTYPE", + ], + "SPDINTERCONNECTORCONSTRAINT": [ + "EFFECTIVEDATE", + "GENCONID", + "INTERCONNECTORID", + "VERSIONNO", + ], + "GENCONDATA": ["GENCONID", "EFFECTIVEDATE", "VERSIONNO"], + "MNSP_PEROFFER": [ + "SETTLEMENTDATE", + "OFFERDATE", + "VERSIONNO", + "PARTICIPANTID", + "LINKID", + "PERIODID", + ], + "MNSP_DAYOFFER": [ + "SETTLEMENTDATE", + "OFFERDATE", + "VERSIONNO", + "PARTICIPANTID", + "LINKID", + ], + "INTERCONNECTORCONSTRAINT": ["EFFECTIVEDATE", "INTERCONNECTORID", "VERSIONNO"], + "MNSP_INTERCONNECTOR": ["EFFECTIVEDATE", "LINKID", "VERSIONNO"], + "LOSSMODEL": ["EFFECTIVEDATE", "INTERCONNECTORID", "LOSSSEGMENT", "VERSIONNO"], + "LOSSFACTORMODEL": ["EFFECTIVEDATE", "INTERCONNECTORID", "REGIONID", "VERSIONNO"], + "BIDPEROFFER_D": [ + "BIDTYPE", + "DUID", + "OFFERDATE", + "INTERVAL_DATETIME", + "SETTLEMENTDATE", + ], + "DISPATCHINTERCONNECTORRES": [ + "DISPATCHINTERVAL", + "INTERCONNECTORID", + "INTERVENTION", + "SETTLEMENTDATE", + ], + "INTERCONNECTOR": ["INTERCONNECTORID"], + "DISPATCHPRICE": ["INTERVENTION", "REGIONID", "SETTLEMENTDATE"], + "BIDDAYOFFER_D": ["BIDTYPE", "DUID", "SETTLEMENTDATE"], + "DISPATCHREGIONSUM": [ + "DISPATCHINTERVAL", + "INTERVENTION", + "REGIONID", + "SETTLEMENTDATE", + ], + "DISPATCHLOAD": ["SETTLEMENTDATE", "INTERVENTION", "DUID"], + "DISPATCH_UNIT_SCADA": ["SETTLEMENTDATE", "DUID"], + "FCAS_4_SECOND": ["TIMESTAMP", "ELEMENTNUMBER", "VARIABLENUMBER"], + "ELEMENTS_FCAS_4_SECOND": ["ELEMENTNUMBER"], + "VARIABLES_FCAS_4_SECOND": ["VARIABLENUMBER", "VARIABLETYPE"], + "Generators and Scheduled Loads": ["DUID"], + "FCAS Providers": ["DUID", "Bid Type"], + "FCAS_4s_SCADA_MAP": ["ELEMENTNUMBER", "MARKETNAME"], + "TRADINGLOAD": ["SETTLEMENTDATE", "DUID"], + "TRADINGPRICE": ["SETTLEMENTDATE", "REGIONID"], + "TRADINGREGIONSUM": ["SETTLEMENTDATE", "REGIONID"], + "TRADINGINTERCONNECT": ["SETTLEMENTDATE", "INTERCONNECTORID"], + "PLANTSTATS": ["Month", "DUID"], + "MARKET_PRICE_THRESHOLDS": ["EFFECTIVEDATE", "VERSIONNO"], +} + +effective_date_group_col = { + "SPDREGIONCONSTRAINT": ["GENCONID"], + "SPDCONNECTIONPOINTCONSTRAINT": ["GENCONID"], + "SPDINTERCONNECTORCONSTRAINT": ["GENCONID"], + "GENCONDATA": ["GENCONID"], + "MNSP_INTERCONNECTOR": ["INTERCONNECTORID"], + "INTERCONNECTORCONSTRAINT": ["INTERCONNECTORID"], + "INTERCONNECTOR": ["INTERCONNECTORID"], + "LOSSMODEL": ["INTERCONNECTORID"], + "LOSSFACTORMODEL": ["INTERCONNECTORID"], + "DUDETAILSUMMARY": ["DUID"], + "MNSP_PEROFFER": ["LINKID"], + "MNSP_DAYOFFER": ["LINKID"], + "DUDETAIL": ["DUID"], + "MARKET_PRICE_THRESHOLDS": [], +} + +primary_date_columns = { + "DISPATCHLOAD": "SETTLEMENTDATE", + "TRADINGLOAD": "SETTLEMENTDATE", + "TRADINGPRICE": "SETTLEMENTDATE", + "TRADINGREGIONSUM": "SETTLEMENTDATE", + "TRADINGINTERCONNECT": "SETTLEMENTDATE", + "DUDETAILSUMMARY": "START_DATE", + "DUDETAIL": "EFFECTIVEDATE", + "DISPATCHCONSTRAINT": "SETTLEMENTDATE", + "GENCONDATA": "EFFECTIVEDATE", + "DISPATCH_UNIT_SCADA": "SETTLEMENTDATE", + "DISPATCHPRICE": "SETTLEMENTDATE", + "SPDREGIONCONSTRAINT": "EFFECTIVEDATE", + "SPDCONNECTIONPOINTCONSTRAINT": "EFFECTIVEDATE", + "SPDINTERCONNECTORCONSTRAINT": "EFFECTIVEDATE", + "BIDPEROFFER_D": "INTERVAL_DATETIME", + "DISPATCHINTERCONNECTORRES": "SETTLEMENTDATE", + "BIDDAYOFFER_D": "SETTLEMENTDATE", + "DISPATCHREGIONSUM": "SETTLEMENTDATE", + "FCAS_4_SECOND": "TIMESTAMP", + "ELEMENTS_FCAS_4_SECOND": None, + "VARIABLES_FCAS_4_SECOND": None, + "Generators and Scheduled Loads": None, + "FCAS Providers": None, + "MNSP_INTERCONNECTOR": "EFFECTIVEDATE", + "MNSP_PEROFFER": "SETTLEMENTDATE", + "INTERCONNECTOR": "LASTCHANGED", + "INTERCONNECTORCONSTRAINT": "EFFECTIVEDATE", + "MNSP_DAYOFFER": "SETTLEMENTDATE", + "LOSSMODEL": "EFFECTIVEDATE", + "LOSSFACTORMODEL": "EFFECTIVEDATE", + "FCAS_4s_SCADA_MAP": None, + "MARKET_PRICE_THRESHOLDS": "EFFECTIVEDATE", +} + +reg_exemption_list_tabs = { + "Generators and Scheduled Loads": "Generators and Scheduled Loads", + "FCAS Providers": "Ancillary Services", +} + +months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"] + +nem_data_model_start_time = "2009/07/01 00:00:00" # GUI settings. @@ -366,7 +778,7 @@ internal_filter_row = 2 delete_button_internal_row = 5 last_column = 100 -join_type = ['inner', 'left', 'right'] +join_type = ["inner", "left", "right"] # Testing settings -raw_data_cache = 'D:/nemosis_test_cache' +raw_data_cache = "D:/nemosis_test_cache" diff --git a/nemosis/gui.py b/nemosis/gui.py index 86f4781..460dc68 100644 --- a/nemosis/gui.py +++ b/nemosis/gui.py @@ -19,19 +19,29 @@ class VerticalScrollFrame(ttk.Frame): """ def __init__(self, parent, *args, **options): - mainborderwidth = options.pop('mainborderwidth', 0) - interiorborderwidth = options.pop('interiorborderwidth', 0) - mainrelief = options.pop('mainrelief', 'flat') - interiorrelief = options.pop('interiorrelief', 'flat') - ttk.Frame.__init__(self, parent, style='main.TFrame', borderwidth=mainborderwidth, relief=mainrelief) + mainborderwidth = options.pop("mainborderwidth", 0) + interiorborderwidth = options.pop("interiorborderwidth", 0) + mainrelief = options.pop("mainrelief", "flat") + interiorrelief = options.pop("interiorrelief", "flat") + ttk.Frame.__init__( + self, + parent, + style="main.TFrame", + borderwidth=mainborderwidth, + relief=mainrelief, + ) self.__createWidgets(interiorborderwidth, interiorrelief) - self.canvas.bind('', self.update_scrollbar) + self.canvas.bind("", self.update_scrollbar) def __createWidgets(self, interiorborderwidth, interiorrelief): - self.vscrollbar = ttk.Scrollbar(self, orient='vertical', style='canvas.Vertical.TScrollbar') - self.vscrollbar.pack(side='right', fill='y', expand='false') - self.canvas = tk.Canvas(self, yscrollcommand=self.vscrollbar.set, highlightthickness=0) - self.canvas.pack(side='left', fill='both', expand='true') + self.vscrollbar = ttk.Scrollbar( + self, orient="vertical", style="canvas.Vertical.TScrollbar" + ) + self.vscrollbar.pack(side="right", fill="y", expand="false") + self.canvas = tk.Canvas( + self, yscrollcommand=self.vscrollbar.set, highlightthickness=0 + ) + self.canvas.pack(side="left", fill="both", expand="true") self.vscrollbar.config(command=self.canvas.yview) # reset the view @@ -39,11 +49,18 @@ def __createWidgets(self, interiorborderwidth, interiorrelief): self.canvas.yview_moveto(0) # create a frame inside the canvas which will be scrolled with it - self.interior = ttk.Frame(self.canvas, borderwidth=interiorborderwidth, relief=interiorrelief) - self.interior_id = self.canvas.create_window(0, 0, window=self.interior, anchor='nw',) + self.interior = ttk.Frame( + self.canvas, borderwidth=interiorborderwidth, relief=interiorrelief + ) + self.interior_id = self.canvas.create_window( + 0, + 0, + window=self.interior, + anchor="nw", + ) def update_scrollbar(self, event): - '''Configure the interior frame size and the canvas scrollregion''' + """Configure the interior frame size and the canvas scrollregion""" # Force the update of .winfo_width() and winfo_height() self.canvas.update_idletasks() @@ -58,22 +75,25 @@ def update_scrollbar(self, event): # Set interior frame height and canvas scrollregion if canvasHeight > interiorReqHeight: self.canvas.itemconfigure(self.interior_id, height=canvasHeight) - self.canvas.config(scrollregion="0 0 {0} {1}". - format(canvasWidth, canvasHeight)) + self.canvas.config( + scrollregion="0 0 {0} {1}".format(canvasWidth, canvasHeight) + ) else: self.canvas.itemconfigure(self.interior_id, height=interiorReqHeight) - self.canvas.config(scrollregion="0 0 {0} {1}". - format(canvasWidth, interiorReqHeight)) + self.canvas.config( + scrollregion="0 0 {0} {1}".format(canvasWidth, interiorReqHeight) + ) class App(ttk.Frame): - def __init__(self, parent, *args, **kwargs): - ttk.Frame.__init__(self, parent=None, style='App.TFrame', borderwidth=0, width=890, height=590) + ttk.Frame.__init__( + self, parent=None, style="App.TFrame", borderwidth=0, width=890, height=590 + ) self.parent = parent - self.parent.title('NEMOSIS') - self.parent.geometry('1000x600') + self.parent.title("NEMOSIS") + self.parent.geometry("1000x600") self.setStyle() self.createWidgets() self.rowconfigure(0, weight=1) @@ -81,13 +101,19 @@ def __init__(self, parent, *args, **kwargs): def setStyle(self): style = ttk.Style() - style.configure('App.TFrame', background='pink') + style.configure("App.TFrame", background="pink") def createWidgets(self): - self.frame = VerticalScrollFrame(self, arrowcolor='white', mainborderwidth=10, interiorborderwidth=10, - mainrelief='raised', interiorrelief='sunken') - - self.frame.grid(row=0, column=0, sticky='nsew') + self.frame = VerticalScrollFrame( + self, + arrowcolor="white", + mainborderwidth=10, + interiorborderwidth=10, + mainrelief="raised", + interiorrelief="sunken", + ) + + self.frame.grid(row=0, column=0, sticky="nsew") self.rows = [] self.add_header() self.add_plus() @@ -96,53 +122,65 @@ def createWidgets(self): def add_header(self): # Create the default starting widgets that appear at the top of the gui. self.header = ttk.Frame(self.frame.interior) - self.header.grid(row=0, column=0, columnspan=50, sticky='w') + self.header.grid(row=0, column=0, columnspan=50, sticky="w") self.header.update() # Label for save location entry box. - self.save_label = tk.Label(self.header, text=' Output data to:', anchor='w') + self.save_label = tk.Label(self.header, text=" Output data to:", anchor="w") self.save_label.grid(row=0, column=1) self.save_label.config(width=15) self.save_label.update() # Text entry that specifies the location to save query results. self.save_location = ttk.Entry(self.header) - self.save_location.grid(row=0, column=2, columnspan=defaults.save_field_column_span) + self.save_location.grid( + row=0, column=2, columnspan=defaults.save_field_column_span + ) self.save_location.config(width=50) self.save_location.update() # Button set save location. - self.output_location = ttk.Button(self.header, text='...', command=self.set_save_location) + self.output_location = ttk.Button( + self.header, text="...", command=self.set_save_location + ) self.output_location.grid(row=0, column=5) self.output_location.config(width=4) self.output_location.update() # Label for the raw data location entry box. - self.raw_data_label = ttk.Label(self.header, text='Raw data cache:', anchor='w') + self.raw_data_label = ttk.Label(self.header, text="Raw data cache:", anchor="w") self.raw_data_label.grid(row=1, column=1) self.raw_data_label.config(width=15) self.raw_data_label.update() # Text entry that specifies the location of the raw aemo data cache. self.raw_data_location = ttk.Entry(self.header) - self.raw_data_location.grid(row=1, column=2, columnspan=defaults.save_field_column_span) + self.raw_data_location.grid( + row=1, column=2, columnspan=defaults.save_field_column_span + ) self.raw_data_location.config(width=50) self.raw_data_location.update() # Button set save location. - self.output_location = ttk.Button(self.header, text='...', command=self.set_cache_location) + self.output_location = ttk.Button( + self.header, text="...", command=self.set_cache_location + ) self.output_location.grid(row=1, column=5) self.output_location.config(width=4) self.output_location.update() # Button to save current state of the gui. - self.save = ttk.Button(self.header, text='Save session', command=self.save_session) + self.save = ttk.Button( + self.header, text="Save session", command=self.save_session + ) self.save.grid(row=0, column=6, padx=20) self.save.config(width=20) self.save.update() # Button to load a previous state of the gui. - self.load = ttk.Button(self.header, text='Load session', command=self.load_session) + self.load = ttk.Button( + self.header, text="Load session", command=self.load_session + ) self.load.grid(row=1, column=6, padx=20) self.load.config(width=20) self.load.update() @@ -150,53 +188,88 @@ def add_header(self): def add_plus(self): # Add the button that added extra query and merge rows to the gui. self.row_adder = ttk.Frame(self.frame.interior) - self.row_adder.grid(row=defaults.query_row_offset + len(self.rows) * defaults.row_height - + defaults.plus_internal_row, column=0, padx=defaults.standard_x_pad, sticky='w', - columnspan=50, pady=10) + self.row_adder.grid( + row=defaults.query_row_offset + + len(self.rows) * defaults.row_height + + defaults.plus_internal_row, + column=0, + padx=defaults.standard_x_pad, + sticky="w", + columnspan=50, + pady=10, + ) self.row_adder.update() # Button to add extra queries. - self.plus_AEMO_query = ttk.Button(self.row_adder, text=u"\u2795" + ' AEMO table', - command=self.add_AEMO_query) + self.plus_AEMO_query = ttk.Button( + self.row_adder, text="\u2795" + " AEMO table", command=self.add_AEMO_query + ) self.plus_AEMO_query.grid(row=0, column=0) self.plus_AEMO_query.update() # Button to add extra queries. - self.plus_custom_query = ttk.Button(self.row_adder, text=u"\u2795" + ' Custom table', - command=self.add_Custom_query) + self.plus_custom_query = ttk.Button( + self.row_adder, + text="\u2795" + " Custom table", + command=self.add_Custom_query, + ) self.plus_custom_query.grid(row=0, column=1) self.plus_custom_query.update() # Button to add extra merge. - self.plus_merge = ttk.Button(self.row_adder, text=u"\u2795" + ' Merge', command=self.add_merge) + self.plus_merge = ttk.Button( + self.row_adder, text="\u2795" + " Merge", command=self.add_merge + ) self.plus_merge.grid(row=0, column=3) self.plus_merge.update() # Button to add extra merge. - self.plus_merge_as_of = ttk.Button(self.row_adder, text=u"\u2795" + ' Merge on most recent ', - command=self.add_merge_as_of) + self.plus_merge_as_of = ttk.Button( + self.row_adder, + text="\u2795" + " Merge on most recent ", + command=self.add_merge_as_of, + ) self.plus_merge_as_of.grid(row=0, column=4) self.plus_merge_as_of.update() # Button to add extra filter version no. - self.plus_filter_version_no = ttk.Button(self.row_adder, text=u"\u2795" + ' Highest version No. ', - command=self.add_filter_version_no) + self.plus_filter_version_no = ttk.Button( + self.row_adder, + text="\u2795" + " Highest version No. ", + command=self.add_filter_version_no, + ) self.plus_filter_version_no.grid(row=0, column=5) self.plus_filter_version_no.update() # Button to run the app. - self.run = ttk.Button(self.row_adder, text=u"\u25B6" + ' Run queries ', command=self.run_queries) + self.run = ttk.Button( + self.row_adder, text="\u25B6" + " Run queries ", command=self.run_queries + ) self.run.grid(row=0, column=6) self.run.update() def add_AEMO_query(self): # Function to add extra query. - self.rows.append(rows.Query(self.frame.interior, len(self.rows), self, table_options=defaults.display_as_AMEO)) + self.rows.append( + rows.Query( + self.frame.interior, + len(self.rows), + self, + table_options=defaults.display_as_AMEO, + ) + ) self.replace_plus() def add_Custom_query(self): # Function to add extra query. - self.rows.append(rows.Query(self.frame.interior, len(self.rows), self, table_options=defaults.display_as_Custom)) + self.rows.append( + rows.Query( + self.frame.interior, + len(self.rows), + self, + table_options=defaults.display_as_Custom, + ) + ) self.replace_plus() def add_merge(self): @@ -211,7 +284,9 @@ def add_merge_as_of(self): def add_filter_version_no(self): # Function to add extra merge. - self.rows.append(rows.FilterVersionNo(self.frame.interior, len(self.rows), self)) + self.rows.append( + rows.FilterVersionNo(self.frame.interior, len(self.rows), self) + ) self.replace_plus() def delete_row(self, row_number): @@ -235,22 +310,28 @@ def run_queries(self): Path(raw_data_location).mkdir(parents=False, exist_ok=True) for row in self.rows: save_name = row.name.get() - if type(row).__name__ == 'Query': + if type(row).__name__ == "Query": results[save_name] = self.run_query(row, raw_data_location) - elif type(row).__name__ == 'Merge': + elif type(row).__name__ == "Merge": results[save_name] = self.run_merge(row, results) - elif type(row).__name__ == 'Merge_as_of': + elif type(row).__name__ == "Merge_as_of": results[save_name] = self.run_merge_as_of(row, results) - elif type(row).__name__ == 'FilterVersionNo': + elif type(row).__name__ == "FilterVersionNo": results[save_name] = self.run_filter_version_no(row, results) - results[save_name].to_csv(Path(save_location) / (save_name + '.csv'), - index=False, date_format='%Y/%m/%d %H:%M:%S') - messagebox.showinfo('Finished', 'Your query has finished!') + results[save_name].to_csv( + Path(save_location) / (save_name + ".csv"), + index=False, + date_format="%Y/%m/%d %H:%M:%S", + ) + messagebox.showinfo("Finished", "Your query has finished!") except Exception: - traceback.print_exc() - messagebox.showerror('Error', 'Your query executed with an error. ' - '\nReview the console for detailed information') + traceback.print_exc() + messagebox.showerror( + "Error", + "Your query executed with an error. " + "\nReview the console for detailed information", + ) return @@ -260,24 +341,38 @@ def run_query(self, row, raw_data_location): # Find the table name from the row. table = row.table_options[row.tables.curselection()[0]] # Find the select columns. - columns = tuple([row.col_list.get(0, tk.END)[index] for index in row.col_list.curselection()]) + columns = tuple( + [ + row.col_list.get(0, tk.END)[index] + for index in row.col_list.curselection() + ] + ) # Find the columns that could be filtered on. potential_filter_cols = list(row.filter_label.keys()) filter_cols = () filter_values = () # Build a list of filter columns and filter values if the filters list have any values in them. for column in potential_filter_cols: - selection = [row.filter_list[column].get(0, tk.END)[index] for index in - row.filter_list[column].curselection()] + selection = [ + row.filter_list[column].get(0, tk.END)[index] + for index in row.filter_list[column].curselection() + ] if len(selection) > 0: - filter_values = filter_values + (selection,) - filter_cols = filter_cols + (column,) + filter_values = filter_values + (selection,) + filter_cols = filter_cols + (column,) start_time = row.start_time.get() end_time = row.end_time.get() # Call the query using the tables predefined wraper function. - result = data_fetch_methods._method_map[table](start_time, end_time, table, raw_data_location, columns, - filter_cols, filter_values) + result = data_fetch_methods._method_map[table]( + start_time, + end_time, + table, + raw_data_location, + columns, + filter_cols, + filter_values, + ) return result def run_merge(self, row, results): @@ -287,17 +382,25 @@ def run_merge(self, row, results): # Get the result to put on the left of the merge. left_table = results[left_table_name] # Get the keys to use on the right result. - left_keys = [row.left_key_list.get(0, tk.END)[index] for index in row.left_key_list.curselection()] + left_keys = [ + row.left_key_list.get(0, tk.END)[index] + for index in row.left_key_list.curselection() + ] # Get the name of the result to put on the left of the merge. right_table_name = row.right_table.get() # Get the result to put on the right of the merge. right_table = results[right_table_name] # Get the keys to use on the right result. - right_keys = [row.right_key_list.get(0, tk.END)[index] for index in row.right_key_list.curselection()] + right_keys = [ + row.right_key_list.get(0, tk.END)[index] + for index in row.right_key_list.curselection() + ] # Get the join type to use. join_type = defaults.join_type[row.join_types.curselection()[0]] # Merge the results. - result = pd.merge(left_table, right_table, join_type, left_on=left_keys, right_on=right_keys) + result = pd.merge( + left_table, right_table, join_type, left_on=left_keys, right_on=right_keys + ) return result def run_merge_as_of(self, row, results): @@ -307,17 +410,29 @@ def run_merge_as_of(self, row, results): # Get the result to put on the left of the merge. left_table = results[left_table_name] # Get the keys to use on the right result. - left_keys = [row.left_key_list.get(0, tk.END)[index] for index in row.left_key_list.curselection()] + left_keys = [ + row.left_key_list.get(0, tk.END)[index] + for index in row.left_key_list.curselection() + ] # Get the keys to use on the right result. - left_time_keys = [row.left_time_key_list.get(0, tk.END)[index] for index in row.left_time_key_list.curselection()] + left_time_keys = [ + row.left_time_key_list.get(0, tk.END)[index] + for index in row.left_time_key_list.curselection() + ] # Get the name of the result to put on the left of the merge. right_table_name = row.right_table.get() # Get the result to put on the right of the merge. right_table = results[right_table_name] # Get the keys to use on the right result. - right_keys = [row.right_key_list.get(0, tk.END)[index] for index in row.right_key_list.curselection()] + right_keys = [ + row.right_key_list.get(0, tk.END)[index] + for index in row.right_key_list.curselection() + ] # Get the keys to use on the right result. - right_time_keys = [row.right_time_key_list.get(0, tk.END)[index] for index in row.right_time_key_list.curselection()] + right_time_keys = [ + row.right_time_key_list.get(0, tk.END)[index] + for index in row.right_time_key_list.curselection() + ] # Get the join type to use. join_type = defaults.join_type[row.join_types.curselection()[0]] # Merge the results. @@ -327,8 +442,14 @@ def run_merge_as_of(self, row, results): right_table[right_time_key] = pd.to_datetime(right_table[right_time_key]) left_table = left_table.sort_values(left_time_key) right_table = right_table.sort_values(right_time_key) - result = pd.merge_asof(left_table, right_table, left_on=left_time_key, right_on=right_time_key, - left_by=left_keys, right_by=right_keys) + result = pd.merge_asof( + left_table, + right_table, + left_on=left_time_key, + right_on=right_time_key, + left_by=left_keys, + right_by=right_keys, + ) return result def run_filter_version_no(self, row, results): @@ -336,9 +457,16 @@ def run_filter_version_no(self, row, results): input_table = results[input_name] group_cols = [] for key_set in defaults.table_primary_keys.values(): - group_cols += [col for col in key_set - if ((col in input_table.columns) & (col != 'VERSIONNO') & (col not in group_cols))] - input_table = input_table.sort_values('VERSIONNO') + group_cols += [ + col + for col in key_set + if ( + (col in input_table.columns) + & (col != "VERSIONNO") + & (col not in group_cols) + ) + ] + input_table = input_table.sort_values("VERSIONNO") result = input_table.groupby(by=group_cols, as_index=False).last() return result @@ -355,18 +483,18 @@ def set_cache_location(self): def save_session(self): # Save the current state of the gui and pickle the result. session_state = {} - session_state['raw_data_location'] = self.raw_data_location.get() - session_state['save_location'] = self.save_location.get() - session_state['rows'] = [] + session_state["raw_data_location"] = self.raw_data_location.get() + session_state["save_location"] = self.save_location.get() + session_state["rows"] = [] for row in self.rows: - session_state['rows'].append(row.state()) + session_state["rows"].append(row.state()) save_name = filedialog.asksaveasfilename() # If a user provides a save name with .pkl already in it do not add another .pkl - if save_name[-4:] != '.pkl': - save_name = save_name + '.pkl' + if save_name[-4:] != ".pkl": + save_name = save_name + ".pkl" - with open(save_name, 'wb') as f: + with open(save_name, "wb") as f: pickle.dump(session_state, f, pickle.HIGHEST_PROTOCOL) def load_session(self, session=None): @@ -376,16 +504,16 @@ def load_session(self, session=None): # without a session argument then a popup asks the user to provide one. if session is None: save_name = filedialog.askopenfilename() - with open(save_name, 'rb') as f: + with open(save_name, "rb") as f: session_state = pickle.load(f) else: session_state = session # Load in raw data and save locations. - self.raw_data_location.delete(0, 'end') - self.raw_data_location.insert(0, session_state['raw_data_location']) - self.save_location.delete(0, 'end') - self.save_location.insert(0, session_state['save_location']) + self.raw_data_location.delete(0, "end") + self.raw_data_location.insert(0, session_state["raw_data_location"]) + self.save_location.delete(0, "end") + self.save_location.insert(0, session_state["save_location"]) # Empty any existing rows. for row in self.rows: @@ -394,23 +522,39 @@ def load_session(self, session=None): self.rows = [] # Create rows based on the saved session. - for row in session_state['rows']: + for row in session_state["rows"]: # Create the right type of row. - if row['type'] == 'query': - if len(row['table']) != 0: - if row['table'] in defaults.display_as_AMEO: - self.rows.append(rows.Query(self.frame.interior, len(self.rows), - self, defaults.display_as_AMEO)) - if row['table'] in defaults.display_as_Custom: - self.rows.append(rows.Query(self.frame.interior, len(self.rows), - self, defaults.display_as_Custom)) - - elif row['type'] == 'merge': + if row["type"] == "query": + if len(row["table"]) != 0: + if row["table"] in defaults.display_as_AMEO: + self.rows.append( + rows.Query( + self.frame.interior, + len(self.rows), + self, + defaults.display_as_AMEO, + ) + ) + if row["table"] in defaults.display_as_Custom: + self.rows.append( + rows.Query( + self.frame.interior, + len(self.rows), + self, + defaults.display_as_Custom, + ) + ) + + elif row["type"] == "merge": self.rows.append(rows.Merge(self.frame.interior, len(self.rows), self)) - elif row['type'] == 'merge_as_of': - self.rows.append(rows.Merge_as_of(self.frame.interior, len(self.rows), self)) - elif row['type'] == 'filter_version_no': - self.rows.append(rows.FilterVersionNo(self.frame.interior, len(self.rows), self)) + elif row["type"] == "merge_as_of": + self.rows.append( + rows.Merge_as_of(self.frame.interior, len(self.rows), self) + ) + elif row["type"] == "filter_version_no": + self.rows.append( + rows.FilterVersionNo(self.frame.interior, len(self.rows), self) + ) # Load the row state. self.rows[-1].load_state(row) @@ -419,13 +563,17 @@ def load_session(self, session=None): def replace_plus(self): # Move the plus buttons to below all existing rows. - self.row_adder.grid(row=defaults.query_row_offset + (len(self.rows)) * defaults.row_height - + defaults.plus_internal_row) + self.row_adder.grid( + row=defaults.query_row_offset + + (len(self.rows)) * defaults.row_height + + defaults.plus_internal_row + ) self.row_adder.update() self.frame.update_scrollbar(None) + def resource_path(relative_path): - """ Get absolute path to resource, works for dev and for PyInstaller """ + """Get absolute path to resource, works for dev and for PyInstaller""" try: # PyInstaller creates a temp folder and stores path in _MEIPASS base_path = sys._MEIPASS @@ -435,23 +583,28 @@ def resource_path(relative_path): return os.path.join(base_path, relative_path) -if __name__ == '__main__': +if __name__ == "__main__": import tempfile - ICON = (b'\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x08\x00h\x05\x00\x00' - b'\x16\x00\x00\x00(\x00\x00\x00\x10\x00\x00\x00 \x00\x00\x00\x01\x00' - b'\x08\x00\x00\x00\x00\x00@\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - b'\x00\x01\x00\x00\x00\x01') + b'\x00' * 1282 + b'\xff' * 64 + ICON = ( + ( + b"\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x08\x00h\x05\x00\x00" + b"\x16\x00\x00\x00(\x00\x00\x00\x10\x00\x00\x00 \x00\x00\x00\x01\x00" + b"\x08\x00\x00\x00\x00\x00@\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + b"\x00\x01\x00\x00\x00\x01" + ) + + b"\x00" * 1282 + + b"\xff" * 64 + ) _, ICON_PATH = tempfile.mkstemp() - with open(ICON_PATH, 'wb') as icon_file: + with open(ICON_PATH, "wb") as icon_file: icon_file.write(ICON) - root = tk.Tk() app = App(root) - app.grid(row=0, column=0, sticky='nsew') + app.grid(row=0, column=0, sticky="nsew") root.rowconfigure(0, weight=1) root.columnconfigure(0, weight=1) - root.iconbitmap(resource_path('favicon.ico')) - root.mainloop() \ No newline at end of file + root.iconbitmap(resource_path("favicon.ico")) + root.mainloop() diff --git a/nemosis/hook-pandas.py b/nemosis/hook-pandas.py index 652a52f..4df7a5f 100644 --- a/nemosis/hook-pandas.py +++ b/nemosis/hook-pandas.py @@ -1 +1 @@ -hiddenimports = ['pandas._libs.tslibs.timedeltas'] \ No newline at end of file +hiddenimports = ["pandas._libs.tslibs.timedeltas"] diff --git a/nemosis/processing_info_maps.py b/nemosis/processing_info_maps.py index ec7cfc0..1046e1c 100644 --- a/nemosis/processing_info_maps.py +++ b/nemosis/processing_info_maps.py @@ -1,188 +1,236 @@ import os -from nemosis import filters, downloader, query_wrapers, write_file_names, date_generators +from nemosis import ( + filters, + downloader, + query_wrapers, + write_file_names, + date_generators, +) -setup = {'DISPATCHLOAD': None, - 'TRADINGLOAD': None, - 'TRADINGPRICE': None, - 'TRADINGREGIONSUM': None, - 'TRADINGINTERCONNECT': None, - 'DISPATCHPRICE': None, - 'DISPATCH_UNIT_SCADA': None, - 'DISPATCHCONSTRAINT': None, - 'DUDETAILSUMMARY': None, - 'DUDETAIL': None, - 'GENCONDATA': None, - 'SPDREGIONCONSTRAINT': None, - 'SPDCONNECTIONPOINTCONSTRAINT': None, - 'SPDINTERCONNECTORCONSTRAINT': None, - 'FCAS_4_SECOND': None, - 'ELEMENTS_FCAS_4_SECOND': None, - 'VARIABLES_FCAS_4_SECOND': None, - 'Generators and Scheduled Loads': None, - 'BIDDAYOFFER_D': query_wrapers.dispatch_date_setup, - 'BIDPEROFFER_D': None, - 'FCAS_4s_SCADA_MAP': None, - 'DISPATCHINTERCONNECTORRES': None, - 'DISPATCHREGIONSUM': None, - 'LOSSMODEL': None, - 'LOSSFACTORMODEL': None, - 'MNSP_DAYOFFER': query_wrapers.dispatch_date_setup, - 'MNSP_PEROFFER': query_wrapers.dispatch_half_hour_setup, - 'MNSP_INTERCONNECTOR': None, - 'INTERCONNECTOR': None, - 'INTERCONNECTORCONSTRAINT': None, - 'MARKET_PRICE_THRESHOLDS': None} +setup = { + "DISPATCHLOAD": None, + "TRADINGLOAD": None, + "TRADINGPRICE": None, + "TRADINGREGIONSUM": None, + "TRADINGINTERCONNECT": None, + "DISPATCHPRICE": None, + "DISPATCH_UNIT_SCADA": None, + "DISPATCHCONSTRAINT": None, + "DUDETAILSUMMARY": None, + "DUDETAIL": None, + "GENCONDATA": None, + "SPDREGIONCONSTRAINT": None, + "SPDCONNECTIONPOINTCONSTRAINT": None, + "SPDINTERCONNECTORCONSTRAINT": None, + "FCAS_4_SECOND": None, + "ELEMENTS_FCAS_4_SECOND": None, + "VARIABLES_FCAS_4_SECOND": None, + "Generators and Scheduled Loads": None, + "BIDDAYOFFER_D": query_wrapers.dispatch_date_setup, + "BIDPEROFFER_D": None, + "FCAS_4s_SCADA_MAP": None, + "DISPATCHINTERCONNECTORRES": None, + "DISPATCHREGIONSUM": None, + "LOSSMODEL": None, + "LOSSFACTORMODEL": None, + "MNSP_DAYOFFER": query_wrapers.dispatch_date_setup, + "MNSP_PEROFFER": query_wrapers.dispatch_half_hour_setup, + "MNSP_INTERCONNECTOR": None, + "INTERCONNECTOR": None, + "INTERCONNECTORCONSTRAINT": None, + "MARKET_PRICE_THRESHOLDS": None, +} -search_type = {'DISPATCHLOAD': 'start_to_end', - 'TRADINGLOAD': 'start_to_end', - 'TRADINGPRICE': 'start_to_end', - 'TRADINGREGIONSUM': 'start_to_end', - 'TRADINGINTERCONNECT': 'start_to_end', - 'DISPATCHPRICE': 'start_to_end', - 'DISPATCH_UNIT_SCADA': 'start_to_end', - 'DISPATCHCONSTRAINT': 'start_to_end', - 'DUDETAILSUMMARY': 'end', - 'DUDETAIL': 'all', - 'GENCONDATA': 'all', - 'SPDREGIONCONSTRAINT': 'all', - 'SPDCONNECTIONPOINTCONSTRAINT': 'all', - 'SPDINTERCONNECTORCONSTRAINT': 'all', - 'FCAS_4_SECOND': 'start_to_end', - 'ELEMENTS_FCAS_4_SECOND': None, - 'VARIABLES_FCAS_4_SECOND': None, - 'Generators and Scheduled Loads': None, - 'BIDDAYOFFER_D': 'start_to_end', - 'BIDPEROFFER_D': 'start_to_end', - 'FCAS_4s_SCADA_MAP': None, - 'DISPATCHINTERCONNECTORRES': 'start_to_end', - 'DISPATCHREGIONSUM': 'start_to_end', - 'LOSSMODEL': 'all', - 'LOSSFACTORMODEL': 'all', - 'MNSP_DAYOFFER': 'start_to_end', - 'MNSP_PEROFFER': 'start_to_end', - 'MNSP_INTERCONNECTOR': 'all', - 'INTERCONNECTOR': 'all', - 'INTERCONNECTORCONSTRAINT': 'all', - 'MARKET_PRICE_THRESHOLDS': 'all'} +search_type = { + "DISPATCHLOAD": "start_to_end", + "TRADINGLOAD": "start_to_end", + "TRADINGPRICE": "start_to_end", + "TRADINGREGIONSUM": "start_to_end", + "TRADINGINTERCONNECT": "start_to_end", + "DISPATCHPRICE": "start_to_end", + "DISPATCH_UNIT_SCADA": "start_to_end", + "DISPATCHCONSTRAINT": "start_to_end", + "DUDETAILSUMMARY": "end", + "DUDETAIL": "all", + "GENCONDATA": "all", + "SPDREGIONCONSTRAINT": "all", + "SPDCONNECTIONPOINTCONSTRAINT": "all", + "SPDINTERCONNECTORCONSTRAINT": "all", + "FCAS_4_SECOND": "start_to_end", + "ELEMENTS_FCAS_4_SECOND": None, + "VARIABLES_FCAS_4_SECOND": None, + "Generators and Scheduled Loads": None, + "BIDDAYOFFER_D": "start_to_end", + "BIDPEROFFER_D": "start_to_end", + "FCAS_4s_SCADA_MAP": None, + "DISPATCHINTERCONNECTORRES": "start_to_end", + "DISPATCHREGIONSUM": "start_to_end", + "LOSSMODEL": "all", + "LOSSFACTORMODEL": "all", + "MNSP_DAYOFFER": "start_to_end", + "MNSP_PEROFFER": "start_to_end", + "MNSP_INTERCONNECTOR": "all", + "INTERCONNECTOR": "all", + "INTERCONNECTORCONSTRAINT": "all", + "MARKET_PRICE_THRESHOLDS": "all", +} -date_cols = {'DISPATCHLOAD': ['SETTLEMENTDATE'], - 'TRADINGLOAD': ['SETTLEMENTDATE'], - 'TRADINGPRICE': ['SETTLEMENTDATE'], - 'TRADINGREGIONSUM': ['SETTLEMENTDATE'], - 'TRADINGINTERCONNECT': ['SETTLEMENTDATE'], - 'DISPATCHPRICE': ['SETTLEMENTDATE'], - 'DISPATCH_UNIT_SCADA': ['SETTLEMENTDATE'], - 'DISPATCHCONSTRAINT': ['SETTLEMENTDATE'], - 'DUDETAILSUMMARY': ['START_DATE', 'END_DATE'], - 'DUDETAIL': ['EFFECTIVEDATE'], - 'GENCONDATA': ['EFFECTIVEDATE'], - 'SPDREGIONCONSTRAINT': ['EFFECTIVEDATE'], - 'SPDCONNECTIONPOINTCONSTRAINT': ['EFFECTIVEDATE'], - 'SPDINTERCONNECTORCONSTRAINT': ['EFFECTIVEDATE'], - 'FCAS_4_SECOND': ['TIMESTAMP'], - 'ELEMENTS_FCAS_4_SECOND': None, - 'VARIABLES_FCAS_4_SECOND': None, - 'Generators and Scheduled Loads': None, - 'BIDDAYOFFER_D': ['SETTLEMENTDATE'], - 'BIDPEROFFER_D': ['INTERVAL_DATETIME'], - 'FCAS_4s_SCADA_MAP': None, - 'DISPATCHINTERCONNECTORRES': ['SETTLEMENTDATE'], - 'DISPATCHREGIONSUM': ['SETTLEMENTDATE'], - 'LOSSMODEL': ['EFFECTIVEDATE'], - 'LOSSFACTORMODEL': ['EFFECTIVEDATE'], - 'MNSP_DAYOFFER': ['SETTLEMENTDATE'], - 'MNSP_PEROFFER': ['SETTLEMENTDATE', 'PERIODID'], - 'MNSP_INTERCONNECTOR': ['EFFECTIVEDATE'], - 'INTERCONNECTOR': ['LASTCHANGED'], - 'INTERCONNECTORCONSTRAINT': ['EFFECTIVEDATE'], - 'MARKET_PRICE_THRESHOLDS': ['EFFECTIVEDATE']} +date_cols = { + "DISPATCHLOAD": ["SETTLEMENTDATE"], + "TRADINGLOAD": ["SETTLEMENTDATE"], + "TRADINGPRICE": ["SETTLEMENTDATE"], + "TRADINGREGIONSUM": ["SETTLEMENTDATE"], + "TRADINGINTERCONNECT": ["SETTLEMENTDATE"], + "DISPATCHPRICE": ["SETTLEMENTDATE"], + "DISPATCH_UNIT_SCADA": ["SETTLEMENTDATE"], + "DISPATCHCONSTRAINT": ["SETTLEMENTDATE"], + "DUDETAILSUMMARY": ["START_DATE", "END_DATE"], + "DUDETAIL": ["EFFECTIVEDATE"], + "GENCONDATA": ["EFFECTIVEDATE"], + "SPDREGIONCONSTRAINT": ["EFFECTIVEDATE"], + "SPDCONNECTIONPOINTCONSTRAINT": ["EFFECTIVEDATE"], + "SPDINTERCONNECTORCONSTRAINT": ["EFFECTIVEDATE"], + "FCAS_4_SECOND": ["TIMESTAMP"], + "ELEMENTS_FCAS_4_SECOND": None, + "VARIABLES_FCAS_4_SECOND": None, + "Generators and Scheduled Loads": None, + "BIDDAYOFFER_D": ["SETTLEMENTDATE"], + "BIDPEROFFER_D": ["INTERVAL_DATETIME"], + "FCAS_4s_SCADA_MAP": None, + "DISPATCHINTERCONNECTORRES": ["SETTLEMENTDATE"], + "DISPATCHREGIONSUM": ["SETTLEMENTDATE"], + "LOSSMODEL": ["EFFECTIVEDATE"], + "LOSSFACTORMODEL": ["EFFECTIVEDATE"], + "MNSP_DAYOFFER": ["SETTLEMENTDATE"], + "MNSP_PEROFFER": ["SETTLEMENTDATE", "PERIODID"], + "MNSP_INTERCONNECTOR": ["EFFECTIVEDATE"], + "INTERCONNECTOR": ["LASTCHANGED"], + "INTERCONNECTORCONSTRAINT": ["EFFECTIVEDATE"], + "MARKET_PRICE_THRESHOLDS": ["EFFECTIVEDATE"], +} -filter = {'DISPATCHLOAD': filters.filter_on_settlementdate, - 'TRADINGLOAD': filters.filter_on_settlementdate, - 'TRADINGPRICE': filters.filter_on_settlementdate, - 'TRADINGREGIONSUM': filters.filter_on_settlementdate, - 'TRADINGINTERCONNECT': filters.filter_on_settlementdate, - 'DISPATCHPRICE': filters.filter_on_settlementdate, - 'DISPATCH_UNIT_SCADA': filters.filter_on_settlementdate, - 'DISPATCHCONSTRAINT': filters.filter_on_settlementdate, - 'DUDETAILSUMMARY': filters.filter_on_start_and_end_date, - 'DUDETAIL': filters.filter_on_effective_date, - 'GENCONDATA': filters.filter_on_effective_date, - 'SPDREGIONCONSTRAINT': filters.filter_on_effective_date, - 'SPDCONNECTIONPOINTCONSTRAINT': filters.filter_on_effective_date, - 'SPDINTERCONNECTORCONSTRAINT': filters.filter_on_effective_date, - 'FCAS_4_SECOND': filters.filter_on_timestamp, - 'ELEMENTS_FCAS_4_SECOND': None, - 'VARIABLES_FCAS_4_SECOND': None, - 'Generators and Scheduled Loads': None, - 'BIDDAYOFFER_D': filters.filter_on_settlementdate, - 'BIDPEROFFER_D': filters.filter_on_interval_datetime, - 'FCAS_4s_SCADA_MAP': None, - 'DISPATCHINTERCONNECTORRES': filters.filter_on_settlementdate, - 'DISPATCHREGIONSUM': filters.filter_on_settlementdate, - 'LOSSMODEL': filters.filter_on_effective_date, - 'LOSSFACTORMODEL': filters.filter_on_effective_date, - 'MNSP_DAYOFFER': filters.filter_on_settlementdate, - 'MNSP_PEROFFER': filters.filter_on_date_and_peroid, - 'MNSP_INTERCONNECTOR': filters.filter_on_effective_date, - 'INTERCONNECTOR': filters.filter_on_last_changed, - 'INTERCONNECTORCONSTRAINT': filters.filter_on_effective_date, - 'MARKET_PRICE_THRESHOLDS': filters.filter_on_effective_date} +filter = { + "DISPATCHLOAD": filters.filter_on_settlementdate, + "TRADINGLOAD": filters.filter_on_settlementdate, + "TRADINGPRICE": filters.filter_on_settlementdate, + "TRADINGREGIONSUM": filters.filter_on_settlementdate, + "TRADINGINTERCONNECT": filters.filter_on_settlementdate, + "DISPATCHPRICE": filters.filter_on_settlementdate, + "DISPATCH_UNIT_SCADA": filters.filter_on_settlementdate, + "DISPATCHCONSTRAINT": filters.filter_on_settlementdate, + "DUDETAILSUMMARY": filters.filter_on_start_and_end_date, + "DUDETAIL": filters.filter_on_effective_date, + "GENCONDATA": filters.filter_on_effective_date, + "SPDREGIONCONSTRAINT": filters.filter_on_effective_date, + "SPDCONNECTIONPOINTCONSTRAINT": filters.filter_on_effective_date, + "SPDINTERCONNECTORCONSTRAINT": filters.filter_on_effective_date, + "FCAS_4_SECOND": filters.filter_on_timestamp, + "ELEMENTS_FCAS_4_SECOND": None, + "VARIABLES_FCAS_4_SECOND": None, + "Generators and Scheduled Loads": None, + "BIDDAYOFFER_D": filters.filter_on_settlementdate, + "BIDPEROFFER_D": filters.filter_on_interval_datetime, + "FCAS_4s_SCADA_MAP": None, + "DISPATCHINTERCONNECTORRES": filters.filter_on_settlementdate, + "DISPATCHREGIONSUM": filters.filter_on_settlementdate, + "LOSSMODEL": filters.filter_on_effective_date, + "LOSSFACTORMODEL": filters.filter_on_effective_date, + "MNSP_DAYOFFER": filters.filter_on_settlementdate, + "MNSP_PEROFFER": filters.filter_on_date_and_peroid, + "MNSP_INTERCONNECTOR": filters.filter_on_effective_date, + "INTERCONNECTOR": filters.filter_on_last_changed, + "INTERCONNECTORCONSTRAINT": filters.filter_on_effective_date, + "MARKET_PRICE_THRESHOLDS": filters.filter_on_effective_date, +} -finalise = {'DISPATCHLOAD': None, - 'TRADINGLOAD': None, - 'TRADINGPRICE': None, - 'TRADINGREGIONSUM': None, - 'TRADINGINTERCONNECT': None, - 'DISPATCHPRICE': None, - 'DISPATCH_UNIT_SCADA': None, - 'DISPATCHCONSTRAINT': [query_wrapers.convert_genconid_effectivedate_to_datetime_format], - 'DUDETAILSUMMARY': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'DUDETAIL': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'GENCONDATA': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'SPDREGIONCONSTRAINT': [query_wrapers.most_recent_records_before_start_time], - 'SPDCONNECTIONPOINTCONSTRAINT': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'SPDINTERCONNECTORCONSTRAINT': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'FCAS_4_SECOND': [query_wrapers.fcas4s_finalise], - 'ELEMENTS_FCAS_4_SECOND': None, - 'VARIABLES_FCAS_4_SECOND': None, - 'Generators and Scheduled Loads': None, - 'BIDDAYOFFER_D': None, - 'BIDPEROFFER_D': None, - 'FCAS_4s_SCADA_MAP': None, - 'DISPATCHINTERCONNECTORRES': None, - 'DISPATCHREGIONSUM': None, - 'LOSSMODEL': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'LOSSFACTORMODEL': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'MNSP_DAYOFFER': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'MNSP_PEROFFER': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'MNSP_INTERCONNECTOR': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'INTERCONNECTOR': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'INTERCONNECTORCONSTRAINT': [query_wrapers.most_recent_records_before_start_time, - query_wrapers.drop_duplicates_by_primary_key], - 'MARKET_PRICE_THRESHOLDS': None} +finalise = { + "DISPATCHLOAD": None, + "TRADINGLOAD": None, + "TRADINGPRICE": None, + "TRADINGREGIONSUM": None, + "TRADINGINTERCONNECT": None, + "DISPATCHPRICE": None, + "DISPATCH_UNIT_SCADA": None, + "DISPATCHCONSTRAINT": [ + query_wrapers.convert_genconid_effectivedate_to_datetime_format + ], + "DUDETAILSUMMARY": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "DUDETAIL": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "GENCONDATA": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "SPDREGIONCONSTRAINT": [query_wrapers.most_recent_records_before_start_time], + "SPDCONNECTIONPOINTCONSTRAINT": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "SPDINTERCONNECTORCONSTRAINT": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "FCAS_4_SECOND": [query_wrapers.fcas4s_finalise], + "ELEMENTS_FCAS_4_SECOND": None, + "VARIABLES_FCAS_4_SECOND": None, + "Generators and Scheduled Loads": None, + "BIDDAYOFFER_D": None, + "BIDPEROFFER_D": None, + "FCAS_4s_SCADA_MAP": None, + "DISPATCHINTERCONNECTORRES": None, + "DISPATCHREGIONSUM": None, + "LOSSMODEL": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "LOSSFACTORMODEL": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "MNSP_DAYOFFER": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "MNSP_PEROFFER": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "MNSP_INTERCONNECTOR": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "INTERCONNECTOR": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "INTERCONNECTORCONSTRAINT": [ + query_wrapers.most_recent_records_before_start_time, + query_wrapers.drop_duplicates_by_primary_key, + ], + "MARKET_PRICE_THRESHOLDS": None, +} -date_gen = {'MMS': date_generators.year_and_month_gen, - 'MMS_AND_ARCHIVE': date_generators.bid_table_gen, - 'FCAS': date_generators.year_month_day_index_gen} +date_gen = { + "MMS": date_generators.year_and_month_gen, + "MMS_AND_ARCHIVE": date_generators.bid_table_gen, + "FCAS": date_generators.year_month_day_index_gen, +} -write_filename = {'MMS': write_file_names.write_file_names, - 'MMS_AND_ARCHIVE': write_file_names.write_file_names_mms_and_archive, - 'FCAS': write_file_names.write_file_names_fcas} +write_filename = { + "MMS": write_file_names.write_file_names, + "MMS_AND_ARCHIVE": write_file_names.write_file_names_mms_and_archive, + "FCAS": write_file_names.write_file_names_fcas, +} -downloader = {'MMS': downloader.run, - 'MMS_AND_ARCHIVE': downloader.run_bid_tables, - 'FCAS': downloader.run_fcas4s} +downloader = { + "MMS": downloader.run, + "MMS_AND_ARCHIVE": downloader.run_bid_tables, + "FCAS": downloader.run_fcas4s, +} diff --git a/nemosis/query_wrapers.py b/nemosis/query_wrapers.py index cd05fc2..fe7b7f5 100644 --- a/nemosis/query_wrapers.py +++ b/nemosis/query_wrapers.py @@ -3,32 +3,36 @@ from nemosis import defaults - def dispatch_date_setup(start_time, end_time): - start_time = datetime.strptime(start_time, '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S") start_time = start_time - timedelta(hours=4) start_time = start_time.replace(hour=0, minute=0) start_time = start_time - timedelta(seconds=1) - start_time = datetime.isoformat(start_time).replace('-', '/').replace('T', ' ') - end_time = datetime.strptime(end_time, '%Y/%m/%d %H:%M:%S') + start_time = datetime.isoformat(start_time).replace("-", "/").replace("T", " ") + end_time = datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S") end_time = end_time - timedelta(hours=4, seconds=1) - end_time = datetime.isoformat(end_time).replace('-', '/').replace('T', ' ') + end_time = datetime.isoformat(end_time).replace("-", "/").replace("T", " ") end_time = end_time[:10] - date_padding = ' 00:00:00' + date_padding = " 00:00:00" end_time = end_time + date_padding return start_time, end_time def dispatch_half_hour_setup(start_time, end_time): - start_time = datetime.strptime(start_time, '%Y/%m/%d %H:%M:%S') - start_time = datetime(year=start_time.year, month=start_time.month, day=start_time.day, hour=start_time.hour, - minute=((start_time.minute // 30) * 30)) - start_time = start_time.isoformat().replace('T', ' ').replace('-', '/') + start_time = datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S") + start_time = datetime( + year=start_time.year, + month=start_time.month, + day=start_time.day, + hour=start_time.hour, + minute=((start_time.minute // 30) * 30), + ) + start_time = start_time.isoformat().replace("T", " ").replace("-", "/") return start_time, end_time def fcas4s_finalise(data, start_time, table_name): - for column in data.select_dtypes(['object']).columns: + for column in data.select_dtypes(["object"]).columns: data[column] = data[column].map(lambda x: x.strip()) return data @@ -40,14 +44,22 @@ def most_recent_records_before_start_time(data, start_time, table_name): records_from_before_start = data[data[date_col] < start_time].copy() records_from_before_start = records_from_before_start.sort_values(date_col) if len(group_cols) > 0: - most_recent_from_before_start = records_from_before_start.groupby(group_cols, as_index=False).last() + most_recent_from_before_start = records_from_before_start.groupby( + group_cols, as_index=False + ).last() group_cols = group_cols + [date_col] - most_recent_from_before_start = pd.merge(most_recent_from_before_start.loc[:, group_cols], - records_from_before_start, 'inner', group_cols) + most_recent_from_before_start = pd.merge( + most_recent_from_before_start.loc[:, group_cols], + records_from_before_start, + "inner", + group_cols, + ) else: most_recent_from_before_start = records_from_before_start.tail(1) - mod_table = pd.concat([records_from_after_start, most_recent_from_before_start], sort=False) + mod_table = pd.concat( + [records_from_after_start, most_recent_from_before_start], sort=False + ) return mod_table @@ -57,6 +69,6 @@ def drop_duplicates_by_primary_key(data, start_time, table_name): def convert_genconid_effectivedate_to_datetime_format(data, start_time, table_name): - if 'GENCONID_EFFECTIVEDATE' in data.columns: - data['GENCONID_EFFECTIVEDATE'] = pd.to_datetime(data['GENCONID_EFFECTIVEDATE']) - return data \ No newline at end of file + if "GENCONID_EFFECTIVEDATE" in data.columns: + data["GENCONID_EFFECTIVEDATE"] = pd.to_datetime(data["GENCONID_EFFECTIVEDATE"]) + return data diff --git a/nemosis/rows.py b/nemosis/rows.py index 01ac902..f072080 100644 --- a/nemosis/rows.py +++ b/nemosis/rows.py @@ -4,29 +4,32 @@ class Query: - def __init__(self, master, row_number, app, table_options=None): # Load in the starting features of a query row. self.master = master self.row_number = row_number # Make an label and entry box for the user to name the query result. - self.query_label = ttk.Label(self.master, text=' Query name:') + self.query_label = ttk.Label(self.master, text=" Query name:") self.name = ttk.Entry(self.master) self.name.config(width=26) # Make labels and entry boxes for the user to provide start and end time to filter the query based on. - self.start_time_label = ttk.Label(self.master, text='Start time:\n(YYYY/MM/DD HH:MM:SS)') + self.start_time_label = ttk.Label( + self.master, text="Start time:\n(YYYY/MM/DD HH:MM:SS)" + ) self.start_time = ttk.Entry(self.master) self.start_time.config(width=26) - self.end_time_label = ttk.Label(self.master, text='End time:\n(YYYY/MM/DD HH:MM:SS)') + self.end_time_label = ttk.Label( + self.master, text="End time:\n(YYYY/MM/DD HH:MM:SS)" + ) self.end_time = ttk.Entry(self.master) self.end_time.config(width=26) # Create a label and a list of tables to choose from. - self.tables_label = ttk.Label(self.master, text='Select table:') + self.tables_label = ttk.Label(self.master, text="Select table:") self.tables = tk.Listbox(self.master, exportselection=False, width=35) - self.tables.bind('<>', self.add_column_selection) + self.tables.bind("<>", self.add_column_selection) self.table_options = table_options @@ -34,7 +37,9 @@ def __init__(self, master, row_number, app, table_options=None): self.tables.insert(tk.END, item) # Create a button to delete the row. - self.delete = ttk.Button(self.master, text=u"\u274C", command=lambda: app.delete_row(self.row_number)) + self.delete = ttk.Button( + self.master, text="\u274C", command=lambda: app.delete_row(self.row_number) + ) # Create empty attributes to fill up later on. self.filter_list = {} @@ -52,47 +57,93 @@ def position(self): pady = defaults.query_y_pad padx = defaults.standard_x_pad - self.query_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number, - column=first_sub_column, pady=pady, padx=padx, sticky='sw') + self.query_label.grid( + row=defaults.query_row_offset + defaults.row_height * self.row_number, + column=first_sub_column, + pady=pady, + padx=padx, + sticky="sw", + ) self.query_label.update() - self.name.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.names_internal_row, column=first_sub_column, padx=padx, sticky='sw') + self.name.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.names_internal_row, + column=first_sub_column, + padx=padx, + sticky="sw", + ) self.name.update() - self.start_time_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.start_time_label_internal_row, column=first_sub_column, - padx=defaults.standard_x_pad, sticky='sw') + self.start_time_label.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.start_time_label_internal_row, + column=first_sub_column, + padx=defaults.standard_x_pad, + sticky="sw", + ) self.start_time_label.update() - self.start_time.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.start_time_internal_row, column=first_sub_column, - padx=padx, sticky='sw') + self.start_time.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.start_time_internal_row, + column=first_sub_column, + padx=padx, + sticky="sw", + ) self.start_time.update() - self.end_time_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.end_time_label_internal_row, column=first_sub_column, - padx=padx, sticky='sw') + self.end_time_label.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.end_time_label_internal_row, + column=first_sub_column, + padx=padx, + sticky="sw", + ) self.end_time_label.update() - self.end_time.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.end_time_internal_row, column=first_sub_column, - padx=padx, sticky='sw') + self.end_time.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.end_time_internal_row, + column=first_sub_column, + padx=padx, + sticky="sw", + ) self.end_time.update() - self.tables_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number, - column=second_sub_column, pady=defaults.query_y_pad, sticky='sw', - padx=padx) + self.tables_label.grid( + row=defaults.query_row_offset + defaults.row_height * self.row_number, + column=second_sub_column, + pady=defaults.query_y_pad, + sticky="sw", + padx=padx, + ) self.tables_label.update() - self.tables.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.table_list_internal_row, column=second_sub_column, - rowspan=defaults.list_row_span, columnspan=defaults.list_column_span, - sticky='sw', padx=padx) + self.tables.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.table_list_internal_row, + column=second_sub_column, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="sw", + padx=padx, + ) self.tables.update() - self.delete.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.delete_button_internal_row, column=defaults.last_column, sticky='nw') + self.delete.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.delete_button_internal_row, + column=defaults.last_column, + sticky="nw", + ) self.delete.update() if self.col_list is not None: @@ -110,9 +161,11 @@ def add_column_selection(self, evt): self.cols_label.destroy() # Create a new label and list box. - self.cols_label = ttk.Label(self.master, text='Select columns:') - self.col_list = tk.Listbox(self.master, selectmode=tk.MULTIPLE, exportselection=False, width=26) - self.col_list.bind('<>', self.add_filters) + self.cols_label = ttk.Label(self.master, text="Select columns:") + self.col_list = tk.Listbox( + self.master, selectmode=tk.MULTIPLE, exportselection=False, width=26 + ) + self.col_list.bind("<>", self.add_filters) self.col_list.delete(0, tk.END) # Populate the list box with column names. @@ -131,15 +184,22 @@ def add_column_selection(self, evt): self.add_filters(None) def position_column_list(self): - self.cols_label.grid(column=self.tables.grid_info()['column'] + defaults.list_column_span, - row=defaults.query_row_offset + self.row_number * defaults.row_height, - sticky='sw') + self.cols_label.grid( + column=self.tables.grid_info()["column"] + defaults.list_column_span, + row=defaults.query_row_offset + self.row_number * defaults.row_height, + sticky="sw", + ) self.cols_label.update() - self.col_list.grid(column=self.tables.grid_info()['column'] + defaults.list_column_span, - row=defaults.query_row_offset + self.row_number * defaults.row_height - + defaults.table_list_internal_row, - rowspan=defaults.list_row_span, columnspan=defaults.list_column_span, - padx=defaults.standard_x_pad, sticky='sw') + self.col_list.grid( + column=self.tables.grid_info()["column"] + defaults.list_column_span, + row=defaults.query_row_offset + + self.row_number * defaults.row_height + + defaults.table_list_internal_row, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + padx=defaults.standard_x_pad, + sticky="sw", + ) self.col_list.update() def add_filters(self, evt): @@ -149,17 +209,30 @@ def add_filters(self, evt): self.remove_filters_unselected() # Find which columns are currently selected. - select_cols = [self.col_list.get(0, tk.END)[index] for index in self.col_list.curselection()] + select_cols = [ + self.col_list.get(0, tk.END)[index] + for index in self.col_list.curselection() + ] # If a column is selected, and is filterable, but does not have a filter then add a filter for that column. for column in select_cols: - if column in defaults.filterable_cols and column not in self.filter_label.keys(): - self.filter_label[column] = ttk.Label(self.master, text='Select {}s:'.format(str(column))) + if ( + column in defaults.filterable_cols + and column not in self.filter_label.keys() + ): + self.filter_label[column] = ttk.Label( + self.master, text="Select {}s:".format(str(column)) + ) self.filter_entry[column] = ttk.Entry(self.master, width=25) - self.filter_entry[column].bind('', self.add_to_list) + self.filter_entry[column].bind("", self.add_to_list) self.filter_entry[column].name = column - self.filter_list[column] = tk.Listbox(self.master, selectmode=tk.MULTIPLE, exportselection=False, - height=8, width=25) + self.filter_list[column] = tk.Listbox( + self.master, + selectmode=tk.MULTIPLE, + exportselection=False, + height=8, + width=25, + ) # Position all the filters so there are no gaps in between them. self.position_filter_list() @@ -170,33 +243,52 @@ def position_filter_list(self): for column in self.filter_label.keys(): if last_filter is None: # Place the first filter next to the column list. - col = self.col_list.grid_info()['column'] + defaults.list_column_span + col = self.col_list.grid_info()["column"] + defaults.list_column_span else: # Place the next filter next to the last filter. - col = self.filter_label[last_filter].grid_info()['column'] + defaults.list_column_span - - self.filter_label[column].grid(row=defaults.query_row_offset+ defaults.row_height * self.row_number, - column=col, padx=defaults.standard_x_pad, sticky='sw') + col = ( + self.filter_label[last_filter].grid_info()["column"] + + defaults.list_column_span + ) + + self.filter_label[column].grid( + row=defaults.query_row_offset + defaults.row_height * self.row_number, + column=col, + padx=defaults.standard_x_pad, + sticky="sw", + ) self.filter_label[column].update() - self.filter_entry[column].grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.names_internal_row, - column=col, padx=defaults.standard_x_pad) + self.filter_entry[column].grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.names_internal_row, + column=col, + padx=defaults.standard_x_pad, + ) self.filter_entry[column].update() - self.filter_list[column].grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.internal_filter_row, column=col, - columnspan=defaults.list_column_span, rowspan=defaults.list_filter_row_span, - padx=defaults.standard_x_pad) + self.filter_list[column].grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.internal_filter_row, + column=col, + columnspan=defaults.list_column_span, + rowspan=defaults.list_filter_row_span, + padx=defaults.standard_x_pad, + ) self.filter_list[column].update() last_filter = column def add_to_list(self, evt): # Add the item in the entry box of a filter to the list box below. self.filter_list[evt.widget.name].insert(tk.END, evt.widget.get()) - evt.widget.delete(0, 'end') + evt.widget.delete(0, "end") def remove_filters_unselected(self): # Delete filter whoes columns are not selected. - select_cols = [self.col_list.get(0, tk.END)[index] for index in self.col_list.curselection()] + select_cols = [ + self.col_list.get(0, tk.END)[index] + for index in self.col_list.curselection() + ] existing_filters = list(self.filter_label.keys()) for column in existing_filters: if column not in select_cols: @@ -237,7 +329,7 @@ def remove_initial_features(self): self.end_time.destroy() del self.end_time self.tables_label.destroy() - del self.tables_label + del self.tables_label self.tables.destroy() del self.tables self.delete.destroy() @@ -247,180 +339,324 @@ def remove_filters(self): # Remove any filter widgets that exist. existing_filters = list(self.filter_label.keys()) for column in existing_filters: - self.filter_label[column].destroy() - del self.filter_label[column] - self.filter_entry[column].destroy() - del self.filter_entry[column] - self.filter_list[column].destroy() - del self.filter_list[column] + self.filter_label[column].destroy() + del self.filter_label[column] + self.filter_entry[column].destroy() + del self.filter_entry[column] + self.filter_list[column].destroy() + del self.filter_list[column] def state(self): # Return the current state of the row as a dictionary. state = {} - state['type'] = 'query' - state['name'] = self.name.get() - state['start_time'] = self.start_time.get() - state['end_time'] = self.end_time.get() + state["type"] = "query" + state["name"] = self.name.get() + state["start_time"] = self.start_time.get() + state["end_time"] = self.end_time.get() if len(self.tables.curselection()) != 0: - state['table'] = self.table_options[self.tables.curselection()[0]] + state["table"] = self.table_options[self.tables.curselection()[0]] if self.col_list is not None: - state['columns'] = [self.col_list.get(0, tk.END)[index] for index in self.col_list.curselection()] - state['filters_contents'] = {} - state['filters_selection'] = {} + state["columns"] = [ + self.col_list.get(0, tk.END)[index] + for index in self.col_list.curselection() + ] + state["filters_contents"] = {} + state["filters_selection"] = {} for column, filter_list in self.filter_list.items(): - state['filters_contents'][column] = self.filter_list[column].get(0, tk.END) - state['filters_selection'][column] = self.filter_list[column].curselection() + state["filters_contents"][column] = self.filter_list[column].get(0, tk.END) + state["filters_selection"][column] = self.filter_list[column].curselection() return state def load_state(self, state): # Update the row to match the state provided. - self.name.insert(0, state['name']) - self.start_time.insert(0, state['start_time']) - self.end_time.insert(0, state['end_time']) - if len(state['table']) != 0: - table_index = list(self.tables.get(0, "end")).index(state['table']) + self.name.insert(0, state["name"]) + self.start_time.insert(0, state["start_time"]) + self.end_time.insert(0, state["end_time"]) + if len(state["table"]) != 0: + table_index = list(self.tables.get(0, "end")).index(state["table"]) self.tables.selection_set(table_index) self.add_column_selection(None) - for col in state['columns']: + for col in state["columns"]: col_index = list(self.col_list.get(0, "end")).index(col) self.col_list.selection_set(col_index) self.add_filters(None) - for column, filter_contents in state['filters_contents'].items(): + for column, filter_contents in state["filters_contents"].items(): self.filter_list[column].insert(0, *filter_contents) - for column, filter_selection in state['filters_selection'].items(): + for column, filter_selection in state["filters_selection"].items(): for index in filter_selection: self.filter_list[column].selection_set(index) -class Merge_as_of: +class Merge_as_of: def __init__(self, master, row_number, app): # Create all the widgets of a merge row. self.master = master self.row_number = row_number # Create a label and entry box to name the result of the merge - self.merge_label = ttk.Label(self.master, text='Merge name') + self.merge_label = ttk.Label(self.master, text="Merge name") self.name = ttk.Entry(self.master) self.name.config(width=26) # Create entry box to provide the name of the left result to merge. - self.left_table_label = ttk.Label(self.master, text='Left table') + self.left_table_label = ttk.Label(self.master, text="Left table") self.left_table = ttk.Entry(self.master) self.left_table.config(width=26) # Create an entry box to provide the name of the right result to merge. - self.right_table_label = ttk.Label(self.master, text='Right table') + self.right_table_label = ttk.Label(self.master, text="Right table") self.right_table = ttk.Entry(self.master) self.right_table.config(width=26) # Create a list to select the merge type from. - self.join_types_label = ttk.Label(self.master, text='Select join type') + self.join_types_label = ttk.Label(self.master, text="Select join type") self.join_types = tk.Listbox(self.master, exportselection=False, width=28) for item in defaults.join_type: self.join_types.insert(tk.END, item) # Create a button that deletes the row. - self.delete = ttk.Button(self.master, text=u"\u274C", command=lambda: app.delete_row(self.row_number)) + self.delete = ttk.Button( + self.master, text="\u274C", command=lambda: app.delete_row(self.row_number) + ) # Create a entry box and list to provide the keys to the left result. - self.left_time_keys_label = ttk.Label(self.master, text='Left time keys') + self.left_time_keys_label = ttk.Label(self.master, text="Left time keys") self.left_time_keys_entry = ttk.Entry(self.master) - self.left_time_keys_entry.bind('', self.add_to_list_left_time) - self.left_time_key_list = tk.Listbox(self.master, selectmode=tk.MULTIPLE, exportselection=False, height=8) + self.left_time_keys_entry.bind("", self.add_to_list_left_time) + self.left_time_key_list = tk.Listbox( + self.master, selectmode=tk.MULTIPLE, exportselection=False, height=8 + ) # Create a entry box and list to provide the keys to the right result. - self.right_time_keys_label = ttk.Label(self.master, text='Right time keys') + self.right_time_keys_label = ttk.Label(self.master, text="Right time keys") self.right_time_keys_entry = ttk.Entry(self.master) - self.right_time_keys_entry.bind('', self.add_to_list_right_time) - self.right_time_key_list = tk.Listbox(self.master, selectmode=tk.MULTIPLE, exportselection=False, height=8) + self.right_time_keys_entry.bind("", self.add_to_list_right_time) + self.right_time_key_list = tk.Listbox( + self.master, selectmode=tk.MULTIPLE, exportselection=False, height=8 + ) # Create a entry box and list to provide the keys to the left result. - self.left_keys_label = ttk.Label(self.master, text='Left keys') + self.left_keys_label = ttk.Label(self.master, text="Left keys") self.left_keys_entry = ttk.Entry(self.master) - self.left_keys_entry.bind('', self.add_to_list_left) - self.left_key_list = tk.Listbox(self.master, selectmode=tk.MULTIPLE, exportselection=False, height=8) + self.left_keys_entry.bind("", self.add_to_list_left) + self.left_key_list = tk.Listbox( + self.master, selectmode=tk.MULTIPLE, exportselection=False, height=8 + ) # Create a entry box and list to provide the keys to the right result. - self.right_keys_label = ttk.Label(self.master, text='Right keys') + self.right_keys_label = ttk.Label(self.master, text="Right keys") self.right_keys_entry = ttk.Entry(self.master) - self.right_keys_entry.bind('', self.add_to_list_right) - self.right_key_list = tk.Listbox(self.master, selectmode=tk.MULTIPLE, exportselection=False, height=8) + self.right_keys_entry.bind("", self.add_to_list_right) + self.right_key_list = tk.Listbox( + self.master, selectmode=tk.MULTIPLE, exportselection=False, height=8 + ) # Position all the widgets. self.position() def position(self): - self.merge_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number, column=0, - pady=defaults.query_y_pad, padx=defaults.standard_x_pad, sticky='ws') + self.merge_label.grid( + row=defaults.query_row_offset + defaults.row_height * self.row_number, + column=0, + pady=defaults.query_y_pad, + padx=defaults.standard_x_pad, + sticky="ws", + ) self.merge_label.update() - self.name.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.names_internal_row, column=0, padx=defaults.standard_x_pad) + self.name.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.names_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.name.update() - self.left_table_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.start_time_label_internal_row, column=0, padx=defaults.standard_x_pad) + self.left_table_label.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.start_time_label_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.left_table_label.update() - self.left_table.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.start_time_internal_row, column=0, padx=defaults.standard_x_pad) + self.left_table.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.start_time_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.left_table.update() - self.right_table_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.end_time_label_internal_row, column=0, padx=defaults.standard_x_pad) + self.right_table_label.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.end_time_label_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.right_table_label.update() - self.right_table.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.end_time_internal_row, column=0, padx=defaults.standard_x_pad) + self.right_table.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.end_time_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.right_table.update() - self.join_types_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number, column=1, - pady = defaults.query_y_pad, sticky = 'sw', padx=defaults.standard_x_pad) + self.join_types_label.grid( + row=defaults.query_row_offset + defaults.row_height * self.row_number, + column=1, + pady=defaults.query_y_pad, + sticky="sw", + padx=defaults.standard_x_pad, + ) self.join_types_label.update() - self.join_types.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.table_list_internal_row, column=1, rowspan=defaults.list_row_span, - columnspan=defaults.list_column_span, sticky='nw', padx=defaults.standard_x_pad) + self.join_types.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.table_list_internal_row, + column=1, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) self.join_types.update() - self.delete.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.delete_button_internal_row, column=defaults.last_column, sticky='nw') + self.delete.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.delete_button_internal_row, + column=defaults.last_column, + sticky="nw", + ) self.delete.update() label_row = defaults.query_row_offset + defaults.row_height * self.row_number - label_sticky = 'w' - entry_row = defaults.query_row_offset + defaults.row_height * self.row_number + defaults.names_internal_row - custom_list_row = defaults.query_row_offset + defaults.row_height * self.row_number + defaults.internal_filter_row - - left_time_keys_col = self.join_types.grid_info()['column'] + defaults.list_column_span - self.left_time_keys_label.grid(row=label_row, column=left_time_keys_col, sticky=label_sticky, padx=defaults.standard_x_pad) - self.left_time_keys_entry.grid(row=entry_row, column=left_time_keys_col, rowspan=defaults.list_row_span, - columnspan=defaults.list_column_span, sticky='nw', padx=defaults.standard_x_pad) - self.left_time_key_list.grid(row=custom_list_row, column=left_time_keys_col, rowspan=defaults.list_row_span, - columnspan=defaults.list_column_span,sticky='nw', padx=defaults.standard_x_pad) - - right_time_keys_col = self.left_time_key_list.grid_info()['column'] + defaults.list_column_span - self.right_time_keys_label.grid(row=label_row, column=right_time_keys_col, sticky=label_sticky, padx=defaults.standard_x_pad) - self.right_time_keys_entry.grid(row=entry_row, column=right_time_keys_col, rowspan=defaults.list_row_span, columnspan=defaults.list_column_span, sticky='nw', - padx=defaults.standard_x_pad) - self.right_time_key_list.grid(row=custom_list_row, column=right_time_keys_col, rowspan=defaults.list_row_span,columnspan=defaults.list_column_span, - sticky='nw', padx=defaults.standard_x_pad) - - left_keys_col = self.right_time_key_list.grid_info()['column'] + defaults.list_column_span - self.left_keys_label.grid(row=label_row, column=left_keys_col, sticky=label_sticky, padx=defaults.standard_x_pad) - self.left_keys_entry.grid(row=entry_row, column=left_keys_col, rowspan=defaults.list_row_span, - columnspan=defaults.list_column_span, sticky='nw', padx=defaults.standard_x_pad) - self.left_key_list.grid(row=custom_list_row, column=left_keys_col, rowspan=defaults.list_row_span, - columnspan=defaults.list_column_span,sticky='nw', padx=defaults.standard_x_pad) - - right_keys_col = self.left_key_list.grid_info()['column'] + defaults.list_column_span - self.right_keys_label.grid(row=label_row, column=right_keys_col, sticky=label_sticky, padx=defaults.standard_x_pad) - self.right_keys_entry.grid(row=entry_row, column=right_keys_col, rowspan=defaults.list_row_span, columnspan=defaults.list_column_span, sticky='nw', - padx=defaults.standard_x_pad) - self.right_key_list.grid(row=custom_list_row, column=right_keys_col, rowspan=defaults.list_row_span,columnspan=defaults.list_column_span, - sticky='nw', padx=defaults.standard_x_pad) + label_sticky = "w" + entry_row = ( + defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.names_internal_row + ) + custom_list_row = ( + defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.internal_filter_row + ) + + left_time_keys_col = ( + self.join_types.grid_info()["column"] + defaults.list_column_span + ) + self.left_time_keys_label.grid( + row=label_row, + column=left_time_keys_col, + sticky=label_sticky, + padx=defaults.standard_x_pad, + ) + self.left_time_keys_entry.grid( + row=entry_row, + column=left_time_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) + self.left_time_key_list.grid( + row=custom_list_row, + column=left_time_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) + + right_time_keys_col = ( + self.left_time_key_list.grid_info()["column"] + defaults.list_column_span + ) + self.right_time_keys_label.grid( + row=label_row, + column=right_time_keys_col, + sticky=label_sticky, + padx=defaults.standard_x_pad, + ) + self.right_time_keys_entry.grid( + row=entry_row, + column=right_time_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) + self.right_time_key_list.grid( + row=custom_list_row, + column=right_time_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) + + left_keys_col = ( + self.right_time_key_list.grid_info()["column"] + defaults.list_column_span + ) + self.left_keys_label.grid( + row=label_row, + column=left_keys_col, + sticky=label_sticky, + padx=defaults.standard_x_pad, + ) + self.left_keys_entry.grid( + row=entry_row, + column=left_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) + self.left_key_list.grid( + row=custom_list_row, + column=left_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) + + right_keys_col = ( + self.left_key_list.grid_info()["column"] + defaults.list_column_span + ) + self.right_keys_label.grid( + row=label_row, + column=right_keys_col, + sticky=label_sticky, + padx=defaults.standard_x_pad, + ) + self.right_keys_entry.grid( + row=entry_row, + column=right_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) + self.right_key_list.grid( + row=custom_list_row, + column=right_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) def add_to_list_left_time(self, evt): # Add key from entry box to list. self.left_time_key_list.insert(tk.END, evt.widget.get()) - evt.widget.delete(0, 'end') + evt.widget.delete(0, "end") def add_to_list_right_time(self, evt): # Add key from entry box to list. self.right_time_key_list.insert(tk.END, evt.widget.get()) - evt.widget.delete(0, 'end') + evt.widget.delete(0, "end") def add_to_list_left(self, evt): # Add key from entry box to list. self.left_key_list.insert(tk.END, evt.widget.get()) - evt.widget.delete(0, 'end') + evt.widget.delete(0, "end") def add_to_list_right(self, evt): # Add key from entry box to list. self.right_key_list.insert(tk.END, evt.widget.get()) - evt.widget.delete(0, 'end') + evt.widget.delete(0, "end") def empty(self): # Delete the widgets of the merge row. @@ -470,87 +706,117 @@ def empty(self): def state(self): # Return the state of the row as a dictionary. state = {} - state['type'] = 'merge_as_of' - state['name'] = self.name.get() - state['left_table'] = self.left_table.get() - state['right_table'] = self.right_table.get() - state['join_types'] = self.join_types.curselection() - state['left_time_key_list'] = {} - state['left_time_key_list']['contents'] = self.left_time_key_list.get(0, tk.END) - state['left_time_key_list']['selection'] = self.left_time_key_list.curselection() - state['right_time_key_list'] = {} - state['right_time_key_list']['contents'] = self.right_time_key_list.get(0, tk.END) - state['right_time_key_list']['selection'] = self.right_time_key_list.curselection() - state['left_key_list'] = {} - state['left_key_list']['contents'] = self.left_key_list.get(0, tk.END) - state['left_key_list']['selection'] = self.left_key_list.curselection() - state['right_key_list'] = {} - state['right_key_list']['contents'] = self.right_key_list.get(0, tk.END) - state['right_key_list']['selection'] = self.right_key_list.curselection() + state["type"] = "merge_as_of" + state["name"] = self.name.get() + state["left_table"] = self.left_table.get() + state["right_table"] = self.right_table.get() + state["join_types"] = self.join_types.curselection() + state["left_time_key_list"] = {} + state["left_time_key_list"]["contents"] = self.left_time_key_list.get(0, tk.END) + state["left_time_key_list"][ + "selection" + ] = self.left_time_key_list.curselection() + state["right_time_key_list"] = {} + state["right_time_key_list"]["contents"] = self.right_time_key_list.get( + 0, tk.END + ) + state["right_time_key_list"][ + "selection" + ] = self.right_time_key_list.curselection() + state["left_key_list"] = {} + state["left_key_list"]["contents"] = self.left_key_list.get(0, tk.END) + state["left_key_list"]["selection"] = self.left_key_list.curselection() + state["right_key_list"] = {} + state["right_key_list"]["contents"] = self.right_key_list.get(0, tk.END) + state["right_key_list"]["selection"] = self.right_key_list.curselection() return state def load_state(self, state): # Update the row to match the state provided. - self.name.insert(0, state['name']) - self.left_table.insert(0, state['left_table']) - self.right_table.insert(0, state['right_table']) - if len(state['join_types']) != 0: - self.join_types.selection_set(state['join_types'][0]) - - self.left_time_key_list.insert(0, *state['left_time_key_list']['contents']) - for index in state['left_time_key_list']['selection']: + self.name.insert(0, state["name"]) + self.left_table.insert(0, state["left_table"]) + self.right_table.insert(0, state["right_table"]) + if len(state["join_types"]) != 0: + self.join_types.selection_set(state["join_types"][0]) + + self.left_time_key_list.insert(0, *state["left_time_key_list"]["contents"]) + for index in state["left_time_key_list"]["selection"]: self.left_time_key_list.selection_set(index) - self.right_time_key_list.insert(0, *state['right_time_key_list']['contents']) - for index in state['right_time_key_list']['selection']: + self.right_time_key_list.insert(0, *state["right_time_key_list"]["contents"]) + for index in state["right_time_key_list"]["selection"]: self.right_time_key_list.selection_set(index) - self.left_key_list.insert(0, *state['left_key_list']['contents']) - for index in state['left_key_list']['selection']: + self.left_key_list.insert(0, *state["left_key_list"]["contents"]) + for index in state["left_key_list"]["selection"]: self.left_key_list.selection_set(index) - self.right_key_list.insert(0, *state['right_key_list']['contents']) - for index in state['right_key_list']['selection']: + self.right_key_list.insert(0, *state["right_key_list"]["contents"]) + for index in state["right_key_list"]["selection"]: self.right_key_list.selection_set(index) class FilterVersionNo: - def __init__(self, master, row_number, app): # Create all the widgets of a merge row. self.master = master self.row_number = row_number # Create a label and entry box to name the result of filter - self.output_label = ttk.Label(self.master, text='Output table') + self.output_label = ttk.Label(self.master, text="Output table") self.name = ttk.Entry(self.master) self.name.config(width=26) # Create entry box to provide the name of the input. - self.input_label = ttk.Label(self.master, text='Input table') + self.input_label = ttk.Label(self.master, text="Input table") self.input = ttk.Entry(self.master) self.input.config(width=26) # Create a button that deletes the row. - self.delete = ttk.Button(self.master, text=u"\u274C", command=lambda: app.delete_row(self.row_number)) + self.delete = ttk.Button( + self.master, text="\u274C", command=lambda: app.delete_row(self.row_number) + ) self.position() def position(self): - self.output_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number, - column=0, - pady=defaults.query_y_pad, padx=defaults.standard_x_pad, sticky='ws') + self.output_label.grid( + row=defaults.query_row_offset + defaults.row_height * self.row_number, + column=0, + pady=defaults.query_y_pad, + padx=defaults.standard_x_pad, + sticky="ws", + ) self.output_label.update() - self.name.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.names_internal_row, column=0, padx=defaults.standard_x_pad) + self.name.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.names_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.name.update() - self.input_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.start_time_label_internal_row, column=0, - padx=defaults.standard_x_pad, sticky='ws') + self.input_label.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.start_time_label_internal_row, + column=0, + padx=defaults.standard_x_pad, + sticky="ws", + ) self.input_label.update() - self.input.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.start_time_internal_row, column=0, padx=defaults.standard_x_pad) + self.input.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.start_time_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.input.update() - self.delete.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.delete_button_internal_row, column=defaults.last_column, - sticky='nw') + self.delete.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.delete_button_internal_row, + column=defaults.last_column, + sticky="nw", + ) self.delete.update() def empty(self): @@ -569,135 +835,211 @@ def empty(self): def state(self): # Return the state of the row as a dictionary. state = {} - state['type'] = 'filter_version_no' - state['name'] = self.name.get() - state['input'] = self.input.get() + state["type"] = "filter_version_no" + state["name"] = self.name.get() + state["input"] = self.input.get() return state def load_state(self, state): # Update the row to match the state provided. - self.name.insert(0, state['name']) - self.input.insert(0, state['input']) + self.name.insert(0, state["name"]) + self.input.insert(0, state["input"]) class Merge: - def __init__(self, master, row_number, app): # Create all the widgets of a merge row. self.master = master self.row_number = row_number # Create a label and entry box to name the result of the merge - self.merge_label = ttk.Label(self.master, text='Merge name') + self.merge_label = ttk.Label(self.master, text="Merge name") self.name = ttk.Entry(self.master) self.name.config(width=26) # Create entry box to provide the name of the left result to merge. - self.left_table_label = ttk.Label(self.master, text='Left table') + self.left_table_label = ttk.Label(self.master, text="Left table") self.left_table = ttk.Entry(self.master) self.left_table.config(width=26) # Create an entry box to provide the name of the right result to merge. - self.right_table_label = ttk.Label(self.master, text='Right table') + self.right_table_label = ttk.Label(self.master, text="Right table") self.right_table = ttk.Entry(self.master) self.right_table.config(width=26) # Create a list to select the merge type from. - self.join_types_label = ttk.Label(self.master, text='Select join type') + self.join_types_label = ttk.Label(self.master, text="Select join type") self.join_types = tk.Listbox(self.master, exportselection=False, width=28) for item in defaults.join_type: self.join_types.insert(tk.END, item) # Create a button that deletes the row. - self.delete = ttk.Button(self.master, text=u"\u274C", - command=lambda: app.delete_row(self.row_number)) + self.delete = ttk.Button( + self.master, text="\u274C", command=lambda: app.delete_row(self.row_number) + ) # Create a entry box and list to provide the keys to the left result. - self.left_keys_label = ttk.Label(self.master, text='Left keys') + self.left_keys_label = ttk.Label(self.master, text="Left keys") self.left_keys_entry = ttk.Entry(self.master) - self.left_keys_entry.bind('', self.add_to_list_left) - self.left_key_list = tk.Listbox(self.master, selectmode=tk.MULTIPLE, exportselection=False, - height=8) + self.left_keys_entry.bind("", self.add_to_list_left) + self.left_key_list = tk.Listbox( + self.master, selectmode=tk.MULTIPLE, exportselection=False, height=8 + ) # Create a entry box and list to provide the keys to the right result. - self.right_keys_label = ttk.Label(self.master, text='Right keys') + self.right_keys_label = ttk.Label(self.master, text="Right keys") self.right_keys_entry = ttk.Entry(self.master) - self.right_keys_entry.bind('', self.add_to_list_right) - self.right_key_list = tk.Listbox(self.master, selectmode=tk.MULTIPLE, exportselection=False, - height=8) + self.right_keys_entry.bind("", self.add_to_list_right) + self.right_key_list = tk.Listbox( + self.master, selectmode=tk.MULTIPLE, exportselection=False, height=8 + ) # Position all the widgets. self.position() def position(self): - self.merge_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number, - column=0, - pady=defaults.query_y_pad, padx=defaults.standard_x_pad, sticky='ws') + self.merge_label.grid( + row=defaults.query_row_offset + defaults.row_height * self.row_number, + column=0, + pady=defaults.query_y_pad, + padx=defaults.standard_x_pad, + sticky="ws", + ) self.merge_label.update() - self.name.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.names_internal_row, column=0, padx=defaults.standard_x_pad) + self.name.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.names_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.name.update() - self.left_table_label.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.start_time_label_internal_row, column=0, - padx=defaults.standard_x_pad) + self.left_table_label.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.start_time_label_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.left_table_label.update() - self.left_table.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.start_time_internal_row, column=0, - padx=defaults.standard_x_pad) + self.left_table.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.start_time_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.left_table.update() self.right_table_label.grid( - row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.end_time_label_internal_row, column=0, - padx=defaults.standard_x_pad) + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.end_time_label_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.right_table_label.update() - self.right_table.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.end_time_internal_row, column=0, - padx=defaults.standard_x_pad) + self.right_table.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.end_time_internal_row, + column=0, + padx=defaults.standard_x_pad, + ) self.right_table.update() self.join_types_label.grid( row=defaults.query_row_offset + defaults.row_height * self.row_number, column=1, - pady=defaults.query_y_pad, sticky='sw', padx=defaults.standard_x_pad) + pady=defaults.query_y_pad, + sticky="sw", + padx=defaults.standard_x_pad, + ) self.join_types_label.update() - self.join_types.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.table_list_internal_row, column=1, - rowspan=defaults.list_row_span, - columnspan=defaults.list_column_span, sticky='nw', - padx=defaults.standard_x_pad) + self.join_types.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.table_list_internal_row, + column=1, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) self.join_types.update() - self.delete.grid(row=defaults.query_row_offset + defaults.row_height * self.row_number - + defaults.delete_button_internal_row, column=defaults.last_column, - sticky='nw') + self.delete.grid( + row=defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.delete_button_internal_row, + column=defaults.last_column, + sticky="nw", + ) self.delete.update() label_row = defaults.query_row_offset + defaults.row_height * self.row_number - label_sticky = 'w' - entry_row = defaults.query_row_offset + defaults.row_height * self.row_number + defaults.names_internal_row - custom_list_row = defaults.query_row_offset + defaults.row_height * self.row_number + defaults.internal_filter_row - - left_keys_col = self.join_types.grid_info()['column'] + defaults.list_column_span - self.left_keys_label.grid(row=label_row, column=left_keys_col, sticky=label_sticky, - padx=defaults.standard_x_pad) - self.left_keys_entry.grid(row=entry_row, column=left_keys_col, rowspan=defaults.list_row_span, - columnspan=defaults.list_column_span, sticky='nw', - padx=defaults.standard_x_pad) - self.left_key_list.grid(row=custom_list_row, column=left_keys_col, - rowspan=defaults.list_row_span, - columnspan=defaults.list_column_span, sticky='nw', - padx=defaults.standard_x_pad) - - right_keys_col = self.left_key_list.grid_info()['column'] + defaults.list_column_span - self.right_keys_label.grid(row=label_row, column=right_keys_col, sticky=label_sticky, - padx=defaults.standard_x_pad) - self.right_keys_entry.grid(row=entry_row, column=right_keys_col, rowspan=defaults.list_row_span, - columnspan=defaults.list_column_span, sticky='nw', - padx=defaults.standard_x_pad) - self.right_key_list.grid(row=custom_list_row, column=right_keys_col, - rowspan=defaults.list_row_span, - columnspan=defaults.list_column_span, - sticky='nw', padx=defaults.standard_x_pad) + label_sticky = "w" + entry_row = ( + defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.names_internal_row + ) + custom_list_row = ( + defaults.query_row_offset + + defaults.row_height * self.row_number + + defaults.internal_filter_row + ) + + left_keys_col = ( + self.join_types.grid_info()["column"] + defaults.list_column_span + ) + self.left_keys_label.grid( + row=label_row, + column=left_keys_col, + sticky=label_sticky, + padx=defaults.standard_x_pad, + ) + self.left_keys_entry.grid( + row=entry_row, + column=left_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) + self.left_key_list.grid( + row=custom_list_row, + column=left_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) + + right_keys_col = ( + self.left_key_list.grid_info()["column"] + defaults.list_column_span + ) + self.right_keys_label.grid( + row=label_row, + column=right_keys_col, + sticky=label_sticky, + padx=defaults.standard_x_pad, + ) + self.right_keys_entry.grid( + row=entry_row, + column=right_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) + self.right_key_list.grid( + row=custom_list_row, + column=right_keys_col, + rowspan=defaults.list_row_span, + columnspan=defaults.list_column_span, + sticky="nw", + padx=defaults.standard_x_pad, + ) def add_to_list_left(self, evt): # Add key from entry box to list. self.left_key_list.insert(tk.END, evt.widget.get()) - evt.widget.delete(0, 'end') + evt.widget.delete(0, "end") def add_to_list_right(self, evt): # Add key from entry box to list. self.right_key_list.insert(tk.END, evt.widget.get()) - evt.widget.delete(0, 'end') + evt.widget.delete(0, "end") def empty(self): # Delete the widgets of the merge row. @@ -735,31 +1077,31 @@ def empty(self): def state(self): # Return the state of the row as a dictionary. state = {} - state['type'] = 'merge' - state['name'] = self.name.get() - state['left_table'] = self.left_table.get() - state['right_table'] = self.right_table.get() - state['join_types'] = self.join_types.curselection() - state['left_key_list'] = {} - state['left_key_list']['contents'] = self.left_key_list.get(0, tk.END) - state['left_key_list']['selection'] = self.left_key_list.curselection() - state['right_key_list'] = {} - state['right_key_list']['contents'] = self.right_key_list.get(0, tk.END) - state['right_key_list']['selection'] = self.right_key_list.curselection() + state["type"] = "merge" + state["name"] = self.name.get() + state["left_table"] = self.left_table.get() + state["right_table"] = self.right_table.get() + state["join_types"] = self.join_types.curselection() + state["left_key_list"] = {} + state["left_key_list"]["contents"] = self.left_key_list.get(0, tk.END) + state["left_key_list"]["selection"] = self.left_key_list.curselection() + state["right_key_list"] = {} + state["right_key_list"]["contents"] = self.right_key_list.get(0, tk.END) + state["right_key_list"]["selection"] = self.right_key_list.curselection() return state def load_state(self, state): # Update the row to match the state provided. - self.name.insert(0, state['name']) - self.left_table.insert(0, state['left_table']) - self.right_table.insert(0, state['right_table']) - if len(state['join_types']) != 0: - self.join_types.selection_set(state['join_types'][0]) - - self.left_key_list.insert(0, *state['left_key_list']['contents']) - for index in state['left_key_list']['selection']: + self.name.insert(0, state["name"]) + self.left_table.insert(0, state["left_table"]) + self.right_table.insert(0, state["right_table"]) + if len(state["join_types"]) != 0: + self.join_types.selection_set(state["join_types"][0]) + + self.left_key_list.insert(0, *state["left_key_list"]["contents"]) + for index in state["left_key_list"]["selection"]: self.left_key_list.selection_set(index) - self.right_key_list.insert(0, *state['right_key_list']['contents']) - for index in state['right_key_list']['selection']: - self.right_key_list.selection_set(index) \ No newline at end of file + self.right_key_list.insert(0, *state["right_key_list"]["contents"]) + for index in state["right_key_list"]["selection"]: + self.right_key_list.selection_set(index) diff --git a/nemosis/test_data_fetch_methods.py b/nemosis/test_data_fetch_methods.py index 8e568ab..dd753af 100644 --- a/nemosis/test_data_fetch_methods.py +++ b/nemosis/test_data_fetch_methods.py @@ -9,513 +9,629 @@ class TestDynamicDataCompilerWithSettlementDateFiltering(unittest.TestCase): def setUp(self): - self.table_names = ['BIDDAYOFFER_D', 'BIDPEROFFER_D', 'DISPATCHLOAD', - 'DISPATCHCONSTRAINT', 'DISPATCH_UNIT_SCADA', - 'DISPATCHPRICE', 'DISPATCHINTERCONNECTORRES', - 'DISPATCHREGIONSUM', 'TRADINGLOAD', 'TRADINGPRICE', - 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT'] - - self.table_types = {'DISPATCHLOAD': 'DUID', - 'DISPATCHCONSTRAINT': 'CONSTRAINTID', - 'DISPATCH_UNIT_SCADA': 'DUIDONLY', - 'DISPATCHPRICE': 'REGIONID', - 'DISPATCHINTERCONNECTORRES': 'INTERCONNECTORID', - 'DISPATCHREGIONSUM': 'REGIONID', - 'BIDPEROFFER_D': 'DUID-BIDTYPE', - 'BIDDAYOFFER_D': 'DUID-BIDTYPE', - 'TRADINGLOAD': 'DUIDONLY', - 'TRADINGPRICE': 'REGIONIDONLY', - 'TRADINGREGIONSUM': 'REGIONIDONLY', - 'TRADINGINTERCONNECT': 'INTERCONNECTORIDONLY'} - - self.table_filters = {'DISPATCHLOAD': ['DUID', 'INTERVENTION'], - 'DISPATCHCONSTRAINT': ['CONSTRAINTID', - 'INTERVENTION'], - 'DISPATCH_UNIT_SCADA': ['DUID'], - 'DISPATCHPRICE': ['REGIONID', 'INTERVENTION'], - 'DISPATCHINTERCONNECTORRES': ['INTERCONNECTORID', - 'INTERVENTION'], - 'DISPATCHREGIONSUM': ['REGIONID', - 'INTERVENTION'], - 'BIDPEROFFER_D': ['DUID', 'BIDTYPE'], - 'BIDDAYOFFER_D': ['DUID', 'BIDTYPE'], - 'TRADINGLOAD': ['DUID'], - 'TRADINGPRICE': ['REGIONID'], - 'TRADINGREGIONSUM': ['REGIONID'], - 'TRADINGINTERCONNECT': ['INTERCONNECTORID']} - - self.filter_values = {'DUID': (['AGLHAL'], [0]), - 'DUIDONLY': (['AGLHAL'],), - 'REGIONID': (['SA1'], [0]), - 'REGIONIDONLY': (['SA1'],), - 'INTERCONNECTORID': (['VIC1-NSW1'], [0]), - 'INTERCONNECTORIDONLY': (['VIC1-NSW1'],), - 'CONSTRAINTID': (['DATASNAP_DFS_Q_CLST'], [0]), - 'DUID-BIDTYPE': (['AGLHAL'], ['ENERGY'])} + self.table_names = [ + "BIDDAYOFFER_D", + "BIDPEROFFER_D", + "DISPATCHLOAD", + "DISPATCHCONSTRAINT", + "DISPATCH_UNIT_SCADA", + "DISPATCHPRICE", + "DISPATCHINTERCONNECTORRES", + "DISPATCHREGIONSUM", + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ] + + self.table_types = { + "DISPATCHLOAD": "DUID", + "DISPATCHCONSTRAINT": "CONSTRAINTID", + "DISPATCH_UNIT_SCADA": "DUIDONLY", + "DISPATCHPRICE": "REGIONID", + "DISPATCHINTERCONNECTORRES": "INTERCONNECTORID", + "DISPATCHREGIONSUM": "REGIONID", + "BIDPEROFFER_D": "DUID-BIDTYPE", + "BIDDAYOFFER_D": "DUID-BIDTYPE", + "TRADINGLOAD": "DUIDONLY", + "TRADINGPRICE": "REGIONIDONLY", + "TRADINGREGIONSUM": "REGIONIDONLY", + "TRADINGINTERCONNECT": "INTERCONNECTORIDONLY", + } + + self.table_filters = { + "DISPATCHLOAD": ["DUID", "INTERVENTION"], + "DISPATCHCONSTRAINT": ["CONSTRAINTID", "INTERVENTION"], + "DISPATCH_UNIT_SCADA": ["DUID"], + "DISPATCHPRICE": ["REGIONID", "INTERVENTION"], + "DISPATCHINTERCONNECTORRES": ["INTERCONNECTORID", "INTERVENTION"], + "DISPATCHREGIONSUM": ["REGIONID", "INTERVENTION"], + "BIDPEROFFER_D": ["DUID", "BIDTYPE"], + "BIDDAYOFFER_D": ["DUID", "BIDTYPE"], + "TRADINGLOAD": ["DUID"], + "TRADINGPRICE": ["REGIONID"], + "TRADINGREGIONSUM": ["REGIONID"], + "TRADINGINTERCONNECT": ["INTERCONNECTORID"], + } + + self.filter_values = { + "DUID": (["AGLHAL"], [0]), + "DUIDONLY": (["AGLHAL"],), + "REGIONID": (["SA1"], [0]), + "REGIONIDONLY": (["SA1"],), + "INTERCONNECTORID": (["VIC1-NSW1"], [0]), + "INTERCONNECTORIDONLY": (["VIC1-NSW1"],), + "CONSTRAINTID": (["DATASNAP_DFS_Q_CLST"], [0]), + "DUID-BIDTYPE": (["AGLHAL"], ["ENERGY"]), + } def test_dispatch_tables_start_of_month(self): - start_time = '2018/02/01 00:00:00' - end_time = '2018/02/01 05:15:00' + start_time = "2018/02/01 00:00:00" + end_time = "2018/02/01 05:15:00" for table in self.table_names: - print(f'Testing {table} returning values at start of month.') + print(f"Testing {table} returning values at start of month.") dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 63 expected_number_of_columns = 2 - expected_first_time = \ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + - timedelta(minutes=5)) - expected_last_time = pd.to_datetime(end_time, - format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', - 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 10 - expected_first_time = '2018/02/01 00:30:00' - expected_first_time =\ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') - expected_last_time = '2018/02/01 05:00:00' - expected_last_time =\ - pd.to_datetime(expected_last_time, - format='%Y/%m/%d %H:%M:%S') + expected_first_time = "2018/02/01 00:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_last_time = "2018/02/01 05:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) if "ONLY" not in table_type: expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': + if table == "BIDDAYOFFER_D": expected_length = 2 - expected_last_time = '2018/02/01 00:00:00' - expected_last_time = \ - pd.to_datetime(expected_last_time, - format='%Y/%m/%d %H:%M:%S') - expected_first_time = '2018/01/31 00:00:00' - expected_first_time =\ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') + expected_last_time = "2018/02/01 00:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_first_time = "2018/01/31 00:00:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_end_of_month(self): - start_time = '2018/01/31 21:00:00' - end_time = '2018/02/01 00:00:00' + start_time = "2018/01/31 21:00:00" + end_time = "2018/02/01 00:00:00" for table in self.table_names: - print('Testing {} returing values at end of month.'.format(table)) + print("Testing {} returing values at end of month.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 36 expected_number_of_columns = 2 - expected_first_time =\ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + - timedelta(minutes=5)) - expected_last_time =\ - pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', - 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 6 - expected_first_time = '2018/01/31 21:30:00' - expected_first_time =\ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') + expected_first_time = "2018/01/31 21:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) if "ONLY" not in table_type: expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': + if table == "BIDDAYOFFER_D": expected_length = 1 - expected_last_time =\ - expected_first_time.replace(hour=0, minute=0) - expected_first_time =\ - expected_first_time.replace(hour=0, minute=0) + expected_last_time = expected_first_time.replace(hour=0, minute=0) + expected_first_time = expected_first_time.replace(hour=0, minute=0) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = data.sort_values(dat_col) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_straddle_2_months(self): - start_time = '2018/02/28 21:00:00' - end_time = '2018/03/01 21:00:00' + start_time = "2018/02/28 21:00:00" + end_time = "2018/03/01 21:00:00" for table in self.table_names: - print(f'Testing {table} returing values from adjacent months.') + print(f"Testing {table} returing values from adjacent months.") dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 288 expected_number_of_columns = 2 - expected_first_time =\ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') - + timedelta(minutes=5)) - expected_last_time = pd.to_datetime(end_time, - format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', - 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 48 - expected_first_time = '2018/02/28 21:30:00' - expected_first_time =\ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') + expected_first_time = "2018/02/28 21:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) if "ONLY" not in table_type: expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': + if table == "BIDDAYOFFER_D": expected_length = 2 - expected_last_time =\ - expected_last_time.replace(hour=0, minute=0) - expected_first_time =\ - expected_first_time.replace(hour=0, minute=0) + expected_last_time = expected_last_time.replace(hour=0, minute=0) + expected_first_time = expected_first_time.replace(hour=0, minute=0) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = data.sort_values(dat_col) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_start_of_year(self): - start_time = '2018/01/01 00:00:00' - end_time = '2018/01/01 01:00:00' + start_time = "2018/01/01 00:00:00" + end_time = "2018/01/01 01:00:00" for table in self.table_names: - print('Testing {} returing values at start of year.'.format(table)) + print("Testing {} returing values at start of year.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 12 expected_number_of_columns = 2 - expected_first_time = \ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') - + timedelta(minutes=5)) - expected_last_time = pd.to_datetime(end_time, - format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', - 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 2 - expected_first_time = '2018/01/01 00:30:00' - expected_first_time = \ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') + expected_first_time = "2018/01/01 00:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) if "ONLY" not in table_type: expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': + if table == "BIDDAYOFFER_D": expected_length = 1 - expected_last_time =\ - (expected_last_time.replace(hour=0, minute=0) - - timedelta(days=1)) - expected_first_time =\ - (expected_first_time.replace(hour=0, minute=0) - - timedelta(days=1)) + expected_last_time = expected_last_time.replace( + hour=0, minute=0 + ) - timedelta(days=1) + expected_first_time = expected_first_time.replace( + hour=0, minute=0 + ) - timedelta(days=1) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = data.sort_values(dat_col) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_end_of_year(self): - start_time = '2017/12/31 23:00:00' - end_time = '2018/01/01 00:00:00' + start_time = "2017/12/31 23:00:00" + end_time = "2018/01/01 00:00:00" for table in self.table_names: - print('Testing {} returing values at end of year.'.format(table)) + print("Testing {} returing values at end of year.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 12 expected_number_of_columns = 2 - expected_first_time = \ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') - + timedelta(minutes=5)) - expected_last_time =\ - pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', - 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 2 - expected_first_time = '2017/12/31 23:30:00' - expected_first_time =\ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') + expected_first_time = "2017/12/31 23:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) if "ONLY" not in table_type: expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': + if table == "BIDDAYOFFER_D": expected_length = 1 - expected_first_time =\ - expected_first_time.replace(hour=0, minute=0) + expected_first_time = expected_first_time.replace(hour=0, minute=0) expected_last_time = expected_first_time data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = data.sort_values(dat_col) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_straddle_years(self): - start_time = '2017/12/31 23:00:00' - end_time = '2018/01/01 01:00:00' + start_time = "2017/12/31 23:00:00" + end_time = "2018/01/01 01:00:00" for table in self.table_names: - print(f'Testing {table} returning values from adjacent years.') + print(f"Testing {table} returning values from adjacent years.") dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 24 expected_number_of_columns = 2 - expected_first_time =\ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') - + timedelta(minutes=5)) - expected_last_time =\ - pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', - 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 4 - expected_first_time = '2017/12/31 23:30:00' - expected_first_time =\ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') + expected_first_time = "2017/12/31 23:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) if "ONLY" not in table_type: expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': + if table == "BIDDAYOFFER_D": expected_length = 1 - expected_first_time =\ - expected_first_time.replace(hour=0, minute=0) + expected_first_time = expected_first_time.replace(hour=0, minute=0) expected_last_time = expected_first_time data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = data.sort_values(dat_col) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") -class TestDynamicDataCompilerWithSettlementDateFiltering2021OfferData(unittest.TestCase): +class TestDynamicDataCompilerWithSettlementDateFiltering2021OfferData( + unittest.TestCase +): def setUp(self): - self.table_names = ['BIDDAYOFFER_D', 'BIDPEROFFER_D'] + self.table_names = ["BIDDAYOFFER_D", "BIDPEROFFER_D"] - self.table_types = {'BIDPEROFFER_D': 'DUID-BIDTYPE', - 'BIDDAYOFFER_D': 'DUID-BIDTYPE'} + self.table_types = { + "BIDPEROFFER_D": "DUID-BIDTYPE", + "BIDDAYOFFER_D": "DUID-BIDTYPE", + } - self.table_filters = {'BIDPEROFFER_D': ['DUID', 'BIDTYPE'], - 'BIDDAYOFFER_D': ['DUID', 'BIDTYPE']} + self.table_filters = { + "BIDPEROFFER_D": ["DUID", "BIDTYPE"], + "BIDDAYOFFER_D": ["DUID", "BIDTYPE"], + } # Filter for bids at the start of the 2021-06-01 file and the end of the 2021-05-31, to make sure that we arn't # skipping any of the data file rows. - self.filter_values = {'DUID-BIDTYPE': (['ADPBA1G', 'ARWF1', 'YWPS4', 'YWPS4'], ['ENERGY', 'RAISEREG', 'RAISE60SEC'])} + self.filter_values = { + "DUID-BIDTYPE": ( + ["ADPBA1G", "ARWF1", "YWPS4", "YWPS4"], + ["ENERGY", "RAISEREG", "RAISE60SEC"], + ) + } @staticmethod def restrictive_filter(data): - data = data[(data['DUID'] == 'ADPBA1G') & (data['BIDTYPE'] == 'ENERGY') | - (data['DUID'] == 'ARWF1') & (data['BIDTYPE'] == 'ENERGY') | - (data['DUID'] == 'YWPS4') & (data['BIDTYPE'] == 'RAISEREG') | - (data['DUID'] == 'YWPS4') & (data['BIDTYPE'] == 'RAISE60SEC')] + data = data[ + (data["DUID"] == "ADPBA1G") & (data["BIDTYPE"] == "ENERGY") + | (data["DUID"] == "ARWF1") & (data["BIDTYPE"] == "ENERGY") + | (data["DUID"] == "YWPS4") & (data["BIDTYPE"] == "RAISEREG") + | (data["DUID"] == "YWPS4") & (data["BIDTYPE"] == "RAISE60SEC") + ] return data def test_dispatch_tables_start_of_month(self): - start_time = '2021/06/01 00:00:00' - end_time = '2021/06/01 05:15:00' + start_time = "2021/06/01 00:00:00" + end_time = "2021/06/01 05:15:00" for table in self.table_names: - print(f'Testing {table} returning values at start of month one.') + print(f"Testing {table} returning values at start of month one.") dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 63 * 4 expected_number_of_columns = 3 - expected_first_time = (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5)) - expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDDAYOFFER_D': + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table == "BIDDAYOFFER_D": expected_length = 2 * 4 - expected_last_time = '2021/06/01 00:00:00' - expected_last_time = \ - pd.to_datetime(expected_last_time, - format='%Y/%m/%d %H:%M:%S') - expected_first_time = '2021/05/31 00:00:00' - expected_first_time =\ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') + expected_last_time = "2021/06/01 00:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_first_time = "2021/05/31 00:00:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = self.restrictive_filter(data) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") - def test_dispatch_tables_start_of_month_previous_market_day_but_not_start_calendar_month(self): - start_time = '2021/06/05 03:00:00' - end_time = '2021/06/05 03:15:00' + def test_dispatch_tables_start_of_month_previous_market_day_but_not_start_calendar_month( + self, + ): + start_time = "2021/06/05 03:00:00" + end_time = "2021/06/05 03:15:00" for table in self.table_names: - print(f'Testing {table} returning values at start of month two.') + print(f"Testing {table} returning values at start of month two.") dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 3 * 4 expected_number_of_columns = 3 - expected_first_time = (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5)) - expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDDAYOFFER_D': + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table == "BIDDAYOFFER_D": expected_length = 1 * 4 - expected_last_time = '2021/06/04 00:00:00' - expected_last_time = \ - pd.to_datetime(expected_last_time, - format='%Y/%m/%d %H:%M:%S') - expected_first_time = '2021/06/04 00:00:00' - expected_first_time =\ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') + expected_last_time = "2021/06/04 00:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_first_time = "2021/06/04 00:00:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = self.restrictive_filter(data) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") - def test_dispatch_tables_start_of_month_previous_market_day_and_first_market_day_but_not_start_calendar_month(self): - start_time = '2021/06/01 03:00:00' - end_time = '2021/06/01 05:00:00' + def test_dispatch_tables_start_of_month_previous_market_day_and_first_market_day_but_not_start_calendar_month( + self, + ): + start_time = "2021/06/01 03:00:00" + end_time = "2021/06/01 05:00:00" for table in self.table_names: - print(f'Testing {table} returning values at start of month two.') + print(f"Testing {table} returning values at start of month two.") dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 24 * 4 expected_number_of_columns = 3 - expected_first_time = (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5)) - expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDDAYOFFER_D': + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table == "BIDDAYOFFER_D": expected_length = 2 * 4 - expected_last_time = '2021/06/01 00:00:00' - expected_last_time = \ - pd.to_datetime(expected_last_time, - format='%Y/%m/%d %H:%M:%S') - expected_first_time = '2021/05/31 00:00:00' - expected_first_time =\ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') + expected_last_time = "2021/06/01 00:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_first_time = "2021/05/31 00:00:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = self.restrictive_filter(data) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") - def test_dispatch_tables_start_of_month_first_market_day_but_not_start_calendar_month(self): - start_time = '2021/06/01 04:00:00' - end_time = '2021/06/01 05:00:00' + def test_dispatch_tables_start_of_month_first_market_day_but_not_start_calendar_month( + self, + ): + start_time = "2021/06/01 04:00:00" + end_time = "2021/06/01 05:00:00" for table in self.table_names: - print(f'Testing {table} returning values at start of month two.') + print(f"Testing {table} returning values at start of month two.") dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 12 * 4 expected_number_of_columns = 3 - expected_first_time = (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5)) - expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDDAYOFFER_D': + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table == "BIDDAYOFFER_D": expected_length = 1 * 4 - expected_last_time = '2021/06/01 00:00:00' - expected_last_time = \ - pd.to_datetime(expected_last_time, - format='%Y/%m/%d %H:%M:%S') - expected_first_time = '2021/06/01 00:00:00' - expected_first_time =\ - pd.to_datetime(expected_first_time, - format='%Y/%m/%d %H:%M:%S') + expected_last_time = "2021/06/01 00:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_first_time = "2021/06/01 00:00:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = self.restrictive_filter(data) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_end_of_month(self): - start_time = '2021/06/30 21:00:00' - end_time = '2021/07/01 00:00:00' + start_time = "2021/06/30 21:00:00" + end_time = "2021/07/01 00:00:00" for table in self.table_names: - print('Testing {} returing values at end of month.'.format(table)) + print("Testing {} returing values at end of month.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 36 * 4 expected_number_of_columns = 3 - expected_first_time =\ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + - timedelta(minutes=5)) - expected_last_time =\ - pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDDAYOFFER_D': + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table == "BIDDAYOFFER_D": expected_length = 1 * 4 expected_last_time = expected_first_time.replace(hour=0, minute=0) expected_first_time = expected_first_time.replace(hour=0, minute=0) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = self.restrictive_filter(data) data = data.sort_values(dat_col) data = data.reset_index(drop=True) @@ -523,35 +639,38 @@ def test_dispatch_tables_end_of_month(self): self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_straddle_2_months(self): - start_time = '2021/06/30 21:00:00' - end_time = '2021/07/01 21:00:00' + start_time = "2021/06/30 21:00:00" + end_time = "2021/07/01 21:00:00" for table in self.table_names: - print(f'Testing {table} returing values from adjacent months.') + print(f"Testing {table} returing values from adjacent months.") dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 288 * 4 expected_number_of_columns = 3 - expected_first_time =\ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') - + timedelta(minutes=5)) - expected_last_time = pd.to_datetime(end_time, - format='%Y/%m/%d %H:%M:%S') - if table == 'BIDDAYOFFER_D': + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table == "BIDDAYOFFER_D": expected_length = 2 * 4 - expected_last_time =\ - expected_last_time.replace(hour=0, minute=0) - expected_first_time =\ - expected_first_time.replace(hour=0, minute=0) + expected_last_time = expected_last_time.replace(hour=0, minute=0) + expected_first_time = expected_first_time.replace(hour=0, minute=0) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = self.restrictive_filter(data) data = data.sort_values(dat_col) data = data.reset_index(drop=True) @@ -559,37 +678,42 @@ def test_dispatch_tables_straddle_2_months(self): self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_start_of_year(self): - start_time = '2021/01/01 00:00:00' - end_time = '2021/01/01 01:00:00' + start_time = "2021/01/01 00:00:00" + end_time = "2021/01/01 01:00:00" for table in self.table_names: - print('Testing {} returing values at start of year.'.format(table)) + print("Testing {} returing values at start of year.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 12 * 3 expected_number_of_columns = 3 - expected_first_time = \ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') - + timedelta(minutes=5)) - expected_last_time = pd.to_datetime(end_time, - format='%Y/%m/%d %H:%M:%S') - if table == 'BIDDAYOFFER_D': + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table == "BIDDAYOFFER_D": expected_length = 1 * 3 - expected_last_time =\ - (expected_last_time.replace(hour=0, minute=0) - - timedelta(days=1)) - expected_first_time =\ - (expected_first_time.replace(hour=0, minute=0) - - timedelta(days=1)) + expected_last_time = expected_last_time.replace( + hour=0, minute=0 + ) - timedelta(days=1) + expected_first_time = expected_first_time.replace( + hour=0, minute=0 + ) - timedelta(days=1) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = self.restrictive_filter(data) data = data.sort_values(dat_col) data = data.reset_index(drop=True) @@ -597,34 +721,38 @@ def test_dispatch_tables_start_of_year(self): self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_end_of_year(self): - start_time = '2021/12/31 23:00:00' - end_time = '2022/01/01 00:00:00' + start_time = "2021/12/31 23:00:00" + end_time = "2022/01/01 00:00:00" for table in self.table_names: - print('Testing {} returing values at end of year.'.format(table)) + print("Testing {} returing values at end of year.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 12 * 4 expected_number_of_columns = 3 - expected_first_time = \ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') - + timedelta(minutes=5)) - expected_last_time =\ - pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDDAYOFFER_D': + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table == "BIDDAYOFFER_D": expected_length = 1 * 4 - expected_first_time =\ - expected_first_time.replace(hour=0, minute=0) + expected_first_time = expected_first_time.replace(hour=0, minute=0) expected_last_time = expected_first_time data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = self.restrictive_filter(data) data = data.sort_values(dat_col) data = data.reset_index(drop=True) @@ -632,34 +760,38 @@ def test_dispatch_tables_end_of_year(self): self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_straddle_years(self): - start_time = '2021/12/31 23:00:00' - end_time = '2022/01/01 01:00:00' + start_time = "2021/12/31 23:00:00" + end_time = "2022/01/01 01:00:00" for table in self.table_names: - print(f'Testing {table} returning values from adjacent years.') + print(f"Testing {table} returning values from adjacent years.") dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 24 * 4 expected_number_of_columns = 3 - expected_first_time =\ - (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') - + timedelta(minutes=5)) - expected_last_time =\ - pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDDAYOFFER_D': + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table == "BIDDAYOFFER_D": expected_length = 1 * 4 - expected_first_time =\ - expected_first_time.replace(hour=0, minute=0) + expected_first_time = expected_first_time.replace(hour=0, minute=0) expected_last_time = expected_first_time data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False, - filter_cols=filter_cols, - filter_values=self.filter_values[table_type]) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + ) data = self.restrictive_filter(data) data = data.sort_values(dat_col) data = data.reset_index(drop=True) @@ -667,67 +799,87 @@ def test_dispatch_tables_straddle_years(self): self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") class TestDynamicDataCompilerWithEffectiveDateFiltering(unittest.TestCase): def setUp(self): - self.table_names = ['GENCONDATA', 'SPDREGIONCONSTRAINT', - 'SPDCONNECTIONPOINTCONSTRAINT', - 'SPDINTERCONNECTORCONSTRAINT'] + self.table_names = [ + "GENCONDATA", + "SPDREGIONCONSTRAINT", + "SPDCONNECTIONPOINTCONSTRAINT", + "SPDINTERCONNECTORCONSTRAINT", + ] def test_filtering_for_one_interval_returns(self): - start_time = '2018/02/20 23:00:00' - end_time = '2018/02/20 23:05:00' + start_time = "2018/02/20 23:00:00" + end_time = "2018/02/20 23:05:00" for table in self.table_names: - print('Testing {} returing values for 1 interval.'.format(table)) + print("Testing {} returing values for 1 interval.".format(table)) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - fformat="feather", keep_csv=False, - select_columns=defaults.table_primary_keys[table]) - group_cols = [col for col in defaults.table_primary_keys[table] - if col != 'EFFECTIVEDATE'] + start_time, + end_time, + table, + defaults.raw_data_cache, + fformat="feather", + keep_csv=False, + select_columns=defaults.table_primary_keys[table], + ) + group_cols = [ + col + for col in defaults.table_primary_keys[table] + if col != "EFFECTIVEDATE" + ] contains_duplicates = data.duplicated(group_cols).any() self.assertEqual(False, contains_duplicates) not_empty = data.shape[0] > 0 self.assertEqual(True, not_empty) - print('Passed') + print("Passed") class TestCacheCompiler(unittest.TestCase): def setUp(self): - self.table_names = ['BIDDAYOFFER_D', 'BIDPEROFFER_D', 'DISPATCHLOAD', - 'DISPATCHCONSTRAINT', 'DISPATCH_UNIT_SCADA', - 'DISPATCHPRICE', 'DISPATCHINTERCONNECTORRES', - 'DISPATCHREGIONSUM', 'TRADINGLOAD', 'TRADINGPRICE', - 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT'] - self.id_cols = {'DISPATCHLOAD': 'DUID', - 'DISPATCHCONSTRAINT': 'CONSTRAINTID', - 'DISPATCH_UNIT_SCADA': 'DUID', - 'DISPATCHPRICE': 'REGIONID', - 'DISPATCHINTERCONNECTORRES': 'INTERCONNECTORID', - 'DISPATCHREGIONSUM': 'REGIONID', - 'BIDPEROFFER_D': 'DUID', - 'BIDDAYOFFER_D': 'DUID', - 'TRADINGLOAD': 'DUID', - 'TRADINGPRICE': 'REGIONID', - 'TRADINGREGIONSUM': 'REGIONID', - 'TRADINGINTERCONNECT': 'INTERCONNECTORID'} + self.table_names = [ + "BIDDAYOFFER_D", + "BIDPEROFFER_D", + "DISPATCHLOAD", + "DISPATCHCONSTRAINT", + "DISPATCH_UNIT_SCADA", + "DISPATCHPRICE", + "DISPATCHINTERCONNECTORRES", + "DISPATCHREGIONSUM", + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ] + self.id_cols = { + "DISPATCHLOAD": "DUID", + "DISPATCHCONSTRAINT": "CONSTRAINTID", + "DISPATCH_UNIT_SCADA": "DUID", + "DISPATCHPRICE": "REGIONID", + "DISPATCHINTERCONNECTORRES": "INTERCONNECTORID", + "DISPATCHREGIONSUM": "REGIONID", + "BIDPEROFFER_D": "DUID", + "BIDDAYOFFER_D": "DUID", + "TRADINGLOAD": "DUID", + "TRADINGPRICE": "REGIONID", + "TRADINGREGIONSUM": "REGIONID", + "TRADINGINTERCONNECT": "INTERCONNECTORID", + } def test_caching_and_typing_works_feather(self): - start_time = '2018/02/20 23:00:00' - end_time = '2018/02/20 23:30:00' + start_time = "2018/02/20 23:00:00" + end_time = "2018/02/20 23:30:00" for table in self.table_names: dat_col = defaults.primary_date_columns[table] id_col = self.id_cols[table] - print(f'Testing {table} returing values for 1 interval.') + print(f"Testing {table} returing values for 1 interval.") data_fetch_methods.cache_compiler( - start_time, end_time, table, defaults.raw_data_cache, - fformat="feather" - ) + start_time, end_time, table, defaults.raw_data_cache, fformat="feather" + ) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - fformat="feather" + start_time, end_time, table, defaults.raw_data_cache, fformat="feather" ) dat_col_type = data[dat_col].dtype id_col_type = data[id_col].dtype @@ -737,22 +889,20 @@ def test_caching_and_typing_works_feather(self): self.assertFalse(not_typed) self.assertEqual(dat_col_type, " 0 self.assertEqual(True, not_empty) - print('Passed') + print("Passed") class TestFCAS4SecondData(unittest.TestCase): @@ -825,23 +997,29 @@ def setUp(self): pass def test_dispatch_tables_start_of_month(self): - table = 'FCAS_4_SECOND' + table = "FCAS_4_SECOND" start_time = self.start_day minute_offset = 5 end_time = start_time + timedelta(minutes=minute_offset) start_str = start_time.strftime("%Y/%m/%d %H:%M:%S") end_str = end_time.strftime("%Y/%m/%d %H:%M:%S") - print('Testing {} returing values at start of month.'.format(table)) + print("Testing {} returing values at start of month.".format(table)) dat_col = defaults.primary_date_columns[table] - cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER'] + cols = [dat_col, "ELEMENTNUMBER", "VARIABLENUMBER"] # expected length assumes first data point is at 00:00:00 expected_length = 15 * minute_offset length_check = False expected_number_of_columns = 3 data = data_fetch_methods.dynamic_data_compiler( - start_str, end_str, table, defaults.raw_data_cache, - fformat="feather", keep_csv=False, select_columns=cols) + start_str, + end_str, + table, + defaults.raw_data_cache, + fformat="feather", + keep_csv=False, + select_columns=cols, + ) length_array = data[dat_col].drop_duplicates() if length_array.shape[0] == expected_length: length_check = True @@ -850,26 +1028,32 @@ def test_dispatch_tables_start_of_month(self): length_check = True self.assertTrue(length_check) self.assertEqual(expected_number_of_columns, data.shape[1]) - print('Passed') + print("Passed") def test_fcas_tables_end_of_month(self): - table = 'FCAS_4_SECOND' + table = "FCAS_4_SECOND" minute_offset = 5 start_time = self.start_month - timedelta(minutes=minute_offset) end_time = start_time + timedelta(minutes=minute_offset * 2) start_str = start_time.strftime("%Y/%m/%d %H:%M:%S") end_str = end_time.strftime("%Y/%m/%d %H:%M:%S") - print('Testing {} returing values at end of month.'.format(table)) + print("Testing {} returing values at end of month.".format(table)) dat_col = defaults.primary_date_columns[table] - cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER'] + cols = [dat_col, "ELEMENTNUMBER", "VARIABLENUMBER"] # expected length assumes first data point is at 00:00:00 expected_length = 15 * (minute_offset * 2) length_check = False expected_number_of_columns = 3 data = data_fetch_methods.dynamic_data_compiler( - start_str, end_str, table, defaults.raw_data_cache, - fformat="feather", keep_csv=False, select_columns=cols) + start_str, + end_str, + table, + defaults.raw_data_cache, + fformat="feather", + keep_csv=False, + select_columns=cols, + ) length_array = data[dat_col].drop_duplicates() if length_array.shape[0] == expected_length: length_check = True @@ -879,28 +1063,35 @@ def test_fcas_tables_end_of_month(self): print(length_array.shape[0]) self.assertTrue(length_check) self.assertEqual(expected_number_of_columns, data.shape[1]) - print('Passed') + print("Passed") - @unittest.skipIf((datetime.now() - datetime(year=datetime.now().year, - month=1, day=1)).days > 60, - "start of year data not available: > 60 days ago") + @unittest.skipIf( + (datetime.now() - datetime(year=datetime.now().year, month=1, day=1)).days > 60, + "start of year data not available: > 60 days ago", + ) def test_fcas_tables_end_of_year(self): - table = 'FCAS_4_SECOND' + table = "FCAS_4_SECOND" minute_offset = 5 start_time = self.start_year - timedelta(minutes=minute_offset) end_time = start_time + timedelta(minutes=minute_offset * 2) start_str = start_time.strftime("%Y/%m/%d %H:%M:%S") end_str = end_time.strftime("%Y/%m/%d %H:%M:%S") - print('Testing {} returing values at end of year.'.format(table)) + print("Testing {} returing values at end of year.".format(table)) dat_col = defaults.primary_date_columns[table] - cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER'] + cols = [dat_col, "ELEMENTNUMBER", "VARIABLENUMBER"] expected_length = 15 * (minute_offset * 2) length_check = False expected_number_of_columns = 3 data = data_fetch_methods.dynamic_data_compiler( - start_str, end_str, table, defaults.raw_data_cache, - select_columns=cols, fformat="feather", keep_csv=False) + start_str, + end_str, + table, + defaults.raw_data_cache, + select_columns=cols, + fformat="feather", + keep_csv=False, + ) length_array = data[dat_col].drop_duplicates() if length_array.shape[0] == expected_length: length_check = True @@ -909,7 +1100,7 @@ def test_fcas_tables_end_of_year(self): length_check = True self.assertTrue(length_check) self.assertEqual(expected_number_of_columns, data.shape[1]) - print('Passed') + print("Passed") class TestStaticTables(unittest.TestCase): @@ -917,76 +1108,90 @@ def setUp(self): pass def test_fcas_elements_table(self): - start_time = '2017/12/31 23:55:04' - end_time = '2018/01/01 00:05:00' - table = 'ELEMENTS_FCAS_4_SECOND' - cols = ['ELEMENTNUMBER', 'EMSNAME'] - filter_cols = ('ELEMENTNUMBER',) - func = \ - data_fetch_methods._static_table_wrapper_for_gui - data = func(start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, filter_cols=filter_cols, - filter_values=(['1'],)) + start_time = "2017/12/31 23:55:04" + end_time = "2018/01/01 00:05:00" + table = "ELEMENTS_FCAS_4_SECOND" + cols = ["ELEMENTNUMBER", "EMSNAME"] + filter_cols = ("ELEMENTNUMBER",) + func = data_fetch_methods._static_table_wrapper_for_gui + data = func( + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + filter_cols=filter_cols, + filter_values=(["1"],), + ) expected_length = 1 expected_number_of_columns = 2 self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) - print('Passed') + print("Passed") def test_fcas_variable_table(self): - start_time = '2018/12/31 23:55:04' - end_time = '2018/01/01 00:05:00' - table = 'VARIABLES_FCAS_4_SECOND' - cols = ['VARIABLENUMBER', 'VARIABLETYPE'] - filter_cols = ('VARIABLENUMBER',) + start_time = "2018/12/31 23:55:04" + end_time = "2018/01/01 00:05:00" + table = "VARIABLES_FCAS_4_SECOND" + cols = ["VARIABLENUMBER", "VARIABLETYPE"] + filter_cols = ("VARIABLENUMBER",) func = data_fetch_methods._static_table_wrapper_for_gui - data = func(start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, filter_cols=filter_cols, - filter_values=(['2'],)) + data = func( + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + filter_cols=filter_cols, + filter_values=(["2"],), + ) expected_length = 1 expected_number_of_columns = 2 self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) - print('Passed') + print("Passed") def test_registration_list(self): - table = 'Generators and Scheduled Loads' - cols = ['DUID', 'Technology Type - Primary'] - filter_cols = ('DUID',) - data = data_fetch_methods.static_table_xl(table, - defaults.raw_data_cache, - select_columns=cols, - filter_cols=filter_cols, - filter_values=(['AGLHAL'],)) + table = "Generators and Scheduled Loads" + cols = ["DUID", "Technology Type - Primary"] + filter_cols = ("DUID",) + data = data_fetch_methods.static_table_xl( + table, + defaults.raw_data_cache, + select_columns=cols, + filter_cols=filter_cols, + filter_values=(["AGLHAL"],), + ) expected_length = 1 expected_number_of_columns = 2 self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) - print('Passed') + print("Passed") class TestCustomTables(unittest.TestCase): def setUp(self): pass - @unittest.skipIf((datetime.now() - - datetime(year=datetime.now().year, - month=1, day=1)).days > 60, - "start of year data not available: > 60 days ago") + @unittest.skipIf( + (datetime.now() - datetime(year=datetime.now().year, month=1, day=1)).days > 60, + "start of year data not available: > 60 days ago", + ) def test_dispatch_tables_straddle_years(self): - table = 'FCAS_4_SECOND' + table = "FCAS_4_SECOND" minute_offset = 5 start_time = self.start_year - timedelta(minutes=minute_offset) end_time = start_time + timedelta(minutes=minute_offset * 2) - print('Testing custom table {}.'.format(table)) - data = custom_tables.fcas4s_scada_match(start_time, end_time, - table, defaults.raw_data_cache) + print("Testing custom table {}.".format(table)) + data = custom_tables.fcas4s_scada_match( + start_time, end_time, table, defaults.raw_data_cache + ) data = data.reset_index(drop=True) - contains_duplicates = data.duplicated(['MARKETNAME']).any() + contains_duplicates = data.duplicated(["MARKETNAME"]).any() self.assertEqual(False, contains_duplicates) - contains_duplicates = data.duplicated(['ELEMENTNUMBER']).any() + contains_duplicates = data.duplicated(["ELEMENTNUMBER"]).any() self.assertEqual(False, contains_duplicates) not_empty = data.shape[0] > 0 self.assertEqual(True, not_empty) - print('Passed') + print("Passed") diff --git a/nemosis/test_date_generators.py b/nemosis/test_date_generators.py index c2437ae..aef9dd3 100644 --- a/nemosis/test_date_generators.py +++ b/nemosis/test_date_generators.py @@ -8,82 +8,85 @@ def setUp(self): pass def test_two_times_not_at_edge_of_month_return_one_month(self): - start_time = datetime.strptime('2017/01/02 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/01/03 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2017/01/02 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/01/03 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.year_and_month_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2017') - self.assertEqual(times[0][1], '01') + self.assertEqual(times[0][0], "2017") + self.assertEqual(times[0][1], "01") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) self.assertEqual(len(times), 1) - - def test_two_times_first_at_edge_of_month_return_month_before_and_month_of_times(self): - start_time = datetime.strptime('2017/02/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/02/03 00:00:00', '%Y/%m/%d %H:%M:%S') + def test_two_times_first_at_edge_of_month_return_month_before_and_month_of_times( + self, + ): + start_time = datetime.strptime("2017/02/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/02/03 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.year_and_month_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2017') - self.assertEqual(times[0][1], '01') + self.assertEqual(times[0][0], "2017") + self.assertEqual(times[0][1], "01") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2017') - self.assertEqual(times[1][1], '02') + self.assertEqual(times[1][0], "2017") + self.assertEqual(times[1][1], "02") self.assertEqual(times[1][2], None) self.assertEqual(times[1][3], None) - def test_two_times_first_at_edge_of_year_return_month_before_and_month_of_times(self): - start_time = datetime.strptime('2017/01/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/01/03 00:00:00', '%Y/%m/%d %H:%M:%S') + def test_two_times_first_at_edge_of_year_return_month_before_and_month_of_times( + self, + ): + start_time = datetime.strptime("2017/01/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/01/03 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.year_and_month_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2016') - self.assertEqual(times[0][1], '12') + self.assertEqual(times[0][0], "2016") + self.assertEqual(times[0][1], "12") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2017') - self.assertEqual(times[1][1], '01') + self.assertEqual(times[1][0], "2017") + self.assertEqual(times[1][1], "01") self.assertEqual(times[1][2], None) self.assertEqual(times[1][3], None) def test_two_times_second_at_edge_of_month_returns_one_month(self): - start_time = datetime.strptime('2017/01/05 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/01/31 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2017/01/05 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/01/31 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.year_and_month_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2017') - self.assertEqual(times[0][1], '01') + self.assertEqual(times[0][0], "2017") + self.assertEqual(times[0][1], "01") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) self.assertEqual(len(times), 1) def test_two_times_second_at_edge_of_year_returns_one_month(self): - start_time = datetime.strptime('2017/12/02 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/12/31 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2017/12/02 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/12/31 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.year_and_month_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2017') - self.assertEqual(times[0][1], '12') + self.assertEqual(times[0][0], "2017") + self.assertEqual(times[0][1], "12") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) self.assertEqual(len(times), 1) def test_two_times_in_middle_of_jan_and_march_return_3_months(self): - start_time = datetime.strptime('2017/01/05 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/03/24 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2017/01/05 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/03/24 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.year_and_month_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2017') - self.assertEqual(times[0][1], '01') + self.assertEqual(times[0][0], "2017") + self.assertEqual(times[0][1], "01") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2017') - self.assertEqual(times[1][1], '02') + self.assertEqual(times[1][0], "2017") + self.assertEqual(times[1][1], "02") self.assertEqual(times[1][2], None) self.assertEqual(times[1][3], None) - self.assertEqual(times[2][0], '2017') - self.assertEqual(times[2][1], '03') + self.assertEqual(times[2][0], "2017") + self.assertEqual(times[2][1], "03") self.assertEqual(times[2][2], None) self.assertEqual(times[2][3], None) self.assertEqual(len(times), 3) @@ -94,187 +97,192 @@ def setUp(self): pass def test_two_times_not_at_edge_of_month_return_one_month(self): - start_time = datetime.strptime('2017/01/02 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/01/03 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2017/01/02 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/01/03 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2017') - self.assertEqual(times[0][1], '01') + self.assertEqual(times[0][0], "2017") + self.assertEqual(times[0][1], "01") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) self.assertEqual(len(times), 1) - - def test_two_times_first_at_edge_of_month_return_month_before_and_month_of_times(self): - start_time = datetime.strptime('2017/02/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/02/03 00:00:00', '%Y/%m/%d %H:%M:%S') + def test_two_times_first_at_edge_of_month_return_month_before_and_month_of_times( + self, + ): + start_time = datetime.strptime("2017/02/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/02/03 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2017') - self.assertEqual(times[0][1], '01') + self.assertEqual(times[0][0], "2017") + self.assertEqual(times[0][1], "01") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2017') - self.assertEqual(times[1][1], '02') + self.assertEqual(times[1][0], "2017") + self.assertEqual(times[1][1], "02") self.assertEqual(times[1][2], None) self.assertEqual(times[1][3], None) - def test_two_times_first_at_edge_of_year_return_month_before_and_month_of_times(self): - start_time = datetime.strptime('2017/01/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/01/03 00:00:00', '%Y/%m/%d %H:%M:%S') + def test_two_times_first_at_edge_of_year_return_month_before_and_month_of_times( + self, + ): + start_time = datetime.strptime("2017/01/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/01/03 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2016') - self.assertEqual(times[0][1], '12') + self.assertEqual(times[0][0], "2016") + self.assertEqual(times[0][1], "12") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2017') - self.assertEqual(times[1][1], '01') + self.assertEqual(times[1][0], "2017") + self.assertEqual(times[1][1], "01") self.assertEqual(times[1][2], None) self.assertEqual(times[1][3], None) def test_two_times_second_at_edge_of_month_returns_one_month(self): - start_time = datetime.strptime('2017/01/05 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/01/31 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2017/01/05 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/01/31 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2017') - self.assertEqual(times[0][1], '01') + self.assertEqual(times[0][0], "2017") + self.assertEqual(times[0][1], "01") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) self.assertEqual(len(times), 1) def test_two_times_second_at_edge_of_year_returns_one_month(self): - start_time = datetime.strptime('2017/12/02 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/12/31 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2017/12/02 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/12/31 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2017') - self.assertEqual(times[0][1], '12') + self.assertEqual(times[0][0], "2017") + self.assertEqual(times[0][1], "12") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) self.assertEqual(len(times), 1) def test_two_times_in_middle_of_jan_and_march_return_3_months(self): - start_time = datetime.strptime('2017/01/05 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2017/03/24 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2017/01/05 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2017/03/24 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2017') - self.assertEqual(times[0][1], '01') + self.assertEqual(times[0][0], "2017") + self.assertEqual(times[0][1], "01") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2017') - self.assertEqual(times[1][1], '02') + self.assertEqual(times[1][0], "2017") + self.assertEqual(times[1][1], "02") self.assertEqual(times[1][2], None) self.assertEqual(times[1][3], None) - self.assertEqual(times[2][0], '2017') - self.assertEqual(times[2][1], '03') + self.assertEqual(times[2][0], "2017") + self.assertEqual(times[2][1], "03") self.assertEqual(times[2][2], None) self.assertEqual(times[2][3], None) self.assertEqual(len(times), 3) def test_change_from_months_to_days(self): - start_time = datetime.strptime('2021/02/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2021/04/03 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2021/02/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2021/04/03 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] # Note we expect the 1st of april to be skipped - self.assertEqual(times[0][0], '2021') - self.assertEqual(times[0][1], '01') + self.assertEqual(times[0][0], "2021") + self.assertEqual(times[0][1], "01") self.assertEqual(times[0][2], None) self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2021') - self.assertEqual(times[1][1], '02') + self.assertEqual(times[1][0], "2021") + self.assertEqual(times[1][1], "02") self.assertEqual(times[1][2], None) self.assertEqual(times[1][3], None) # Data for march and the first of april is missing from the AEMO website so we don't generate the dates # for these times. - self.assertEqual(times[2][0], '2021') - self.assertEqual(times[2][1], '04') - self.assertEqual(times[2][2], '02') + self.assertEqual(times[2][0], "2021") + self.assertEqual(times[2][1], "04") + self.assertEqual(times[2][2], "02") self.assertEqual(times[2][3], None) - self.assertEqual(times[3][0], '2021') - self.assertEqual(times[3][1], '04') - self.assertEqual(times[3][2], '03') + self.assertEqual(times[3][0], "2021") + self.assertEqual(times[3][1], "04") + self.assertEqual(times[3][2], "03") self.assertEqual(times[3][3], None) self.assertEqual(len(times), 4) def test_day_given_in_april_2021(self): - start_time = datetime.strptime('2021/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2021/04/03 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2021/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2021/04/03 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] # Note we expect the 1st of april to be skipped - self.assertEqual(times[0][0], '2021') - self.assertEqual(times[0][1], '04') - self.assertEqual(times[0][2], '02') + self.assertEqual(times[0][0], "2021") + self.assertEqual(times[0][1], "04") + self.assertEqual(times[0][2], "02") self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2021') - self.assertEqual(times[1][1], '04') - self.assertEqual(times[1][2], '03') + self.assertEqual(times[1][0], "2021") + self.assertEqual(times[1][1], "04") + self.assertEqual(times[1][2], "03") self.assertEqual(times[1][3], None) self.assertEqual(len(times), 2) def test_include_previous_market_day(self): - start_time = datetime.strptime('2021/05/10 01:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2021/05/10 05:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2021/05/10 01:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2021/05/10 05:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] # Note we expect the 1st of april to be skipped - self.assertEqual(times[0][0], '2021') - self.assertEqual(times[0][1], '05') - self.assertEqual(times[0][2], '09') + self.assertEqual(times[0][0], "2021") + self.assertEqual(times[0][1], "05") + self.assertEqual(times[0][2], "09") self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2021') - self.assertEqual(times[1][1], '05') - self.assertEqual(times[1][2], '10') + self.assertEqual(times[1][0], "2021") + self.assertEqual(times[1][1], "05") + self.assertEqual(times[1][2], "10") self.assertEqual(times[1][3], None) self.assertEqual(len(times), 2) def test_include_previous_month_if_1st_market_day_of_month(self): - start_time = datetime.strptime('2021/05/01 05:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2021/05/03 05:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2021/05/01 05:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2021/05/03 05:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2021') - self.assertEqual(times[0][1], '04') - self.assertEqual(times[0][2], '30') + self.assertEqual(times[0][0], "2021") + self.assertEqual(times[0][1], "04") + self.assertEqual(times[0][2], "30") self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2021') - self.assertEqual(times[1][1], '05') - self.assertEqual(times[1][2], '01') + self.assertEqual(times[1][0], "2021") + self.assertEqual(times[1][1], "05") + self.assertEqual(times[1][2], "01") self.assertEqual(times[1][3], None) - self.assertEqual(times[2][0], '2021') - self.assertEqual(times[2][1], '05') - self.assertEqual(times[2][2], '02') + self.assertEqual(times[2][0], "2021") + self.assertEqual(times[2][1], "05") + self.assertEqual(times[2][2], "02") self.assertEqual(times[2][3], None) - self.assertEqual(times[3][0], '2021') - self.assertEqual(times[3][1], '05') - self.assertEqual(times[3][2], '03') + self.assertEqual(times[3][0], "2021") + self.assertEqual(times[3][1], "05") + self.assertEqual(times[3][2], "03") self.assertEqual(times[3][3], None) self.assertEqual(len(times), 4) - def test_include_previous_month_if_1st_market_day_of_month_but_2nd_calendar_day(self): - start_time = datetime.strptime('2021/05/02 04:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2021/05/03 05:00:00', '%Y/%m/%d %H:%M:%S') + def test_include_previous_month_if_1st_market_day_of_month_but_2nd_calendar_day( + self, + ): + start_time = datetime.strptime("2021/05/02 04:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2021/05/03 05:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.bid_table_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2021') - self.assertEqual(times[0][1], '04') - self.assertEqual(times[0][2], '30') + self.assertEqual(times[0][0], "2021") + self.assertEqual(times[0][1], "04") + self.assertEqual(times[0][2], "30") self.assertEqual(times[0][3], None) - self.assertEqual(times[1][0], '2021') - self.assertEqual(times[1][1], '05') - self.assertEqual(times[1][2], '01') + self.assertEqual(times[1][0], "2021") + self.assertEqual(times[1][1], "05") + self.assertEqual(times[1][2], "01") self.assertEqual(times[1][3], None) - self.assertEqual(times[2][0], '2021') - self.assertEqual(times[2][1], '05') - self.assertEqual(times[2][2], '02') + self.assertEqual(times[2][0], "2021") + self.assertEqual(times[2][1], "05") + self.assertEqual(times[2][2], "02") self.assertEqual(times[2][3], None) - self.assertEqual(times[3][0], '2021') - self.assertEqual(times[3][1], '05') - self.assertEqual(times[3][2], '03') + self.assertEqual(times[3][0], "2021") + self.assertEqual(times[3][1], "05") + self.assertEqual(times[3][2], "03") self.assertEqual(times[3][3], None) self.assertEqual(len(times), 4) @@ -285,62 +293,63 @@ def setUp(self): def test_two_times_in_middle_of_adjacent_hours(self): # Whole of each hour should be returned. - start_time = datetime.strptime('2013/01/05 12:20:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2013/01/05 13:45:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2013/01/05 12:20:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2013/01/05 13:45:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.year_month_day_index_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2013') - self.assertEqual(times[0][1], '01') - self.assertEqual(times[0][2], '05') - self.assertEqual(times[0][3], '1355') - self.assertEqual(times[-1][0], '2013') - self.assertEqual(times[-1][1], '01') - self.assertEqual(times[-1][2], '05') - self.assertEqual(times[-1][3], '1200') + self.assertEqual(times[0][0], "2013") + self.assertEqual(times[0][1], "01") + self.assertEqual(times[0][2], "05") + self.assertEqual(times[0][3], "1355") + self.assertEqual(times[-1][0], "2013") + self.assertEqual(times[-1][1], "01") + self.assertEqual(times[-1][2], "05") + self.assertEqual(times[-1][3], "1200") self.assertEqual(len(times), 24) def test_two_times_one_at_start_of_year_should_not_over_flow_to_previous_year(self): - start_time = datetime.strptime('2013/01/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2013/01/01 01:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2013/01/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2013/01/01 01:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.year_month_day_index_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2013') - self.assertEqual(times[0][1], '01') - self.assertEqual(times[0][2], '01') - self.assertEqual(times[0][3], '0155') - self.assertEqual(times[-1][0], '2013') - self.assertEqual(times[-1][1], '01') - self.assertEqual(times[-1][2], '01') - self.assertEqual(times[-1][3], '0000') + self.assertEqual(times[0][0], "2013") + self.assertEqual(times[0][1], "01") + self.assertEqual(times[0][2], "01") + self.assertEqual(times[0][3], "0155") + self.assertEqual(times[-1][0], "2013") + self.assertEqual(times[-1][1], "01") + self.assertEqual(times[-1][2], "01") + self.assertEqual(times[-1][3], "0000") self.assertEqual(len(times), 24) - def test_two_times_one_at_start_of_month_should_not_over_flow_to_previous_month(self): - start_time = datetime.strptime('2013/02/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2013/02/01 01:00:00', '%Y/%m/%d %H:%M:%S') + def test_two_times_one_at_start_of_month_should_not_over_flow_to_previous_month( + self, + ): + start_time = datetime.strptime("2013/02/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2013/02/01 01:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.year_month_day_index_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2013') - self.assertEqual(times[0][1], '02') - self.assertEqual(times[0][2], '01') - self.assertEqual(times[0][3], '0155') - self.assertEqual(times[-1][0], '2013') - self.assertEqual(times[-1][1], '02') - self.assertEqual(times[-1][2], '01') - self.assertEqual(times[-1][3], '0000') + self.assertEqual(times[0][0], "2013") + self.assertEqual(times[0][1], "02") + self.assertEqual(times[0][2], "01") + self.assertEqual(times[0][3], "0155") + self.assertEqual(times[-1][0], "2013") + self.assertEqual(times[-1][1], "02") + self.assertEqual(times[-1][2], "01") + self.assertEqual(times[-1][3], "0000") self.assertEqual(len(times), 24) def test_no_missing_values_in_a_week(self): - start_time = datetime.strptime('2013/02/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2013/02/07 00:00:00', '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2013/02/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2013/02/07 00:00:00", "%Y/%m/%d %H:%M:%S") gen = date_generators.year_month_day_index_gen(start_time, end_time) times = [(year, month, day, index) for year, month, day, index in gen] - self.assertEqual(times[0][0], '2013') - self.assertEqual(times[0][1], '02') - self.assertEqual(times[0][2], '01') - self.assertEqual(times[0][3], '2355') - self.assertEqual(times[-1][0], '2013') - self.assertEqual(times[-1][1], '02') - self.assertEqual(times[-1][2], '07') - self.assertEqual(times[-1][3], '0000') + self.assertEqual(times[0][0], "2013") + self.assertEqual(times[0][1], "02") + self.assertEqual(times[0][2], "01") + self.assertEqual(times[0][3], "2355") + self.assertEqual(times[-1][0], "2013") + self.assertEqual(times[-1][1], "02") + self.assertEqual(times[-1][2], "07") + self.assertEqual(times[-1][3], "0000") self.assertEqual(len(times), 1740) - diff --git a/nemosis/test_errors_and_warnings.py b/nemosis/test_errors_and_warnings.py index 530f299..62f4a0c 100644 --- a/nemosis/test_errors_and_warnings.py +++ b/nemosis/test_errors_and_warnings.py @@ -6,156 +6,307 @@ class TestDynamicDataCompilerRaisesExpectedErrors(unittest.TestCase): def test_raise_error_for_incorrect_table_name(self): with self.assertRaises(Exception) as context: - dynamic_data_compiler('2000/01/01 00:00:00', '2000/02/01 00:00:00', 'NOTATABLE', - defaults.raw_data_cache) - self.assertTrue("Table name provided is not a dynamic table." - in str(context.exception)) + dynamic_data_compiler( + "2000/01/01 00:00:00", + "2000/02/01 00:00:00", + "NOTATABLE", + defaults.raw_data_cache, + ) + self.assertTrue( + "Table name provided is not a dynamic table." in str(context.exception) + ) def test_raise_error_for_no_data_returned(self): with self.assertRaises(Exception) as context: - dynamic_data_compiler('2000/01/01 00:00:00', '2000/02/01 00:00:00', 'DISPATCHPRICE', - defaults.raw_data_cache) - self.assertTrue((f'Compiling data for table DISPATCHPRICE failed. ' + - 'This probably because none of the requested data ' + - 'could be download from AEMO. Check your internet ' + - 'connection and that the requested data is archived on: ' + - 'https://nemweb.com.au see nemosis.defaults for table specific urls.') - in str(context.exception)) + dynamic_data_compiler( + "2000/01/01 00:00:00", + "2000/02/01 00:00:00", + "DISPATCHPRICE", + defaults.raw_data_cache, + ) + self.assertTrue( + ( + f"Compiling data for table DISPATCHPRICE failed. " + + "This probably because none of the requested data " + + "could be download from AEMO. Check your internet " + + "connection and that the requested data is archived on: " + + "https://nemweb.com.au see nemosis.defaults for table specific urls." + ) + in str(context.exception) + ) def test_raise_error_for_filter_column_not_in_select_columns(self): with self.assertRaises(Exception) as context: - dynamic_data_compiler('2019/01/01 00:00:00', '2019/02/01 00:00:00', 'DISPATCHPRICE', - defaults.raw_data_cache, select_columns=['REGIONID', 'SETTLEMENTDATE', 'RRP'], - filter_cols=['INTERVENTION'], filter_values=(['0'],)) - self.assertTrue(('Filter columns not valid. They must be a part of ' + - 'select_columns or the table defaults.') - in str(context.exception)) + dynamic_data_compiler( + "2019/01/01 00:00:00", + "2019/02/01 00:00:00", + "DISPATCHPRICE", + defaults.raw_data_cache, + select_columns=["REGIONID", "SETTLEMENTDATE", "RRP"], + filter_cols=["INTERVENTION"], + filter_values=(["0"],), + ) + self.assertTrue( + ( + "Filter columns not valid. They must be a part of " + + "select_columns or the table defaults." + ) + in str(context.exception) + ) def test_raise_error_for_filter_column_not_in_default_columns(self): with self.assertRaises(Exception) as context: - dynamic_data_compiler('2019/01/01 00:00:00', '2019/02/01 00:00:00', 'DISPATCHPRICE', - defaults.raw_data_cache, select_columns=['REGIONID', 'SETTLEMENTDATE', 'RRP'], - filter_cols=['NOTACOLUMN'], filter_values=(['0'],)) - self.assertTrue(('Filter columns not valid. They must be a part of ' + - 'select_columns or the table defaults.') - in str(context.exception)) + dynamic_data_compiler( + "2019/01/01 00:00:00", + "2019/02/01 00:00:00", + "DISPATCHPRICE", + defaults.raw_data_cache, + select_columns=["REGIONID", "SETTLEMENTDATE", "RRP"], + filter_cols=["NOTACOLUMN"], + filter_values=(["0"],), + ) + self.assertTrue( + ( + "Filter columns not valid. They must be a part of " + + "select_columns or the table defaults." + ) + in str(context.exception) + ) def test_raise_error_if_fformat_not_in_expected_set(self): with self.assertRaises(Exception) as context: - dynamic_data_compiler('2019/01/01 00:00:00', '2019/02/01 00:00:00', 'DISPATCHPRICE', - defaults.raw_data_cache, fformat='db') - self.assertTrue("Argument fformat must be 'csv', 'feather' or 'parquet'" - in str(context.exception)) + dynamic_data_compiler( + "2019/01/01 00:00:00", + "2019/02/01 00:00:00", + "DISPATCHPRICE", + defaults.raw_data_cache, + fformat="db", + ) + self.assertTrue( + "Argument fformat must be 'csv', 'feather' or 'parquet'" + in str(context.exception) + ) def test_raise_error_if_select_columns_not_in_data(self): with self.assertRaises(Exception) as context: - dynamic_data_compiler('2019/01/01 00:00:00', '2019/02/01 00:00:00', 'DISPATCHPRICE', - defaults.raw_data_cache, select_columns=['NOTACOLUMN']) - self.assertTrue((f'None of columns [\'NOTACOLUMN\'] are in D:/nemosis_test_cache\\PUBLIC_DVD_DISPATCHPRICE_201812010000.feather. ' - "This may be caused by user input if the \'select_columns\' " - "argument is being used, or by changed AEMO data formats. " - "This error can be avoided by using the argument select_columns=\'all\'.") - in str(context.exception)) + dynamic_data_compiler( + "2019/01/01 00:00:00", + "2019/02/01 00:00:00", + "DISPATCHPRICE", + defaults.raw_data_cache, + select_columns=["NOTACOLUMN"], + ) + self.assertTrue( + ( + f"None of columns ['NOTACOLUMN'] are in D:/nemosis_test_cache\\PUBLIC_DVD_DISPATCHPRICE_201812010000.feather. " + "This may be caused by user input if the 'select_columns' " + "argument is being used, or by changed AEMO data formats. " + "This error can be avoided by using the argument select_columns='all'." + ) + in str(context.exception) + ) def test_using_select_columns_all_does_not_raise_error(self): - price_data = dynamic_data_compiler('2019/01/01 00:00:00', '2019/02/01 00:00:00', 'DISPATCHPRICE', - defaults.raw_data_cache, select_columns='all', fformat='csv') - expected_columns = ['I', 'DISPATCH', 'PRICE', '1', 'SETTLEMENTDATE', 'RUNNO', 'REGIONID', 'DISPATCHINTERVAL', - 'INTERVENTION', 'RRP', 'EEP', 'ROP', 'APCFLAG', 'MARKETSUSPENDEDFLAG', - 'LASTCHANGED', 'RAISE6SECRRP', 'RAISE6SECROP', 'RAISE6SECAPCFLAG', - 'RAISE60SECRRP', 'RAISE60SECROP', 'RAISE60SECAPCFLAG', 'RAISE5MINRRP', - 'RAISE5MINROP', 'RAISE5MINAPCFLAG', 'RAISEREGRRP', 'RAISEREGROP', - 'RAISEREGAPCFLAG', 'LOWER6SECRRP', 'LOWER6SECROP', 'LOWER6SECAPCFLAG', - 'LOWER60SECRRP', 'LOWER60SECROP', 'LOWER60SECAPCFLAG', 'LOWER5MINRRP', - 'LOWER5MINROP', 'LOWER5MINAPCFLAG', 'LOWERREGRRP', 'LOWERREGROP', - 'LOWERREGAPCFLAG', 'PRICE_STATUS', 'PRE_AP_ENERGY_PRICE', - 'PRE_AP_RAISE6_PRICE', 'PRE_AP_RAISE60_PRICE', 'PRE_AP_RAISE5MIN_PRICE', - 'PRE_AP_RAISEREG_PRICE', 'PRE_AP_LOWER6_PRICE', 'PRE_AP_LOWER60_PRICE', - 'PRE_AP_LOWER5MIN_PRICE', 'PRE_AP_LOWERREG_PRICE', - 'CUMUL_PRE_AP_ENERGY_PRICE', 'CUMUL_PRE_AP_RAISE6_PRICE', - 'CUMUL_PRE_AP_RAISE60_PRICE', 'CUMUL_PRE_AP_RAISE5MIN_PRICE', - 'CUMUL_PRE_AP_RAISEREG_PRICE', 'CUMUL_PRE_AP_LOWER6_PRICE', - 'CUMUL_PRE_AP_LOWER60_PRICE', 'CUMUL_PRE_AP_LOWER5MIN_PRICE', - 'CUMUL_PRE_AP_LOWERREG_PRICE'] + price_data = dynamic_data_compiler( + "2019/01/01 00:00:00", + "2019/02/01 00:00:00", + "DISPATCHPRICE", + defaults.raw_data_cache, + select_columns="all", + fformat="csv", + ) + expected_columns = [ + "I", + "DISPATCH", + "PRICE", + "1", + "SETTLEMENTDATE", + "RUNNO", + "REGIONID", + "DISPATCHINTERVAL", + "INTERVENTION", + "RRP", + "EEP", + "ROP", + "APCFLAG", + "MARKETSUSPENDEDFLAG", + "LASTCHANGED", + "RAISE6SECRRP", + "RAISE6SECROP", + "RAISE6SECAPCFLAG", + "RAISE60SECRRP", + "RAISE60SECROP", + "RAISE60SECAPCFLAG", + "RAISE5MINRRP", + "RAISE5MINROP", + "RAISE5MINAPCFLAG", + "RAISEREGRRP", + "RAISEREGROP", + "RAISEREGAPCFLAG", + "LOWER6SECRRP", + "LOWER6SECROP", + "LOWER6SECAPCFLAG", + "LOWER60SECRRP", + "LOWER60SECROP", + "LOWER60SECAPCFLAG", + "LOWER5MINRRP", + "LOWER5MINROP", + "LOWER5MINAPCFLAG", + "LOWERREGRRP", + "LOWERREGROP", + "LOWERREGAPCFLAG", + "PRICE_STATUS", + "PRE_AP_ENERGY_PRICE", + "PRE_AP_RAISE6_PRICE", + "PRE_AP_RAISE60_PRICE", + "PRE_AP_RAISE5MIN_PRICE", + "PRE_AP_RAISEREG_PRICE", + "PRE_AP_LOWER6_PRICE", + "PRE_AP_LOWER60_PRICE", + "PRE_AP_LOWER5MIN_PRICE", + "PRE_AP_LOWERREG_PRICE", + "CUMUL_PRE_AP_ENERGY_PRICE", + "CUMUL_PRE_AP_RAISE6_PRICE", + "CUMUL_PRE_AP_RAISE60_PRICE", + "CUMUL_PRE_AP_RAISE5MIN_PRICE", + "CUMUL_PRE_AP_RAISEREG_PRICE", + "CUMUL_PRE_AP_LOWER6_PRICE", + "CUMUL_PRE_AP_LOWER60_PRICE", + "CUMUL_PRE_AP_LOWER5MIN_PRICE", + "CUMUL_PRE_AP_LOWERREG_PRICE", + ] self.assertSequenceEqual(list(price_data.columns), expected_columns) class TestCacheCompilerRaisesExpectedErrors(unittest.TestCase): def test_raise_error_for_incorrect_table_name(self): with self.assertRaises(Exception) as context: - cache_compiler('2019/01/01 00:00:00', '2019/02/01 00:00:00', 'NOTATABLE', - defaults.raw_data_cache, fformat='db') - self.assertTrue("Table name provided is not a dynamic table." - in str(context.exception)) + cache_compiler( + "2019/01/01 00:00:00", + "2019/02/01 00:00:00", + "NOTATABLE", + defaults.raw_data_cache, + fformat="db", + ) + self.assertTrue( + "Table name provided is not a dynamic table." in str(context.exception) + ) def test_raise_error_if_fformat_not_in_expected_set(self): with self.assertRaises(Exception) as context: - cache_compiler('2019/01/01 00:00:00', '2019/02/01 00:00:00', 'DISPATCHPRICE', - defaults.raw_data_cache, fformat='db') - self.assertTrue("Argument fformat must be 'feather' or 'parquet'" - in str(context.exception)) + cache_compiler( + "2019/01/01 00:00:00", + "2019/02/01 00:00:00", + "DISPATCHPRICE", + defaults.raw_data_cache, + fformat="db", + ) + self.assertTrue( + "Argument fformat must be 'feather' or 'parquet'" in str(context.exception) + ) def test_raise_error_if_select_columns_used_without_rebuild_true(self): with self.assertRaises(Exception) as context: - cache_compiler('2019/01/01 00:00:00', '2019/02/01 00:00:00', 'DISPATCHPRICE', - defaults.raw_data_cache, select_columns='all') - self.assertTrue(("The select_columns argument must be used with rebuild=True " + - "to ensure the cache is built with the correct columns.") - in str(context.exception)) + cache_compiler( + "2019/01/01 00:00:00", + "2019/02/01 00:00:00", + "DISPATCHPRICE", + defaults.raw_data_cache, + select_columns="all", + ) + self.assertTrue( + ( + "The select_columns argument must be used with rebuild=True " + + "to ensure the cache is built with the correct columns." + ) + in str(context.exception) + ) class TestStaticTableRaisesExpectedErrors(unittest.TestCase): def test_raise_error_for_incorrect_table_name(self): with self.assertRaises(Exception) as context: - static_table('NOTATABLE', defaults.raw_data_cache) - self.assertTrue("Table name provided is not a static table." - in str(context.exception)) + static_table("NOTATABLE", defaults.raw_data_cache) + self.assertTrue( + "Table name provided is not a static table." in str(context.exception) + ) def test_raise_error_for_no_data_returned(self): - good_url = defaults.static_table_url['VARIABLES_FCAS_4_SECOND'] - defaults.static_table_url['VARIABLES_FCAS_4_SECOND'] = 'bad_url' - path_and_name = defaults.raw_data_cache + '/' + defaults.names['VARIABLES_FCAS_4_SECOND'] + good_url = defaults.static_table_url["VARIABLES_FCAS_4_SECOND"] + defaults.static_table_url["VARIABLES_FCAS_4_SECOND"] = "bad_url" + path_and_name = ( + defaults.raw_data_cache + "/" + defaults.names["VARIABLES_FCAS_4_SECOND"] + ) if os.path.isfile(path_and_name): os.remove(path_and_name) with self.assertRaises(Exception) as context: - static_table('VARIABLES_FCAS_4_SECOND', defaults.raw_data_cache) - self.assertTrue((f'Compiling data for table VARIABLES_FCAS_4_SECOND failed. ' + - 'This probably because none of the requested data ' + - 'could be download from AEMO. Check your internet ' + - 'connection and that the requested data is archived on: ' + - 'https://nemweb.com.au see nemosis.defaults for table specific urls.') - in str(context.exception)) - defaults.static_table_url['VARIABLES_FCAS_4_SECOND'] = good_url + static_table("VARIABLES_FCAS_4_SECOND", defaults.raw_data_cache) + self.assertTrue( + ( + f"Compiling data for table VARIABLES_FCAS_4_SECOND failed. " + + "This probably because none of the requested data " + + "could be download from AEMO. Check your internet " + + "connection and that the requested data is archived on: " + + "https://nemweb.com.au see nemosis.defaults for table specific urls." + ) + in str(context.exception) + ) + defaults.static_table_url["VARIABLES_FCAS_4_SECOND"] = good_url def test_raise_error_for_filter_column_not_in_select_columns(self): with self.assertRaises(Exception) as context: - static_table('VARIABLES_FCAS_4_SECOND', defaults.raw_data_cache, - select_columns=['VARIABLENUMBER'], filter_cols=['VARIABLETYPE'], - filter_values=(['0'],)) - self.assertTrue(('Filter columns not valid. They must be a part of ' + - 'select_columns or the table defaults.') - in str(context.exception)) + static_table( + "VARIABLES_FCAS_4_SECOND", + defaults.raw_data_cache, + select_columns=["VARIABLENUMBER"], + filter_cols=["VARIABLETYPE"], + filter_values=(["0"],), + ) + self.assertTrue( + ( + "Filter columns not valid. They must be a part of " + + "select_columns or the table defaults." + ) + in str(context.exception) + ) def test_raise_error_for_filter_column_not_in_default_columns(self): with self.assertRaises(Exception) as context: - static_table('VARIABLES_FCAS_4_SECOND', defaults.raw_data_cache, - select_columns=['VARIABLENUMBER'], filter_cols=['NOTACOLUMN'], - filter_values=(['0'],)) - self.assertTrue(('Filter columns not valid. They must be a part of ' + - 'select_columns or the table defaults.') - in str(context.exception)) + static_table( + "VARIABLES_FCAS_4_SECOND", + defaults.raw_data_cache, + select_columns=["VARIABLENUMBER"], + filter_cols=["NOTACOLUMN"], + filter_values=(["0"],), + ) + self.assertTrue( + ( + "Filter columns not valid. They must be a part of " + + "select_columns or the table defaults." + ) + in str(context.exception) + ) def test_raise_error_if_select_columns_not_in_data(self): with self.assertRaises(Exception) as context: - static_table('VARIABLES_FCAS_4_SECOND', defaults.raw_data_cache, - select_columns=['NOTACOLUMN']) - self.assertTrue((f'None of columns [\'NOTACOLUMN\'] are in D:/nemosis_test_cache\\Ancillary Services Market Causer Pays Variables File.csv. ' - "This may be caused by user input if the \'select_columns\' " - "argument is being used, or by changed AEMO data formats. " - "This error can be avoided by using the argument select_columns=\'all\'.") - in str(context.exception)) + static_table( + "VARIABLES_FCAS_4_SECOND", + defaults.raw_data_cache, + select_columns=["NOTACOLUMN"], + ) + self.assertTrue( + ( + f"None of columns ['NOTACOLUMN'] are in D:/nemosis_test_cache\\Ancillary Services Market Causer Pays Variables File.csv. " + "This may be caused by user input if the 'select_columns' " + "argument is being used, or by changed AEMO data formats. " + "This error can be avoided by using the argument select_columns='all'." + ) + in str(context.exception) + ) def test_using_select_columns_all_does_not_raise_error(self): - price_data = static_table('VARIABLES_FCAS_4_SECOND', defaults.raw_data_cache, select_columns='all') - expected_columns = ['VARIABLENUMBER', 'VARIABLETYPE'] + price_data = static_table( + "VARIABLES_FCAS_4_SECOND", defaults.raw_data_cache, select_columns="all" + ) + expected_columns = ["VARIABLENUMBER", "VARIABLETYPE"] self.assertSequenceEqual(list(price_data.columns), expected_columns) diff --git a/nemosis/test_filters.py b/nemosis/test_filters.py index a05405f..af4399d 100644 --- a/nemosis/test_filters.py +++ b/nemosis/test_filters.py @@ -8,348 +8,458 @@ class TestFiltersStartDate(unittest.TestCase): def setUp(self): - self.start_date_data = \ - pd.DataFrame({'START_DATE': ['2011/01/01 00:00:00', '2015/01/01 00:00:00'], - 'END_DATE': ['2015/01/01 00:00:00', '2015/07/01 00:12:00']}) + self.start_date_data = pd.DataFrame( + { + "START_DATE": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"], + "END_DATE": ["2015/01/01 00:00:00", "2015/07/01 00:12:00"], + } + ) def test_start_date_pick_first_of_two(self): - start_time = datetime.strptime('2014/06/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2014/09/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_start_and_end_date(self.start_date_data, start_time=start_time, end_time=end_time) - aim = pd.DataFrame({'START_DATE': ['2011/01/01 00:00:00'], 'END_DATE': ['2015/01/01 00:00:00']}) - aim['START_DATE'] = pd.to_datetime(aim['START_DATE'], format='%Y/%m/%d %H:%M:%S') - aim['END_DATE'] = pd.to_datetime(aim['END_DATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2014/06/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2014/09/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_start_and_end_date( + self.start_date_data, start_time=start_time, end_time=end_time + ) + aim = pd.DataFrame( + {"START_DATE": ["2011/01/01 00:00:00"], "END_DATE": ["2015/01/01 00:00:00"]} + ) + aim["START_DATE"] = pd.to_datetime( + aim["START_DATE"], format="%Y/%m/%d %H:%M:%S" + ) + aim["END_DATE"] = pd.to_datetime(aim["END_DATE"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) def test_start_date_pick_second_of_two(self): - start_time = datetime.strptime('2015/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2015/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_start_and_end_date(self.start_date_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'START_DATE': ['2015/01/01 00:00:00'], 'END_DATE': ['2015/07/01 00:12:00']}) - aim['START_DATE'] = pd.to_datetime(aim['START_DATE'], format='%Y/%m/%d %H:%M:%S') - aim['END_DATE'] = pd.to_datetime(aim['END_DATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2015/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2015/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_start_and_end_date( + self.start_date_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame( + {"START_DATE": ["2015/01/01 00:00:00"], "END_DATE": ["2015/07/01 00:12:00"]} + ) + aim["START_DATE"] = pd.to_datetime( + aim["START_DATE"], format="%Y/%m/%d %H:%M:%S" + ) + aim["END_DATE"] = pd.to_datetime(aim["END_DATE"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) def test_start_date_pick_two_of_two_by_overlaping_interval(self): - start_time = datetime.strptime('2011/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2015/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_start_and_end_date(self.start_date_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'START_DATE': ['2011/01/01 00:00:00', '2015/01/01 00:00:00'], - 'END_DATE': ['2015/01/01 00:00:00', '2015/07/01 00:12:00']}) - aim['START_DATE'] = pd.to_datetime(aim['START_DATE'], format='%Y/%m/%d %H:%M:%S') - aim['END_DATE'] = pd.to_datetime(aim['END_DATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2011/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2015/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_start_and_end_date( + self.start_date_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame( + { + "START_DATE": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"], + "END_DATE": ["2015/01/01 00:00:00", "2015/07/01 00:12:00"], + } + ) + aim["START_DATE"] = pd.to_datetime( + aim["START_DATE"], format="%Y/%m/%d %H:%M:%S" + ) + aim["END_DATE"] = pd.to_datetime(aim["END_DATE"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) def test_start_date_pick_none_of_two_by_overshooting_date(self): - start_time = datetime.strptime('2018/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2019/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_start_and_end_date(self.start_date_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'START_DATE': [], - 'END_DATE': []}) - aim['START_DATE'] = pd.to_datetime(aim['START_DATE'], format='%Y/%m/%d %H:%M:%S') - aim['END_DATE'] = pd.to_datetime(aim['END_DATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2018/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2019/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_start_and_end_date( + self.start_date_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"START_DATE": [], "END_DATE": []}) + aim["START_DATE"] = pd.to_datetime( + aim["START_DATE"], format="%Y/%m/%d %H:%M:%S" + ) + aim["END_DATE"] = pd.to_datetime(aim["END_DATE"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) def test_start_date_pick_none_of_two_by_undershooting_date(self): - start_time = datetime.strptime('2010/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2010/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_start_and_end_date(self.start_date_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'START_DATE': [], - 'END_DATE': []}) - aim['START_DATE'] = pd.to_datetime(aim['START_DATE'], format='%Y/%m/%d %H:%M:%S') - aim['END_DATE'] = pd.to_datetime(aim['END_DATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2010/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_start_and_end_date( + self.start_date_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"START_DATE": [], "END_DATE": []}) + aim["START_DATE"] = pd.to_datetime( + aim["START_DATE"], format="%Y/%m/%d %H:%M:%S" + ) + aim["END_DATE"] = pd.to_datetime(aim["END_DATE"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) class TestFiltersEffectiveDate(unittest.TestCase): def setUp(self): - self.last_changed_data = pd.DataFrame({'EFFECTIVEDATE': ['2011/01/01 00:00:00', '2015/01/01 00:00:00']}) + self.last_changed_data = pd.DataFrame( + {"EFFECTIVEDATE": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"]} + ) def test_start_date_pick_first_of_two(self): - start_time = datetime.strptime('2014/06/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2014/09/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_effective_date(self.last_changed_data, start_time=start_time, end_time=end_time) - aim = pd.DataFrame({'EFFECTIVEDATE': ['2011/01/01 00:00:00']}) - aim['EFFECTIVEDATE'] = pd.to_datetime(aim['EFFECTIVEDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2014/06/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2014/09/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_effective_date( + self.last_changed_data, start_time=start_time, end_time=end_time + ) + aim = pd.DataFrame({"EFFECTIVEDATE": ["2011/01/01 00:00:00"]}) + aim["EFFECTIVEDATE"] = pd.to_datetime( + aim["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_start_date_two_of_two_with_date_window_on_second(self): - start_time = datetime.strptime('2015/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2015/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_effective_date(self.last_changed_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'EFFECTIVEDATE': ['2011/01/01 00:00:00', '2015/01/01 00:00:00']}) - aim['EFFECTIVEDATE'] = pd.to_datetime(aim['EFFECTIVEDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2015/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2015/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_effective_date( + self.last_changed_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame( + {"EFFECTIVEDATE": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"]} + ) + aim["EFFECTIVEDATE"] = pd.to_datetime( + aim["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_start_date_pick_two_of_two_by_overlaping_interval(self): - start_time = datetime.strptime('2011/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2015/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_effective_date(self.last_changed_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'EFFECTIVEDATE': ['2011/01/01 00:00:00', '2015/01/01 00:00:00']}) - aim['EFFECTIVEDATE'] = pd.to_datetime(aim['EFFECTIVEDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2011/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2015/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_effective_date( + self.last_changed_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame( + {"EFFECTIVEDATE": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"]} + ) + aim["EFFECTIVEDATE"] = pd.to_datetime( + aim["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_start_date_pick_two_of_two_by_overshooting_date(self): - start_time = datetime.strptime('2018/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2019/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_effective_date(self.last_changed_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'EFFECTIVEDATE': ['2011/01/01 00:00:00', '2015/01/01 00:00:00']}) - aim['EFFECTIVEDATE'] = pd.to_datetime(aim['EFFECTIVEDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2018/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2019/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_effective_date( + self.last_changed_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame( + {"EFFECTIVEDATE": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"]} + ) + aim["EFFECTIVEDATE"] = pd.to_datetime( + aim["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_start_date_pick_none_of_two_by_undershooting_date(self): - start_time = datetime.strptime('2010/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2010/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_effective_date(self.last_changed_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'EFFECTIVEDATE': []}) - aim['EFFECTIVEDATE'] = pd.to_datetime(aim['EFFECTIVEDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2010/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_effective_date( + self.last_changed_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"EFFECTIVEDATE": []}) + aim["EFFECTIVEDATE"] = pd.to_datetime( + aim["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) class TestFiltersSettlementDate(unittest.TestCase): def setUp(self): - self.settlement_date_data = \ - pd.DataFrame({'SETTLEMENTDATE': ['2011/01/01 00:00:00', '2015/01/01 00:00:00']}) + self.settlement_date_data = pd.DataFrame( + {"SETTLEMENTDATE": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"]} + ) def test_settlement_date_pick_first_of_two(self): - start_time = datetime.strptime('2010/06/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2011/09/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_settlementdate(self.settlement_date_data, start_time=start_time, end_time=end_time) - aim = pd.DataFrame({'SETTLEMENTDATE': ['2011/01/01 00:00:00']}) - aim['SETTLEMENTDATE'] = pd.to_datetime(aim['SETTLEMENTDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/06/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2011/09/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_settlementdate( + self.settlement_date_data, start_time=start_time, end_time=end_time + ) + aim = pd.DataFrame({"SETTLEMENTDATE": ["2011/01/01 00:00:00"]}) + aim["SETTLEMENTDATE"] = pd.to_datetime( + aim["SETTLEMENTDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_settlement_date_pick_second_of_two(self): - start_time = datetime.strptime('2014/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2015/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_settlementdate(self.settlement_date_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'SETTLEMENTDATE': ['2015/01/01 00:00:00']}) - aim['SETTLEMENTDATE'] = pd.to_datetime(aim['SETTLEMENTDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2014/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2015/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_settlementdate( + self.settlement_date_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"SETTLEMENTDATE": ["2015/01/01 00:00:00"]}) + aim["SETTLEMENTDATE"] = pd.to_datetime( + aim["SETTLEMENTDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_settlement_date_pick_two_of_two_by_overlaping_interval(self): - start_time = datetime.strptime('2010/12/31 23:59:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2015/05/01 00:00:00', '%Y/%m/%d %H:%M:%S') - aim = pd.DataFrame({'SETTLEMENTDATE': ['2011/01/01 00:00:00', '2015/01/01 00:00:00']}) - result = filters.filter_on_settlementdate(self.settlement_date_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim['SETTLEMENTDATE'] = pd.to_datetime(aim['SETTLEMENTDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/12/31 23:59:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2015/05/01 00:00:00", "%Y/%m/%d %H:%M:%S") + aim = pd.DataFrame( + {"SETTLEMENTDATE": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"]} + ) + result = filters.filter_on_settlementdate( + self.settlement_date_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim["SETTLEMENTDATE"] = pd.to_datetime( + aim["SETTLEMENTDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_settlement_date_pick_none_of_two_by_overshooting_date(self): - start_time = datetime.strptime('2018/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2019/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_settlementdate(self.settlement_date_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'SETTLEMENTDATE': []}) - aim['SETTLEMENTDATE'] = pd.to_datetime(aim['SETTLEMENTDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2018/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2019/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_settlementdate( + self.settlement_date_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"SETTLEMENTDATE": []}) + aim["SETTLEMENTDATE"] = pd.to_datetime( + aim["SETTLEMENTDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_settlement_date_pick_none_of_two_by_undershooting_date(self): - start_time = datetime.strptime('2010/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2010/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_settlementdate(self.settlement_date_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'SETTLEMENTDATE': []}) - aim['SETTLEMENTDATE'] = pd.to_datetime(aim['SETTLEMENTDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2010/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_settlementdate( + self.settlement_date_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"SETTLEMENTDATE": []}) + aim["SETTLEMENTDATE"] = pd.to_datetime( + aim["SETTLEMENTDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_settlement_date_end_date_exclusive_by_undershooting(self): - start_time = datetime.strptime('2010/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2010/12/31 23:59:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_settlementdate(self.settlement_date_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'SETTLEMENTDATE': []}) - aim['SETTLEMENTDATE'] = pd.to_datetime(aim['SETTLEMENTDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2010/12/31 23:59:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_settlementdate( + self.settlement_date_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"SETTLEMENTDATE": []}) + aim["SETTLEMENTDATE"] = pd.to_datetime( + aim["SETTLEMENTDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) class TestFiltersTimeStamp(unittest.TestCase): def setUp(self): - self.time_stamp_data = \ - pd.DataFrame({'TIMESTAMP': ['2011/01/01 00:00:00', '2015/01/01 00:00:00']}) + self.time_stamp_data = pd.DataFrame( + {"TIMESTAMP": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"]} + ) def test_time_stamp_pick_first_of_two(self): - start_time = datetime.strptime('2010/06/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2011/09/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_timestamp(self.time_stamp_data, start_time=start_time, end_time=end_time) - aim = pd.DataFrame({'TIMESTAMP': ['2011/01/01 00:00:00']}) - aim['TIMESTAMP'] = pd.to_datetime(aim['TIMESTAMP'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/06/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2011/09/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_timestamp( + self.time_stamp_data, start_time=start_time, end_time=end_time + ) + aim = pd.DataFrame({"TIMESTAMP": ["2011/01/01 00:00:00"]}) + aim["TIMESTAMP"] = pd.to_datetime(aim["TIMESTAMP"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) def test_time_stamp_pick_second_of_two(self): - start_time = datetime.strptime('2014/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2015/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_timestamp(self.time_stamp_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'TIMESTAMP': ['2015/01/01 00:00:00']}) - aim['TIMESTAMP'] = pd.to_datetime(aim['TIMESTAMP'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2014/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2015/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_timestamp( + self.time_stamp_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"TIMESTAMP": ["2015/01/01 00:00:00"]}) + aim["TIMESTAMP"] = pd.to_datetime(aim["TIMESTAMP"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) def test_time_stamp_pick_two_of_two_by_overlaping_interval(self): - start_time = datetime.strptime('2010/12/31 23:59:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2015/05/01 00:00:00', '%Y/%m/%d %H:%M:%S') - aim = pd.DataFrame({'TIMESTAMP': ['2011/01/01 00:00:00', '2015/01/01 00:00:00']}) - result = filters.filter_on_timestamp(self.time_stamp_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim['TIMESTAMP'] = pd.to_datetime(aim['TIMESTAMP'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/12/31 23:59:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2015/05/01 00:00:00", "%Y/%m/%d %H:%M:%S") + aim = pd.DataFrame( + {"TIMESTAMP": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"]} + ) + result = filters.filter_on_timestamp( + self.time_stamp_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim["TIMESTAMP"] = pd.to_datetime(aim["TIMESTAMP"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) def test_time_stamp_pick_none_of_two_by_overshooting_date(self): - start_time = datetime.strptime('2018/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2019/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_timestamp(self.time_stamp_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'TIMESTAMP': []}) - aim['TIMESTAMP'] = pd.to_datetime(aim['TIMESTAMP'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2018/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2019/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_timestamp( + self.time_stamp_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"TIMESTAMP": []}) + aim["TIMESTAMP"] = pd.to_datetime(aim["TIMESTAMP"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) def test_time_stamp_pick_none_of_two_by_undershooting_date(self): - start_time = datetime.strptime('2010/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2010/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_timestamp(self.time_stamp_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'TIMESTAMP': []}) - aim['TIMESTAMP'] = pd.to_datetime(aim['TIMESTAMP'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2010/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_timestamp( + self.time_stamp_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"TIMESTAMP": []}) + aim["TIMESTAMP"] = pd.to_datetime(aim["TIMESTAMP"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) def test_time_stamp_end_date_exclusive_by_undershooting(self): - start_time = datetime.strptime('2010/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2010/12/31 23:59:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_timestamp(self.time_stamp_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'TIMESTAMP': []}) - aim['TIMESTAMP'] = pd.to_datetime(aim['TIMESTAMP'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2010/12/31 23:59:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_timestamp( + self.time_stamp_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"TIMESTAMP": []}) + aim["TIMESTAMP"] = pd.to_datetime(aim["TIMESTAMP"], format="%Y/%m/%d %H:%M:%S") assert_frame_equal(aim, result) class TestFiltersIntervalDatetime(unittest.TestCase): def setUp(self): - self.interval_datetime_data = \ - pd.DataFrame({'INTERVAL_DATETIME': ['2011/01/01 00:00:00', '2015/01/01 00:00:00']}) + self.interval_datetime_data = pd.DataFrame( + {"INTERVAL_DATETIME": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"]} + ) def test_interval_datetime_pick_first_of_two(self): - start_time = datetime.strptime('2010/06/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2011/09/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_interval_datetime(self.interval_datetime_data, start_time=start_time, end_time=end_time) - aim = pd.DataFrame({'INTERVAL_DATETIME': ['2011/01/01 00:00:00']}) - aim['INTERVAL_DATETIME'] = pd.to_datetime(aim['INTERVAL_DATETIME'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/06/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2011/09/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_interval_datetime( + self.interval_datetime_data, start_time=start_time, end_time=end_time + ) + aim = pd.DataFrame({"INTERVAL_DATETIME": ["2011/01/01 00:00:00"]}) + aim["INTERVAL_DATETIME"] = pd.to_datetime( + aim["INTERVAL_DATETIME"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_interval_datetime_pick_second_of_two(self): - start_time = datetime.strptime('2014/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2015/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_interval_datetime(self.interval_datetime_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'INTERVAL_DATETIME': ['2015/01/01 00:00:00']}) - aim['INTERVAL_DATETIME'] = pd.to_datetime(aim['INTERVAL_DATETIME'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2014/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2015/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_interval_datetime( + self.interval_datetime_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"INTERVAL_DATETIME": ["2015/01/01 00:00:00"]}) + aim["INTERVAL_DATETIME"] = pd.to_datetime( + aim["INTERVAL_DATETIME"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_interval_datetime_pick_two_of_two_by_overlaping_interval(self): - start_time = datetime.strptime('2010/12/31 23:59:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2015/05/01 00:00:00', '%Y/%m/%d %H:%M:%S') - aim = pd.DataFrame({'INTERVAL_DATETIME': ['2011/01/01 00:00:00', '2015/01/01 00:00:00']}) - result = filters.filter_on_interval_datetime(self.interval_datetime_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim['INTERVAL_DATETIME'] = pd.to_datetime(aim['INTERVAL_DATETIME'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/12/31 23:59:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2015/05/01 00:00:00", "%Y/%m/%d %H:%M:%S") + aim = pd.DataFrame( + {"INTERVAL_DATETIME": ["2011/01/01 00:00:00", "2015/01/01 00:00:00"]} + ) + result = filters.filter_on_interval_datetime( + self.interval_datetime_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim["INTERVAL_DATETIME"] = pd.to_datetime( + aim["INTERVAL_DATETIME"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_interval_datetime_pick_none_of_two_by_overshooting_date(self): - start_time = datetime.strptime('2018/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2019/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_interval_datetime(self.interval_datetime_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'INTERVAL_DATETIME': []}) - aim['INTERVAL_DATETIME'] = pd.to_datetime(aim['INTERVAL_DATETIME'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2018/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2019/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_interval_datetime( + self.interval_datetime_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"INTERVAL_DATETIME": []}) + aim["INTERVAL_DATETIME"] = pd.to_datetime( + aim["INTERVAL_DATETIME"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_interval_datetime_stamp_pick_none_of_two_by_undershooting_date(self): - start_time = datetime.strptime('2010/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2010/05/01 00:13:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_interval_datetime(self.interval_datetime_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'INTERVAL_DATETIME': []}) - aim['INTERVAL_DATETIME'] = pd.to_datetime(aim['INTERVAL_DATETIME'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2010/05/01 00:13:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_interval_datetime( + self.interval_datetime_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"INTERVAL_DATETIME": []}) + aim["INTERVAL_DATETIME"] = pd.to_datetime( + aim["INTERVAL_DATETIME"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_interval_datetime_end_date_exclusive_by_undershooting(self): - start_time = datetime.strptime('2010/04/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2010/12/31 23:59:00', '%Y/%m/%d %H:%M:%S') - result = filters.filter_on_interval_datetime(self.interval_datetime_data, start_time=start_time, end_time=end_time)\ - .reset_index(drop=True) - aim = pd.DataFrame({'INTERVAL_DATETIME': []}) - aim['INTERVAL_DATETIME'] = pd.to_datetime(aim['INTERVAL_DATETIME'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2010/04/01 00:00:00", "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2010/12/31 23:59:00", "%Y/%m/%d %H:%M:%S") + result = filters.filter_on_interval_datetime( + self.interval_datetime_data, start_time=start_time, end_time=end_time + ).reset_index(drop=True) + aim = pd.DataFrame({"INTERVAL_DATETIME": []}) + aim["INTERVAL_DATETIME"] = pd.to_datetime( + aim["INTERVAL_DATETIME"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) class TestFiltersColumnValue(unittest.TestCase): def setUp(self): - self.int_filter_data = pd.DataFrame({'INTCOL': [1, 10, 100, -5, 0, 10, 10]}) - self.int_and_string_filter_data = pd.DataFrame({'INTCOL': [1, 10, 100, -5, 0, 10, 10], - 'STRINGCOL': ['1', '10', '100', '-5', '0', '10', '10']}) + self.int_filter_data = pd.DataFrame({"INTCOL": [1, 10, 100, -5, 0, 10, 10]}) + self.int_and_string_filter_data = pd.DataFrame( + { + "INTCOL": [1, 10, 100, -5, 0, 10, 10], + "STRINGCOL": ["1", "10", "100", "-5", "0", "10", "10"], + } + ) def test_filter_one_col_one_value_positive_ints(self): - filter_cols = ('INTCOL', ) + filter_cols = ("INTCOL",) filter_values = ([10],) - result = \ - filters.filter_on_column_value(self.int_filter_data, filter_cols, filter_values).reset_index(drop=True) - aim = pd.DataFrame({'INTCOL': [10, 10, 10]}) + result = filters.filter_on_column_value( + self.int_filter_data, filter_cols, filter_values + ).reset_index(drop=True) + aim = pd.DataFrame({"INTCOL": [10, 10, 10]}) assert_frame_equal(aim, result) def test_filter_one_col_two_values_positive_and_negative_ints(self): - filter_cols = ('INTCOL', ) + filter_cols = ("INTCOL",) filter_values = ([10, -5],) - result = \ - filters.filter_on_column_value(self.int_filter_data, filter_cols, filter_values).reset_index(drop=True) - aim = pd.DataFrame({'INTCOL': [10, -5, 10, 10]}) + result = filters.filter_on_column_value( + self.int_filter_data, filter_cols, filter_values + ).reset_index(drop=True) + aim = pd.DataFrame({"INTCOL": [10, -5, 10, 10]}) assert_frame_equal(aim, result) def test_filter_two_cols_one_value_each_not_matching(self): - filter_cols = ('INTCOL', 'STRINGCOL' ) - filter_values = ([10], ['100']) - result = \ - filters.filter_on_column_value(self.int_and_string_filter_data, filter_cols, filter_values).reset_index(drop=True) - aim = pd.DataFrame({'INTCOL': [], 'STRINGCOL': []}) - aim = aim.astype(dtype={'INTCOL': np.int64, 'STRINGCOL': str}) + filter_cols = ("INTCOL", "STRINGCOL") + filter_values = ([10], ["100"]) + result = filters.filter_on_column_value( + self.int_and_string_filter_data, filter_cols, filter_values + ).reset_index(drop=True) + aim = pd.DataFrame({"INTCOL": [], "STRINGCOL": []}) + aim = aim.astype(dtype={"INTCOL": np.int64, "STRINGCOL": str}) assert_frame_equal(aim, result) def test_filter_two_cols_one_value_each_matching(self): - filter_cols = ('INTCOL', 'STRINGCOL' ) - filter_values = ([10], ['10']) - result = \ - filters.filter_on_column_value(self.int_and_string_filter_data, filter_cols, filter_values).reset_index(drop=True) - aim = pd.DataFrame({'INTCOL': [10, 10, 10], 'STRINGCOL': ['10', '10', '10']}) + filter_cols = ("INTCOL", "STRINGCOL") + filter_values = ([10], ["10"]) + result = filters.filter_on_column_value( + self.int_and_string_filter_data, filter_cols, filter_values + ).reset_index(drop=True) + aim = pd.DataFrame({"INTCOL": [10, 10, 10], "STRINGCOL": ["10", "10", "10"]}) assert_frame_equal(aim, result) def test_filter_just_one_of_two_cols(self): - filter_cols = ('INTCOL', ) + filter_cols = ("INTCOL",) filter_values = ([10],) - result = \ - filters.filter_on_column_value(self.int_and_string_filter_data, filter_cols, filter_values).reset_index(drop=True) - aim = pd.DataFrame({'INTCOL': [10, 10, 10], 'STRINGCOL': ['10', '10', '10']}) + result = filters.filter_on_column_value( + self.int_and_string_filter_data, filter_cols, filter_values + ).reset_index(drop=True) + aim = pd.DataFrame({"INTCOL": [10, 10, 10], "STRINGCOL": ["10", "10", "10"]}) assert_frame_equal(aim, result) def test_filter_one_empty_values_returns_empty_data_frame(self): - filter_cols = ('INTCOL', ) + filter_cols = ("INTCOL",) filter_values = ([],) - result = \ - filters.filter_on_column_value(self.int_and_string_filter_data, filter_cols, filter_values).reset_index(drop=True) - aim = pd.DataFrame({'INTCOL': [], 'STRINGCOL': []}) - aim = aim.astype(dtype={'INTCOL': np.int64, 'STRINGCOL': str}) + result = filters.filter_on_column_value( + self.int_and_string_filter_data, filter_cols, filter_values + ).reset_index(drop=True) + aim = pd.DataFrame({"INTCOL": [], "STRINGCOL": []}) + aim = aim.astype(dtype={"INTCOL": np.int64, "STRINGCOL": str}) assert_frame_equal(aim, result) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - - - diff --git a/nemosis/test_format_options.py b/nemosis/test_format_options.py index 2d6a448..d1f009f 100644 --- a/nemosis/test_format_options.py +++ b/nemosis/test_format_options.py @@ -9,58 +9,93 @@ class TestFormatOptions(unittest.TestCase): def setUp(self): # TODO: Clean tests since only one table - BIDDAYOFFER_D is tested - self.table_names = ['BIDDAYOFFER_D'] + self.table_names = ["BIDDAYOFFER_D"] - self.table_types = {'DISPATCHLOAD': 'DUID', 'DISPATCHCONSTRAINT': 'CONSTRAINTID', 'DISPATCH_UNIT_SCADA': 'DUID', - 'DISPATCHPRICE': 'REGIONID', 'DISPATCHINTERCONNECTORRES': 'INTERCONNECTORID', - 'DISPATCHREGIONSUM': 'REGIONID', 'BIDPEROFFER_D': 'DUID-BIDTYPE', - 'BIDDAYOFFER_D': 'DUID-BIDTYPE', 'TRADINGLOAD': 'DUID', 'TRADINGPRICE': 'REGIONID', - 'TRADINGREGIONSUM': 'REGIONID', 'TRADINGINTERCONNECT': 'INTERCONNECTORID'} + self.table_types = { + "DISPATCHLOAD": "DUID", + "DISPATCHCONSTRAINT": "CONSTRAINTID", + "DISPATCH_UNIT_SCADA": "DUID", + "DISPATCHPRICE": "REGIONID", + "DISPATCHINTERCONNECTORRES": "INTERCONNECTORID", + "DISPATCHREGIONSUM": "REGIONID", + "BIDPEROFFER_D": "DUID-BIDTYPE", + "BIDDAYOFFER_D": "DUID-BIDTYPE", + "TRADINGLOAD": "DUID", + "TRADINGPRICE": "REGIONID", + "TRADINGREGIONSUM": "REGIONID", + "TRADINGINTERCONNECT": "INTERCONNECTORID", + } - self.filter_values = {'DUID': (['AGLHAL'],), 'REGIONID': (['SA1'],), 'INTERCONNECTORID': (['VIC1-NSW1'],), - 'CONSTRAINTID': (['DATASNAP_DFS_Q_CLST'],), 'DUID-BIDTYPE': (['AGLHAL', 'ENERGY'],)} + self.filter_values = { + "DUID": (["AGLHAL"],), + "REGIONID": (["SA1"],), + "INTERCONNECTORID": (["VIC1-NSW1"],), + "CONSTRAINTID": (["DATASNAP_DFS_Q_CLST"],), + "DUID-BIDTYPE": (["AGLHAL", "ENERGY"],), + } def test_dispatch_tables_start_of_month_just_csv_format_dont_keep(self): # Empty cache. for f in os.listdir(defaults.raw_data_cache): os.remove(os.path.join(defaults.raw_data_cache, f)) - start_time = '2018/02/01 00:00:00' - end_time = '2018/02/01 05:15:00' + start_time = "2018/02/01 00:00:00" + end_time = "2018/02/01 05:15:00" for table in self.table_names: - print('Testing {} returing values at start of month.'.format(table)) + print("Testing {} returing values at start of month.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] cols = [dat_col, self.table_types[table]] filter_cols = (self.table_types[table],) expected_length = 63 expected_number_of_columns = 2 - expected_first_time = pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5) - expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 10 - expected_first_time = '2018/02/01 00:30:00' - expected_first_time = pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') - expected_last_time = '2018/02/01 05:00:00' - expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDPEROFFER_D': - cols = [dat_col, 'DUID', 'BIDTYPE'] - filter_cols = ('DUID', 'BIDTYPE') + expected_first_time = "2018/02/01 00:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_last_time = "2018/02/01 05:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + if table == "BIDPEROFFER_D": + cols = [dat_col, "DUID", "BIDTYPE"] + filter_cols = ("DUID", "BIDTYPE") expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': - cols = [dat_col, 'DUID', 'BIDTYPE'] - filter_cols = ('DUID', 'BIDTYPE') + if table == "BIDDAYOFFER_D": + cols = [dat_col, "DUID", "BIDTYPE"] + filter_cols = ("DUID", "BIDTYPE") expected_number_of_columns = 3 expected_length = 2 - expected_last_time = '2018/02/01 00:00:00' - expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') - expected_first_time = '2018/01/31 00:00:00' - expected_first_time = pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') + expected_last_time = "2018/02/01 00:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_first_time = "2018/01/31 00:00:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, - filter_cols=filter_cols, filter_values=self.filter_values[table_type], - fformat='csv', keep_csv=False) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + fformat="csv", + keep_csv=False, + ) data = data.reset_index(drop=True) print(table) self.assertEqual(expected_length, data.shape[0]) @@ -68,49 +103,69 @@ def test_dispatch_tables_start_of_month_just_csv_format_dont_keep(self): self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) self.assertEqual(len(os.listdir(defaults.raw_data_cache)), 0) - print('Passed') + print("Passed") def test_dispatch_tables_start_of_month_just_csv_format(self): # Empty cache. for f in os.listdir(defaults.raw_data_cache): os.remove(os.path.join(defaults.raw_data_cache, f)) - start_time = '2018/02/01 00:00:00' - end_time = '2018/02/01 05:15:00' + start_time = "2018/02/01 00:00:00" + end_time = "2018/02/01 05:15:00" for table in self.table_names: - print('Testing {} returing values at start of month.'.format(table)) + print("Testing {} returing values at start of month.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] cols = [dat_col, self.table_types[table]] filter_cols = (self.table_types[table],) expected_length = 63 expected_number_of_columns = 2 - expected_first_time = pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5) - expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 10 - expected_first_time = '2018/02/01 00:30:00' - expected_first_time = pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') - expected_last_time = '2018/02/01 05:00:00' - expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDPEROFFER_D': - cols = [dat_col, 'DUID', 'BIDTYPE'] - filter_cols = ('DUID', 'BIDTYPE') + expected_first_time = "2018/02/01 00:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_last_time = "2018/02/01 05:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + if table == "BIDPEROFFER_D": + cols = [dat_col, "DUID", "BIDTYPE"] + filter_cols = ("DUID", "BIDTYPE") expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': - cols = [dat_col, 'DUID', 'BIDTYPE'] - filter_cols = ('DUID', 'BIDTYPE') + if table == "BIDDAYOFFER_D": + cols = [dat_col, "DUID", "BIDTYPE"] + filter_cols = ("DUID", "BIDTYPE") expected_number_of_columns = 3 expected_length = 2 - expected_last_time = '2018/02/01 00:00:00' - expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') - expected_first_time = '2018/01/31 00:00:00' - expected_first_time = pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') + expected_last_time = "2018/02/01 00:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_first_time = "2018/01/31 00:00:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, - filter_cols=filter_cols, filter_values=self.filter_values[table_type], - fformat='csv') + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + fformat="csv", + ) data = data.reset_index(drop=True) print(table) self.assertEqual(expected_length, data.shape[0]) @@ -118,46 +173,67 @@ def test_dispatch_tables_start_of_month_just_csv_format(self): self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) self.assertNotEqual(len(os.listdir(defaults.raw_data_cache)), 0) - print('Passed') - + print("Passed") # Test that also works on second pass. - start_time = '2018/02/01 00:00:00' - end_time = '2018/02/01 05:15:00' + start_time = "2018/02/01 00:00:00" + end_time = "2018/02/01 05:15:00" for table in self.table_names: - print('Testing {} returing values at start of month.'.format(table)) + print("Testing {} returing values at start of month.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] cols = [dat_col, self.table_types[table]] filter_cols = (self.table_types[table],) expected_length = 63 expected_number_of_columns = 2 - expected_first_time = pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5) - expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime( + end_time, format="%Y/%m/%d %H:%M:%S" + ) + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 10 - expected_first_time = '2018/02/01 00:30:00' - expected_first_time = pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') - expected_last_time = '2018/02/01 05:00:00' - expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDPEROFFER_D': - cols = [dat_col, 'DUID', 'BIDTYPE'] - filter_cols = ('DUID', 'BIDTYPE') + expected_first_time = "2018/02/01 00:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_last_time = "2018/02/01 05:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + if table == "BIDPEROFFER_D": + cols = [dat_col, "DUID", "BIDTYPE"] + filter_cols = ("DUID", "BIDTYPE") expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': - cols = [dat_col, 'DUID', 'BIDTYPE'] - filter_cols = ('DUID', 'BIDTYPE') + if table == "BIDDAYOFFER_D": + cols = [dat_col, "DUID", "BIDTYPE"] + filter_cols = ("DUID", "BIDTYPE") expected_number_of_columns = 3 expected_length = 2 - expected_last_time = '2018/02/01 00:00:00' - expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') - expected_first_time = '2018/01/31 00:00:00' - expected_first_time = pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') + expected_last_time = "2018/02/01 00:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_first_time = "2018/01/31 00:00:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, + start_time, + end_time, + table, + defaults.raw_data_cache, select_columns=cols, - filter_cols=filter_cols, filter_values=self.filter_values[table_type], - fformat='csv') + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + fformat="csv", + ) data = data.reset_index(drop=True) print(table) self.assertEqual(expected_length, data.shape[0]) @@ -165,91 +241,132 @@ def test_dispatch_tables_start_of_month_just_csv_format(self): self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) self.assertNotEqual(len(os.listdir(defaults.raw_data_cache)), 0) - print('Passed') + print("Passed") def test_dispatch_tables_start_of_month_feather_format(self): - start_time = '2018/02/01 00:00:00' - end_time = '2018/02/01 05:15:00' + start_time = "2018/02/01 00:00:00" + end_time = "2018/02/01 05:15:00" for table in self.table_names: - print('Testing {} returing values at start of month.'.format(table)) + print("Testing {} returing values at start of month.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] cols = [dat_col, self.table_types[table]] filter_cols = (self.table_types[table],) expected_length = 63 expected_number_of_columns = 2 - expected_first_time = pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5) - expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 10 - expected_first_time = '2018/02/01 00:30:00' - expected_first_time = pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') - expected_last_time = '2018/02/01 05:00:00' - expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDPEROFFER_D': - cols = [dat_col, 'DUID', 'BIDTYPE'] - filter_cols = ('DUID', 'BIDTYPE') + expected_first_time = "2018/02/01 00:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_last_time = "2018/02/01 05:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + if table == "BIDPEROFFER_D": + cols = [dat_col, "DUID", "BIDTYPE"] + filter_cols = ("DUID", "BIDTYPE") expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': - cols = [dat_col, 'DUID', 'BIDTYPE'] - filter_cols = ('DUID', 'BIDTYPE') + if table == "BIDDAYOFFER_D": + cols = [dat_col, "DUID", "BIDTYPE"] + filter_cols = ("DUID", "BIDTYPE") expected_number_of_columns = 3 expected_length = 2 - expected_last_time = '2018/02/01 00:00:00' - expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') - expected_first_time = '2018/01/31 00:00:00' - expected_first_time = pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') + expected_last_time = "2018/02/01 00:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_first_time = "2018/01/31 00:00:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) print(table) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, - filter_cols=filter_cols, filter_values=self.filter_values[table_type], - fformat='feather') + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + fformat="feather", + ) data = data.reset_index(drop=True) print(table) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) - print('Passed') + print("Passed") def test_dispatch_tables_start_of_month_parquet_format(self): - start_time = '2018/02/01 00:00:00' - end_time = '2018/02/01 05:15:00' + start_time = "2018/02/01 00:00:00" + end_time = "2018/02/01 05:15:00" for table in self.table_names: - print('Testing {} returing values at start of month.'.format(table)) + print("Testing {} returing values at start of month.".format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] cols = [dat_col, self.table_types[table]] filter_cols = (self.table_types[table],) expected_length = 63 expected_number_of_columns = 2 - expected_first_time = pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5) - expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') - if table in ['TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']: + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + if table in [ + "TRADINGLOAD", + "TRADINGPRICE", + "TRADINGREGIONSUM", + "TRADINGINTERCONNECT", + ]: expected_length = 10 - expected_first_time = '2018/02/01 00:30:00' - expected_first_time = pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') - expected_last_time = '2018/02/01 05:00:00' - expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') - if table == 'BIDPEROFFER_D': - cols = [dat_col, 'DUID', 'BIDTYPE'] - filter_cols = ('DUID', 'BIDTYPE') + expected_first_time = "2018/02/01 00:30:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_last_time = "2018/02/01 05:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + if table == "BIDPEROFFER_D": + cols = [dat_col, "DUID", "BIDTYPE"] + filter_cols = ("DUID", "BIDTYPE") expected_number_of_columns = 3 - if table == 'BIDDAYOFFER_D': - cols = [dat_col, 'DUID', 'BIDTYPE'] - filter_cols = ('DUID', 'BIDTYPE') + if table == "BIDDAYOFFER_D": + cols = [dat_col, "DUID", "BIDTYPE"] + filter_cols = ("DUID", "BIDTYPE") expected_number_of_columns = 3 expected_length = 2 - expected_last_time = '2018/02/01 00:00:00' - expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') - expected_first_time = '2018/01/31 00:00:00' - expected_first_time = pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') + expected_last_time = "2018/02/01 00:00:00" + expected_last_time = pd.to_datetime( + expected_last_time, format="%Y/%m/%d %H:%M:%S" + ) + expected_first_time = "2018/01/31 00:00:00" + expected_first_time = pd.to_datetime( + expected_first_time, format="%Y/%m/%d %H:%M:%S" + ) data = data_fetch_methods.dynamic_data_compiler( - start_time, end_time, table, defaults.raw_data_cache, - select_columns=cols, - filter_cols=filter_cols, filter_values=self.filter_values[table_type], - fformat='parquet', parse_data_types=True) + start_time, + end_time, + table, + defaults.raw_data_cache, + select_columns=cols, + filter_cols=filter_cols, + filter_values=self.filter_values[table_type], + fformat="parquet", + parse_data_types=True, + ) data = data.reset_index(drop=True) print(table) self.assertEqual(expected_length, data.shape[0]) @@ -257,4 +374,4 @@ def test_dispatch_tables_start_of_month_parquet_format(self): self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) self.assertFalse(all(object == data.dtypes)) - print('Passed') \ No newline at end of file + print("Passed") diff --git a/nemosis/test_performance_stats.py b/nemosis/test_performance_stats.py index fa6a656..70ad340 100644 --- a/nemosis/test_performance_stats.py +++ b/nemosis/test_performance_stats.py @@ -14,11 +14,21 @@ class TestBaseVolumeWeightAveragePriceFunction(unittest.TestCase): def setUp(self): self.volume = pd.Series([55, 0, math.nan, 60, 40]) self.price = pd.Series([88, 90, -100, -100, 50]) - self.pricing_data = pd.DataFrame({ - 'SCADAVALUE': [8, 9, 50, 11, 10, 0], - 'TRADING_TOTALCLEARED': [20, math.nan, math.nan, 80, math.nan, math.nan], - 'TRADING_RRP': [80, math.nan, math.nan, 100, math.nan, math.nan], - 'DISPATCH_RRP': [80, 90, 89, 111, 110, 75]}) + self.pricing_data = pd.DataFrame( + { + "SCADAVALUE": [8, 9, 50, 11, 10, 0], + "TRADING_TOTALCLEARED": [ + 20, + math.nan, + math.nan, + 80, + math.nan, + math.nan, + ], + "TRADING_RRP": [80, math.nan, math.nan, 100, math.nan, math.nan], + "DISPATCH_RRP": [80, 90, 89, 111, 110, 75], + } + ) pass def test_volume_weighted_average_price(self): @@ -37,27 +47,49 @@ def test_spot_price(self): class TestPerformanceAtNodalPeak(unittest.TestCase): def setUp(self): self.cap_and_output = pd.DataFrame( - {'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', '2015/01/01 00:10:00', - '2015/01/01 12:00:00', '2015/01/01 12:10:00', '2015/01/01 22:10:00'], - 'DUID': ['A', 'A', 'A', 'A', 'A', 'A'], - 'MAXCAPACITY': [68, 68, 68, 68, 68, 68], - 'SCADAVALUE': [8, 9, 50, 11, 10, 0], - 'TOTALDEMAND': [1000, 1100, 13000, 900, 800, 1000]}) - self.cap_and_output['SETTLEMENTDATE'] = pd.to_datetime(self.cap_and_output['SETTLEMENTDATE']) + { + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:10:00", + "2015/01/01 12:00:00", + "2015/01/01 12:10:00", + "2015/01/01 22:10:00", + ], + "DUID": ["A", "A", "A", "A", "A", "A"], + "MAXCAPACITY": [68, 68, 68, 68, 68, 68], + "SCADAVALUE": [8, 9, 50, 11, 10, 0], + "TOTALDEMAND": [1000, 1100, 13000, 900, 800, 1000], + } + ) + self.cap_and_output["SETTLEMENTDATE"] = pd.to_datetime( + self.cap_and_output["SETTLEMENTDATE"] + ) self.cap_and_output_nans = pd.DataFrame( - {'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', '2015/01/01 00:10:00', - '2015/01/01 12:00:00', '2015/01/01 12:10:00', '2015/01/01 22:10:00'], - 'DUID': ['A', 'A', 'A', 'A', 'A', 'A'], - 'MAXCAPACITY': [68, 68, 68, 68, 68, 68], - 'SCADAVALUE': [8, 9, math.nan, 11, 10, 0], - 'TOTALDEMAND': [1000, 1100, 13000, 900, 800, 1000]}) - self.cap_and_output_nans['SETTLEMENTDATE'] = pd.to_datetime(self.cap_and_output['SETTLEMENTDATE']) + { + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:10:00", + "2015/01/01 12:00:00", + "2015/01/01 12:10:00", + "2015/01/01 22:10:00", + ], + "DUID": ["A", "A", "A", "A", "A", "A"], + "MAXCAPACITY": [68, 68, 68, 68, 68, 68], + "SCADAVALUE": [8, 9, math.nan, 11, 10, 0], + "TOTALDEMAND": [1000, 1100, 13000, 900, 800, 1000], + } + ) + self.cap_and_output_nans["SETTLEMENTDATE"] = pd.to_datetime( + self.cap_and_output["SETTLEMENTDATE"] + ) pass def test_performance_at_nodal_peak(self): peak = custom_tables.performance_at_nodal_peak(self.cap_and_output) - self.assertAlmostEqual(peak, 50/68, 4) + self.assertAlmostEqual(peak, 50 / 68, 4) def test_performance_at_nodal_peak_nans(self): peak = custom_tables.performance_at_nodal_peak(self.cap_and_output_nans) @@ -67,65 +99,196 @@ def test_performance_at_nodal_peak_nans(self): class TestCapacityFactor90(unittest.TestCase): def setUp(self): self.cap_and_output = pd.DataFrame( - {'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', '2015/01/01 00:10:00', - '2015/01/01 12:00:00', '2015/01/01 12:10:00', '2015/01/01 22:10:00'], - 'DUID': ['A', 'A', 'A', 'A', 'A', 'A'], - 'MAXCAPACITY': [68, 68, 68, 68, 68, 68], - 'SCADAVALUE': [8, 9, 50, 11, 10, 0], - 'TOTALDEMAND': [1000, 1100, 13000, 900, 800, 1000]}) - self.cap_and_output['SETTLEMENTDATE'] = pd.to_datetime(self.cap_and_output['SETTLEMENTDATE']) + { + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:10:00", + "2015/01/01 12:00:00", + "2015/01/01 12:10:00", + "2015/01/01 22:10:00", + ], + "DUID": ["A", "A", "A", "A", "A", "A"], + "MAXCAPACITY": [68, 68, 68, 68, 68, 68], + "SCADAVALUE": [8, 9, 50, 11, 10, 0], + "TOTALDEMAND": [1000, 1100, 13000, 900, 800, 1000], + } + ) + self.cap_and_output["SETTLEMENTDATE"] = pd.to_datetime( + self.cap_and_output["SETTLEMENTDATE"] + ) self.cap_and_output2 = pd.DataFrame( - {'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', '2015/01/01 00:10:00', - '2015/01/01 12:00:00', '2015/01/01 12:10:00', '2015/01/01 22:10:00', - '2015/01/02 00:00:00', '2015/01/02 00:05:00', '2015/01/02 00:10:00', - '2015/01/02 12:00:00', '2015/01/02 12:10:00', '2015/01/02 22:10:00'], - 'DUID': ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'], - 'MAXCAPACITY': [68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68], - 'SCADAVALUE': [8, 9, 50, 11, 10, 0, 8, 9, 50, 11, 10, 90], - 'TOTALDEMAND': [10000, 11000, 13000, 900, 800, 1000, 1000, 11000, 13000, 900, 800, 18000]}) - self.cap_and_output2['SETTLEMENTDATE'] = pd.to_datetime(self.cap_and_output2['SETTLEMENTDATE']) + { + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:10:00", + "2015/01/01 12:00:00", + "2015/01/01 12:10:00", + "2015/01/01 22:10:00", + "2015/01/02 00:00:00", + "2015/01/02 00:05:00", + "2015/01/02 00:10:00", + "2015/01/02 12:00:00", + "2015/01/02 12:10:00", + "2015/01/02 22:10:00", + ], + "DUID": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + "MAXCAPACITY": [68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68], + "SCADAVALUE": [8, 9, 50, 11, 10, 0, 8, 9, 50, 11, 10, 90], + "TOTALDEMAND": [ + 10000, + 11000, + 13000, + 900, + 800, + 1000, + 1000, + 11000, + 13000, + 900, + 800, + 18000, + ], + } + ) + self.cap_and_output2["SETTLEMENTDATE"] = pd.to_datetime( + self.cap_and_output2["SETTLEMENTDATE"] + ) self.cap_and_output2_nans = pd.DataFrame( - {'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', '2015/01/01 00:10:00', - '2015/01/01 12:00:00', '2015/01/01 12:10:00', '2015/01/01 22:10:00', - '2015/01/02 00:00:00', '2015/01/02 00:05:00', '2015/01/02 00:10:00', - '2015/01/02 12:00:00', '2015/01/02 12:10:00', '2015/01/02 22:10:00'], - 'DUID': ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'], - 'MAXCAPACITY': [68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68], - 'SCADAVALUE': [8, 9, 50, 11, 10, 0, 8, 9, 50, 11, 10, math.nan], - 'TOTALDEMAND': [10000, 11000, 13000, 900, 800, 1000, 1000, 11000, 13000, 900, 800, 18000]}) - self.cap_and_output2_nans['SETTLEMENTDATE'] = pd.to_datetime(self.cap_and_output2['SETTLEMENTDATE']) + { + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:10:00", + "2015/01/01 12:00:00", + "2015/01/01 12:10:00", + "2015/01/01 22:10:00", + "2015/01/02 00:00:00", + "2015/01/02 00:05:00", + "2015/01/02 00:10:00", + "2015/01/02 12:00:00", + "2015/01/02 12:10:00", + "2015/01/02 22:10:00", + ], + "DUID": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + "MAXCAPACITY": [68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68], + "SCADAVALUE": [8, 9, 50, 11, 10, 0, 8, 9, 50, 11, 10, math.nan], + "TOTALDEMAND": [ + 10000, + 11000, + 13000, + 900, + 800, + 1000, + 1000, + 11000, + 13000, + 900, + 800, + 18000, + ], + } + ) + self.cap_and_output2_nans["SETTLEMENTDATE"] = pd.to_datetime( + self.cap_and_output2["SETTLEMENTDATE"] + ) self.cap_and_output2_nans2 = pd.DataFrame( - {'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', '2015/01/01 00:10:00', - '2015/01/01 12:00:00', '2015/01/01 12:10:00', '2015/01/01 22:10:00', - '2015/01/02 00:00:00', '2015/01/02 00:05:00', '2015/01/02 00:10:00', - '2015/01/02 12:00:00', '2015/01/02 12:10:00', '2015/01/02 22:10:00'], - 'DUID': ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'], - 'MAXCAPACITY': [math.nan, math.nan, math.nan, 68, 68, 68, 68, 68, 68, 68, 68, 68], - 'SCADAVALUE': [math.nan, math.nan, math.nan, 11, 10, 0, 8, 9, 50, 11, 10, 90], - 'TOTALDEMAND': [10000, 11000, 13000, 900, 800, 1000, 1000, 11000, 13000, 900, 800, 18000]}) - self.cap_and_output2_nans2['SETTLEMENTDATE'] = pd.to_datetime(self.cap_and_output2['SETTLEMENTDATE']) + { + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:10:00", + "2015/01/01 12:00:00", + "2015/01/01 12:10:00", + "2015/01/01 22:10:00", + "2015/01/02 00:00:00", + "2015/01/02 00:05:00", + "2015/01/02 00:10:00", + "2015/01/02 12:00:00", + "2015/01/02 12:10:00", + "2015/01/02 22:10:00", + ], + "DUID": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + "MAXCAPACITY": [ + math.nan, + math.nan, + math.nan, + 68, + 68, + 68, + 68, + 68, + 68, + 68, + 68, + 68, + ], + "SCADAVALUE": [ + math.nan, + math.nan, + math.nan, + 11, + 10, + 0, + 8, + 9, + 50, + 11, + 10, + 90, + ], + "TOTALDEMAND": [ + 10000, + 11000, + 13000, + 900, + 800, + 1000, + 1000, + 11000, + 13000, + 900, + 800, + 18000, + ], + } + ) + self.cap_and_output2_nans2["SETTLEMENTDATE"] = pd.to_datetime( + self.cap_and_output2["SETTLEMENTDATE"] + ) pass def test_capacity_factor(self): - peak = custom_tables.capacity_factor_over_90th_percentile_of_nodal_demand(self.cap_and_output) - self.assertAlmostEqual(peak, 50/68, 4) + peak = custom_tables.capacity_factor_over_90th_percentile_of_nodal_demand( + self.cap_and_output + ) + self.assertAlmostEqual(peak, 50 / 68, 4) def test_capacity_factor_2_intervals(self): - peak = custom_tables.capacity_factor_over_90th_percentile_of_nodal_demand(self.cap_and_output2) - self.assertAlmostEqual(peak, (50/68 + 90/68) / 2, 4) + peak = custom_tables.capacity_factor_over_90th_percentile_of_nodal_demand( + self.cap_and_output2 + ) + self.assertAlmostEqual(peak, (50 / 68 + 90 / 68) / 2, 4) def test_capacity_factor_2_intervals_one_nan(self): - peak = custom_tables.capacity_factor_over_90th_percentile_of_nodal_demand(self.cap_and_output2_nans) - self.assertAlmostEqual(peak, (50/68 + 0.0/68) / 2, 4) + peak = custom_tables.capacity_factor_over_90th_percentile_of_nodal_demand( + self.cap_and_output2_nans + ) + self.assertAlmostEqual(peak, (50 / 68 + 0.0 / 68) / 2, 4) def test_capacity_factor_2_intervals_plant_built_after_first(self): - peak = custom_tables.capacity_factor_over_90th_percentile_of_nodal_demand(self.cap_and_output2_nans2) + peak = custom_tables.capacity_factor_over_90th_percentile_of_nodal_demand( + self.cap_and_output2_nans2 + ) self.assertAlmostEqual(peak, (90 / 68), 4) class TestCapacityScadaBasedStats(unittest.TestCase): def setUp(self): - self.cap_and_output = pd.DataFrame({'MAXCAPACITY': [68, 68, 68, 68, 68], 'SCADAVALUE': [8, 9, 50, 11, 10]}) + self.cap_and_output = pd.DataFrame( + {"MAXCAPACITY": [68, 68, 68, 68, 68], "SCADAVALUE": [8, 9, 50, 11, 10]} + ) pass def test_capacity_factor(self): @@ -140,144 +303,279 @@ def test_volume(self): class TestMonthlyGroupingForStats(unittest.TestCase): def setUp(self): self.cap_and_output = pd.DataFrame( - {'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', '2015/01/01 00:10:00', - '2015/02/01 00:00:00', '2015/02/01 00:05:00', '2015/02/01 00:10:00'], - 'DUID': ['A', 'A', 'A', 'A', 'A', 'A'], - 'MAXCAPACITY': [68, 68, 68, 68, 68, 68], - 'SCADAVALUE': [8, 9, 50, 11, 10, 0], - 'TRADING_TOTALCLEARED': [111, math.nan, math.nan, 250, math.nan, math.nan], - 'TRADING_RRP': [115, math.nan, math.nan, 250, math.nan, math.nan], - 'DISPATCH_RRP': [112, 97, 102, 81, 85, 91], - 'TOTALDEMAND': [1000, 1020, 1100, 990, 1100, 897]}) - self.cap_and_output['SETTLEMENTDATE'] = pd.to_datetime(self.cap_and_output['SETTLEMENTDATE']) + { + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:10:00", + "2015/02/01 00:00:00", + "2015/02/01 00:05:00", + "2015/02/01 00:10:00", + ], + "DUID": ["A", "A", "A", "A", "A", "A"], + "MAXCAPACITY": [68, 68, 68, 68, 68, 68], + "SCADAVALUE": [8, 9, 50, 11, 10, 0], + "TRADING_TOTALCLEARED": [ + 111, + math.nan, + math.nan, + 250, + math.nan, + math.nan, + ], + "TRADING_RRP": [115, math.nan, math.nan, 250, math.nan, math.nan], + "DISPATCH_RRP": [112, 97, 102, 81, 85, 91], + "TOTALDEMAND": [1000, 1020, 1100, 990, 1100, 897], + } + ) + self.cap_and_output["SETTLEMENTDATE"] = pd.to_datetime( + self.cap_and_output["SETTLEMENTDATE"] + ) self.cf_by_month = pd.DataFrame( - {'Month': ['2015-01', '2015-02'], - 'DUID': ['A', 'A'], - 'CapacityFactor': [0.328431373, 0.102941176], - 'Volume': [67.0/12, 21.0/12], - 'TRADING_VWAP': [115.0, 250.0], - 'DISPATCH_VWAP': [102.5223881, 82.9047619], - 'NodalPeakCapacityFactor': [0.735294118, 0.147058824], - 'Nodal90thPercentileCapacityFactor': [0.735294118, 0.147058824]}) + { + "Month": ["2015-01", "2015-02"], + "DUID": ["A", "A"], + "CapacityFactor": [0.328431373, 0.102941176], + "Volume": [67.0 / 12, 21.0 / 12], + "TRADING_VWAP": [115.0, 250.0], + "DISPATCH_VWAP": [102.5223881, 82.9047619], + "NodalPeakCapacityFactor": [0.735294118, 0.147058824], + "Nodal90thPercentileCapacityFactor": [0.735294118, 0.147058824], + } + ) pass def test_one_duid_two_months_example(self): - cf_by_month_and_duid = custom_tables.stats_by_month_and_plant(self.cap_and_output) + cf_by_month_and_duid = custom_tables.stats_by_month_and_plant( + self.cap_and_output + ) cf_by_month_and_duid = cf_by_month_and_duid.reset_index(drop=True) pd.testing.assert_frame_equal(cf_by_month_and_duid, self.cf_by_month) class TestMergeTables(unittest.TestCase): def setUp(self): - self.gen_info = pd.DataFrame({ - 'EFFECTIVEDATE': ['2014/01/01 00:00:00', '2017/01/01 00:00:00', '2014/01/01 00:00:00'], - 'DUID': ['A', 'A', 'B'], - 'MAXCAPACITY': [333, 400, 250]}) - self.gen_info['EFFECTIVEDATE'] = pd.to_datetime(self.gen_info['EFFECTIVEDATE']) - self.gen_info2 = pd.DataFrame({ - 'START_DATE': ['2014/01/01 00:00:00', '2017/01/01 00:00:00', '2014/01/01 00:00:00'], - 'DUID': ['A', 'A', 'B'], - 'REGIONID': ['NSW1', 'NSW1', 'VIC1']}) - self.gen_info2['START_DATE'] = pd.to_datetime(self.gen_info2['START_DATE']) - self.scada = pd.DataFrame({ - 'DUID': ['A', 'A', 'B', 'B'], - 'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', - '2015/01/01 00:00:00', '2015/01/01 00:05:00'], - 'SCADAVALUE': [150, 150, 200, 220]}) - self.scada['SETTLEMENTDATE'] = pd.to_datetime(self.scada['SETTLEMENTDATE']) - self.trading_load = pd.DataFrame({ - 'DUID': ['A', 'B'], - 'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:00:00'], - 'TOTALCLEARED': [150, 200]}) - self.trading_load['SETTLEMENTDATE'] = pd.to_datetime(self.trading_load['SETTLEMENTDATE']) - self.dispatch_price = pd.DataFrame({ - 'REGIONID': ['NSW1', 'NSW1', 'VIC1', 'VIC1'], - 'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', - '2015/01/01 00:00:00', '2015/01/01 00:05:00'], - 'RRP': [99, 110, 300, 500]}) - self.dispatch_price['SETTLEMENTDATE'] = pd.to_datetime(self.dispatch_price['SETTLEMENTDATE']) - self.trading_price = pd.DataFrame({ - 'REGIONID': ['NSW1', 'VIC1'], - 'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:00:00'], - 'RRP': [99, 300]}) - self.trading_price['SETTLEMENTDATE'] = pd.to_datetime(self.trading_price['SETTLEMENTDATE']) - self.region_summary = pd.DataFrame({ - 'REGIONID': ['NSW1', 'NSW1', 'VIC1', 'VIC1'], - 'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', - '2015/01/01 00:00:00', '2015/01/01 00:05:00'], - 'TOTALDEMAND': [5000, 5010, 8000, 8700]}) - self.region_summary['SETTLEMENTDATE'] = pd.to_datetime(self.region_summary['SETTLEMENTDATE']) - - self.expected_combined_df = pd.DataFrame({ - 'DUID': ['A', 'A', 'B', 'B'], - 'DUDETAIL_EFFECTIVEDATE': ['2014/01/01 00:00:00', '2014/01/01 00:00:00', - '2014/01/01 00:00:00', '2014/01/01 00:00:00'], - 'REGIONID': ['NSW1', 'NSW1', 'VIC1', 'VIC1'], - 'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', - '2015/01/01 00:00:00', '2015/01/01 00:05:00'], - 'MAXCAPACITY': [333, 333, 250, 250], - 'SCADAVALUE': [150, 150, 200, 220], - 'TRADING_TOTALCLEARED': [150, math.nan, 200, math.nan], - 'DISPATCH_RRP': [99, 110, 300, 500], - 'TRADING_RRP': [99, math.nan, 300, math.nan], - 'TOTALDEMAND': [5000, 5010, 8000, 8700]}) - ix = pd.date_range(start=datetime.strptime('2015/01/01 00:00:00', '%Y/%m/%d %H:%M:%S'), - end=datetime.strptime('2015/01/01 00:10:00', '%Y/%m/%d %H:%M:%S') - timedelta(minutes=5), - freq='5T') + self.gen_info = pd.DataFrame( + { + "EFFECTIVEDATE": [ + "2014/01/01 00:00:00", + "2017/01/01 00:00:00", + "2014/01/01 00:00:00", + ], + "DUID": ["A", "A", "B"], + "MAXCAPACITY": [333, 400, 250], + } + ) + self.gen_info["EFFECTIVEDATE"] = pd.to_datetime(self.gen_info["EFFECTIVEDATE"]) + self.gen_info2 = pd.DataFrame( + { + "START_DATE": [ + "2014/01/01 00:00:00", + "2017/01/01 00:00:00", + "2014/01/01 00:00:00", + ], + "DUID": ["A", "A", "B"], + "REGIONID": ["NSW1", "NSW1", "VIC1"], + } + ) + self.gen_info2["START_DATE"] = pd.to_datetime(self.gen_info2["START_DATE"]) + self.scada = pd.DataFrame( + { + "DUID": ["A", "A", "B", "B"], + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + ], + "SCADAVALUE": [150, 150, 200, 220], + } + ) + self.scada["SETTLEMENTDATE"] = pd.to_datetime(self.scada["SETTLEMENTDATE"]) + self.trading_load = pd.DataFrame( + { + "DUID": ["A", "B"], + "SETTLEMENTDATE": ["2015/01/01 00:00:00", "2015/01/01 00:00:00"], + "TOTALCLEARED": [150, 200], + } + ) + self.trading_load["SETTLEMENTDATE"] = pd.to_datetime( + self.trading_load["SETTLEMENTDATE"] + ) + self.dispatch_price = pd.DataFrame( + { + "REGIONID": ["NSW1", "NSW1", "VIC1", "VIC1"], + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + ], + "RRP": [99, 110, 300, 500], + } + ) + self.dispatch_price["SETTLEMENTDATE"] = pd.to_datetime( + self.dispatch_price["SETTLEMENTDATE"] + ) + self.trading_price = pd.DataFrame( + { + "REGIONID": ["NSW1", "VIC1"], + "SETTLEMENTDATE": ["2015/01/01 00:00:00", "2015/01/01 00:00:00"], + "RRP": [99, 300], + } + ) + self.trading_price["SETTLEMENTDATE"] = pd.to_datetime( + self.trading_price["SETTLEMENTDATE"] + ) + self.region_summary = pd.DataFrame( + { + "REGIONID": ["NSW1", "NSW1", "VIC1", "VIC1"], + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + ], + "TOTALDEMAND": [5000, 5010, 8000, 8700], + } + ) + self.region_summary["SETTLEMENTDATE"] = pd.to_datetime( + self.region_summary["SETTLEMENTDATE"] + ) + + self.expected_combined_df = pd.DataFrame( + { + "DUID": ["A", "A", "B", "B"], + "DUDETAIL_EFFECTIVEDATE": [ + "2014/01/01 00:00:00", + "2014/01/01 00:00:00", + "2014/01/01 00:00:00", + "2014/01/01 00:00:00", + ], + "REGIONID": ["NSW1", "NSW1", "VIC1", "VIC1"], + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + ], + "MAXCAPACITY": [333, 333, 250, 250], + "SCADAVALUE": [150, 150, 200, 220], + "TRADING_TOTALCLEARED": [150, math.nan, 200, math.nan], + "DISPATCH_RRP": [99, 110, 300, 500], + "TRADING_RRP": [99, math.nan, 300, math.nan], + "TOTALDEMAND": [5000, 5010, 8000, 8700], + } + ) + ix = pd.date_range( + start=datetime.strptime("2015/01/01 00:00:00", "%Y/%m/%d %H:%M:%S"), + end=datetime.strptime("2015/01/01 00:10:00", "%Y/%m/%d %H:%M:%S") + - timedelta(minutes=5), + freq="5T", + ) self.timeseries_df = pd.DataFrame(index=ix) self.timeseries_df.reset_index(inplace=True) - self.timeseries_df.columns = ['SETTLEMENTDATE'] - self.expected_combined_df['SETTLEMENTDATE'] = pd.to_datetime(self.expected_combined_df['SETTLEMENTDATE']) - self.expected_combined_df = self.expected_combined_df.sort_values('SETTLEMENTDATE') + self.timeseries_df.columns = ["SETTLEMENTDATE"] + self.expected_combined_df["SETTLEMENTDATE"] = pd.to_datetime( + self.expected_combined_df["SETTLEMENTDATE"] + ) + self.expected_combined_df = self.expected_combined_df.sort_values( + "SETTLEMENTDATE" + ) def test_merge_tables(self): - merged_table = custom_tables.merge_tables_for_plant_stats(self.timeseries_df, self.gen_info, self.gen_info2, - self.scada, self.trading_load, self.dispatch_price, - self.trading_price, self.region_summary) + merged_table = custom_tables.merge_tables_for_plant_stats( + self.timeseries_df, + self.gen_info, + self.gen_info2, + self.scada, + self.trading_load, + self.dispatch_price, + self.trading_price, + self.region_summary, + ) np.array_equal(merged_table, self.expected_combined_df) class TestSelectHighestVersionNumber(unittest.TestCase): def setUp(self): - self.gen_info = pd.DataFrame({ - 'EFFECTIVEDATE': ['2014/01/01 00:00:00', '2017/01/01 00:00:00', '2014/01/01 00:00:00', - '2014/01/01 00:00:00'], - 'VERSIONNO': ['1', '1', '1', '2'], - 'DUID': ['A', 'A', 'B', 'B'], - 'MAXCAPACITY': [333, 400, 250, 800]}) - self.gen_info['EFFECTIVEDATE'] = pd.to_datetime(self.gen_info['EFFECTIVEDATE']) - self.expected_result = pd.DataFrame({ - 'EFFECTIVEDATE': ['2014/01/01 00:00:00', '2017/01/01 00:00:00', '2014/01/01 00:00:00'], - 'VERSIONNO': ['1', '1', '2'], - 'DUID': ['A', 'A', 'B'], - 'MAXCAPACITY': [333, 400, 800]}) - self.expected_result['EFFECTIVEDATE'] = pd.to_datetime(self.expected_result['EFFECTIVEDATE']) + self.gen_info = pd.DataFrame( + { + "EFFECTIVEDATE": [ + "2014/01/01 00:00:00", + "2017/01/01 00:00:00", + "2014/01/01 00:00:00", + "2014/01/01 00:00:00", + ], + "VERSIONNO": ["1", "1", "1", "2"], + "DUID": ["A", "A", "B", "B"], + "MAXCAPACITY": [333, 400, 250, 800], + } + ) + self.gen_info["EFFECTIVEDATE"] = pd.to_datetime(self.gen_info["EFFECTIVEDATE"]) + self.expected_result = pd.DataFrame( + { + "EFFECTIVEDATE": [ + "2014/01/01 00:00:00", + "2017/01/01 00:00:00", + "2014/01/01 00:00:00", + ], + "VERSIONNO": ["1", "1", "2"], + "DUID": ["A", "A", "B"], + "MAXCAPACITY": [333, 400, 800], + } + ) + self.expected_result["EFFECTIVEDATE"] = pd.to_datetime( + self.expected_result["EFFECTIVEDATE"] + ) def test_select_highest_version_no(self): - result = custom_tables.select_highest_version_number(self.gen_info, defaults.table_primary_keys['DUDETAIL']) + result = custom_tables.select_highest_version_number( + self.gen_info, defaults.table_primary_keys["DUDETAIL"] + ) np.array_equal(result, self.expected_result) class TestSelectInterventionIfPresent(unittest.TestCase): def setUp(self): - self.dispatch_price = pd.DataFrame({ - 'REGIONID': ['NSW1', 'NSW1', 'NSW1', 'VIC1', 'VIC1'], - 'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', '2015/01/01 00:05:00', - '2015/01/01 00:00:00', '2015/01/01 00:05:00'], - 'RRP': [99, 110, 112, 300, 500], - 'INTERVENTION': ['0', '0', '1', '0', '1']}) - self.dispatch_price['SETTLEMENTDATE'] = pd.to_datetime(self.dispatch_price['SETTLEMENTDATE']) - self.expected_result = pd.DataFrame({ - 'REGIONID': ['NSW1', 'NSW1', 'VIC1', 'VIC1'], - 'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', - '2015/01/01 00:00:00', '2015/01/01 00:05:00'], - 'RRP': [99, 112, 300, 500], - 'INTERVENTION': ['0', '1', '0', '1']}) - self.expected_result['SETTLEMENTDATE'] = pd.to_datetime(self.expected_result['SETTLEMENTDATE']) + self.dispatch_price = pd.DataFrame( + { + "REGIONID": ["NSW1", "NSW1", "NSW1", "VIC1", "VIC1"], + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:05:00", + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + ], + "RRP": [99, 110, 112, 300, 500], + "INTERVENTION": ["0", "0", "1", "0", "1"], + } + ) + self.dispatch_price["SETTLEMENTDATE"] = pd.to_datetime( + self.dispatch_price["SETTLEMENTDATE"] + ) + self.expected_result = pd.DataFrame( + { + "REGIONID": ["NSW1", "NSW1", "VIC1", "VIC1"], + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + ], + "RRP": [99, 112, 300, 500], + "INTERVENTION": ["0", "1", "0", "1"], + } + ) + self.expected_result["SETTLEMENTDATE"] = pd.to_datetime( + self.expected_result["SETTLEMENTDATE"] + ) def test_select_highest_version_no(self): - result = custom_tables.select_intervention_if_present(self.dispatch_price, - defaults.table_primary_keys['DISPATCHPRICE']) + result = custom_tables.select_intervention_if_present( + self.dispatch_price, defaults.table_primary_keys["DISPATCHPRICE"] + ) np.array_equal(result, self.expected_result) @@ -286,18 +584,34 @@ def setUp(self): pass def test_plant_stats(self): - if os.path.isfile('C:/Users/user/Documents/plant_stats.csv'): + if os.path.isfile("C:/Users/user/Documents/plant_stats.csv"): t0 = time.time() - plant_types = data_fetch_methods.static_table_xl('', '', 'Generators and Scheduled Loads', - defaults.raw_data_cache, - select_columns=['DUID', 'Fuel Source - Primary', - 'Region', 'Participant']) - plant_stats = custom_tables.plant_stats('2017/07/01 00:05:00', '2018/07/01 00:05:00', '', - defaults.raw_data_cache) - plant_stats = pd.merge(plant_stats, plant_types, 'left', 'DUID') - plant_stats['TRADING_COST'] = plant_stats['Volume'] * plant_stats['TRADING_VWAP'] - plant_stats['DISPATCH_COST'] = plant_stats['Volume'] * plant_stats['DISPATCH_VWAP'] - plant_stats.to_csv('C:/Users/user/Documents/plant_stats_tp.csv') + plant_types = data_fetch_methods.static_table_xl( + "", + "", + "Generators and Scheduled Loads", + defaults.raw_data_cache, + select_columns=[ + "DUID", + "Fuel Source - Primary", + "Region", + "Participant", + ], + ) + plant_stats = custom_tables.plant_stats( + "2017/07/01 00:05:00", + "2018/07/01 00:05:00", + "", + defaults.raw_data_cache, + ) + plant_stats = pd.merge(plant_stats, plant_types, "left", "DUID") + plant_stats["TRADING_COST"] = ( + plant_stats["Volume"] * plant_stats["TRADING_VWAP"] + ) + plant_stats["DISPATCH_COST"] = ( + plant_stats["Volume"] * plant_stats["DISPATCH_VWAP"] + ) + plant_stats.to_csv("C:/Users/user/Documents/plant_stats_tp.csv") print(time.time() - t0) @@ -306,23 +620,35 @@ def setUp(self): pass def test_nyngan1(self): - if os.path.isfile('E:/plants_stats_test_data/NYNGAN1/NYNGAN1_test.xlsx'): - xls = pd.ExcelFile('E:/plants_stats_test_data/NYNGAN1/NYNGAN1_test.xlsx') - table = pd.read_excel(xls, 'Plant_stats', dtype=str) - results = custom_tables.plant_stats('2017/01/01 00:00:00', '2017/02/01 00:00:00', '', defaults.raw_data_cache, - filter_cols=['DUID'], filter_values=[('NYNGAN1',)]) - for col in [col for col in table.columns if col not in ['Month', 'DUID']]: + if os.path.isfile("E:/plants_stats_test_data/NYNGAN1/NYNGAN1_test.xlsx"): + xls = pd.ExcelFile("E:/plants_stats_test_data/NYNGAN1/NYNGAN1_test.xlsx") + table = pd.read_excel(xls, "Plant_stats", dtype=str) + results = custom_tables.plant_stats( + "2017/01/01 00:00:00", + "2017/02/01 00:00:00", + "", + defaults.raw_data_cache, + filter_cols=["DUID"], + filter_values=[("NYNGAN1",)], + ) + for col in [col for col in table.columns if col not in ["Month", "DUID"]]: table[col] = table[col].astype(float) results.reset_index(drop=True, inplace=True) pd.testing.assert_frame_equal(results, table) def test_eildon2(self): - if os.path.isfile('E:/plants_stats_test_data/EILDON2/EILDON2_test.xlsx'): - xls = pd.ExcelFile('E:/plants_stats_test_data/EILDON2/EILDON2_test.xlsx') - table = pd.read_excel(xls, 'Plant_stats', dtype=str) - results = custom_tables.plant_stats('2018/01/01 00:00:00', '2018/02/01 00:00:00', '', defaults.raw_data_cache, - filter_cols=['DUID'], filter_values=[('EILDON2',)]) - for col in [col for col in table.columns if col not in ['Month', 'DUID']]: + if os.path.isfile("E:/plants_stats_test_data/EILDON2/EILDON2_test.xlsx"): + xls = pd.ExcelFile("E:/plants_stats_test_data/EILDON2/EILDON2_test.xlsx") + table = pd.read_excel(xls, "Plant_stats", dtype=str) + results = custom_tables.plant_stats( + "2018/01/01 00:00:00", + "2018/02/01 00:00:00", + "", + defaults.raw_data_cache, + filter_cols=["DUID"], + filter_values=[("EILDON2",)], + ) + for col in [col for col in table.columns if col not in ["Month", "DUID"]]: table[col] = table[col].astype(float) results.reset_index(drop=True, inplace=True) pd.testing.assert_frame_equal(results, table) @@ -330,19 +656,32 @@ def test_eildon2(self): class TestCalcTradingLoad(unittest.TestCase): def setUp(self): - self.scada = pd.DataFrame({ - 'DUID': ['A', 'A', 'B', 'B'], - 'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', - '2015/01/01 00:00:00', '2015/01/01 00:05:00'], - 'SCADAVALUE': [150, 150, 200, 220]}) - self.scada['SETTLEMENTDATE'] = pd.to_datetime(self.scada['SETTLEMENTDATE']) - self.result = pd.DataFrame({ - 'DUID': ['A', 'A', 'B', 'B'], - 'TOTALCLEARED': [150, 150, 200, 220], - 'SETTLEMENTDATE': ['2015/01/01 00:00:00', '2015/01/01 00:05:00', - '2015/01/01 00:00:00', '2015/01/01 00:05:00'] - }) - self.result['SETTLEMENTDATE'] = pd.to_datetime(self.result['SETTLEMENTDATE']) + self.scada = pd.DataFrame( + { + "DUID": ["A", "A", "B", "B"], + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + ], + "SCADAVALUE": [150, 150, 200, 220], + } + ) + self.scada["SETTLEMENTDATE"] = pd.to_datetime(self.scada["SETTLEMENTDATE"]) + self.result = pd.DataFrame( + { + "DUID": ["A", "A", "B", "B"], + "TOTALCLEARED": [150, 150, 200, 220], + "SETTLEMENTDATE": [ + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + "2015/01/01 00:00:00", + "2015/01/01 00:05:00", + ], + } + ) + self.result["SETTLEMENTDATE"] = pd.to_datetime(self.result["SETTLEMENTDATE"]) def test_calc_trading_load_simple(self): trading_load = custom_tables.calc_trading_load(self.scada) @@ -355,5 +694,4 @@ def setUp(self): def test_calc_trading_load_simple(self): pass - #custom_tables.trading_and_dispatch_cost() - + # custom_tables.trading_and_dispatch_cost() diff --git a/nemosis/test_processing_info_maps.py b/nemosis/test_processing_info_maps.py index 34e9c60..4bc68f6 100644 --- a/nemosis/test_processing_info_maps.py +++ b/nemosis/test_processing_info_maps.py @@ -18,121 +18,199 @@ def setUp(self): def test_start_to_end_no_duplication_between_batches(self): for table_name in processing_info_maps.search_type.keys(): - if processing_info_maps.search_type[table_name] == 'start_to_end': - print('Validating start_to_end type for table {}'.format(table_name)) - start_time = datetime.strptime('2018/01/01 00:00:00', '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2018/03/01 00:00:00', '%Y/%m/%d %H:%M:%S') - if table_name == 'FCAS_4_SECOND': + if processing_info_maps.search_type[table_name] == "start_to_end": + print("Validating start_to_end type for table {}".format(table_name)) + start_time = datetime.strptime( + "2018/01/01 00:00:00", "%Y/%m/%d %H:%M:%S" + ) + end_time = datetime.strptime("2018/03/01 00:00:00", "%Y/%m/%d %H:%M:%S") + if table_name == "FCAS_4_SECOND": start_test_window = self.start_day start_time = self.start_day end_time = self.start_day + timedelta(days=1) data_tables = data_fetch_methods._dynamic_data_fetch_loop( - start_search=start_time, start_time=start_time, - end_time=end_time, table_name=table_name, raw_data_location=defaults.raw_data_cache, + start_search=start_time, + start_time=start_time, + end_time=end_time, + table_name=table_name, + raw_data_location=defaults.raw_data_cache, select_columns=defaults.table_primary_keys[table_name], date_filter=None, - keep_csv=False) + keep_csv=False, + ) all_data = pd.concat(data_tables, sort=False) contains_duplicates = all_data.duplicated().any() - self.assertEqual(False, contains_duplicates, 'table {}'.format(table_name)) - print('Type valid, no duplicates found.') + self.assertEqual( + False, contains_duplicates, "table {}".format(table_name) + ) + print("Type valid, no duplicates found.") def test_start_to_end_has_settlement_or_interval_col(self): for table_name in processing_info_maps.search_type.keys(): - if processing_info_maps.search_type[table_name] == 'start_to_end': - has_settlement_date_col = 'SETTLEMENTDATE' in defaults.table_columns[table_name] - has_interval_datetime_col = 'INTERVAL_DATETIME' in defaults.table_columns[table_name] - has_interval_timestamp_col = 'TIMESTAMP' in defaults.table_columns[table_name] - has_either = has_interval_datetime_col or has_settlement_date_col or has_interval_timestamp_col + if processing_info_maps.search_type[table_name] == "start_to_end": + has_settlement_date_col = ( + "SETTLEMENTDATE" in defaults.table_columns[table_name] + ) + has_interval_datetime_col = ( + "INTERVAL_DATETIME" in defaults.table_columns[table_name] + ) + has_interval_timestamp_col = ( + "TIMESTAMP" in defaults.table_columns[table_name] + ) + has_either = ( + has_interval_datetime_col + or has_settlement_date_col + or has_interval_timestamp_col + ) self.assertEqual(True, has_either) - print('{} is valid candidate for type start_to_end as there is a SETTLEMENTDATE, ' - 'INTERVAL_DATETIME or TIMESTAMP column to filter on.' - .format(table_name)) + print( + "{} is valid candidate for type start_to_end as there is a SETTLEMENTDATE, " + "INTERVAL_DATETIME or TIMESTAMP column to filter on.".format( + table_name + ) + ) def test_all_no_duplication_between_batches(self): for table_name in processing_info_maps.search_type.keys(): - if processing_info_maps.search_type[table_name] == 'all': - print('Validating all type for table {}'.format(table_name)) - if table_name in ['GENCONDATA', 'SPDCONNECTIONPOINTCONSTRAINT', 'SPDINTERCONNECTORCONSTRAINT', - 'DUDETAILSUMMARY', 'LOSSMODEL', 'LOSSFACTORMODEL', 'MNSP_DAYOFFER', - 'MNSP_PEROFFER', 'MNSP_INTERCONNECTOR', 'INTERCONNECTOR', 'INTERCONNECTORCONSTRAINT', - 'DUDETAIL', 'MARKET_PRICE_THRESHOLDS']: - print('{} is known to contain duplicate entries and is exempted from this test, a finalise ' - 'data processing step is included in dynamic data fetch to clean up these duplicates.' - .format(table_name)) + if processing_info_maps.search_type[table_name] == "all": + print("Validating all type for table {}".format(table_name)) + if table_name in [ + "GENCONDATA", + "SPDCONNECTIONPOINTCONSTRAINT", + "SPDINTERCONNECTORCONSTRAINT", + "DUDETAILSUMMARY", + "LOSSMODEL", + "LOSSFACTORMODEL", + "MNSP_DAYOFFER", + "MNSP_PEROFFER", + "MNSP_INTERCONNECTOR", + "INTERCONNECTOR", + "INTERCONNECTORCONSTRAINT", + "DUDETAIL", + "MARKET_PRICE_THRESHOLDS", + ]: + print( + "{} is known to contain duplicate entries and is exempted from this test, a finalise " + "data processing step is included in dynamic data fetch to clean up these duplicates.".format( + table_name + ) + ) continue start_test_window = defaults.nem_data_model_start_time - start_time = datetime.strptime(start_test_window, '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2018/01/01 00:00:00', '%Y/%m/%d %H:%M:%S') - start_search = datetime.strptime(start_test_window, '%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime(start_test_window, "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2018/01/01 00:00:00", "%Y/%m/%d %H:%M:%S") + start_search = datetime.strptime(start_test_window, "%Y/%m/%d %H:%M:%S") data_tables = data_fetch_methods._dynamic_data_fetch_loop( - start_search=start_search, start_time=start_time, - end_time=end_time, table_name=table_name, raw_data_location=defaults.raw_data_cache, + start_search=start_search, + start_time=start_time, + end_time=end_time, + table_name=table_name, + raw_data_location=defaults.raw_data_cache, select_columns=defaults.table_primary_keys[table_name], date_filter=None, - keep_csv=False) + keep_csv=False, + ) all_data = pd.concat(data_tables, sort=False) contains_duplicates = all_data.duplicated().any() - self.assertEqual(False, contains_duplicates, 'table {}'.format(table_name)) - print('Type valid, no duplicates found.') + self.assertEqual( + False, contains_duplicates, "table {}".format(table_name) + ) + print("Type valid, no duplicates found.") def test_all_no_duplication_between_batches_with_finalise_step(self): for table_name in processing_info_maps.search_type.keys(): - if processing_info_maps.search_type[table_name] == 'all': - print('Testing duplicate removal for table {}'.format(table_name)) + if processing_info_maps.search_type[table_name] == "all": + print("Testing duplicate removal for table {}".format(table_name)) start_test_window = defaults.nem_data_model_start_time - #start_test_window = '2018/01/01 00:00:00' - start_time = datetime.strptime(start_test_window, '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2018/01/01 00:00:00', '%Y/%m/%d %H:%M:%S') - start_search = datetime.strptime(start_test_window, '%Y/%m/%d %H:%M:%S') + # start_test_window = '2018/01/01 00:00:00' + start_time = datetime.strptime(start_test_window, "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2018/01/01 00:00:00", "%Y/%m/%d %H:%M:%S") + start_search = datetime.strptime(start_test_window, "%Y/%m/%d %H:%M:%S") data_tables = data_fetch_methods._dynamic_data_fetch_loop( - start_search=start_search, start_time=start_time, - end_time=end_time, table_name=table_name, raw_data_location=defaults.raw_data_cache, + start_search=start_search, + start_time=start_time, + end_time=end_time, + table_name=table_name, + raw_data_location=defaults.raw_data_cache, select_columns=defaults.table_primary_keys[table_name], date_filter=None, - keep_csv=False) + keep_csv=False, + ) all_data = pd.concat(data_tables, sort=False) - all_data = query_wrapers.drop_duplicates_by_primary_key(all_data, start_time, table_name) + all_data = query_wrapers.drop_duplicates_by_primary_key( + all_data, start_time, table_name + ) contains_duplicates = all_data.duplicated().any() self.assertEqual(False, contains_duplicates) - print('Type valid, no duplicates found.') + print("Type valid, no duplicates found.") def test_start_to_end_has_no_settlement_interval_or_timestamp_col(self): for table_name in processing_info_maps.search_type.keys(): - if processing_info_maps.search_type[table_name] == 'all': - has_settlement_date_col = 'SETTLEMENTDATE' in defaults.table_columns[table_name] - has_interval_datetime_col = 'INTERVAL_DATETIME' in defaults.table_columns[table_name] - has_interval_timestamp_col = 'TIMESTAMP' in defaults.table_columns[table_name] - has_either = has_interval_datetime_col or has_settlement_date_col or has_interval_timestamp_col - self.assertEqual(False, has_either, 'table {}'.format(table_name)) - print('{} is valid candidate for type all as there is not a SETTLEMENTDATE, ' - 'INTERVAL_DATETIME or TIMESTAMP column to filter on' - .format(table_name)) + if processing_info_maps.search_type[table_name] == "all": + has_settlement_date_col = ( + "SETTLEMENTDATE" in defaults.table_columns[table_name] + ) + has_interval_datetime_col = ( + "INTERVAL_DATETIME" in defaults.table_columns[table_name] + ) + has_interval_timestamp_col = ( + "TIMESTAMP" in defaults.table_columns[table_name] + ) + has_either = ( + has_interval_datetime_col + or has_settlement_date_col + or has_interval_timestamp_col + ) + self.assertEqual(False, has_either, "table {}".format(table_name)) + print( + "{} is valid candidate for type all as there is not a SETTLEMENTDATE, " + "INTERVAL_DATETIME or TIMESTAMP column to filter on".format( + table_name + ) + ) def test_last_contains_data_from_first(self): for table_name in processing_info_maps.search_type.keys(): - if processing_info_maps.search_type[table_name] == 'end': + if processing_info_maps.search_type[table_name] == "end": start_test_window = defaults.nem_data_model_start_time - #start_test_window = '2018/01/01 00:00:00' - start_time = datetime.strptime(start_test_window, '%Y/%m/%d %H:%M:%S') - end_time = datetime.strptime('2018/01/01 00:00:00', '%Y/%m/%d %H:%M:%S') - start_search = datetime.strptime(start_test_window, '%Y/%m/%d %H:%M:%S') + # start_test_window = '2018/01/01 00:00:00' + start_time = datetime.strptime(start_test_window, "%Y/%m/%d %H:%M:%S") + end_time = datetime.strptime("2018/01/01 00:00:00", "%Y/%m/%d %H:%M:%S") + start_search = datetime.strptime(start_test_window, "%Y/%m/%d %H:%M:%S") select_columns = None - _, _, select_columns, _, _ =\ - data_fetch_methods._set_up_dynamic_compilers(table_name, - start_time, - end_time, - select_columns) + ( + _, + _, + select_columns, + _, + _, + ) = data_fetch_methods._set_up_dynamic_compilers( + table_name, start_time, end_time, select_columns + ) data_tables = data_fetch_methods._dynamic_data_fetch_loop( - start_search=start_search, start_time=start_time, - end_time=end_time, table_name=table_name, raw_data_location=defaults.raw_data_cache, + start_search=start_search, + start_time=start_time, + end_time=end_time, + table_name=table_name, + raw_data_location=defaults.raw_data_cache, select_columns=select_columns, date_filter=None, - keep_csv=False) - first_data_table = data_tables[35].loc[:, defaults.table_primary_keys[table_name]] + keep_csv=False, + ) + first_data_table = data_tables[35].loc[ + :, defaults.table_primary_keys[table_name] + ] last_data_table = data_tables[-1] - comp = pd.merge(first_data_table, last_data_table, 'left', defaults.table_primary_keys[table_name]) - non_primary_col = [col for col in defaults.table_columns[table_name] - if col not in defaults.table_primary_keys[table_name]][0] + comp = pd.merge( + first_data_table, + last_data_table, + "left", + defaults.table_primary_keys[table_name], + ) + non_primary_col = [ + col + for col in defaults.table_columns[table_name] + if col not in defaults.table_primary_keys[table_name] + ][0] missing_from_last = comp[comp[non_primary_col].isnull()] - self.assertEqual(False, missing_from_last.empty) \ No newline at end of file + self.assertEqual(False, missing_from_last.empty) diff --git a/nemosis/test_query_wrapers.py b/nemosis/test_query_wrapers.py index 1f00197..c3326c4 100644 --- a/nemosis/test_query_wrapers.py +++ b/nemosis/test_query_wrapers.py @@ -8,129 +8,238 @@ class TestDispatchDateSetup(unittest.TestCase): def setUp(self): - self.start_time = '2017/01/01 00:10:00' - self.end_time = '2019/06/03 12:15:00' + self.start_time = "2017/01/01 00:10:00" + self.end_time = "2019/06/03 12:15:00" def test_start_time_and_end_time(self): - start_time, end_time = query_wrapers.dispatch_date_setup(self.start_time, self.end_time) - self.assertEqual(start_time, '2016/12/30 23:59:59') - self.assertEqual(end_time, '2019/06/03 00:00:00') + start_time, end_time = query_wrapers.dispatch_date_setup( + self.start_time, self.end_time + ) + self.assertEqual(start_time, "2016/12/30 23:59:59") + self.assertEqual(end_time, "2019/06/03 00:00:00") class TestFCASFinalise(unittest.TestCase): def setUp(self): - self.testDataFrame1 = pd.DataFrame({'StingsWithSpaces': [' HELLO ', 'foo and baa ', ' at front']}) - self.testDataFrame2 = pd.DataFrame({'StingsWithSpaces': [' HELLO ', 'foo and baa ', ' at front'], - 'INTs2ignore': [1, 2, 3]}) - self.testDataFrame3 = pd.DataFrame({'StingsWithSpaces': [' HELLO ', 'foo and baa ', ' at front'], - 'INTs2ignore': [1, 2, 3], - 'StingsWithSpaces2': [' HELLO ', 'foo and baa ', ' at front']}) + self.testDataFrame1 = pd.DataFrame( + {"StingsWithSpaces": [" HELLO ", "foo and baa ", " at front"]} + ) + self.testDataFrame2 = pd.DataFrame( + { + "StingsWithSpaces": [" HELLO ", "foo and baa ", " at front"], + "INTs2ignore": [1, 2, 3], + } + ) + self.testDataFrame3 = pd.DataFrame( + { + "StingsWithSpaces": [" HELLO ", "foo and baa ", " at front"], + "INTs2ignore": [1, 2, 3], + "StingsWithSpaces2": [" HELLO ", "foo and baa ", " at front"], + } + ) def test_string_cleanup_one_col(self): result = query_wrapers.fcas4s_finalise(self.testDataFrame1, None, None) - aim = pd.DataFrame({'StingsWithSpaces': ['HELLO', 'foo and baa', 'at front']}) + aim = pd.DataFrame({"StingsWithSpaces": ["HELLO", "foo and baa", "at front"]}) assert_frame_equal(aim, result) def test_string_cleanup_one_col_ignore_one_col(self): result = query_wrapers.fcas4s_finalise(self.testDataFrame2, None, None) - aim = pd.DataFrame({'StingsWithSpaces': ['HELLO', 'foo and baa', 'at front'], - 'INTs2ignore': [1, 2, 3]}) + aim = pd.DataFrame( + { + "StingsWithSpaces": ["HELLO", "foo and baa", "at front"], + "INTs2ignore": [1, 2, 3], + } + ) assert_frame_equal(aim, result) def test_cleanup_ignore_cleanup(self): result = query_wrapers.fcas4s_finalise(self.testDataFrame3, None, None) - aim = pd.DataFrame({'StingsWithSpaces': ['HELLO', 'foo and baa', 'at front'], - 'INTs2ignore': [1, 2, 3], - 'StingsWithSpaces2': ['HELLO', 'foo and baa', 'at front']}) + aim = pd.DataFrame( + { + "StingsWithSpaces": ["HELLO", "foo and baa", "at front"], + "INTs2ignore": [1, 2, 3], + "StingsWithSpaces2": ["HELLO", "foo and baa", "at front"], + } + ) assert_frame_equal(aim, result) class TestMostRecent(unittest.TestCase): def setUp(self): - self.dummyGenConData = \ - pd.DataFrame({'EFFECTIVEDATE': ['2017/01/01 00:00:00', '2017/01/04 00:15:00', '2018/05/01 00:00:00'], - 'VERSIONNO': ['5', '1', '1'], - 'GENCONID': ['ID1', 'ID1', 'ID1']}) - self.dummyGenConData['EFFECTIVEDATE'] = pd.to_datetime(self.dummyGenConData['EFFECTIVEDATE'], - format='%Y/%m/%d %H:%M:%S') - - self.dummyGenConData2 = \ - pd.DataFrame({'EFFECTIVEDATE': ['2017/01/01 00:00:00', '2017/01/04 00:15:00', '2017/01/04 00:15:00'], - 'VERSIONNO': ['5', '1', '2'], - 'GENCONID': ['ID1', 'ID1', 'ID1']}) - self.dummyGenConData2['EFFECTIVEDATE'] = pd.to_datetime(self.dummyGenConData2['EFFECTIVEDATE'], - format='%Y/%m/%d %H:%M:%S') - - self.dummyGenConData3 = \ - pd.DataFrame({'EFFECTIVEDATE': ['2017/01/01 00:00:00', '2017/01/04 00:15:00', '2017/01/04 00:15:00', - '2017/01/01 00:00:00', '2017/01/04 00:15:00', '2017/01/04 00:15:00'], - 'VERSIONNO': ['5', '1', '2', '5', '1', '2'], - 'GENCONID': ['ID1', 'ID1', 'ID1', 'ID2', 'ID2', 'ID2']}) - self.dummyGenConData3['EFFECTIVEDATE'] = pd.to_datetime(self.dummyGenConData3['EFFECTIVEDATE'], - format='%Y/%m/%d %H:%M:%S') + self.dummyGenConData = pd.DataFrame( + { + "EFFECTIVEDATE": [ + "2017/01/01 00:00:00", + "2017/01/04 00:15:00", + "2018/05/01 00:00:00", + ], + "VERSIONNO": ["5", "1", "1"], + "GENCONID": ["ID1", "ID1", "ID1"], + } + ) + self.dummyGenConData["EFFECTIVEDATE"] = pd.to_datetime( + self.dummyGenConData["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) + + self.dummyGenConData2 = pd.DataFrame( + { + "EFFECTIVEDATE": [ + "2017/01/01 00:00:00", + "2017/01/04 00:15:00", + "2017/01/04 00:15:00", + ], + "VERSIONNO": ["5", "1", "2"], + "GENCONID": ["ID1", "ID1", "ID1"], + } + ) + self.dummyGenConData2["EFFECTIVEDATE"] = pd.to_datetime( + self.dummyGenConData2["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) + + self.dummyGenConData3 = pd.DataFrame( + { + "EFFECTIVEDATE": [ + "2017/01/01 00:00:00", + "2017/01/04 00:15:00", + "2017/01/04 00:15:00", + "2017/01/01 00:00:00", + "2017/01/04 00:15:00", + "2017/01/04 00:15:00", + ], + "VERSIONNO": ["5", "1", "2", "5", "1", "2"], + "GENCONID": ["ID1", "ID1", "ID1", "ID2", "ID2", "ID2"], + } + ) + self.dummyGenConData3["EFFECTIVEDATE"] = pd.to_datetime( + self.dummyGenConData3["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) def test_one_id_gencondata_start_date_after_all(self): - start_time = datetime.strptime('2019/06/01 00:00:00', '%Y/%m/%d %H:%M:%S') - table_name = 'dummy' - defaults.primary_date_columns['dummy'] = 'EFFECTIVEDATE' - defaults.effective_date_group_col['dummy'] = ['GENCONID'] - result = query_wrapers.most_recent_records_before_start_time(self.dummyGenConData, start_time, table_name - ).reset_index(drop=True) - aim = pd.DataFrame({'EFFECTIVEDATE': ['2018/05/01 00:00:00'], 'VERSIONNO': ['1'], 'GENCONID': ['ID1']}) - aim['EFFECTIVEDATE'] = pd.to_datetime(aim['EFFECTIVEDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2019/06/01 00:00:00", "%Y/%m/%d %H:%M:%S") + table_name = "dummy" + defaults.primary_date_columns["dummy"] = "EFFECTIVEDATE" + defaults.effective_date_group_col["dummy"] = ["GENCONID"] + result = query_wrapers.most_recent_records_before_start_time( + self.dummyGenConData, start_time, table_name + ).reset_index(drop=True) + aim = pd.DataFrame( + { + "EFFECTIVEDATE": ["2018/05/01 00:00:00"], + "VERSIONNO": ["1"], + "GENCONID": ["ID1"], + } + ) + aim["EFFECTIVEDATE"] = pd.to_datetime( + aim["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_one_id_gencondata_start_date_after_first(self): - start_time = datetime.strptime('2017/01/01 01:00:00', '%Y/%m/%d %H:%M:%S') - table_name = 'dummy' - defaults.primary_date_columns['dummy'] = 'EFFECTIVEDATE' - defaults.effective_date_group_col['dummy'] = ['GENCONID'] - result = query_wrapers.most_recent_records_before_start_time(self.dummyGenConData, start_time, table_name).\ - sort_values('EFFECTIVEDATE', ascending=False)\ + start_time = datetime.strptime("2017/01/01 01:00:00", "%Y/%m/%d %H:%M:%S") + table_name = "dummy" + defaults.primary_date_columns["dummy"] = "EFFECTIVEDATE" + defaults.effective_date_group_col["dummy"] = ["GENCONID"] + result = ( + query_wrapers.most_recent_records_before_start_time( + self.dummyGenConData, start_time, table_name + ) + .sort_values("EFFECTIVEDATE", ascending=False) .reset_index(drop=True) - aim = pd.DataFrame({'EFFECTIVEDATE': ['2017/01/01 00:00:00', '2017/01/04 00:15:00', '2018/05/01 00:00:00'], - 'VERSIONNO': ['5', '1', '1'], 'GENCONID': ['ID1', 'ID1', 'ID1']} - ).sort_values('EFFECTIVEDATE', ascending=False)\ + ) + aim = ( + pd.DataFrame( + { + "EFFECTIVEDATE": [ + "2017/01/01 00:00:00", + "2017/01/04 00:15:00", + "2018/05/01 00:00:00", + ], + "VERSIONNO": ["5", "1", "1"], + "GENCONID": ["ID1", "ID1", "ID1"], + } + ) + .sort_values("EFFECTIVEDATE", ascending=False) .reset_index(drop=True) - aim['EFFECTIVEDATE'] = pd.to_datetime(aim['EFFECTIVEDATE'], format='%Y/%m/%d %H:%M:%S') + ) + aim["EFFECTIVEDATE"] = pd.to_datetime( + aim["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_one_id_gencondata_start_date_before_all(self): - start_time = datetime.strptime('2010/01/01 01:00:00', '%Y/%m/%d %H:%M:%S') - table_name = 'dummy' - defaults.primary_date_columns['dummy'] = 'EFFECTIVEDATE' - defaults.effective_date_group_col['dummy'] = ['GENCONID'] - result = query_wrapers.most_recent_records_before_start_time(self.dummyGenConData, start_time, table_name).\ - sort_values('EFFECTIVEDATE', ascending=False).reset_index(drop=True) - aim = pd.DataFrame({'EFFECTIVEDATE': ['2017/01/01 00:00:00', '2017/01/04 00:15:00', '2018/05/01 00:00:00'], - 'VERSIONNO': ['5', '1', '1'], - 'GENCONID': ['ID1', 'ID1', 'ID1']}).sort_values('EFFECTIVEDATE', ascending=False)\ + start_time = datetime.strptime("2010/01/01 01:00:00", "%Y/%m/%d %H:%M:%S") + table_name = "dummy" + defaults.primary_date_columns["dummy"] = "EFFECTIVEDATE" + defaults.effective_date_group_col["dummy"] = ["GENCONID"] + result = ( + query_wrapers.most_recent_records_before_start_time( + self.dummyGenConData, start_time, table_name + ) + .sort_values("EFFECTIVEDATE", ascending=False) .reset_index(drop=True) - aim['EFFECTIVEDATE'] = pd.to_datetime(aim['EFFECTIVEDATE'], format='%Y/%m/%d %H:%M:%S') + ) + aim = ( + pd.DataFrame( + { + "EFFECTIVEDATE": [ + "2017/01/01 00:00:00", + "2017/01/04 00:15:00", + "2018/05/01 00:00:00", + ], + "VERSIONNO": ["5", "1", "1"], + "GENCONID": ["ID1", "ID1", "ID1"], + } + ) + .sort_values("EFFECTIVEDATE", ascending=False) + .reset_index(drop=True) + ) + aim["EFFECTIVEDATE"] = pd.to_datetime( + aim["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_one_id_gencondata_and_repeated_effectivedate_start_date_after_all(self): - start_time = datetime.strptime('2019/06/01 00:00:00', '%Y/%m/%d %H:%M:%S') - table_name = 'dummy' - defaults.primary_date_columns['dummy'] = 'EFFECTIVEDATE' - defaults.effective_date_group_col['dummy'] = ['GENCONID'] - result = query_wrapers.most_recent_records_before_start_time(self.dummyGenConData2, start_time, table_name - ).reset_index(drop=True) - aim = pd.DataFrame({'EFFECTIVEDATE': ['2017/01/04 00:15:00', '2017/01/04 00:15:00'], 'VERSIONNO': ['1', '2'], - 'GENCONID': ['ID1', 'ID1']}) - aim['EFFECTIVEDATE'] = pd.to_datetime(aim['EFFECTIVEDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2019/06/01 00:00:00", "%Y/%m/%d %H:%M:%S") + table_name = "dummy" + defaults.primary_date_columns["dummy"] = "EFFECTIVEDATE" + defaults.effective_date_group_col["dummy"] = ["GENCONID"] + result = query_wrapers.most_recent_records_before_start_time( + self.dummyGenConData2, start_time, table_name + ).reset_index(drop=True) + aim = pd.DataFrame( + { + "EFFECTIVEDATE": ["2017/01/04 00:15:00", "2017/01/04 00:15:00"], + "VERSIONNO": ["1", "2"], + "GENCONID": ["ID1", "ID1"], + } + ) + aim["EFFECTIVEDATE"] = pd.to_datetime( + aim["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) def test_2_id_and_repeated_effectivedate_start_date_after_all(self): - start_time = datetime.strptime('2019/06/01 00:00:00', '%Y/%m/%d %H:%M:%S') - table_name = 'dummy' - defaults.primary_date_columns['dummy'] = 'EFFECTIVEDATE' - defaults.effective_date_group_col['dummy'] = ['GENCONID'] - result = query_wrapers.most_recent_records_before_start_time(self.dummyGenConData3, start_time, table_name - ).reset_index(drop=True) - aim = pd.DataFrame({'EFFECTIVEDATE': ['2017/01/04 00:15:00', '2017/01/04 00:15:00', - '2017/01/04 00:15:00', '2017/01/04 00:15:00'], - 'VERSIONNO': ['1', '2', '1', '2'], - 'GENCONID': ['ID1', 'ID1', 'ID2', 'ID2']}) - aim['EFFECTIVEDATE'] = pd.to_datetime(aim['EFFECTIVEDATE'], format='%Y/%m/%d %H:%M:%S') + start_time = datetime.strptime("2019/06/01 00:00:00", "%Y/%m/%d %H:%M:%S") + table_name = "dummy" + defaults.primary_date_columns["dummy"] = "EFFECTIVEDATE" + defaults.effective_date_group_col["dummy"] = ["GENCONID"] + result = query_wrapers.most_recent_records_before_start_time( + self.dummyGenConData3, start_time, table_name + ).reset_index(drop=True) + aim = pd.DataFrame( + { + "EFFECTIVEDATE": [ + "2017/01/04 00:15:00", + "2017/01/04 00:15:00", + "2017/01/04 00:15:00", + "2017/01/04 00:15:00", + ], + "VERSIONNO": ["1", "2", "1", "2"], + "GENCONID": ["ID1", "ID1", "ID2", "ID2"], + } + ) + aim["EFFECTIVEDATE"] = pd.to_datetime( + aim["EFFECTIVEDATE"], format="%Y/%m/%d %H:%M:%S" + ) assert_frame_equal(aim, result) diff --git a/nemosis/write_file_names.py b/nemosis/write_file_names.py index a6cb4c7..a98bac9 100644 --- a/nemosis/write_file_names.py +++ b/nemosis/write_file_names.py @@ -4,22 +4,24 @@ def write_file_names(name, month, year, day, index, raw_data_location): # Add the year and month information to the generic AEMO file name - filename_stub = (defaults.names[name] + "_" + str(year) + str(month) + "010000") + filename_stub = defaults.names[name] + "_" + str(year) + str(month) + "010000" path_and_name = os.path.join(raw_data_location, filename_stub) return filename_stub, path_and_name def write_file_names_mms_and_archive(name, month, year, day, index, raw_data_location): if day is None: - filename_stub = (defaults.names[name] + "_" + str(year) + str(month) + "010000") + filename_stub = defaults.names[name] + "_" + str(year) + str(month) + "010000" else: - filename_stub = (defaults.names[name] + "_" + str(year) + str(month) + str(day) + "0000") + filename_stub = ( + defaults.names[name] + "_" + str(year) + str(month) + str(day) + "0000" + ) path_and_name = os.path.join(raw_data_location, filename_stub) return filename_stub, path_and_name def write_file_names_fcas(name, month, year, day, index, raw_data_location): # Add the year and month information to the generic AEMO file name - filename_stub = (defaults.names[name] + "_" + str(year) + str(month) + day + index) + filename_stub = defaults.names[name] + "_" + str(year) + str(month) + day + index path_and_name = os.path.join(raw_data_location, filename_stub) return filename_stub, path_and_name diff --git a/setup.py b/setup.py index e298383..a71b18f 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,15 @@ long_description_content_type="text/markdown", url="https://github.com/UNSW-CEEM/NEMOSIS", packages=setuptools.find_packages(), - install_requires=['requests', 'joblib', 'pyarrow', 'feather-format', 'pandas', 'xlrd', 'beautifulsoup4'], + install_requires=[ + "requests", + "joblib", + "pyarrow", + "feather-format", + "pandas", + "xlrd", + "beautifulsoup4", + ], classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: GNU General Public License (GPL)",