diff --git a/src/nemosis/defaults.py b/src/nemosis/defaults.py index 8d3c16f..f0fc1ad 100644 --- a/src/nemosis/defaults.py +++ b/src/nemosis/defaults.py @@ -135,7 +135,7 @@ "_downloader.download_xl": "https://www.aemo.com.au/-/media/Files/Electricity/NEM/Participant_Information/NEM-Registration-and-Exemption-List.xls", } -aemo_mms_url = "http://www.nemweb.com.au/Data_Archive/Wholesale_Electricity/MMSDM/{}/MMSDM_{}_{}/MMSDM_Historical_Data_SQLLoader/DATA/{}.zip" +aemo_mms_url = "https://www.nemweb.com.au/Data_Archive/Wholesale_Electricity/MMSDM/{}/MMSDM_{}_{}/MMSDM_Historical_Data_SQLLoader/DATA/{}.zip" current_data_page_urls = { "BIDDING": "Reports/Current/Bidmove_Complete/", @@ -144,9 +144,9 @@ "INTERMITTENT_GEN_SCADA": "/Reports/Current/Next_Day_Intermittent_Gen_Scada/" } -fcas_4_url = "http://www.nemweb.com.au/Reports/Current/Causer_Pays/FCAS_{}{}{}{}.zip" +fcas_4_url = "https://www.nemweb.com.au/Reports/Current/Causer_Pays/FCAS_{}{}{}{}.zip" -fcas_4_url_hist = "http://www.nemweb.com.au/Data_Archive/Wholesale_Electricity/FCAS_Causer_Pays/{}/FCAS_Causer_Pays_{}_{}/FCAS_{}{}{}{}.zip" +fcas_4_url_hist = "https://www.nemweb.com.au/Data_Archive/Wholesale_Electricity/FCAS_Causer_Pays/{}/FCAS_Causer_Pays_{}_{}/FCAS_{}{}{}{}.zip" data_url = { "DISPATCHLOAD": "aemo_data_url", diff --git a/src/nemosis/downloader.py b/src/nemosis/downloader.py index dac391c..36a4f4d 100644 --- a/src/nemosis/downloader.py +++ b/src/nemosis/downloader.py @@ -10,14 +10,16 @@ logger = logging.getLogger(__name__) +session = requests.Session() + # Windows Chrome for User-Agent request headers -USR_AGENT_HEADER = { +session.headers.update({ "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" + " AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/80.0.3987.87 Safari/537.36" ) -} +}) def run(year, month, day, chunk, index, filename_stub, down_load_to): @@ -95,7 +97,7 @@ def _get_current_url(filename_stub, current_page_url): def _download_and_unpack_bid_move_complete_files( download_url, down_load_to ): - r = requests.get(download_url, headers=USR_AGENT_HEADER) + r = session.get(download_url) zipped_file = zipfile.ZipFile(io.BytesIO(r.content)) file_name = zipped_file.namelist()[ @@ -131,7 +133,7 @@ def _download_and_unpack_bid_move_complete_files( def _download_and_unpack_next_region_tables( download_url, down_load_to ): - r = requests.get(download_url, headers=USR_AGENT_HEADER) + r = session.get(download_url) zipped_file = zipfile.ZipFile(io.BytesIO(r.content)) file_name = zipped_file.namelist()[ @@ -160,7 +162,7 @@ def _download_and_unpack_next_region_tables( def _download_and_unpack_next_dispatch_load_files_complete_files( download_url, down_load_to ): - r = requests.get(download_url, headers=USR_AGENT_HEADER) + r = session.get(download_url) zipped_file = zipfile.ZipFile(io.BytesIO(r.content)) file_name = zipped_file.namelist()[ @@ -185,7 +187,7 @@ def _download_and_unpack_next_dispatch_load_files_complete_files( def _download_and_unpack_intermittent_gen_scada_file( download_url, down_load_to ): - r = requests.get(download_url, headers=USR_AGENT_HEADER) + r = session.get(download_url) zipped_file = zipfile.ZipFile(io.BytesIO(r.content)) file_name = zipped_file.namelist()[ @@ -253,7 +255,7 @@ def download_unzip_csv(url, down_load_to): extracts the csv and saves it a specified location """ url = url.replace('#', '%23') - r = requests.get(url, headers=USR_AGENT_HEADER) + r = session.get(url) z = zipfile.ZipFile(io.BytesIO(r.content)) z.extractall(down_load_to) @@ -263,19 +265,19 @@ def download_csv(url, path_and_name): This function downloads a zipped csv using a url, extracts the csv and saves it a specified location """ - r = requests.get(url, headers=USR_AGENT_HEADER) + r = session.get(url) with open(path_and_name, "wb") as f: f.write(r.content) def download_elements_file(url, path_and_name): - page = requests.get(url) + page = session.get(url) text = page.text soup = BeautifulSoup(text, "html.parser") links = soup.find_all("a") last_file_name = links[-1].text link = url + last_file_name - r = requests.get(link, headers=USR_AGENT_HEADER) + r = session.get(link) with open(path_and_name, "wb") as f: f.write(r.content) @@ -285,7 +287,7 @@ def download_xl(url, path_and_name): This function downloads a zipped csv using a url, extracts the csv and saves it a specified location """ - r = requests.get(url, headers=USR_AGENT_HEADER) + r = session.get(url) with open(path_and_name, "wb") as f: f.write(r.content) @@ -301,12 +303,12 @@ def format_aemo_url(url, year, month, filename_stub): def status_code_return(url): - r = requests.get(url, headers=USR_AGENT_HEADER) + r = session.get(url) return r.status_code def _get_matching_link(url, stub_link): - r = requests.get(url, headers=USR_AGENT_HEADER) + r = session.get(url) soup = BeautifulSoup(r.content, "html.parser") links = [link.get("href") for link in soup.find_all("a")] for link in links: