diff --git a/python-pscheduler/pscheduler/pscheduler/limitprocessor/identifier/ipcidrlisturl.py b/python-pscheduler/pscheduler/pscheduler/limitprocessor/identifier/ipcidrlisturl.py index 2d2f1295e..ecfa729f7 100644 --- a/python-pscheduler/pscheduler/pscheduler/limitprocessor/identifier/ipcidrlisturl.py +++ b/python-pscheduler/pscheduler/pscheduler/limitprocessor/identifier/ipcidrlisturl.py @@ -18,6 +18,7 @@ "type": "object", "properties": { "source": { "$ref": "#/pScheduler/URL" }, + "proxy": { "$ref": "#/pScheduler/URL" }, "transform": { "$ref": "#/pScheduler/JQTransformSpecification" }, "bind": { "$ref": "#/pScheduler/Host" }, "transform": { "$ref": "#/pScheduler/JQTransformSpecification" }, @@ -69,7 +70,7 @@ def __populate_cidrs_update__(self): """ status, text = url_get(self.source, bind=self.bind, - json=False, throw=False) + json=False, throw=False, proxy=self.proxy) possible_next_attempt = datetime.datetime.now() + self.retry @@ -146,6 +147,7 @@ def __init__(self, raise ValueError("Invalid data: %s" % message) self.source = data['source'] + self.proxy = data.get('proxy', None) self.bind = data.get('bind', None) self.update = iso8601_as_timedelta(data['update']) self.retry = iso8601_as_timedelta(data['retry']) diff --git a/python-pscheduler/pscheduler/pscheduler/limitprocessor/limit/urlfetch.py b/python-pscheduler/pscheduler/pscheduler/limitprocessor/limit/urlfetch.py index 7f5e90f0a..9dd7d249e 100644 --- a/python-pscheduler/pscheduler/pscheduler/limitprocessor/limit/urlfetch.py +++ b/python-pscheduler/pscheduler/pscheduler/limitprocessor/limit/urlfetch.py @@ -14,6 +14,7 @@ "type": "object", "properties": { "url": { "$ref": "#/pScheduler/URL" }, + "proxy": { "$ref": "#/pScheduler/URL" }, "url-transform": { "$ref": "#/pScheduler/JQTransformSpecification" }, "bind": { "$ref": "#/pScheduler/Host" }, "verify-keys": { "$ref": "#/pScheduler/Boolean" }, @@ -88,6 +89,7 @@ def __init__(self, raise ValueError("Invalid data: %s" % message) self.url = data["url"] + self.proxy = data.get('proxy', None) self.url_transform = _jq_filter(data.get("url-transform", None)) self.bind = data.get("bind", None) self.follow = data.get("follow-redirects", True) @@ -174,7 +176,8 @@ def evaluate(self, throw=False, timeout=self.timeout, allow_redirects=self.follow, - verify_keys=self.verify + verify_keys=self.verify, + proxy=self.proxy ) if self.success_only: diff --git a/python-pscheduler/pscheduler/pscheduler/limitprocessor/limitprocessor.py b/python-pscheduler/pscheduler/pscheduler/limitprocessor/limitprocessor.py index 7d3206af3..67adbd78e 100644 --- a/python-pscheduler/pscheduler/pscheduler/limitprocessor/limitprocessor.py +++ b/python-pscheduler/pscheduler/pscheduler/limitprocessor/limitprocessor.py @@ -86,20 +86,45 @@ def __init__(self, # Try to parse it as a URL. If it's got a scheme, fetch it # and replace the contents with that. - url_parsed = None + check_url = False try: - url_parsed = urlparse(limit_file_contents) - except UnicodeDecodeError: - # If it doesn't look like a URL, make the next block skip - # it. - pass - - if url_parsed is not None and url_parsed.scheme != '' and url_parsed.scheme != b'': - url = limit_file_contents - status, limit_file_contents = url_get(limit_file_contents, throw=False, json=False) - if status != 200: - raise ValueError("Unable to load limit configuration from %s: Status %d" % (url, status)) - + _ = json_load(limit_file_contents) + except: + # not a valid JSON, maybe a URL + check_url = True + + + if check_url: + stripped_lines = [x.strip() for x in limit_file_contents.split('\n') if x.strip() != ''] + if len(stripped_lines) == 0: + raise ValueError('Limits configuration is empty') + + if len(stripped_lines) <= 2: + # 1 line URL + # 2 lines URL and proxy + # > 2 lines, most likely a invalid JSON + url_parsed = None + proxy_parsed = None + try: + url_parsed = urlparse(stripped_lines[0]) + if len(stripped_lines) == 2: + proxy_parsed = urlparse(stripped_lines[1]) + except Exception as ex: + raise ValueError(f'Failed to parse URL or proxy string in limits configuration: {ex}') + + if url_parsed is None or url_parsed.scheme == '' or url_parsed.scheme == b'' or url_parsed.netloc == '': + raise ValueError(f'URL string in limits configuration is invalid: {stripped_lines[0]} . Should be something like: https://example.com...') + + if len(stripped_lines) == 2: + if proxy_parsed is None or proxy_parsed.scheme == '' or proxy_parsed.scheme == b'' or proxy_parsed.netloc == '': + raise ValueError(f'Proxy string in limits configuration is invalid: {stripped_lines[1]} . Should be something like: http://user:pass@example.com:8080...') + + url = stripped_lines[0] + proxy = stripped_lines[1] if len(stripped_lines) == 2 else None + status, limit_file_contents = url_get(url=url, throw=False, json=False, proxy=proxy) + if status != 200: + tmp_msg = '' if proxy is None else f' (using proxy {proxy})' + raise ValueError(f'Unable to load limit configuration from {url}{tmp_msg}: Status {status}') # Parse it. diff --git a/python-pscheduler/pscheduler/pscheduler/psurl.py b/python-pscheduler/pscheduler/pscheduler/psurl.py index 7c19cbb6c..57ca82647 100644 --- a/python-pscheduler/pscheduler/pscheduler/psurl.py +++ b/python-pscheduler/pscheduler/pscheduler/psurl.py @@ -27,7 +27,7 @@ class URLException(Exception): class PycURLRunner(object): - def __init__(self, url, params, bind, timeout, allow_redirects, headers, verify_keys): + def __init__(self, url, params, bind, timeout, allow_redirects, headers, verify_keys, proxy=None): """Constructor""" self.curl = pycurl.Curl() @@ -57,6 +57,8 @@ def __init__(self, url, params, bind, timeout, allow_redirects, headers, verify_ self.buf = io.BytesIO() self.curl.setopt(pycurl.WRITEFUNCTION, self.buf.write) + if proxy is not None: + self.curl.setopt(pycurl.PROXY, proxy) def __call__(self, json, throw): @@ -107,13 +109,14 @@ def url_get( url, # GET URL timeout=None, # Seconds before giving up allow_redirects=True, # Allows URL to be redirected headers=None, # Hash of HTTP headers - verify_keys=verify_keys_default # Verify SSL keys + verify_keys=verify_keys_default, # Verify SSL keys + proxy=None # Proxy string ): """ Fetch a URL using GET with parameters, returning whatever came back. """ - curl = PycURLRunner(url, params, bind, timeout, allow_redirects, headers, verify_keys) + curl = PycURLRunner(url, params, bind, timeout, allow_redirects, headers, verify_keys, proxy) return curl(json, throw) @@ -155,7 +158,8 @@ def url_post( url, # GET URL timeout=None, # Seconds before giving up allow_redirects=True, #Allows URL to be redirected headers={}, # Hash of HTTP headers - verify_keys=verify_keys_default # Verify SSL keys + verify_keys=verify_keys_default, # Verify SSL keys + proxy=None # Proxy string ): """ Post to a URL, returning whatever came back. @@ -164,7 +168,7 @@ def url_post( url, # GET URL content_type, data = __content_type_data(content_type, headers, data) headers["Content-Type"] = content_type - curl = PycURLRunner(url, params, bind, timeout, allow_redirects, headers, verify_keys) + curl = PycURLRunner(url, params, bind, timeout, allow_redirects, headers, verify_keys, proxy) curl.curl.setopt(pycurl.POSTFIELDS, data) @@ -182,7 +186,8 @@ def url_put( url, # GET URL timeout=None, # Seconds before giving up allow_redirects=True, #Allows URL to be redirected headers={}, # Hash of HTTP headers - verify_keys=verify_keys_default # Verify SSL keys + verify_keys=verify_keys_default, # Verify SSL keys + proxy=None # Proxy string ): """ PUT to a URL, returning whatever came back. @@ -191,7 +196,7 @@ def url_put( url, # GET URL content_type, data = __content_type_data(content_type, headers, data) headers["Content-Type"] = content_type - curl = PycURLRunner(url, params, bind, timeout, allow_redirects, headers, verify_keys) + curl = PycURLRunner(url, params, bind, timeout, allow_redirects, headers, verify_keys, proxy) curl.curl.setopt(pycurl.CUSTOMREQUEST, "PUT") curl.curl.setopt(pycurl.POSTFIELDS, data) @@ -207,13 +212,14 @@ def url_delete( url, # DELETE URL timeout=None, # Seconds before giving up allow_redirects=True, #Allows URL to be redirected headers=None, # Hash of HTTP headers - verify_keys=verify_keys_default # Verify SSL keys + verify_keys=verify_keys_default, # Verify SSL keys + proxy=None # Proxy string ): """ Delete a URL. """ - curl = PycURLRunner(url, params, bind, timeout, allow_redirects, headers, verify_keys) + curl = PycURLRunner(url, params, bind, timeout, allow_redirects, headers, verify_keys, proxy) curl.curl.setopt(pycurl.CUSTOMREQUEST, "DELETE") @@ -228,7 +234,8 @@ def url_delete_list( timeout=None, # Seconds before giving up allow_redirects=True, #Allows URL to be redirected headers=None, # Hash of HTTP headers - verify_keys=verify_keys_default # Verify SSL keys + verify_keys=verify_keys_default, # Verify SSL keys + proxy=None # Proxy string ): """ Delete a list of URLs and return tuples of the status and error for @@ -237,5 +244,5 @@ def url_delete_list( """ return [ url_delete(url, throw=False, timeout=timeout, params=params, bind=bind, headers=headers, verify_keys=verify_keys, - allow_redirects=allow_redirects) + allow_redirects=allow_redirects, proxy=proxy) for url in urls ]