diff --git a/Dockerfile b/Dockerfile index dc7e7bc..bde63e3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM docker.io/debian:buster-slim # install dependencies RUN apt update -RUN apt install -y --no-install-recommends pipenv osmctools rsync +RUN apt install -y --no-install-recommends pipenv osmctools rsync curl # add sources ADD Pipfile Pipfile.lock /app/src/ diff --git a/Pipfile b/Pipfile index 043801f..828d172 100644 --- a/Pipfile +++ b/Pipfile @@ -7,3 +7,4 @@ verify_ssl = true [packages] mercantile = "*" +requests = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 91729fa..e854d28 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "7cffd18d7d3f70b619ce481dae52c7120f2d6c1d26694d2d2cfb74dc8587e05e" + "sha256": "34791b34ac4e8cad808808442bb8104cfafd98ba2aae431b4fd316444b23579c" }, "pipfile-spec": 6, "requires": {}, @@ -14,6 +14,20 @@ ] }, "default": { + "certifi": { + "hashes": [ + "sha256:1d987a998c75633c40847cc966fcf5904906c920a7f17ef374f5aa4282abd304", + "sha256:51fcb31174be6e6664c5f69e3e1691a2d72a1a12e90f872cbdb1567eb47b6519" + ], + "version": "==2020.4.5.1" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, "click": { "hashes": [ "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", @@ -21,6 +35,13 @@ ], "version": "==7.1.2" }, + "idna": { + "hashes": [ + "sha256:7588d1c14ae4c77d74036e8c22ff447b26d0fde8f007354fd48a7814db15b7cb", + "sha256:a068a21ceac8a4d63dbfd964670474107f541babbd2250d61922f029858365fa" + ], + "version": "==2.9" + }, "mercantile": { "hashes": [ "sha256:9d773dd96d68350c8e3871099a39c0df7d0d9938158478aa1c1a9bcce773122d", @@ -28,6 +49,21 @@ ], "index": "pypi", "version": "==1.1.4" + }, + "requests": { + "hashes": [ + "sha256:43999036bfa82904b6af1d99e4882b560e5e2c68e5c4b0aa03b655f3d7d73fee", + "sha256:b3f43d496c6daba4493e7c431722aeb7dbc6288f52a6e04e7b6023b0247817e6" + ], + "index": "pypi", + "version": "==2.23.0" + }, + "urllib3": { + "hashes": [ + "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527", + "sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115" + ], + "version": "==1.25.9" } }, "develop": {} diff --git a/generate_extracts.py b/generate_extracts.py index 2d52d20..95e429e 100755 --- a/generate_extracts.py +++ b/generate_extracts.py @@ -4,8 +4,11 @@ import argparse import subprocess import time - +import requests +import json import mercantile + +from requests.auth import HTTPBasicAuth from multiprocessing import Lock from concurrent.futures import ThreadPoolExecutor, Future from pathlib import Path @@ -36,6 +39,11 @@ def directory_type(raw: str): raise argparse.ArgumentTypeError(f'Path {raw} is not a directory') return p + def auth_type(raw: str) -> list: + if raw != '' and ':' not in raw and len(raw.split(':')) != 2: + raise argparse.ArgumentTypeError(f'Authentication has incorrect format. Use :') + return raw.split(':') + parser = argparse.ArgumentParser('generate_extracts', description='Extract similarly sized files from the latest OpenStreetMap ' 'Planet dump.') @@ -54,6 +62,16 @@ def directory_type(raw: str): help='Maximum zoom level above which no further splitting will be performed') parser.add_argument('--processes', default=(max(1, os.cpu_count() - 2)), type=int, help='How many concurrent processes to use') + + upload_group = parser.add_argument_group(title='Uploading', + description='Finished PBF extracts can be uploaded to a ' + 'tileserver-mapping. Use these arguments to do so and ' + 'configure how.') + upload_group.add_argument('--upload-url', dest='upload_url', type=str, default='', + help='Upload to the tileserver-mapping server located at under this url.') + upload_group.add_argument('--upload-auth', dest='upload_auth', type=auth_type, default='', + help=': combination used to authenticate the upload.') + return parser.parse_args() def __init__(self): @@ -67,8 +85,15 @@ def __init__(self): self.running_futures = 0 self.lock_running_futures = Lock() + self.upload_url = args.upload_url + self.upload_auth = args.upload_auth + self.executor = ThreadPoolExecutor(max_workers=args.processes) + @property + def should_upload(self): + return self.upload_url != '' and self.upload_auth != '' + def run(self): self.download_planet_dump() print('Extracting tiles') @@ -96,6 +121,8 @@ def _generate_tile(self, tile: mercantile.Tile): If the tile is smaller than the intended target size it is considered done and moved to the out_dir. If not, additional jobs are scheduled to further break it down. + If uploading is configured, the results get uploaded to tileserver-mapping as well. + :param tile: Target tile which should be generated """ @@ -132,7 +159,7 @@ def _generate_tile(self, tile: mercantile.Tile): if target_file.stat().st_size < self.target_size: print(f'{Colors.OKGREEN}{tile} has reached target size{Colors.ENDC}') - subprocess.run(['rsync', str(target_file.absolute()), str(self.out_dir)], check=True) + self.finish_file(target_file, tile) else: self.extract(tile) @@ -162,6 +189,44 @@ def extract(self, source: mercantile.Tile): self.running_futures += 1 future.add_done_callback(lambda result: self._on_future_done(result)) + def finish_file(self, file: Path, tile: mercantile.Tile): + """ + Do finishing steps for the given file. + This includes copying the file to the output directory or uploading it toe a tileserver-mapping server. + + :param file: File which should be processed + :param tile: Tile object whose data this file contains + """ + subprocess.run(['rsync', str(file.absolute()), str(self.out_dir)], check=True) + + if self.should_upload: + # check if a server object already describes this tile + existing_dumps = json.loads(requests.get(f'{self.upload_url}/api/v1/planet_dumps/').content) + target_dumps = [i for i in existing_dumps if i['x'] == tile.x and i['y'] == tile.y and i['z'] == tile.z] + + if len(target_dumps) == 0: + # if no corresponding dump objects exists on the server, we need to create one + response = json.loads(requests.post(f'{self.upload_url}/api/v1/planet_dumps/', headers={ + 'Content-Type': 'application/json' + }, data=json.dumps({ + 'x': tile.x, + 'y': tile.y, + 'z': tile.z, + }), auth=HTTPBasicAuth(username=self.upload_auth[0], password=self.upload_auth[1])).content) + dump_id = response['id'] + else: + dump_id = target_dumps[0]['id'] + + # update only the file of the existing dump object on the server + subprocess.run([ + 'curl', + '-u', f'{self.upload_auth[0]}:{self.upload_auth[1]}', + '-F', f'file=@{file.absolute()}', + '--request', 'PATCH', + '--silent', + f'{self.upload_url}/api/v1/planet_dumps/{dump_id}/' + ], check=True, stdout=subprocess.DEVNULL) + if __name__ == '__main__': p = Program()