Skip to content

Commit

Permalink
Add --append support to Docker process. Improve amenity with subtype.
Browse files Browse the repository at this point in the history
  • Loading branch information
rustprooflabs committed Mar 25, 2022
2 parents 7886275 + 2549288 commit d0c6fbf
Show file tree
Hide file tree
Showing 17 changed files with 566 additions and 178 deletions.
15 changes: 10 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
FROM postgis/postgis:14-3.1
FROM postgis/postgis:14-3.2

LABEL maintainer="PgOSM-Flex - https://github.com/rustprooflabs/pgosm-flex"

ARG OSM2PGSQL_BRANCH=1.6.0
ARG OSM2PGSQL_BRANCH=master

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
Expand All @@ -11,11 +11,16 @@ RUN apt-get update \
libboost-dev libboost-system-dev \
libboost-filesystem-dev libexpat1-dev zlib1g-dev \
libbz2-dev libpq-dev libproj-dev lua5.2 liblua5.2-dev \
python3 python3-distutils python3-psycopg2 \
python3 python3-distutils \
postgresql-server-dev-14 \
curl luarocks \
curl unzip \
&& rm -rf /var/lib/apt/lists/*

RUN wget https://luarocks.org/releases/luarocks-3.8.0.tar.gz \
&& tar zxpf luarocks-3.8.0.tar.gz \
&& cd luarocks-3.8.0 \
&& ./configure && make && make install

RUN curl -o /tmp/get-pip.py https://bootstrap.pypa.io/get-pip.py \
&& python3 /tmp/get-pip.py \
&& rm /tmp/get-pip.py
Expand All @@ -25,7 +30,7 @@ RUN luarocks install luasql-postgres PGSQL_INCDIR=/usr/include/postgresql/


WORKDIR /tmp
RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH git://github.com/openstreetmap/osm2pgsql.git \
RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH https://github.com/openstreetmap/osm2pgsql.git \
&& mkdir osm2pgsql/build \
&& cd osm2pgsql/build \
&& cmake .. \
Expand Down
96 changes: 86 additions & 10 deletions docker/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,18 +173,24 @@ def pg_isready():
return True


def prepare_pgosm_db(data_only, db_path):
def prepare_pgosm_db(data_only, db_path, append):
"""Runs through series of steps to prepare database for PgOSM.
Parameters
--------------------------
data_only : bool
db_path : str
append : bool
"""

if pg_conn_parts()['pg_host'] == 'localhost':
LOGGER.debug('Running standard database prep for in-Docker operation. Includes DROP/CREATE DATABASE')
drop_pgosm_db()
if append:
LOGGER.debug('Skipping DB drop b/c of append mode')
else:
LOGGER.debug('Dropping database')
drop_pgosm_db()

create_pgosm_db()
else:
LOGGER.info('Using external database. Ensure the target database is setup properly for PgOSM Flex with PostGIS, osm schema, and proper permissions.')
Expand Down Expand Up @@ -231,6 +237,10 @@ def drop_pgosm_db():
"""Drops the pgosm database if it exists.
Intentionally hard coded to `pgosm` database for in-Docker use only.
Returns
------------------------
status : bool
"""
if not pg_conn_parts()['pg_host'] == 'localhost':
LOGGER.error('Attempted to drop database external from Docker. Not doing that')
Expand All @@ -244,12 +254,17 @@ def drop_pgosm_db():
conn.execute(sql_raw)
conn.close()
LOGGER.info('Removed pgosm database')
return True


def create_pgosm_db():
"""Creates the pgosm database and prepares with PostGIS and osm schema
Intentionally hard coded to `pgosm` database for in-Docker use only.
Returns
-----------------------
status : bool
"""
if not pg_conn_parts()['pg_host'] == 'localhost':
LOGGER.error('Attempted to create database external from Docker. Not doing that')
Expand All @@ -260,12 +275,16 @@ def create_pgosm_db():

LOGGER.debug('Setting Pg conn to enable autocommit - required for drop/create DB')
conn.autocommit = True
conn.execute(sql_raw)
conn.close()
LOGGER.info('Created pgosm database')
try:
conn.execute(sql_raw)
LOGGER.info('Created pgosm database')
except psycopg.errors.DuplicateDatabase:
LOGGER.info('Database already existed.')
finally:
conn.close()

sql_create_postgis = "CREATE EXTENSION postgis;"
sql_create_schema = "CREATE SCHEMA osm;"
sql_create_postgis = "CREATE EXTENSION IF NOT EXISTS postgis;"
sql_create_schema = "CREATE SCHEMA IF NOT EXISTS osm;"

with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn:
cur = conn.cursor()
Expand All @@ -274,6 +293,8 @@ def create_pgosm_db():
cur.execute(sql_create_schema)
LOGGER.debug('Created osm schema')

return True


def run_sqitch_prep(db_path):
"""Runs Sqitch to create DB structure and populate helper data.
Expand Down Expand Up @@ -424,10 +445,18 @@ def pgosm_after_import(flex_path):

output = subprocess.run(cmds,
text=True,
capture_output=True,
cwd=flex_path,
check=True)
LOGGER.info(f'Post-processing output: \n {output.stderr}')
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
LOGGER.info(f'Post-processing SQL output: \n {output.stdout}')

if output.returncode != 0:
err_msg = f'Failed to run post-processing SQL. Return code: {output.returncode}'
LOGGER.error(err_msg)
return False

return True


def pgosm_nested_admin_polygons(flex_path):
Expand Down Expand Up @@ -456,6 +485,49 @@ def pgosm_nested_admin_polygons(flex_path):
sys.exit(f'{err_msg} - Check the log output for details.')



def osm2pgsql_replication_start():
"""Runs pre-replication step to clean out FKs that would prevent updates.
"""
LOGGER.info('Prep database to allow data updates.')
sql_raw = 'CALL osm.append_data_start();'

with get_db_conn(conn_string=connection_string()) as conn:
cur = conn.cursor()
cur.execute(sql_raw)


def osm2pgsql_replication_finish(skip_nested):
"""Runs post-replication step to put FKs back and refresh materialied views.
Parameters
---------------------
skip_nested : bool
"""
# Fails via psycopg, using psql
if skip_nested:
LOGGER.info('Finishing Replication, skipping nested polygons')
sql_raw = 'CALL osm.append_data_finish(skip_nested := True );'
else:
LOGGER.info('Finishing Replication, including nested polygons')
sql_raw = 'CALL osm.append_data_finish(skip_nested := False );'

conn_string = os.environ['PGOSM_CONN']
cmds = ['psql', '-d', conn_string, '-c', sql_raw]
LOGGER.info('Finishing Replication')
output = subprocess.run(cmds,
text=True,
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
LOGGER.info(f'Finishing replication output: \n {output.stdout}')

if output.returncode != 0:
err_msg = f'Failed to finish replication. Return code: {output.returncode}'
LOGGER.error(err_msg)
sys.exit(f'{err_msg} - Check the log output for details.')


def rename_schema(schema_name):
"""Renames default schema name "osm" to `schema_name`
Expand Down Expand Up @@ -511,6 +583,10 @@ def fix_pg_dump_create_public(export_path):
"""Using pg_dump with `--schema=public` results in
a .sql script containing `CREATE SCHEMA public;`, nearly always breaks
in target DB. Replaces with `CREATE SCHEMA IF NOT EXISTS public;`
Parameters
----------------------
export_path : str
"""
result = sh.sed('-i',
's/CREATE SCHEMA public;/CREATE SCHEMA IF NOT EXISTS public;/',
Expand Down
32 changes: 14 additions & 18 deletions docker/geofabrik.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,16 @@
import helpers


def get_region_filename(region, subregion):
def get_region_filename():
"""Returns the filename needed to download/manage PBF files.
Parameters
----------------------
region : str
subregion : str
Returns
----------------------
filename : str
"""
region = os.environ.get('PGOSM_REGION')
subregion = os.environ.get('PGOSM_SUBREGION')

base_name = '{}-latest.osm.pbf'
if subregion is None:
filename = base_name.format(region)
Expand All @@ -29,25 +27,26 @@ def get_region_filename(region, subregion):
return filename


def prepare_data(region, subregion, pgosm_date, out_path):
def prepare_data(out_path):
"""Ensures the PBF file is available.
Checks if it already exists locally, download if needed,
and verify MD5 checksum.
Parameters
----------------------
region : str
subregion : str
pgosm_date : str
out_path : str
Returns
----------------------
pbf_file : str
Full path to PBF file
"""
pbf_filename = get_region_filename(region, subregion)
region = os.environ.get('PGOSM_REGION')
subregion = os.environ.get('PGOSM_SUBREGION')
pgosm_date = os.environ.get('PGOSM_DATE')

pbf_filename = get_region_filename()

pbf_file = os.path.join(out_path, pbf_filename)
pbf_file_with_date = pbf_file.replace('latest', pgosm_date)
Expand Down Expand Up @@ -216,23 +215,20 @@ def unarchive_data(pbf_file, md5_file, pbf_file_with_date, md5_file_with_date):
shutil.copy2(md5_file_with_date, md5_file)


def remove_latest_files(region, subregion, paths):
def remove_latest_files(out_path):
"""Removes the PBF and MD5 file with -latest in the name.
Files are archived via prepare_data() before processing starts
Parameters
-------------------------
region : str
subregion : str
paths : dict
out_path : str
"""
pbf_filename = get_region_filename(region, subregion)
pbf_filename = get_region_filename()

pbf_file = os.path.join(paths['out_path'], pbf_filename)
pbf_file = os.path.join(out_path, pbf_filename)
md5_file = f'{pbf_file}.md5'
logging.info(f'Done with {pbf_file}, removing.')
os.remove(pbf_file)
logging.info(f'Done with {md5_file}, removing.')
os.remove(md5_file)

12 changes: 9 additions & 3 deletions docker/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def verify_checksum(md5_file, path):
logger.error(err_msg)
sys.exit(err_msg)

logger.info(f'md5sum validated')
logger.info('md5sum validated')


def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
Expand All @@ -72,13 +72,17 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
unset_env_vars()
logger.debug('Setting environment variables')

os.environ['PGOSM_REGION'] = region

if subregion is None:
pgosm_region = f'{region}'
else:
os.environ['PGOSM_SUBREGION'] = subregion
pgosm_region = f'{region}-{subregion}'

logger.debug(f'PGOSM_REGION: {pgosm_region}')
os.environ['PGOSM_REGION'] = pgosm_region
# Used by helpers.lua
logger.debug(f'PGOSM_REGION_COMBINED: {pgosm_region}')
os.environ['PGOSM_REGION_COMBINED'] = pgosm_region

if srid != DEFAULT_SRID:
logger.info(f'SRID set: {srid}')
Expand Down Expand Up @@ -106,6 +110,8 @@ def unset_env_vars():
Does not pop POSTGRES_DB on purpose to allow non-Docker operation.
"""
os.environ.pop('PGOSM_REGION', None)
os.environ.pop('PGOSM_SUBREGION', None)
os.environ.pop('PGOSM_COMBINED', None)
os.environ.pop('PGOSM_SRID', None)
os.environ.pop('PGOSM_LANGUAGE', None)
os.environ.pop('PGOSM_LAYERSET_PATH', None)
Expand Down
6 changes: 3 additions & 3 deletions docker/osm2pgsql_recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
LOGGER = logging.getLogger('pgosm-flex')


def osm2pgsql_recommendation(ram, pbf_filename, out_path):
def osm2pgsql_recommendation(ram, pbf_filename, out_path, append):
"""Returns recommended osm2pgsql command.
Recommendation from API at https://osm2pgsql-tuner.com
Expand All @@ -24,6 +24,8 @@ def osm2pgsql_recommendation(ram, pbf_filename, out_path):
out_path : str
append : boolean
Returns
----------------------
osm2pgsql_cmd : str
Expand All @@ -38,8 +40,6 @@ def osm2pgsql_recommendation(ram, pbf_filename, out_path):
osm_pbf_gb = os.path.getsize(pbf_file) / 1024 / 1024 / 1024
LOGGER.debug(f'PBF size (GB): {osm_pbf_gb}')

# PgOSM-Flex currently does not support/test append mode.
append = False
osm2pgsql_cmd = get_recommended_script(system_ram_gb,
osm_pbf_gb,
append,
Expand Down
Loading

0 comments on commit d0c6fbf

Please sign in to comment.