From 97e2c54d536593c5ced0c895d07c4aa28cbc0a12 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Wed, 30 Mar 2022 19:57:16 -0600 Subject: [PATCH 01/11] Install pgBouncer --- Dockerfile | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Dockerfile b/Dockerfile index d6e6d09..59ce4c1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,7 @@ FROM postgis/postgis:16-3.4 LABEL maintainer="PgOSM Flex - https://github.com/rustprooflabs/pgosm-flex" ARG OSM2PGSQL_BRANCH=master +ARG BOUNCER_VERSION=1.17.0 RUN apt-get update \ # Removed upgrade per https://github.com/rustprooflabs/pgosm-flex/issues/322 @@ -18,6 +19,7 @@ RUN apt-get update \ curl unzip \ postgresql-16-pgrouting \ nlohmann-json3-dev \ + pkg-config libevent-2.1-7 libevent-dev libudns-dev \ && rm -rf /var/lib/apt/lists/* RUN wget https://luarocks.org/releases/luarocks-3.9.2.tar.gz \ @@ -32,6 +34,19 @@ RUN curl -o /tmp/get-pip.py https://bootstrap.pypa.io/get-pip.py \ RUN luarocks install inifile RUN luarocks install luasql-postgres PGSQL_INCDIR=/usr/include/postgresql/ +# pgBouncer implementation based on: https://github.com/edoburu/docker-pgbouncer/blob/master/Dockerfile +RUN curl -o /tmp/pgbouncer-$BOUNCER_VERSION.tar.gz -L https://pgbouncer.github.io/downloads/files/$BOUNCER_VERSION/pgbouncer-$BOUNCER_VERSION.tar.gz \ + && cd /tmp \ + && tar xvfz /tmp/pgbouncer-$BOUNCER_VERSION.tar.gz \ + && cd pgbouncer-$BOUNCER_VERSION \ + && ./configure --prefix=/usr --with-udns \ + && make \ + && cp pgbouncer /usr/bin \ + && mkdir -p /etc/pgbouncer /var/log/pgbouncer /var/run/pgbouncer \ + && cp etc/pgbouncer.ini /etc/pgbouncer/pgbouncer.ini.example \ + && cp etc/userlist.txt /etc/pgbouncer/userlist.txt.example \ + && touch /etc/pgbouncer/userlist.txt \ + && chown -R postgres /var/run/pgbouncer /etc/pgbouncer WORKDIR /tmp RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH https://github.com/openstreetmap/osm2pgsql.git \ @@ -45,6 +60,7 @@ RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH https://github.com/openstreet libexpat1-dev zlib1g-dev \ libbz2-dev libproj-dev \ curl \ + pkg-config libevent-dev \ && apt autoremove -y \ && cd /tmp && rm -r /tmp/osm2pgsql From b4f8c3c6c7240a45c9cb43fe0021be0398bbf04b Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Tue, 17 Oct 2023 20:50:53 -0600 Subject: [PATCH 02/11] Update pgbouncer version --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 59ce4c1..1671d1e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM postgis/postgis:16-3.4 LABEL maintainer="PgOSM Flex - https://github.com/rustprooflabs/pgosm-flex" ARG OSM2PGSQL_BRANCH=master -ARG BOUNCER_VERSION=1.17.0 +ARG BOUNCER_VERSION=1.21.0 RUN apt-get update \ # Removed upgrade per https://github.com/rustprooflabs/pgosm-flex/issues/322 From be431faba2edeeed2d1150002aa2327f6fcb9604 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Wed, 18 Oct 2023 20:55:53 -0600 Subject: [PATCH 03/11] Add pgbouncer module --- Dockerfile | 5 +--- Makefile | 11 +++++++- docker/db.py | 36 ++++++++++++++++-------- docker/helpers.py | 2 ++ docker/pgbouncer.py | 65 ++++++++++++++++++++++++++++++++++++++++++++ docker/pgosm_flex.py | 5 ++++ 6 files changed, 108 insertions(+), 16 deletions(-) create mode 100644 docker/pgbouncer.py diff --git a/Dockerfile b/Dockerfile index 1671d1e..d9c7a92 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,10 +43,7 @@ RUN curl -o /tmp/pgbouncer-$BOUNCER_VERSION.tar.gz -L https://pgbouncer.githu && make \ && cp pgbouncer /usr/bin \ && mkdir -p /etc/pgbouncer /var/log/pgbouncer /var/run/pgbouncer \ - && cp etc/pgbouncer.ini /etc/pgbouncer/pgbouncer.ini.example \ - && cp etc/userlist.txt /etc/pgbouncer/userlist.txt.example \ - && touch /etc/pgbouncer/userlist.txt \ - && chown -R postgres /var/run/pgbouncer /etc/pgbouncer + && chown -R postgres /var/run/pgbouncer /etc/pgbouncer /var/log/pgbouncer/ WORKDIR /tmp RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH https://github.com/openstreetmap/osm2pgsql.git \ diff --git a/Makefile b/Makefile index 6f26579..2428308 100644 --- a/Makefile +++ b/Makefile @@ -74,6 +74,11 @@ docker-exec-default: build-run-docker docker exec -it pgosm \ chown $(CURRENT_UID):$(CURRENT_GID) /app/docker/ + # Needed for pgbouncer + docker exec -it pgosm \ + chown $(CURRENT_UID):$(CURRENT_GID) /etc/pgbouncer/ + + # run typical processing using built-in file handling docker exec -it \ -e POSTGRES_PASSWORD=mysecretpassword \ @@ -167,11 +172,15 @@ docker-exec-region: build-run-docker # Needed for unit-tests docker exec -it pgosm \ chown $(CURRENT_UID):$(CURRENT_GID) /app/docker/ + # Needed for pgbouncer + docker exec -it pgosm \ + chown $(CURRENT_UID):$(CURRENT_GID) /etc/pgbouncer/ + docker exec -it pgosm \ sed -i 's/district-of-columbia/$(REGION_FILE_NAME)/' /app/output/$(REGION_FILE_NAME)-$(TODAY).osm.pbf.md5 - # process DC file, pretending its a region instead of subregion + # process DC file, pretending it's a region instead of subregion docker exec -it \ -e POSTGRES_PASSWORD=mysecretpassword \ -e POSTGRES_USER=postgres \ diff --git a/docker/db.py b/docker/db.py index 97d8d00..98b383d 100644 --- a/docker/db.py +++ b/docker/db.py @@ -13,7 +13,7 @@ LOGGER = logging.getLogger('pgosm-flex') -def connection_string(admin: bool=False) -> str: +def connection_string(admin: bool=False, pgbouncer: bool=False) -> str: """Returns connection string to `db_name`. Env vars for user/password defined by Postgres docker image. @@ -30,6 +30,10 @@ def connection_string(admin: bool=False) -> str: Default False. Set to True to connect to admin database, currently hard-coded to `postgres` + pgbouncer : boolean + Default False. + FIXME: SET DEFAULT TO TRUE??? + Returns -------------------------- conn_string : str @@ -39,9 +43,16 @@ def connection_string(admin: bool=False) -> str: pg_details = pg_conn_parts() pg_user = pg_details['pg_user'] pg_pass = pg_details['pg_pass'] - pg_host = pg_details['pg_host'] + pg_db = pg_details['pg_db'] - pg_port = pg_details['pg_port'] + + + if pgbouncer: + pg_host = pg_details['pg_host_pgbouncer'] + pg_port = pg_details['pg_port_pgbouncer'] + else: + pg_host = pg_details['pg_host'] + pg_port = pg_details['pg_port'] if admin: if pg_host == 'localhost': @@ -121,7 +132,10 @@ def pg_conn_parts() -> dict: 'pg_pass': pg_pass, 'pg_host': pg_host, 'pg_port': pg_port, - 'pg_db': pg_db} + 'pg_db': pg_db, + 'pg_host_pgbouncer': 'localhost', + 'pg_port_pgbouncer': 6432 + } return pg_details @@ -278,7 +292,7 @@ def start_import(pgosm_region, pgosm_date, srid, language, layerset, git_info, ; """ sql_raw = sql_raw.format(schema_name=schema_name) - with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn: + with get_db_conn(conn_string=os.environ['PGOSM_CONN_PGBOUNCER']) as conn: cur = conn.cursor() cur.execute(sql_raw, params=params) import_id = cur.fetchone()[0] @@ -439,7 +453,7 @@ def run_deploy_file(db_path: str, sql_filename: str, schema_name: str, deploy_sql = deploy_sql.format(schema_name=schema_name) - with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn: + with get_db_conn(conn_string=os.environ['PGOSM_CONN_PGBOUNCER']) as conn: cur = conn.cursor() cur.execute(deploy_sql) LOGGER.debug(f'Ran SQL in {sql_filename}') @@ -507,7 +521,7 @@ def pgosm_nested_admin_polygons(flex_path: str, schema_name: str): """ sql_raw = f'CALL {schema_name}.build_nested_admin_polygons();' - conn_string = os.environ['PGOSM_CONN'] + conn_string = os.environ['PGOSM_CONN_PGBOUNCER'] cmds = ['psql', '-d', conn_string, '-c', sql_raw] LOGGER.info('Building nested polygons... (this can take a while)') output = subprocess.run(cmds, @@ -552,7 +566,7 @@ def osm2pgsql_replication_finish(skip_nested): LOGGER.info('Finishing Replication, including nested polygons') sql_raw = 'CALL osm.append_data_finish(skip_nested := False );' - conn_string = os.environ['PGOSM_CONN'] + conn_string = os.environ['PGOSM_CONN_PGBOUNCER'] cmds = ['psql', '-d', conn_string, '-c', sql_raw] LOGGER.info('Finishing Replication') output = subprocess.run(cmds, @@ -578,7 +592,7 @@ def run_pg_dump(export_path, skip_qgis_style): skip_qgis_style : bool """ logger = logging.getLogger('pgosm-flex') - conn_string = os.environ['PGOSM_CONN'] + conn_string = os.environ['PGOSM_CONN_PGBOUNCER'] schema_name = 'osm' if skip_qgis_style: @@ -635,7 +649,7 @@ def log_import_message(import_id, msg, schema_name): ; """ sql_raw = sql_raw.format(schema_name=schema_name) - with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn: + with get_db_conn(conn_string=os.environ['PGOSM_CONN_PGBOUNCER']) as conn: params = {'import_id': import_id, 'msg': msg} cur = conn.cursor() cur.execute(sql_raw, params=params) @@ -664,7 +678,7 @@ def get_prior_import(schema_name: str) -> dict: ; """ sql_raw = sql_raw.format(schema_name=schema_name) - with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn: + with get_db_conn(conn_string=os.environ['PGOSM_CONN_PGBOUNCER']) as conn: cur = conn.cursor(row_factory=psycopg.rows.dict_row) results = cur.execute(sql_raw).fetchone() diff --git a/docker/helpers.py b/docker/helpers.py index a8e6dcf..3282117 100644 --- a/docker/helpers.py +++ b/docker/helpers.py @@ -139,6 +139,8 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset, os.environ['PGOSM_CONN'] = db.connection_string() # Connection to DB for admin purposes, e.g. drop/create main database os.environ['PGOSM_CONN_PG'] = db.connection_string(admin=True) + # pgBouncer connection + os.environ['PGOSM_CONN_PGBOUNCER'] = db.connection_string(pgbouncer=True) pgosm_region = get_region_combined(region, subregion) logger.debug(f'PGOSM_REGION_COMBINED: {pgosm_region}') diff --git a/docker/pgbouncer.py b/docker/pgbouncer.py new file mode 100644 index 0000000..60288e4 --- /dev/null +++ b/docker/pgbouncer.py @@ -0,0 +1,65 @@ +"""Module to allow adding pgBouncer functionality. +""" +import subprocess + +import db + + +PGBOUNCER_USER_LIST_PATH = '/etc/pgbouncer/userlist.txt' +PGBOUNCER_INI_PATH = '/etc/pgbouncer/pgbouncer.ini' + +PGBOUNCER_INI_TEMPLATE = """[databases] +{pg_db} = host={pg_host} port={pg_port} dbname={pg_db} + +[pgbouncer] +listen_port = 6432 +listen_addr = localhost +auth_type = md5 +auth_file = /etc/pgbouncer/userlist.txt +logfile = /var/log/pgbouncer/pgbouncer.log +pidfile = /var/run/pgbouncer/pgbouncer.pid +admin_users = postgres +max_client_conn = 300 +default_pool_size = 20 +max_prepared_statements = 500 +""" +"""str : Shell of pgbouncer.ini used to configure pgBouncer.""" + +PGBOUNCER_USER_LIST_TEMPLATE = """ +"{pg_user}" "{pg_pass}" +""" +"""str : Shell of userlist.txt to provide authentication to pgBouncer.""" + + +def setup(): + """Sets up configuration files for pgBouncer. + """ + print('Setting up pgBouncer configuration files') + db_parts = db.pg_conn_parts() + user_list = PGBOUNCER_USER_LIST_TEMPLATE.format(pg_user=db_parts['pg_user'], + pg_pass=db_parts['pg_pass'] + ) + + with open(PGBOUNCER_USER_LIST_PATH, "w") as user_file: + user_file.write(user_list) + + + pgbouncer_ini = PGBOUNCER_INI_TEMPLATE.format(pg_host=db_parts['pg_host'], + pg_port=db_parts['pg_port'], + pg_db=db_parts['pg_db'] + ) + + with open(PGBOUNCER_INI_PATH, "w") as ini_file: + ini_file.write(pgbouncer_ini) + + +def run(): + print('Running pgbouncer as postgres user') + subprocess.run( + ['/usr/bin/pgbouncer', "-d", PGBOUNCER_INI_PATH], + capture_output=True, + text=True, + check=True, + user='postgres' + ) + diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index 26dcb85..c02eeee 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -18,6 +18,7 @@ import db import geofabrik import helpers +import pgbouncer from import_mode import ImportMode @@ -97,6 +98,10 @@ def run_pgosm_flex(ram, region, subregion, debug, force, helpers.set_env_vars(region, subregion, srid, language, pgosm_date, layerset, layerset_path, replication, schema_name) + + pgbouncer.setup() + pgbouncer.run() + db.wait_for_postgres() if force and db.pg_conn_parts()['pg_host'] == 'localhost': msg = 'Using --force with the built-in database is unnecessary.' From 1f7f9a629e5f4dd6fdda2a9af554b131e37755ee Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Fri, 20 Oct 2023 19:10:50 -0600 Subject: [PATCH 04/11] Print warning about PW being saved in container in plain text --- docker/pgbouncer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/pgbouncer.py b/docker/pgbouncer.py index 60288e4..4a5972a 100644 --- a/docker/pgbouncer.py +++ b/docker/pgbouncer.py @@ -36,14 +36,15 @@ def setup(): """ print('Setting up pgBouncer configuration files') db_parts = db.pg_conn_parts() + user_list = PGBOUNCER_USER_LIST_TEMPLATE.format(pg_user=db_parts['pg_user'], pg_pass=db_parts['pg_pass'] ) + print('WARNING: Saving password in plain text within the container for pgBouncer.') with open(PGBOUNCER_USER_LIST_PATH, "w") as user_file: user_file.write(user_list) - pgbouncer_ini = PGBOUNCER_INI_TEMPLATE.format(pg_host=db_parts['pg_host'], pg_port=db_parts['pg_port'], pg_db=db_parts['pg_db'] From 4ab51e3dcb36b6649c787b54d5ce80767f6f13f5 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sun, 22 Oct 2023 08:49:27 -0600 Subject: [PATCH 05/11] Add switch to enable pgbouncer, disabled by default --- .gitignore | 2 +- docker/db.py | 31 +++++++++++++++++++++++-------- docker/helpers.py | 9 ++++++++- docker/pgosm_flex.py | 13 +++++++++---- 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 4d7d0d8..36e297e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ output/ **/__pycache__ pgosm-data/* docs/book/* -.vscode/* \ No newline at end of file +.vscode/* diff --git a/docker/db.py b/docker/db.py index 98b383d..509001f 100644 --- a/docker/db.py +++ b/docker/db.py @@ -292,7 +292,7 @@ def start_import(pgosm_region, pgosm_date, srid, language, layerset, git_info, ; """ sql_raw = sql_raw.format(schema_name=schema_name) - with get_db_conn(conn_string=os.environ['PGOSM_CONN_PGBOUNCER']) as conn: + with get_db_conn(conn_string=get_db_conn_string()) as conn: cur = conn.cursor() cur.execute(sql_raw, params=params) import_id = cur.fetchone()[0] @@ -453,11 +453,26 @@ def run_deploy_file(db_path: str, sql_filename: str, schema_name: str, deploy_sql = deploy_sql.format(schema_name=schema_name) - with get_db_conn(conn_string=os.environ['PGOSM_CONN_PGBOUNCER']) as conn: + with get_db_conn(conn_string=get_db_conn_string()) as conn: cur = conn.cursor() cur.execute(deploy_sql) LOGGER.debug(f'Ran SQL in {sql_filename}') +def get_db_conn_string() -> str: + """Returns non-admin database connection, either pgBouncer or not depending + on run-time configuration. + + Returns + ---------------------------- + conn_string : str + """ + if os.environ['USE_PGBOUNCER'] == 'true': + conn_string = os.environ['PGOSM_CONN_PGBOUNCER'] + else: + conn_string = os.environ['PGOSM_CONN'] + + return conn_string + def get_db_conn(conn_string): """Establishes psycopg database connection. @@ -521,7 +536,7 @@ def pgosm_nested_admin_polygons(flex_path: str, schema_name: str): """ sql_raw = f'CALL {schema_name}.build_nested_admin_polygons();' - conn_string = os.environ['PGOSM_CONN_PGBOUNCER'] + conn_string = get_db_conn_string() cmds = ['psql', '-d', conn_string, '-c', sql_raw] LOGGER.info('Building nested polygons... (this can take a while)') output = subprocess.run(cmds, @@ -546,7 +561,7 @@ def osm2pgsql_replication_start(): # This use of append applies to both osm2pgsql --append and osm2pgsq-replication, not renaming from "append" sql_raw = 'CALL osm.append_data_start();' - with get_db_conn(conn_string=connection_string()) as conn: + with get_db_conn(conn_string=get_db_conn_string()) as conn: cur = conn.cursor() cur.execute(sql_raw) @@ -566,7 +581,7 @@ def osm2pgsql_replication_finish(skip_nested): LOGGER.info('Finishing Replication, including nested polygons') sql_raw = 'CALL osm.append_data_finish(skip_nested := False );' - conn_string = os.environ['PGOSM_CONN_PGBOUNCER'] + conn_string = get_db_conn_string() cmds = ['psql', '-d', conn_string, '-c', sql_raw] LOGGER.info('Finishing Replication') output = subprocess.run(cmds, @@ -592,7 +607,7 @@ def run_pg_dump(export_path, skip_qgis_style): skip_qgis_style : bool """ logger = logging.getLogger('pgosm-flex') - conn_string = os.environ['PGOSM_CONN_PGBOUNCER'] + conn_string = get_db_conn_string() schema_name = 'osm' if skip_qgis_style: @@ -649,7 +664,7 @@ def log_import_message(import_id, msg, schema_name): ; """ sql_raw = sql_raw.format(schema_name=schema_name) - with get_db_conn(conn_string=os.environ['PGOSM_CONN_PGBOUNCER']) as conn: + with get_db_conn(conn_string=get_db_conn_string()) as conn: params = {'import_id': import_id, 'msg': msg} cur = conn.cursor() cur.execute(sql_raw, params=params) @@ -678,7 +693,7 @@ def get_prior_import(schema_name: str) -> dict: ; """ sql_raw = sql_raw.format(schema_name=schema_name) - with get_db_conn(conn_string=os.environ['PGOSM_CONN_PGBOUNCER']) as conn: + with get_db_conn(conn_string=get_db_conn_string()) as conn: cur = conn.cursor(row_factory=psycopg.rows.dict_row) results = cur.execute(sql_raw).fetchone() diff --git a/docker/helpers.py b/docker/helpers.py index 3282117..6d745cf 100644 --- a/docker/helpers.py +++ b/docker/helpers.py @@ -94,7 +94,7 @@ def verify_checksum(md5_file: str, path: str): def set_env_vars(region, subregion, srid, language, pgosm_date, layerset, - layerset_path, replication, schema_name): + layerset_path, replication, schema_name, use_pgbouncer): """Sets environment variables needed by PgOSM Flex. Also creates DB record in `osm.pgosm_flex` table. @@ -111,6 +111,8 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset, replication : bool Indicates when osm2pgsql-replication is used schema_name : str + use_pgbouncer : bool + Indicates if pgBouncer connection should be setup and used """ logger = logging.getLogger('pgosm-flex') logger.debug('Ensuring env vars are not set from prior run') @@ -145,6 +147,10 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset, pgosm_region = get_region_combined(region, subregion) logger.debug(f'PGOSM_REGION_COMBINED: {pgosm_region}') + if use_pgbouncer: + os.environ['USE_PGBOUNCER'] = 'true' + else: + os.environ['USE_PGBOUNCER'] = 'false' def get_region_combined(region: str, subregion: str) -> str: @@ -225,3 +231,4 @@ def unset_env_vars(): os.environ.pop('PGOSM_CONN', None) os.environ.pop('PGOSM_CONN_PG', None) os.environ.pop('SCHEMA_NAME', None) + os.environ.pop('USE_PGBOUNCER', None) diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index c02eeee..f414c8e 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -48,6 +48,8 @@ @click.option('--language', default=None, envvar="PGOSM_LANGUAGE", help="Set default language in loaded OpenStreetMap data when available. e.g. 'en' or 'kn'.") +@click.option('--use-pgbouncer', default=False, is_flag=True, + help='EXPERIMENTAL FEATURE - Setup and use pgbouncer to reduce connection load on target Postgres database.') @click.option('--pg-dump', default=False, is_flag=True, help='Uses pg_dump after processing is completed to enable easily load OpenStreetMap data into a different database') @click.option('--pgosm-date', required=False, @@ -76,7 +78,8 @@ type=click.Choice(['append', 'create'], case_sensitive=True), help='EXPERIMENTAL - Wrap around osm2pgsql create v. append modes, without using osm2pgsql-replication.') def run_pgosm_flex(ram, region, subregion, debug, force, - input_file, layerset, layerset_path, language, pg_dump, + input_file, layerset, layerset_path, language, + use_pgbouncer, pg_dump, pgosm_date, replication, schema_name, skip_nested, skip_qgis_style, srid, update): """Run PgOSM Flex within Docker to automate osm2pgsql flex processing. @@ -97,10 +100,12 @@ def run_pgosm_flex(ram, region, subregion, debug, force, region = input_file helpers.set_env_vars(region, subregion, srid, language, pgosm_date, - layerset, layerset_path, replication, schema_name) + layerset, layerset_path, replication, schema_name, + use_pgbouncer) - pgbouncer.setup() - pgbouncer.run() + if use_pgbouncer: + pgbouncer.setup() + pgbouncer.run() db.wait_for_postgres() if force and db.pg_conn_parts()['pg_host'] == 'localhost': From adacb94ddc31398cbb7d1884a5211fe1e05c5d97 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sun, 22 Oct 2023 09:05:18 -0600 Subject: [PATCH 06/11] Adjusting tests to pass. No attempts to test with pgBouncer enabled yet --- docker/tests/test_db.py | 18 +++++++++++++++++- docker/tests/test_geofabrik.py | 6 ++++-- docker/tests/test_pgosm_flex.py | 21 ++++++++++++++------- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/docker/tests/test_db.py b/docker/tests/test_db.py index a1ed538..97f444a 100644 --- a/docker/tests/test_db.py +++ b/docker/tests/test_db.py @@ -3,7 +3,7 @@ import unittest from unittest import mock -import db +import db, helpers POSTGRES_USER = 'my_pg_user' POSTGRES_PASSWORD = 'here_for_fun' @@ -19,8 +19,24 @@ 'POSTGRES_USER': POSTGRES_USER, 'POSTGRES_PASSWORD': POSTGRES_PASSWORD} +REGION_US = 'north-america/us' +SUBREGION_DC = 'district-of-columbia' +LAYERSET = 'default' +PGOSM_DATE = '2021-12-02' + class DBTests(unittest.TestCase): + def setUp(self): + helpers.set_env_vars(region=REGION_US, + subregion=SUBREGION_DC, + srid=3857, + language=None, + pgosm_date=PGOSM_DATE, + layerset=LAYERSET, + layerset_path=None, + replication=False, + schema_name='osm', + use_pgbouncer=False) @mock.patch.dict(os.environ, PG_USER_ONLY) def test_pg_conn_parts_user_only_returns_expected_values(self): diff --git a/docker/tests/test_geofabrik.py b/docker/tests/test_geofabrik.py index 73c3349..0c0a8f0 100644 --- a/docker/tests/test_geofabrik.py +++ b/docker/tests/test_geofabrik.py @@ -20,7 +20,8 @@ def setUp(self): layerset=LAYERSET, layerset_path=None, replication=False, - schema_name='osm') + schema_name='osm', + use_pgbouncer=False) def tearDown(self): @@ -42,7 +43,8 @@ def test_get_region_filename_returns_region_when_subregion_None(self): layerset=LAYERSET, layerset_path=None, replication=False, - schema_name='osm') + schema_name='osm', + use_pgbouncer=False) result = geofabrik.get_region_filename() expected = f'{REGION_US}-latest.osm.pbf' diff --git a/docker/tests/test_pgosm_flex.py b/docker/tests/test_pgosm_flex.py index 1d7670f..04c6f5a 100644 --- a/docker/tests/test_pgosm_flex.py +++ b/docker/tests/test_pgosm_flex.py @@ -20,7 +20,8 @@ def setUp(self): layerset=LAYERSET, layerset_path=None, replication=False, - schema_name='osm') + schema_name='osm', + use_pgbouncer=False) def tearDown(self): @@ -92,7 +93,8 @@ def test_get_export_filename_region_only(self): layerset=LAYERSET, layerset_path=None, replication=False, - schema_name='osm') + schema_name='osm', + use_pgbouncer=False) input_file = None result = pgosm_flex.get_export_filename(input_file) @@ -110,7 +112,8 @@ def test_layerset_include_place_returns_boolean(self): layerset=LAYERSET, layerset_path=layerset_path, replication=False, - schema_name='osm') + schema_name='osm', + use_pgbouncer=False) paths = pgosm_flex.get_paths() result = pgosm_flex.layerset_include_place(flex_path=paths['flex_path']) @@ -129,7 +132,8 @@ def test_layerset_include_place_returns_True_with_default_layerset(self): layerset=LAYERSET, layerset_path=layerset_path, replication=False, - schema_name='osm') + schema_name='osm', + use_pgbouncer=False) paths = pgosm_flex.get_paths() actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path']) @@ -148,7 +152,8 @@ def test_layerset_include_place_returns_false_when_place_false_in_ini(self): layerset=layerset, layerset_path=layerset_path, replication=False, - schema_name='osm') + schema_name='osm', + use_pgbouncer=False) paths = pgosm_flex.get_paths() actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path']) @@ -167,7 +172,8 @@ def test_layerset_include_place_returns_false_when_place_missing_in_ini(self): layerset=layerset, layerset_path=layerset_path, replication=False, - schema_name='osm') + schema_name='osm', + use_pgbouncer=False) paths = pgosm_flex.get_paths() actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path']) @@ -186,7 +192,8 @@ def test_layerset_include_place_returns_true_when_place_true_in_ini(self): layerset=layerset, layerset_path=layerset_path, replication=False, - schema_name='osm') + schema_name='osm', + use_pgbouncer=False) paths = pgosm_flex.get_paths() actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path']) From 50c515a18625d1cc6522240eb823c6cbdfe0d634 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sun, 22 Oct 2023 11:58:32 -0600 Subject: [PATCH 07/11] Switch to using pool size as toggle for using pgbouncer. Add error handling, ability to restart pgBouncer and shell of documentation --- docker/db.py | 47 ++++++++++++++++++---------- docker/pgbouncer.py | 71 +++++++++++++++++++++++++++++++++++-------- docker/pgosm_flex.py | 16 +++++++--- docs/src/SUMMARY.md | 1 + docs/src/pgbouncer.md | 28 +++++++++++++++++ 5 files changed, 131 insertions(+), 32 deletions(-) create mode 100644 docs/src/pgbouncer.md diff --git a/docker/db.py b/docker/db.py index 509001f..2f0eacd 100644 --- a/docker/db.py +++ b/docker/db.py @@ -13,7 +13,8 @@ LOGGER = logging.getLogger('pgosm-flex') -def connection_string(admin: bool=False, pgbouncer: bool=False) -> str: +def connection_string(admin: bool=False, pgbouncer: bool=False, + pgbouncer_admin: bool=False) -> str: """Returns connection string to `db_name`. Env vars for user/password defined by Postgres docker image. @@ -34,6 +35,11 @@ def connection_string(admin: bool=False, pgbouncer: bool=False) -> str: Default False. FIXME: SET DEFAULT TO TRUE??? + pgbouncer_admin : boolean + Default False + Connects to pgbouncer database (must be pgbouncer connection) for admin + functionality, e.g. `SHUTDOWN;` + Returns -------------------------- conn_string : str @@ -46,6 +52,8 @@ def connection_string(admin: bool=False, pgbouncer: bool=False) -> str: pg_db = pg_details['pg_db'] + if pgbouncer_admin and not pgbouncer: + raise ValueError('Cannot connect to pgbouncer_admin on non-pgbouncer connection.') if pgbouncer: pg_host = pg_details['pg_host_pgbouncer'] @@ -54,6 +62,7 @@ def connection_string(admin: bool=False, pgbouncer: bool=False) -> str: pg_host = pg_details['pg_host'] pg_port = pg_details['pg_port'] + if admin: if pg_host == 'localhost': db_name = 'postgres' @@ -65,6 +74,11 @@ def connection_string(admin: bool=False, pgbouncer: bool=False) -> str: else: db_name = pg_db + # Just overwriting instead of working into above logic. Probably a good + # sign this logic should be improved... + if pgbouncer_admin: + db_name = 'pgbouncer' + if pg_pass is None: conn_string = f'postgresql://{pg_user}@{pg_host}:{pg_port}/{db_name}{app_str}' else: @@ -73,6 +87,22 @@ def connection_string(admin: bool=False, pgbouncer: bool=False) -> str: return conn_string +def get_db_conn_string() -> str: + """Returns non-admin database connection, either pgBouncer or not depending + on run-time configuration. + + Returns + ---------------------------- + conn_string : str + """ + if os.environ['USE_PGBOUNCER'] == 'true': + conn_string = os.environ['PGOSM_CONN_PGBOUNCER'] + else: + conn_string = os.environ['PGOSM_CONN'] + + return conn_string + + def pg_conn_parts() -> dict: """Returns dictionary of connection parts based on environment variables if they exist. @@ -458,21 +488,6 @@ def run_deploy_file(db_path: str, sql_filename: str, schema_name: str, cur.execute(deploy_sql) LOGGER.debug(f'Ran SQL in {sql_filename}') -def get_db_conn_string() -> str: - """Returns non-admin database connection, either pgBouncer or not depending - on run-time configuration. - - Returns - ---------------------------- - conn_string : str - """ - if os.environ['USE_PGBOUNCER'] == 'true': - conn_string = os.environ['PGOSM_CONN_PGBOUNCER'] - else: - conn_string = os.environ['PGOSM_CONN'] - - return conn_string - def get_db_conn(conn_string): """Establishes psycopg database connection. diff --git a/docker/pgbouncer.py b/docker/pgbouncer.py index 4a5972a..da74af3 100644 --- a/docker/pgbouncer.py +++ b/docker/pgbouncer.py @@ -1,10 +1,14 @@ """Module to allow adding pgBouncer functionality. """ +import logging +import psycopg import subprocess import db +LOGGER = logging.getLogger('pgosm-flex') + PGBOUNCER_USER_LIST_PATH = '/etc/pgbouncer/userlist.txt' PGBOUNCER_INI_PATH = '/etc/pgbouncer/pgbouncer.ini' @@ -20,7 +24,7 @@ pidfile = /var/run/pgbouncer/pgbouncer.pid admin_users = postgres max_client_conn = 300 -default_pool_size = 20 +default_pool_size = {pgbouncer_pool_size} max_prepared_statements = 500 """ """str : Shell of pgbouncer.ini used to configure pgBouncer.""" @@ -31,23 +35,31 @@ """str : Shell of userlist.txt to provide authentication to pgBouncer.""" -def setup(): +def setup(pgbouncer_pool_size: int): """Sets up configuration files for pgBouncer. + + Parameters + --------------------------- + pgbouncer_pool_size : int """ - print('Setting up pgBouncer configuration files') + if pgbouncer_pool_size < 1: + raise ValueError(f'Invalid pgbouncer_pool_size. Must be >= 1. Value {pgbouncer_pool_size}') + db_parts = db.pg_conn_parts() user_list = PGBOUNCER_USER_LIST_TEMPLATE.format(pg_user=db_parts['pg_user'], pg_pass=db_parts['pg_pass'] ) - print('WARNING: Saving password in plain text within the container for pgBouncer.') + LOGGER.warning('Saving password in plain text within the container for pgBouncer.') with open(PGBOUNCER_USER_LIST_PATH, "w") as user_file: user_file.write(user_list) + LOGGER.info(f'Setting up pgBouncer configuration files with pool size {pgbouncer_pool_size}') pgbouncer_ini = PGBOUNCER_INI_TEMPLATE.format(pg_host=db_parts['pg_host'], pg_port=db_parts['pg_port'], - pg_db=db_parts['pg_db'] + pg_db=db_parts['pg_db'], + pgbouncer_pool_size=pgbouncer_pool_size ) with open(PGBOUNCER_INI_PATH, "w") as ini_file: @@ -55,12 +67,47 @@ def setup(): def run(): - print('Running pgbouncer as postgres user') + """Wrapper of the private _run() function. This uses error handling to + stop and re-start the pgBouncer service since the # of connections is + defined at docker exec time, not docker run. + """ + LOGGER.debug('Running pgbouncer as daemon') + try: + _run() + except subprocess.CalledProcessError as err: + LOGGER.debug('pgBouncer was already running.') + stop() + LOGGER.debug('Re-starting pgBouncer daemon') + _run() + + LOGGER.info('pgBouncer started') + + +def _run(): + """Has the logic to run pgbouncer. Not wrapped in error handling here, + letting outer method deal with that. + """ subprocess.run( - ['/usr/bin/pgbouncer', "-d", PGBOUNCER_INI_PATH], - capture_output=True, - text=True, - check=True, - user='postgres' - ) + ['/usr/bin/pgbouncer', "-d", PGBOUNCER_INI_PATH], + capture_output=True, + text=True, + check=True, + user='postgres' + ) + + +def stop(): + """Stops the pgbouncer service. Uses connection to pgbouncer database. + """ + LOGGER.info('Shutting down the pgBouncer service.') + sql_raw = "SHUTDOWN;" + conn_string = db.connection_string(pgbouncer=True, + pgbouncer_admin=True) + + with db.get_db_conn(conn_string=conn_string) as conn: + conn.autocommit = True + try: + conn.execute(sql_raw) + except psycopg.OperationalError: + LOGGER.debug('Disconnected. This is the expected result.') diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index f414c8e..ba3e2d0 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -48,8 +48,8 @@ @click.option('--language', default=None, envvar="PGOSM_LANGUAGE", help="Set default language in loaded OpenStreetMap data when available. e.g. 'en' or 'kn'.") -@click.option('--use-pgbouncer', default=False, is_flag=True, - help='EXPERIMENTAL FEATURE - Setup and use pgbouncer to reduce connection load on target Postgres database.') +@click.option('--pgbouncer-pool-size', default=-1, type=int, required=True, + help='EXPERIMENTAL FEATURE: When set to > 0 the internal pgBouncer is enabled and used for all non-admin database connections. Setting to 10 is a reasonable starting place.') @click.option('--pg-dump', default=False, is_flag=True, help='Uses pg_dump after processing is completed to enable easily load OpenStreetMap data into a different database') @click.option('--pgosm-date', required=False, @@ -79,7 +79,7 @@ help='EXPERIMENTAL - Wrap around osm2pgsql create v. append modes, without using osm2pgsql-replication.') def run_pgosm_flex(ram, region, subregion, debug, force, input_file, layerset, layerset_path, language, - use_pgbouncer, pg_dump, + pgbouncer_pool_size, pg_dump, pgosm_date, replication, schema_name, skip_nested, skip_qgis_style, srid, update): """Run PgOSM Flex within Docker to automate osm2pgsql flex processing. @@ -99,13 +99,21 @@ def run_pgosm_flex(ram, region, subregion, debug, force, if region is None and input_file: region = input_file + if pgbouncer_pool_size > 0: + use_pgbouncer = True + else: + use_pgbouncer = False + helpers.set_env_vars(region, subregion, srid, language, pgosm_date, layerset, layerset_path, replication, schema_name, use_pgbouncer) if use_pgbouncer: - pgbouncer.setup() + pgbouncer.setup(pgbouncer_pool_size=pgbouncer_pool_size) pgbouncer.run() + else: + # Ensuring pgbouncer is not running if ran subsequent times. + pgbouncer.stop() db.wait_for_postgres() if force and db.pg_conn_parts()['pg_host'] == 'localhost': diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 17db1c8..e8c6100 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -20,6 +20,7 @@ - [Postgres Permissions](./postgres-permissions.md) - [Using External Postgres Connection](./postgres-external.md) +- [pgBouncer](./pgbouncer.md) - [Data updates](./data-updates.md) - [Using Replication](./replication.md) - [Relocate Data](./relocate-data.md) diff --git a/docs/src/pgbouncer.md b/docs/src/pgbouncer.md new file mode 100644 index 0000000..bdb173b --- /dev/null +++ b/docs/src/pgbouncer.md @@ -0,0 +1,28 @@ +# pgBouncer + +The PgOSM Flex Docker image now includes pgBouncer. + +> Experimental Feature! pgBouncer was added as an experimental feature in +> PgOSM Flex 0.10.3. + +```bash +docker run --name pgosm -d --rm \ + -v ~/pgosm-data:/app/output \ + -v /etc/localtime:/etc/localtime:ro \ + -e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \ + -p 5433:6432 \ # Port for pgBouncer instead of base Postgres + -d rustprooflabs/pgosm-flex +``` + +Running + +```bash +time docker exec -it \ + pgosm python3 docker/pgosm_flex.py \ + --ram=8 \ + --region=north-america/us \ + --subregion=district-of-columbia \ + --pgbouncer-pool-size=10 +``` + + From 3bd03e118870af8a5420f3f148d9a0625fe02a60 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sun, 22 Oct 2023 12:05:59 -0600 Subject: [PATCH 08/11] Fix when not using pgbouncer. Passes make again --- docker/pgbouncer.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/docker/pgbouncer.py b/docker/pgbouncer.py index da74af3..fc350b7 100644 --- a/docker/pgbouncer.py +++ b/docker/pgbouncer.py @@ -104,10 +104,17 @@ def stop(): conn_string = db.connection_string(pgbouncer=True, pgbouncer_admin=True) - with db.get_db_conn(conn_string=conn_string) as conn: - conn.autocommit = True - try: - conn.execute(sql_raw) - except psycopg.OperationalError: - LOGGER.debug('Disconnected. This is the expected result.') + # Writing this way instead of as with ... as conn b/c of how function returns + # false. + conn = db.get_db_conn(conn_string=conn_string) + if not conn: + LOGGER.warning('Unable to connect to pgbouncer to shutdown pgBouncer. Probably not a problem.') + return + + conn.autocommit = True + try: + conn.execute(sql_raw) + except psycopg.OperationalError: + LOGGER.debug('Disconnected. This is the expected result.') + \ No newline at end of file From bfa267cc35a058c68d0991c7652bd7954e1fb558 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Mon, 23 Oct 2023 17:04:45 -0600 Subject: [PATCH 09/11] Making changes to have osm2pgsql connections go through pgBouncer --- Dockerfile | 3 +++ docker/db.py | 2 ++ docker/osm2pgsql_recommendation.py | 2 +- docker/pgbouncer.py | 4 +++- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index d9c7a92..e847307 100644 --- a/Dockerfile +++ b/Dockerfile @@ -74,3 +74,6 @@ WORKDIR /app COPY . ./ RUN pip install --upgrade pip && pip install -r requirements.txt + +# Expose pgBouncer port +EXPOSE 6432 diff --git a/docker/db.py b/docker/db.py index 2f0eacd..5024be1 100644 --- a/docker/db.py +++ b/docker/db.py @@ -96,8 +96,10 @@ def get_db_conn_string() -> str: conn_string : str """ if os.environ['USE_PGBOUNCER'] == 'true': + LOGGER.debug('Using pgBouncer connection string') conn_string = os.environ['PGOSM_CONN_PGBOUNCER'] else: + LOGGER.debug('Using direct to Postgres connection string (non-admin)') conn_string = os.environ['PGOSM_CONN'] return conn_string diff --git a/docker/osm2pgsql_recommendation.py b/docker/osm2pgsql_recommendation.py index 25d53e7..302575e 100644 --- a/docker/osm2pgsql_recommendation.py +++ b/docker/osm2pgsql_recommendation.py @@ -88,7 +88,7 @@ def get_recommended_script(system_ram_gb: float, LOGGER.debug(f'Generic command to run: {osm2pgsql_cmd}') # Replace generic connection string with specific conn string - conn_string = db.connection_string() + conn_string = db.get_db_conn_string() osm2pgsql_cmd = osm2pgsql_cmd.replace('-d $PGOSM_CONN', f'-d {conn_string}') # Warning: Do not print() this string any more! Includes password return osm2pgsql_cmd diff --git a/docker/pgbouncer.py b/docker/pgbouncer.py index fc350b7..0826c42 100644 --- a/docker/pgbouncer.py +++ b/docker/pgbouncer.py @@ -22,10 +22,11 @@ auth_file = /etc/pgbouncer/userlist.txt logfile = /var/log/pgbouncer/pgbouncer.log pidfile = /var/run/pgbouncer/pgbouncer.pid -admin_users = postgres +admin_users = {pg_user} max_client_conn = 300 default_pool_size = {pgbouncer_pool_size} max_prepared_statements = 500 +pool_mode = transaction """ """str : Shell of pgbouncer.ini used to configure pgBouncer.""" @@ -59,6 +60,7 @@ def setup(pgbouncer_pool_size: int): pgbouncer_ini = PGBOUNCER_INI_TEMPLATE.format(pg_host=db_parts['pg_host'], pg_port=db_parts['pg_port'], pg_db=db_parts['pg_db'], + pg_user=db_parts['pg_user'], pgbouncer_pool_size=pgbouncer_pool_size ) From c3b9c6d500ec56ba305cac0d5d91201e23575a1f Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Wed, 3 Jan 2024 20:28:35 -0700 Subject: [PATCH 10/11] Resolve rebase conflict --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index e847307..30cebe8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,7 @@ LABEL maintainer="PgOSM Flex - https://github.com/rustprooflabs/pgosm-flex" ARG OSM2PGSQL_BRANCH=master ARG BOUNCER_VERSION=1.21.0 +ARG OSM2PGSQL_REPO=https://github.com/openstreetmap/osm2pgsql.git RUN apt-get update \ # Removed upgrade per https://github.com/rustprooflabs/pgosm-flex/issues/322 @@ -46,7 +47,7 @@ RUN curl -o /tmp/pgbouncer-$BOUNCER_VERSION.tar.gz -L https://pgbouncer.githu && chown -R postgres /var/run/pgbouncer /etc/pgbouncer /var/log/pgbouncer/ WORKDIR /tmp -RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH https://github.com/openstreetmap/osm2pgsql.git \ +RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH $OSM2PGSQL_REPO \ && mkdir osm2pgsql/build \ && cd osm2pgsql/build \ && cmake .. -D USE_PROJ_LIB=6 \ From 82a137b3a38432bd38f8243b722d1d7716c4b4a4 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sat, 20 Jan 2024 12:58:03 -0700 Subject: [PATCH 11/11] Minor update to about page in docs --- docs/src/readme.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/src/readme.md b/docs/src/readme.md index ebc058a..391e8db 100644 --- a/docs/src/readme.md +++ b/docs/src/readme.md @@ -50,9 +50,10 @@ Minimum versions supported: This project will attempt, but not guarantee, to support PostgreSQL 12 until it reaches it EOL support. -The osm2pgsql version requirement is no longer relevant. Users of the Docker image +The Docker image is pinned to osm2pgsql's `master` branch. Users of the Docker image naturally use the latest version of osm2pgsql at the time the Docker image was created. +This project has not been officially tested on Windows. ## Minimum Hardware @@ -67,3 +68,8 @@ SD, etc), however the [osm2pgsql-tuner](https://github.com/rustprooflabs/osm2pgsql-tuner) package used to determine the best osm2pgsql command assumes fast SSDs. +## RustProof Labs project + +PgOSM Flex is a RustProof Labs project developed and maintained by Ryan Lambert. +See the [RustProof Labs blog](https://blog.rustprooflabs.com/category/pgosm-flex) +for more resources and examples of using PgOSM Flex.