Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pgBouncer to funnel connections #243

Closed
wants to merge 12 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -4,4 +4,4 @@ output/
**/__pycache__
pgosm-data/*
docs/book/*
.vscode/*
.vscode/*
19 changes: 18 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -3,6 +3,8 @@ FROM postgis/postgis:16-3.4
LABEL maintainer="PgOSM Flex - https://github.com/rustprooflabs/pgosm-flex"

ARG OSM2PGSQL_BRANCH=master
ARG BOUNCER_VERSION=1.21.0
ARG OSM2PGSQL_REPO=https://github.com/openstreetmap/osm2pgsql.git

RUN apt-get update \
# Removed upgrade per https://github.com/rustprooflabs/pgosm-flex/issues/322
@@ -18,6 +20,7 @@ RUN apt-get update \
curl unzip \
postgresql-16-pgrouting \
nlohmann-json3-dev \
pkg-config libevent-2.1-7 libevent-dev libudns-dev \
&& rm -rf /var/lib/apt/lists/*

RUN wget https://luarocks.org/releases/luarocks-3.9.2.tar.gz \
@@ -32,9 +35,19 @@ RUN curl -o /tmp/get-pip.py https://bootstrap.pypa.io/get-pip.py \
RUN luarocks install inifile
RUN luarocks install luasql-postgres PGSQL_INCDIR=/usr/include/postgresql/

# pgBouncer implementation based on: https://github.com/edoburu/docker-pgbouncer/blob/master/Dockerfile
RUN curl -o /tmp/pgbouncer-$BOUNCER_VERSION.tar.gz -L https://pgbouncer.github.io/downloads/files/$BOUNCER_VERSION/pgbouncer-$BOUNCER_VERSION.tar.gz \
&& cd /tmp \
&& tar xvfz /tmp/pgbouncer-$BOUNCER_VERSION.tar.gz \
&& cd pgbouncer-$BOUNCER_VERSION \
&& ./configure --prefix=/usr --with-udns \
&& make \
&& cp pgbouncer /usr/bin \
&& mkdir -p /etc/pgbouncer /var/log/pgbouncer /var/run/pgbouncer \
&& chown -R postgres /var/run/pgbouncer /etc/pgbouncer /var/log/pgbouncer/

WORKDIR /tmp
RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH https://github.com/openstreetmap/osm2pgsql.git \
RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH $OSM2PGSQL_REPO \
&& mkdir osm2pgsql/build \
&& cd osm2pgsql/build \
&& cmake .. -D USE_PROJ_LIB=6 \
@@ -45,6 +58,7 @@ RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH https://github.com/openstreet
libexpat1-dev zlib1g-dev \
libbz2-dev libproj-dev \
curl \
pkg-config libevent-dev \
&& apt autoremove -y \
&& cd /tmp && rm -r /tmp/osm2pgsql

@@ -61,3 +75,6 @@ WORKDIR /app
COPY . ./

RUN pip install --upgrade pip && pip install -r requirements.txt

# Expose pgBouncer port
EXPOSE 6432
11 changes: 10 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -74,6 +74,11 @@ docker-exec-default: build-run-docker
docker exec -it pgosm \
chown $(CURRENT_UID):$(CURRENT_GID) /app/docker/

# Needed for pgbouncer
docker exec -it pgosm \
chown $(CURRENT_UID):$(CURRENT_GID) /etc/pgbouncer/


# run typical processing using built-in file handling
docker exec -it \
-e POSTGRES_PASSWORD=mysecretpassword \
@@ -167,11 +172,15 @@ docker-exec-region: build-run-docker
# Needed for unit-tests
docker exec -it pgosm \
chown $(CURRENT_UID):$(CURRENT_GID) /app/docker/
# Needed for pgbouncer
docker exec -it pgosm \
chown $(CURRENT_UID):$(CURRENT_GID) /etc/pgbouncer/


docker exec -it pgosm \
sed -i 's/district-of-columbia/$(REGION_FILE_NAME)/' /app/output/$(REGION_FILE_NAME)-$(TODAY).osm.pbf.md5

# process DC file, pretending its a region instead of subregion
# process DC file, pretending it's a region instead of subregion
docker exec -it \
-e POSTGRES_PASSWORD=mysecretpassword \
-e POSTGRES_USER=postgres \
70 changes: 58 additions & 12 deletions docker/db.py
Original file line number Diff line number Diff line change
@@ -13,7 +13,8 @@
LOGGER = logging.getLogger('pgosm-flex')


def connection_string(admin: bool=False) -> str:
def connection_string(admin: bool=False, pgbouncer: bool=False,
pgbouncer_admin: bool=False) -> str:
"""Returns connection string to `db_name`.

Env vars for user/password defined by Postgres docker image.
@@ -30,6 +31,15 @@ def connection_string(admin: bool=False) -> str:
Default False. Set to True to connect to admin database, currently
hard-coded to `postgres`

pgbouncer : boolean
Default False.
FIXME: SET DEFAULT TO TRUE???

pgbouncer_admin : boolean
Default False
Connects to pgbouncer database (must be pgbouncer connection) for admin
functionality, e.g. `SHUTDOWN;`

Returns
--------------------------
conn_string : str
@@ -39,9 +49,19 @@ def connection_string(admin: bool=False) -> str:
pg_details = pg_conn_parts()
pg_user = pg_details['pg_user']
pg_pass = pg_details['pg_pass']
pg_host = pg_details['pg_host']

pg_db = pg_details['pg_db']
pg_port = pg_details['pg_port']

if pgbouncer_admin and not pgbouncer:
raise ValueError('Cannot connect to pgbouncer_admin on non-pgbouncer connection.')

if pgbouncer:
pg_host = pg_details['pg_host_pgbouncer']
pg_port = pg_details['pg_port_pgbouncer']
else:
pg_host = pg_details['pg_host']
pg_port = pg_details['pg_port']


if admin:
if pg_host == 'localhost':
@@ -54,6 +74,11 @@ def connection_string(admin: bool=False) -> str:
else:
db_name = pg_db

# Just overwriting instead of working into above logic. Probably a good
# sign this logic should be improved...
if pgbouncer_admin:
db_name = 'pgbouncer'

if pg_pass is None:
conn_string = f'postgresql://{pg_user}@{pg_host}:{pg_port}/{db_name}{app_str}'
else:
@@ -62,6 +87,24 @@ def connection_string(admin: bool=False) -> str:
return conn_string


def get_db_conn_string() -> str:
"""Returns non-admin database connection, either pgBouncer or not depending
on run-time configuration.

Returns
----------------------------
conn_string : str
"""
if os.environ['USE_PGBOUNCER'] == 'true':
LOGGER.debug('Using pgBouncer connection string')
conn_string = os.environ['PGOSM_CONN_PGBOUNCER']
else:
LOGGER.debug('Using direct to Postgres connection string (non-admin)')
conn_string = os.environ['PGOSM_CONN']

return conn_string


def pg_conn_parts() -> dict:
"""Returns dictionary of connection parts based on environment variables
if they exist.
@@ -121,7 +164,10 @@ def pg_conn_parts() -> dict:
'pg_pass': pg_pass,
'pg_host': pg_host,
'pg_port': pg_port,
'pg_db': pg_db}
'pg_db': pg_db,
'pg_host_pgbouncer': 'localhost',
'pg_port_pgbouncer': 6432
}

return pg_details

@@ -278,7 +324,7 @@ def start_import(pgosm_region, pgosm_date, srid, language, layerset, git_info,
;
"""
sql_raw = sql_raw.format(schema_name=schema_name)
with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn:
with get_db_conn(conn_string=get_db_conn_string()) as conn:
cur = conn.cursor()
cur.execute(sql_raw, params=params)
import_id = cur.fetchone()[0]
@@ -439,7 +485,7 @@ def run_deploy_file(db_path: str, sql_filename: str, schema_name: str,

deploy_sql = deploy_sql.format(schema_name=schema_name)

with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn:
with get_db_conn(conn_string=get_db_conn_string()) as conn:
cur = conn.cursor()
cur.execute(deploy_sql)
LOGGER.debug(f'Ran SQL in {sql_filename}')
@@ -507,7 +553,7 @@ def pgosm_nested_admin_polygons(flex_path: str, schema_name: str):
"""
sql_raw = f'CALL {schema_name}.build_nested_admin_polygons();'

conn_string = os.environ['PGOSM_CONN']
conn_string = get_db_conn_string()
cmds = ['psql', '-d', conn_string, '-c', sql_raw]
LOGGER.info('Building nested polygons... (this can take a while)')
output = subprocess.run(cmds,
@@ -532,7 +578,7 @@ def osm2pgsql_replication_start():
# This use of append applies to both osm2pgsql --append and osm2pgsq-replication, not renaming from "append"
sql_raw = 'CALL osm.append_data_start();'

with get_db_conn(conn_string=connection_string()) as conn:
with get_db_conn(conn_string=get_db_conn_string()) as conn:
cur = conn.cursor()
cur.execute(sql_raw)

@@ -552,7 +598,7 @@ def osm2pgsql_replication_finish(skip_nested):
LOGGER.info('Finishing Replication, including nested polygons')
sql_raw = 'CALL osm.append_data_finish(skip_nested := False );'

conn_string = os.environ['PGOSM_CONN']
conn_string = get_db_conn_string()
cmds = ['psql', '-d', conn_string, '-c', sql_raw]
LOGGER.info('Finishing Replication')
output = subprocess.run(cmds,
@@ -578,7 +624,7 @@ def run_pg_dump(export_path, skip_qgis_style):
skip_qgis_style : bool
"""
logger = logging.getLogger('pgosm-flex')
conn_string = os.environ['PGOSM_CONN']
conn_string = get_db_conn_string()
schema_name = 'osm'

if skip_qgis_style:
@@ -635,7 +681,7 @@ def log_import_message(import_id, msg, schema_name):
;
"""
sql_raw = sql_raw.format(schema_name=schema_name)
with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn:
with get_db_conn(conn_string=get_db_conn_string()) as conn:
params = {'import_id': import_id, 'msg': msg}
cur = conn.cursor()
cur.execute(sql_raw, params=params)
@@ -664,7 +710,7 @@ def get_prior_import(schema_name: str) -> dict:
;
"""
sql_raw = sql_raw.format(schema_name=schema_name)
with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn:
with get_db_conn(conn_string=get_db_conn_string()) as conn:
cur = conn.cursor(row_factory=psycopg.rows.dict_row)
results = cur.execute(sql_raw).fetchone()

11 changes: 10 additions & 1 deletion docker/helpers.py
Original file line number Diff line number Diff line change
@@ -94,7 +94,7 @@ def verify_checksum(md5_file: str, path: str):


def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
layerset_path, replication, schema_name):
layerset_path, replication, schema_name, use_pgbouncer):
"""Sets environment variables needed by PgOSM Flex. Also creates DB
record in `osm.pgosm_flex` table.

@@ -111,6 +111,8 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
replication : bool
Indicates when osm2pgsql-replication is used
schema_name : str
use_pgbouncer : bool
Indicates if pgBouncer connection should be setup and used
"""
logger = logging.getLogger('pgosm-flex')
logger.debug('Ensuring env vars are not set from prior run')
@@ -139,10 +141,16 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
os.environ['PGOSM_CONN'] = db.connection_string()
# Connection to DB for admin purposes, e.g. drop/create main database
os.environ['PGOSM_CONN_PG'] = db.connection_string(admin=True)
# pgBouncer connection
os.environ['PGOSM_CONN_PGBOUNCER'] = db.connection_string(pgbouncer=True)

pgosm_region = get_region_combined(region, subregion)
logger.debug(f'PGOSM_REGION_COMBINED: {pgosm_region}')

if use_pgbouncer:
os.environ['USE_PGBOUNCER'] = 'true'
else:
os.environ['USE_PGBOUNCER'] = 'false'


def get_region_combined(region: str, subregion: str) -> str:
@@ -223,3 +231,4 @@ def unset_env_vars():
os.environ.pop('PGOSM_CONN', None)
os.environ.pop('PGOSM_CONN_PG', None)
os.environ.pop('SCHEMA_NAME', None)
os.environ.pop('USE_PGBOUNCER', None)
7 changes: 3 additions & 4 deletions docker/osm2pgsql_recommendation.py
Original file line number Diff line number Diff line change
@@ -75,21 +75,20 @@ def get_recommended_script(system_ram_gb: float,
"""
LOGGER.debug('Generating recommended osm2pgsql command')

rec = tuner.recommendation(system_ram_gb=system_ram_gb,
rec = tuner.Recommendation(system_ram_gb=system_ram_gb,
osm_pbf_gb=osm_pbf_gb,
slim_no_drop=import_mode.slim_no_drop,
append_first_run=import_mode.append_first_run,
ssd=True)

osm2pgsql_cmd = rec.get_osm2pgsql_command(out_format='api',
pbf_path=pbf_filename)
osm2pgsql_cmd = rec.get_osm2pgsql_command(pbf_path=pbf_filename)

osm2pgsql_cmd = osm2pgsql_cmd.replace('~/pgosm-data', output_path)

LOGGER.debug(f'Generic command to run: {osm2pgsql_cmd}')

# Replace generic connection string with specific conn string
conn_string = db.connection_string()
conn_string = db.get_db_conn_string()
osm2pgsql_cmd = osm2pgsql_cmd.replace('-d $PGOSM_CONN', f'-d {conn_string}')
# Warning: Do not print() this string any more! Includes password
return osm2pgsql_cmd
Loading