Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

About docs #374

Closed
wants to merge 11 commits into from
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ output/
**/__pycache__
pgosm-data/*
docs/book/*
.vscode/*
.vscode/*
19 changes: 18 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ FROM postgis/postgis:16-3.4
LABEL maintainer="PgOSM Flex - https://github.com/rustprooflabs/pgosm-flex"

ARG OSM2PGSQL_BRANCH=master
ARG BOUNCER_VERSION=1.21.0
ARG OSM2PGSQL_REPO=https://github.com/openstreetmap/osm2pgsql.git

RUN apt-get update \
# Removed upgrade per https://github.com/rustprooflabs/pgosm-flex/issues/322
Expand All @@ -18,6 +20,7 @@ RUN apt-get update \
curl unzip \
postgresql-16-pgrouting \
nlohmann-json3-dev \
pkg-config libevent-2.1-7 libevent-dev libudns-dev \
&& rm -rf /var/lib/apt/lists/*

RUN wget https://luarocks.org/releases/luarocks-3.9.2.tar.gz \
Expand All @@ -32,9 +35,19 @@ RUN curl -o /tmp/get-pip.py https://bootstrap.pypa.io/get-pip.py \
RUN luarocks install inifile
RUN luarocks install luasql-postgres PGSQL_INCDIR=/usr/include/postgresql/

# pgBouncer implementation based on: https://github.com/edoburu/docker-pgbouncer/blob/master/Dockerfile
RUN curl -o /tmp/pgbouncer-$BOUNCER_VERSION.tar.gz -L https://pgbouncer.github.io/downloads/files/$BOUNCER_VERSION/pgbouncer-$BOUNCER_VERSION.tar.gz \
&& cd /tmp \
&& tar xvfz /tmp/pgbouncer-$BOUNCER_VERSION.tar.gz \
&& cd pgbouncer-$BOUNCER_VERSION \
&& ./configure --prefix=/usr --with-udns \
&& make \
&& cp pgbouncer /usr/bin \
&& mkdir -p /etc/pgbouncer /var/log/pgbouncer /var/run/pgbouncer \
&& chown -R postgres /var/run/pgbouncer /etc/pgbouncer /var/log/pgbouncer/

WORKDIR /tmp
RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH https://github.com/openstreetmap/osm2pgsql.git \
RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH $OSM2PGSQL_REPO \
&& mkdir osm2pgsql/build \
&& cd osm2pgsql/build \
&& cmake .. -D USE_PROJ_LIB=6 \
Expand All @@ -45,6 +58,7 @@ RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH https://github.com/openstreet
libexpat1-dev zlib1g-dev \
libbz2-dev libproj-dev \
curl \
pkg-config libevent-dev \
&& apt autoremove -y \
&& cd /tmp && rm -r /tmp/osm2pgsql

Expand All @@ -61,3 +75,6 @@ WORKDIR /app
COPY . ./

RUN pip install --upgrade pip && pip install -r requirements.txt

# Expose pgBouncer port
EXPOSE 6432
11 changes: 10 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ docker-exec-default: build-run-docker
docker exec -it pgosm \
chown $(CURRENT_UID):$(CURRENT_GID) /app/docker/

# Needed for pgbouncer
docker exec -it pgosm \
chown $(CURRENT_UID):$(CURRENT_GID) /etc/pgbouncer/


# run typical processing using built-in file handling
docker exec -it \
-e POSTGRES_PASSWORD=mysecretpassword \
Expand Down Expand Up @@ -167,11 +172,15 @@ docker-exec-region: build-run-docker
# Needed for unit-tests
docker exec -it pgosm \
chown $(CURRENT_UID):$(CURRENT_GID) /app/docker/
# Needed for pgbouncer
docker exec -it pgosm \
chown $(CURRENT_UID):$(CURRENT_GID) /etc/pgbouncer/


docker exec -it pgosm \
sed -i 's/district-of-columbia/$(REGION_FILE_NAME)/' /app/output/$(REGION_FILE_NAME)-$(TODAY).osm.pbf.md5

# process DC file, pretending its a region instead of subregion
# process DC file, pretending it's a region instead of subregion
docker exec -it \
-e POSTGRES_PASSWORD=mysecretpassword \
-e POSTGRES_USER=postgres \
Expand Down
70 changes: 58 additions & 12 deletions docker/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
LOGGER = logging.getLogger('pgosm-flex')


def connection_string(admin: bool=False) -> str:
def connection_string(admin: bool=False, pgbouncer: bool=False,
pgbouncer_admin: bool=False) -> str:
"""Returns connection string to `db_name`.

Env vars for user/password defined by Postgres docker image.
Expand All @@ -30,6 +31,15 @@ def connection_string(admin: bool=False) -> str:
Default False. Set to True to connect to admin database, currently
hard-coded to `postgres`

pgbouncer : boolean
Default False.
FIXME: SET DEFAULT TO TRUE???

pgbouncer_admin : boolean
Default False
Connects to pgbouncer database (must be pgbouncer connection) for admin
functionality, e.g. `SHUTDOWN;`

Returns
--------------------------
conn_string : str
Expand All @@ -39,9 +49,19 @@ def connection_string(admin: bool=False) -> str:
pg_details = pg_conn_parts()
pg_user = pg_details['pg_user']
pg_pass = pg_details['pg_pass']
pg_host = pg_details['pg_host']

pg_db = pg_details['pg_db']
pg_port = pg_details['pg_port']

if pgbouncer_admin and not pgbouncer:
raise ValueError('Cannot connect to pgbouncer_admin on non-pgbouncer connection.')

if pgbouncer:
pg_host = pg_details['pg_host_pgbouncer']
pg_port = pg_details['pg_port_pgbouncer']
else:
pg_host = pg_details['pg_host']
pg_port = pg_details['pg_port']


if admin:
if pg_host == 'localhost':
Expand All @@ -54,6 +74,11 @@ def connection_string(admin: bool=False) -> str:
else:
db_name = pg_db

# Just overwriting instead of working into above logic. Probably a good
# sign this logic should be improved...
if pgbouncer_admin:
db_name = 'pgbouncer'

if pg_pass is None:
conn_string = f'postgresql://{pg_user}@{pg_host}:{pg_port}/{db_name}{app_str}'
else:
Expand All @@ -62,6 +87,24 @@ def connection_string(admin: bool=False) -> str:
return conn_string


def get_db_conn_string() -> str:
"""Returns non-admin database connection, either pgBouncer or not depending
on run-time configuration.

Returns
----------------------------
conn_string : str
"""
if os.environ['USE_PGBOUNCER'] == 'true':
LOGGER.debug('Using pgBouncer connection string')
conn_string = os.environ['PGOSM_CONN_PGBOUNCER']
else:
LOGGER.debug('Using direct to Postgres connection string (non-admin)')
conn_string = os.environ['PGOSM_CONN']

return conn_string


def pg_conn_parts() -> dict:
"""Returns dictionary of connection parts based on environment variables
if they exist.
Expand Down Expand Up @@ -121,7 +164,10 @@ def pg_conn_parts() -> dict:
'pg_pass': pg_pass,
'pg_host': pg_host,
'pg_port': pg_port,
'pg_db': pg_db}
'pg_db': pg_db,
'pg_host_pgbouncer': 'localhost',
'pg_port_pgbouncer': 6432
}

return pg_details

Expand Down Expand Up @@ -278,7 +324,7 @@ def start_import(pgosm_region, pgosm_date, srid, language, layerset, git_info,
;
"""
sql_raw = sql_raw.format(schema_name=schema_name)
with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn:
with get_db_conn(conn_string=get_db_conn_string()) as conn:
cur = conn.cursor()
cur.execute(sql_raw, params=params)
import_id = cur.fetchone()[0]
Expand Down Expand Up @@ -439,7 +485,7 @@ def run_deploy_file(db_path: str, sql_filename: str, schema_name: str,

deploy_sql = deploy_sql.format(schema_name=schema_name)

with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn:
with get_db_conn(conn_string=get_db_conn_string()) as conn:
cur = conn.cursor()
cur.execute(deploy_sql)
LOGGER.debug(f'Ran SQL in {sql_filename}')
Expand Down Expand Up @@ -507,7 +553,7 @@ def pgosm_nested_admin_polygons(flex_path: str, schema_name: str):
"""
sql_raw = f'CALL {schema_name}.build_nested_admin_polygons();'

conn_string = os.environ['PGOSM_CONN']
conn_string = get_db_conn_string()
cmds = ['psql', '-d', conn_string, '-c', sql_raw]
LOGGER.info('Building nested polygons... (this can take a while)')
output = subprocess.run(cmds,
Expand All @@ -532,7 +578,7 @@ def osm2pgsql_replication_start():
# This use of append applies to both osm2pgsql --append and osm2pgsq-replication, not renaming from "append"
sql_raw = 'CALL osm.append_data_start();'

with get_db_conn(conn_string=connection_string()) as conn:
with get_db_conn(conn_string=get_db_conn_string()) as conn:
cur = conn.cursor()
cur.execute(sql_raw)

Expand All @@ -552,7 +598,7 @@ def osm2pgsql_replication_finish(skip_nested):
LOGGER.info('Finishing Replication, including nested polygons')
sql_raw = 'CALL osm.append_data_finish(skip_nested := False );'

conn_string = os.environ['PGOSM_CONN']
conn_string = get_db_conn_string()
cmds = ['psql', '-d', conn_string, '-c', sql_raw]
LOGGER.info('Finishing Replication')
output = subprocess.run(cmds,
Expand All @@ -578,7 +624,7 @@ def run_pg_dump(export_path, skip_qgis_style):
skip_qgis_style : bool
"""
logger = logging.getLogger('pgosm-flex')
conn_string = os.environ['PGOSM_CONN']
conn_string = get_db_conn_string()
schema_name = 'osm'

if skip_qgis_style:
Expand Down Expand Up @@ -635,7 +681,7 @@ def log_import_message(import_id, msg, schema_name):
;
"""
sql_raw = sql_raw.format(schema_name=schema_name)
with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn:
with get_db_conn(conn_string=get_db_conn_string()) as conn:
params = {'import_id': import_id, 'msg': msg}
cur = conn.cursor()
cur.execute(sql_raw, params=params)
Expand Down Expand Up @@ -664,7 +710,7 @@ def get_prior_import(schema_name: str) -> dict:
;
"""
sql_raw = sql_raw.format(schema_name=schema_name)
with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn:
with get_db_conn(conn_string=get_db_conn_string()) as conn:
cur = conn.cursor(row_factory=psycopg.rows.dict_row)
results = cur.execute(sql_raw).fetchone()

Expand Down
11 changes: 10 additions & 1 deletion docker/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def verify_checksum(md5_file: str, path: str):


def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
layerset_path, replication, schema_name):
layerset_path, replication, schema_name, use_pgbouncer):
"""Sets environment variables needed by PgOSM Flex. Also creates DB
record in `osm.pgosm_flex` table.

Expand All @@ -111,6 +111,8 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
replication : bool
Indicates when osm2pgsql-replication is used
schema_name : str
use_pgbouncer : bool
Indicates if pgBouncer connection should be setup and used
"""
logger = logging.getLogger('pgosm-flex')
logger.debug('Ensuring env vars are not set from prior run')
Expand Down Expand Up @@ -139,10 +141,16 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
os.environ['PGOSM_CONN'] = db.connection_string()
# Connection to DB for admin purposes, e.g. drop/create main database
os.environ['PGOSM_CONN_PG'] = db.connection_string(admin=True)
# pgBouncer connection
os.environ['PGOSM_CONN_PGBOUNCER'] = db.connection_string(pgbouncer=True)

pgosm_region = get_region_combined(region, subregion)
logger.debug(f'PGOSM_REGION_COMBINED: {pgosm_region}')

if use_pgbouncer:
os.environ['USE_PGBOUNCER'] = 'true'
else:
os.environ['USE_PGBOUNCER'] = 'false'


def get_region_combined(region: str, subregion: str) -> str:
Expand Down Expand Up @@ -223,3 +231,4 @@ def unset_env_vars():
os.environ.pop('PGOSM_CONN', None)
os.environ.pop('PGOSM_CONN_PG', None)
os.environ.pop('SCHEMA_NAME', None)
os.environ.pop('USE_PGBOUNCER', None)
2 changes: 1 addition & 1 deletion docker/osm2pgsql_recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def get_recommended_script(system_ram_gb: float,
LOGGER.debug(f'Generic command to run: {osm2pgsql_cmd}')

# Replace generic connection string with specific conn string
conn_string = db.connection_string()
conn_string = db.get_db_conn_string()
osm2pgsql_cmd = osm2pgsql_cmd.replace('-d $PGOSM_CONN', f'-d {conn_string}')
# Warning: Do not print() this string any more! Includes password
return osm2pgsql_cmd
Loading
Loading