Skip to content

Commit

Permalink
Flex: Add support for configuring table indexes in the Lua config
Browse files Browse the repository at this point in the history
The table definitions have a new (optional) field called "indexes" now
which takes a list of index definitions. If the field is not there, we
fall back to what we did before, a GIST index on the only/first geometry
column of table is created. To disable indexes, set to an empty array.

See the flex-config/indexes.lua Lua config for some usage examples.

See osm2pgsql-dev#1780
  • Loading branch information
joto committed Dec 5, 2022
1 parent d8dd191 commit d9e9151
Show file tree
Hide file tree
Showing 12 changed files with 1,017 additions and 12 deletions.
155 changes: 155 additions & 0 deletions flex-config/indexes.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
-- This config example file is released into the Public Domain.

-- This file shows some options around index creation.

local tables = {}

-- When "indexes" is explicitly set to an empty Lua table, there will be no
-- index on this table. (The index for the id column is still built if
-- osm2pgsql needs that for updates.)
tables.pois = osm2pgsql.define_table({
name = 'pois',
ids = { type = 'node', id_column = 'node_id' },
columns = {
{ column = 'tags', type = 'jsonb' },
{ column = 'geom', type = 'point', not_null = true },
},
indexes = {}
})

-- The "indexes" field is not set at all, you get the default, a GIST index on
-- the only (or first) geometry column.
tables.ways = osm2pgsql.define_way_table('ways', {
{ column = 'tags', type = 'jsonb' },
{ column = 'geom', type = 'linestring', not_null = true },
})

-- Setting "indexes" explicitly: Two indexes area created, one on the polygon
-- geometry ("geom"), one on the center point geometry ("center"), both use
-- the GIST method.
tables.polygons = osm2pgsql.define_area_table('polygons', {
{ column = 'tags', type = 'jsonb' },
{ column = 'geom', type = 'geometry', not_null = true },
{ column = 'center', type = 'point', not_null = true },
}, { indexes = {
{ column = 'geom', method = 'gist' },
{ column = 'center', method = 'gist' }
}})

-- You can put an index on any column, not just geometry columns, and use any
-- index method available in your PostgreSQL version. To get a list of methods
-- use: "SELECT amname FROM pg_catalog.pg_am WHERE amtype = 'i';"
tables.pubs = osm2pgsql.define_node_table('pubs', {
{ column = 'name', type = 'text' },
{ column = 'geom', type = 'geometry', not_null = true },
}, { indexes = {
{ column = 'geom', method = 'gist' },
{ column = 'name', method = 'btree' }
}})

-- You can also create indexes using multiple columns by specifying an array
-- as the "column". And you can add a where condition to the index. Note that
-- the content of the where condition is not checked, but given "as is" to
-- the database. You have to make sure it makes sense.
tables.roads = osm2pgsql.define_way_table('roads', {
{ column = 'name', type = 'text' },
{ column = 'type', type = 'text' },
{ column = 'ref', type = 'text' },
{ column = 'geom', type = 'linestring', not_null = true },
}, { indexes = {
{ column = { 'name', 'ref' }, method = 'btree' },
{ column = { 'geom' }, method = 'gist', where = "type='primary'" }
}})

-- Instead of on a column (or columns) you can define an index on an expression.
tables.postboxes = osm2pgsql.define_node_table('postboxes', {
{ column = 'operator', type = 'text' },
{ column = 'geom', type = 'point', not_null = true },
}, { indexes = {
{ expression = 'lower(operator)', method = 'btree' },
}})

-- Helper function that looks at the tags and decides if this is possibly
-- an area.
function has_area_tags(tags)
if tags.area == 'yes' then
return true
end
if tags.area == 'no' then
return false
end

return tags.aeroway
or tags.amenity
or tags.building
or tags.harbour
or tags.historic
or tags.landuse
or tags.leisure
or tags.man_made
or tags.military
or tags.natural
or tags.office
or tags.place
or tags.power
or tags.public_transport
or tags.shop
or tags.sport
or tags.tourism
or tags.water
or tags.waterway
or tags.wetland
or tags['abandoned:aeroway']
or tags['abandoned:amenity']
or tags['abandoned:building']
or tags['abandoned:landuse']
or tags['abandoned:power']
or tags['area:highway']
end

function osm2pgsql.process_node(object)
local geom = object:as_point()

tables.pois:insert({
tags = object.tags,
geom = geom
})

if object.tags.amenity == 'pub' then
tables.pubs:insert({
name = object.tags.name,
geom = geom
})
elseif object.tags.amenity == 'post_box' then
tables.postboxes:insert({
operator = object.tags.operator,
geom = geom
})
end
end

function osm2pgsql.process_way(object)
if object.is_closed and has_area_tags(object.tags) then
local geom = object:as_polygon()
tables.polygons:insert({
tags = object.tags,
geom = geom,
center = geom:centroid()
})
else
tables.ways:insert({
tags = object.tags,
geom = object:as_linestring()
})
end

if object.tags.highway then
tables.roads:insert({
type = object.tags.highway,
name = object.tags.name,
ref = object.tags.ref,
geom = object:as_linestring()
})
end
end

1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ if (WITH_LUA)
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/init.lua" LUA_INIT_CODE)
configure_file(lua-init.cpp.in lua-init.cpp @ONLY)
target_sources(osm2pgsql_lib PRIVATE
flex-index.cpp
flex-table.cpp
flex-table-column.cpp
flex-lua-geom.cpp
Expand Down
66 changes: 66 additions & 0 deletions src/flex-index.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/**
* SPDX-License-Identifier: GPL-2.0-or-later
*
* This file is part of osm2pgsql (https://osm2pgsql.org/).
*
* Copyright (C) 2006-2022 by the osm2pgsql developer community.
* For a full list of authors see the git log.
*/

#include "flex-index.hpp"
#include "util.hpp"

std::string flex_index_t::columns() const
{
return util::join(m_columns, ',', '"', '(', ')');
}

std::string flex_index_t::include_columns() const
{
return util::join(m_include_columns, ',', '"', '(', ')');
}

std::string
flex_index_t::create_index(std::string const &qualified_table_name) const
{
util::string_joiner_t joiner{' '};
joiner.add("CREATE");

if (m_is_unique) {
joiner.add("UNIQUE");
}

joiner.add("INDEX ON");
joiner.add(qualified_table_name);

joiner.add("USING");
joiner.add(m_method);

if (m_expression.empty()) {
joiner.add(columns());
} else {
joiner.add('(' + m_expression + ')');
}

if (!m_include_columns.empty()) {
joiner.add("INCLUDE");
joiner.add(include_columns());
}

if (m_fillfactor != 0) {
joiner.add("WITH");
joiner.add("(fillfactor = {})"_format(m_fillfactor));
}

if (!m_tablespace.empty()) {
joiner.add("TABLESPACE");
joiner.add("\"" + m_tablespace + "\"");
}

if (!m_where_condition.empty()) {
joiner.add("WHERE");
joiner.add(m_where_condition);
}

return joiner();
}
96 changes: 96 additions & 0 deletions src/flex-index.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#ifndef OSM2PGSQL_FLEX_INDEX_HPP
#define OSM2PGSQL_FLEX_INDEX_HPP

/**
* SPDX-License-Identifier: GPL-2.0-or-later
*
* This file is part of osm2pgsql (https://osm2pgsql.org/).
*
* Copyright (C) 2006-2022 by the osm2pgsql developer community.
* For a full list of authors see the git log.
*/

#include <stdexcept>
#include <string>
#include <vector>

/**
* This class represents a database index.
*/
class flex_index_t
{
public:
explicit flex_index_t(std::string method) : m_method(std::move(method)) {}

std::string const &method() const noexcept { return m_method; }

std::string columns() const;

void set_columns(std::string const &columns)
{
m_columns.push_back(columns);
}

void set_columns(std::vector<std::string> const &columns)
{
m_columns = columns;
}

std::string include_columns() const;

void set_include_columns(std::vector<std::string> const &columns)
{
m_include_columns = columns;
}

std::string const &expression() const noexcept { return m_expression; }

void set_expression(std::string expression)
{
m_expression = std::move(expression);
}

std::string const &tablespace() const noexcept { return m_tablespace; }

void set_tablespace(std::string tablespace)
{
m_tablespace = std::move(tablespace);
}

std::string const &where_condition() const noexcept
{
return m_where_condition;
}

void set_where_condition(std::string where_condition)
{
m_where_condition = std::move(where_condition);
}

void set_fillfactor(uint8_t fillfactor)
{
if (fillfactor < 10 || fillfactor > 100) {
throw std::runtime_error{"Fillfactor must be between 10 and 100."};
}
m_fillfactor = fillfactor;
}

bool is_unique() const noexcept { return m_is_unique; }

void set_is_unique(bool unique) noexcept { m_is_unique = unique; }

std::string create_index(std::string const &qualified_table_name) const;

private:
std::vector<std::string> m_columns;
std::vector<std::string> m_include_columns;
std::string m_method;
std::string m_expression;
std::string m_tablespace;
std::string m_where_condition;
uint8_t m_fillfactor = 0;
bool m_is_unique = false;

}; // class flex_index_t

#endif // OSM2PGSQL_FLEX_INDEX_HPP
24 changes: 15 additions & 9 deletions src/flex-table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@ std::string flex_table_t::build_sql_create_id_index() const
full_name(), id_column_names(), tablespace_clause(index_tablespace()));
}

flex_index_t &flex_table_t::add_index(std::string method)
{
return m_indexes.emplace_back(std::move(method));
}

void table_connection_t::connect(std::string const &conninfo)
{
assert(!m_db_connection);
Expand Down Expand Up @@ -367,15 +372,16 @@ void table_connection_t::stop(bool updateable, bool append)
}
}

if (table().has_geom_column()) {
log_info("Creating geometry index on table '{}'...", table().name());

// Use fillfactor 100 for un-updateable imports
m_db_connection->exec(
R"(CREATE INDEX ON {} USING GIST ("{}") {} {})"_format(
table().full_name(), table().geom_column().name(),
(updateable ? "" : "WITH (fillfactor = 100)"),
tablespace_clause(table().index_tablespace())));
if (table().indexes().empty()) {
log_info("No indexes to create on table '{}'.", table().name());
} else {
for (auto const &index : table().indexes()) {
log_info("Creating index on table '{}' {}..."_format(
table().name(), index.columns()));
auto const sql = index.create_index(
qualified_name(table().schema(), table().name()));
m_db_connection->exec(sql);
}
}

if (updateable && table().has_id_column()) {
Expand Down
13 changes: 13 additions & 0 deletions src/flex-table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
*/

#include "db-copy-mgr.hpp"
#include "flex-index.hpp"
#include "flex-table-column.hpp"
#include "pgsql.hpp"
#include "reprojection.hpp"
Expand Down Expand Up @@ -146,6 +147,13 @@ class flex_table_t
return m_has_multiple_geom_columns;
}

std::vector<flex_index_t> const &indexes() const noexcept
{
return m_indexes;
}

flex_index_t &add_index(std::string method);

private:
/// The name of the table
std::string m_name;
Expand All @@ -168,6 +176,11 @@ class flex_table_t
*/
std::vector<flex_table_column_t> m_columns;

/**
* The indexes defined on this table. Does not include the id index.
*/
std::vector<flex_index_t> m_indexes;

/**
* Index of the (first) geometry column in m_columns. Default means no
* geometry column.
Expand Down
Loading

0 comments on commit d9e9151

Please sign in to comment.