Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flex: Add support for configuring table indexes in the Lua config #1847

Merged
merged 1 commit into from
Dec 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 155 additions & 0 deletions flex-config/indexes.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
-- This config example file is released into the Public Domain.

-- This file shows some options around index creation.

local tables = {}

-- When "indexes" is explicitly set to an empty Lua table, there will be no
-- index on this table. (The index for the id column is still built if
-- osm2pgsql needs that for updates.)
tables.pois = osm2pgsql.define_table({
name = 'pois',
ids = { type = 'node', id_column = 'node_id' },
columns = {
{ column = 'tags', type = 'jsonb' },
{ column = 'geom', type = 'point', not_null = true },
},
indexes = {}
})

-- The "indexes" field is not set at all, you get the default, a GIST index on
-- the only (or first) geometry column.
tables.ways = osm2pgsql.define_way_table('ways', {
{ column = 'tags', type = 'jsonb' },
{ column = 'geom', type = 'linestring', not_null = true },
})

-- Setting "indexes" explicitly: Two indexes area created, one on the polygon
-- geometry ("geom"), one on the center point geometry ("center"), both use
-- the GIST method.
tables.polygons = osm2pgsql.define_area_table('polygons', {
{ column = 'tags', type = 'jsonb' },
{ column = 'geom', type = 'geometry', not_null = true },
{ column = 'center', type = 'point', not_null = true },
}, { indexes = {
{ column = 'geom', method = 'gist' },
{ column = 'center', method = 'gist' }
}})

-- You can put an index on any column, not just geometry columns, and use any
-- index method available in your PostgreSQL version. To get a list of methods
-- use: "SELECT amname FROM pg_catalog.pg_am WHERE amtype = 'i';"
tables.pubs = osm2pgsql.define_node_table('pubs', {
{ column = 'name', type = 'text' },
{ column = 'geom', type = 'geometry', not_null = true },
}, { indexes = {
{ column = 'geom', method = 'gist' },
{ column = 'name', method = 'btree' }
}})

-- You can also create indexes using multiple columns by specifying an array
-- as the "column". And you can add a where condition to the index. Note that
-- the content of the where condition is not checked, but given "as is" to
-- the database. You have to make sure it makes sense.
tables.roads = osm2pgsql.define_way_table('roads', {
{ column = 'name', type = 'text' },
{ column = 'type', type = 'text' },
{ column = 'ref', type = 'text' },
{ column = 'geom', type = 'linestring', not_null = true },
}, { indexes = {
{ column = { 'name', 'ref' }, method = 'btree' },
{ column = { 'geom' }, method = 'gist', where = "type='primary'" }
}})

-- Instead of on a column (or columns) you can define an index on an expression.
tables.postboxes = osm2pgsql.define_node_table('postboxes', {
{ column = 'operator', type = 'text' },
{ column = 'geom', type = 'point', not_null = true },
}, { indexes = {
{ expression = 'lower(operator)', method = 'btree' },
}})

-- Helper function that looks at the tags and decides if this is possibly
-- an area.
function has_area_tags(tags)
if tags.area == 'yes' then
return true
end
if tags.area == 'no' then
return false
end

return tags.aeroway
or tags.amenity
or tags.building
or tags.harbour
or tags.historic
or tags.landuse
or tags.leisure
or tags.man_made
or tags.military
or tags.natural
or tags.office
or tags.place
or tags.power
or tags.public_transport
or tags.shop
or tags.sport
or tags.tourism
or tags.water
or tags.waterway
or tags.wetland
or tags['abandoned:aeroway']
or tags['abandoned:amenity']
or tags['abandoned:building']
or tags['abandoned:landuse']
or tags['abandoned:power']
or tags['area:highway']
end

function osm2pgsql.process_node(object)
local geom = object:as_point()

tables.pois:insert({
tags = object.tags,
geom = geom
})

if object.tags.amenity == 'pub' then
tables.pubs:insert({
name = object.tags.name,
geom = geom
})
elseif object.tags.amenity == 'post_box' then
tables.postboxes:insert({
operator = object.tags.operator,
geom = geom
})
end
end

function osm2pgsql.process_way(object)
if object.is_closed and has_area_tags(object.tags) then
local geom = object:as_polygon()
tables.polygons:insert({
tags = object.tags,
geom = geom,
center = geom:centroid()
})
else
tables.ways:insert({
tags = object.tags,
geom = object:as_linestring()
})
end

if object.tags.highway then
tables.roads:insert({
type = object.tags.highway,
name = object.tags.name,
ref = object.tags.ref,
geom = object:as_linestring()
})
end
end

1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ if (WITH_LUA)
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/init.lua" LUA_INIT_CODE)
configure_file(lua-init.cpp.in lua-init.cpp @ONLY)
target_sources(osm2pgsql_lib PRIVATE
flex-index.cpp
flex-table.cpp
flex-table-column.cpp
flex-lua-geom.cpp
Expand Down
66 changes: 66 additions & 0 deletions src/flex-index.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/**
* SPDX-License-Identifier: GPL-2.0-or-later
*
* This file is part of osm2pgsql (https://osm2pgsql.org/).
*
* Copyright (C) 2006-2022 by the osm2pgsql developer community.
* For a full list of authors see the git log.
*/

#include "flex-index.hpp"
#include "util.hpp"

std::string flex_index_t::columns() const
{
return util::join(m_columns, ',', '"', '(', ')');
}

std::string flex_index_t::include_columns() const
{
return util::join(m_include_columns, ',', '"', '(', ')');
}

std::string
flex_index_t::create_index(std::string const &qualified_table_name) const
{
util::string_joiner_t joiner{' '};
joiner.add("CREATE");

if (m_is_unique) {
joiner.add("UNIQUE");
}

joiner.add("INDEX ON");
joiner.add(qualified_table_name);

joiner.add("USING");
joiner.add(m_method);

if (m_expression.empty()) {
joiner.add(columns());
} else {
joiner.add('(' + m_expression + ')');
}

if (!m_include_columns.empty()) {
joiner.add("INCLUDE");
joiner.add(include_columns());
}

if (m_fillfactor != 0) {
joiner.add("WITH");
joiner.add("(fillfactor = {})"_format(m_fillfactor));
}

if (!m_tablespace.empty()) {
joiner.add("TABLESPACE");
joiner.add("\"" + m_tablespace + "\"");
}

if (!m_where_condition.empty()) {
joiner.add("WHERE");
joiner.add(m_where_condition);
}

return joiner();
}
100 changes: 100 additions & 0 deletions src/flex-index.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#ifndef OSM2PGSQL_FLEX_INDEX_HPP
#define OSM2PGSQL_FLEX_INDEX_HPP

/**
* SPDX-License-Identifier: GPL-2.0-or-later
*
* This file is part of osm2pgsql (https://osm2pgsql.org/).
*
* Copyright (C) 2006-2022 by the osm2pgsql developer community.
* For a full list of authors see the git log.
*/

#include <cassert>
#include <stdexcept>
#include <string>
#include <vector>

/**
* This class represents a database index.
*/
class flex_index_t
{
public:
explicit flex_index_t(std::string method) : m_method(std::move(method)) {}

std::string const &method() const noexcept { return m_method; }

std::string columns() const;

/// Set columns (single-column version)
void set_columns(std::string const &columns)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's rather 'add_column'?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not intended for adding columns one at a time, but an overload of the void set_columns(std::vector<std::string> const &columns) function so that we don't have to create a vector if all we want is to add a single column.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added an assert.

{
assert(m_columns.empty());
m_columns.push_back(columns);
}

/// Set columns (multi-column version)
void set_columns(std::vector<std::string> const &columns)
{
m_columns = columns;
}

std::string include_columns() const;

void set_include_columns(std::vector<std::string> const &columns)
{
m_include_columns = columns;
}

std::string const &expression() const noexcept { return m_expression; }

void set_expression(std::string expression)
{
m_expression = std::move(expression);
}

std::string const &tablespace() const noexcept { return m_tablespace; }

void set_tablespace(std::string tablespace)
{
m_tablespace = std::move(tablespace);
}

std::string const &where_condition() const noexcept
{
return m_where_condition;
}

void set_where_condition(std::string where_condition)
{
m_where_condition = std::move(where_condition);
}

void set_fillfactor(uint8_t fillfactor)
{
if (fillfactor < 10 || fillfactor > 100) {
throw std::runtime_error{"Fillfactor must be between 10 and 100."};
}
m_fillfactor = fillfactor;
}

bool is_unique() const noexcept { return m_is_unique; }

void set_is_unique(bool unique) noexcept { m_is_unique = unique; }

std::string create_index(std::string const &qualified_table_name) const;

private:
std::vector<std::string> m_columns;
std::vector<std::string> m_include_columns;
std::string m_method;
std::string m_expression;
std::string m_tablespace;
std::string m_where_condition;
uint8_t m_fillfactor = 0;
bool m_is_unique = false;

}; // class flex_index_t

#endif // OSM2PGSQL_FLEX_INDEX_HPP
24 changes: 15 additions & 9 deletions src/flex-table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@ std::string flex_table_t::build_sql_create_id_index() const
full_name(), id_column_names(), tablespace_clause(index_tablespace()));
}

flex_index_t &flex_table_t::add_index(std::string method)
{
return m_indexes.emplace_back(std::move(method));
}

void table_connection_t::connect(std::string const &conninfo)
{
assert(!m_db_connection);
Expand Down Expand Up @@ -367,15 +372,16 @@ void table_connection_t::stop(bool updateable, bool append)
}
}

if (table().has_geom_column()) {
log_info("Creating geometry index on table '{}'...", table().name());

// Use fillfactor 100 for un-updateable imports
m_db_connection->exec(
R"(CREATE INDEX ON {} USING GIST ("{}") {} {})"_format(
table().full_name(), table().geom_column().name(),
(updateable ? "" : "WITH (fillfactor = 100)"),
tablespace_clause(table().index_tablespace())));
if (table().indexes().empty()) {
log_info("No indexes to create on table '{}'.", table().name());
} else {
for (auto const &index : table().indexes()) {
log_info("Creating index on table '{}' {}..."_format(
table().name(), index.columns()));
auto const sql = index.create_index(
qualified_name(table().schema(), table().name()));
m_db_connection->exec(sql);
}
}

if (updateable && table().has_id_column()) {
Expand Down
Loading