Skip to content

Commit 6e45862

Browse files
committed
Remove support for non-bucket index on middle way table
Only supports bucket index which has been the default for a long time and works much better than the old one. Also removes the command line option --middle-way-node-index-id-shift. The id shift can not be changed any more, it is hardcoded to 5 which was the default. This is a potentially breaking change: Users with incompatible indexes have to do a reimport (or at least create the new index).
1 parent 0de33f5 commit 6e45862

File tree

5 files changed

+25
-90
lines changed

5 files changed

+25
-90
lines changed

man/osm2pgsql.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,6 @@ mandatory for short options too.
173173
database user. By default the schema set with `--schema` is used, or
174174
`public` if that is not set.
175175

176-
\--middle-way-node-index-id-shift=SHIFT
177-
: Set ID shift for way node bucket index in middle. Experts only. See
178-
documentation for details.
179-
180176
\--middle-with-nodes
181177
: Used together with the **new** middle database format when a flat nodes
182178
file is used to force storing nodes with tags in the database, too.

src/command-line-parser.cpp

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,8 @@ void parse_expire_tiles_param(char const *arg, uint32_t *expire_tiles_zoom_min,
136136
void check_options_non_slim(CLI::App const &app)
137137
{
138138
std::vector<std::string> const slim_options = {
139-
"--cache",
140-
"--middle-schema",
141-
"--middle-with-nodes",
142-
"--middle-way-node-index-id-shift",
143-
"--tablespace-slim-data",
144-
"--tablespace-slim-index"};
139+
"--cache", "--middle-schema", "--middle-with-nodes",
140+
"--tablespace-slim-data", "--tablespace-slim-index"};
145141

146142
for (auto const &opt : slim_options) {
147143
if (app.count(opt) > 0) {
@@ -575,13 +571,6 @@ options_t parse_command_line(int argc, char *argv[])
575571
->description("Disable concurrent index creation.")
576572
->group("Advanced options");
577573

578-
// --middle-way-node-index-id-shift
579-
app.add_option("--middle-way-node-index-id-shift",
580-
options.way_node_index_id_shift)
581-
->description("Set ID shift for bucket index.")
582-
->type_name("N")
583-
->group("Advanced options");
584-
585574
// --number-processes
586575
app.add_option("--number-processes", options.num_procs)
587576
// The threads will open up database connections which will

src/middle-pgsql.cpp

Lines changed: 23 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,6 @@
4646

4747
namespace {
4848

49-
bool check_bucket_index(pg_conn_t const *db_connection,
50-
std::string const &prefix)
51-
{
52-
auto const res =
53-
db_connection->exec("SELECT relname FROM pg_class"
54-
" WHERE relkind='i'"
55-
" AND relname = '{}_ways_nodes_bucket_idx'",
56-
prefix);
57-
return res.num_tuples() > 0;
58-
}
59-
6049
void send_id_list(pg_conn_t const &db_connection,
6150
std::string const &table, idlist_t const &ids)
6251
{
@@ -97,7 +86,7 @@ std::string build_sql(options_t const &options, std::string const &templ)
9786
fmt::arg("using_tablespace", using_tablespace),
9887
fmt::arg("data_tablespace", tablespace_clause(options.tblsslim_data)),
9988
fmt::arg("index_tablespace", tablespace_clause(options.tblsslim_index)),
100-
fmt::arg("way_node_index_id_shift", options.way_node_index_id_shift),
89+
fmt::arg("way_node_index_id_shift", 5),
10190
fmt::arg("attribute_columns_definition",
10291
options.extra_attributes ? " created timestamp with time zone,"
10392
" version int4,"
@@ -663,17 +652,13 @@ void middle_pgsql_t::get_node_parents(idlist_t const &changed_nodes,
663652

664653
queries.emplace_back("ANALYZE osm2pgsql_changed_nodes");
665654

666-
bool const has_bucket_index =
667-
check_bucket_index(&m_db_connection, m_options->prefix);
668-
669-
if (has_bucket_index) {
670-
// The query to get the parent ways of changed nodes is "hidden"
671-
// inside a PL/pgSQL function so that the query planner only sees
672-
// a single node id that is being queried for. If we ask for all
673-
// nodes at the same time the query planner sometimes thinks it is
674-
// better to do a full table scan which totally destroys performance.
675-
// This is due to the PostgreSQL statistics on ARRAYs being way off.
676-
queries.emplace_back(R"(
655+
// The query to get the parent ways of changed nodes is "hidden"
656+
// inside a PL/pgSQL function so that the query planner only sees
657+
// a single node id that is being queried for. If we ask for all
658+
// nodes at the same time the query planner sometimes thinks it is
659+
// better to do a full table scan which totally destroys performance.
660+
// This is due to the PostgreSQL statistics on ARRAYs being way off.
661+
queries.emplace_back(R"(
677662
CREATE OR REPLACE FUNCTION osm2pgsql_find_changed_ways() RETURNS void AS $$
678663
DECLARE
679664
changed_buckets RECORD;
@@ -692,16 +677,8 @@ BEGIN
692677
END;
693678
$$ LANGUAGE plpgsql
694679
)");
695-
queries.emplace_back("SELECT osm2pgsql_find_changed_ways()");
696-
queries.emplace_back("DROP FUNCTION osm2pgsql_find_changed_ways()");
697-
} else {
698-
queries.emplace_back(R"(
699-
INSERT INTO osm2pgsql_changed_ways
700-
SELECT w.id
701-
FROM {schema}"{prefix}_ways" w, osm2pgsql_changed_nodes n
702-
WHERE w.nodes && ARRAY[n.id]
703-
)");
704-
}
680+
queries.emplace_back("SELECT osm2pgsql_find_changed_ways()");
681+
queries.emplace_back("DROP FUNCTION osm2pgsql_find_changed_ways()");
705682

706683
queries.emplace_back(R"(
707684
INSERT INTO osm2pgsql_changed_relations
@@ -1176,7 +1153,7 @@ table_sql sql_for_nodes(middle_pgsql_options const &options)
11761153
return sql;
11771154
}
11781155

1179-
table_sql sql_for_ways(middle_pgsql_options const &options)
1156+
table_sql sql_for_ways()
11801157
{
11811158
table_sql sql{};
11821159

@@ -1200,23 +1177,17 @@ table_sql sql_for_ways(middle_pgsql_options const &options)
12001177
" {users_table_access}"
12011178
" WHERE o.id = ANY($1::int8[])"};
12021179

1203-
if (options.way_node_index_id_shift == 0) {
1204-
sql.create_fw_dep_indexes = {
1205-
"CREATE INDEX ON {schema}\"{prefix}_ways\" USING GIN (nodes)"
1206-
" WITH (fastupdate = off) {index_tablespace}"};
1207-
} else {
1208-
sql.create_fw_dep_indexes = {
1209-
"CREATE OR REPLACE FUNCTION"
1210-
" {schema}\"{prefix}_index_bucket\"(int8[])"
1211-
" RETURNS int8[] AS $$"
1212-
" SELECT ARRAY(SELECT DISTINCT"
1213-
" unnest($1) >> {way_node_index_id_shift})"
1214-
"$$ LANGUAGE SQL IMMUTABLE",
1215-
"CREATE INDEX \"{prefix}_ways_nodes_bucket_idx\""
1216-
" ON {schema}\"{prefix}_ways\""
1217-
" USING GIN ({schema}\"{prefix}_index_bucket\"(nodes))"
1218-
" WITH (fastupdate = off) {index_tablespace}"};
1219-
}
1180+
sql.create_fw_dep_indexes = {
1181+
"CREATE OR REPLACE FUNCTION"
1182+
" {schema}\"{prefix}_index_bucket\"(int8[])"
1183+
" RETURNS int8[] AS $$"
1184+
" SELECT ARRAY(SELECT DISTINCT"
1185+
" unnest($1) >> {way_node_index_id_shift})"
1186+
"$$ LANGUAGE SQL IMMUTABLE",
1187+
"CREATE INDEX \"{prefix}_ways_nodes_bucket_idx\""
1188+
" ON {schema}\"{prefix}_ways\""
1189+
" USING GIN ({schema}\"{prefix}_index_bucket\"(nodes))"
1190+
" WITH (fastupdate = off) {index_tablespace}"};
12201191

12211192
return sql;
12221193
}
@@ -1272,7 +1243,6 @@ middle_pgsql_t::middle_pgsql_t(std::shared_ptr<thread_pool_t> thread_pool,
12721243
m_db_copy(m_copy_thread), m_append(options->append)
12731244
{
12741245
m_store_options.with_attributes = options->extra_attributes;
1275-
m_store_options.way_node_index_id_shift = options->way_node_index_id_shift;
12761246

12771247
if (options->middle_with_nodes) {
12781248
m_store_options.nodes = true;
@@ -1289,15 +1259,8 @@ middle_pgsql_t::middle_pgsql_t(std::shared_ptr<thread_pool_t> thread_pool,
12891259

12901260
log_debug("Mid: pgsql, cache={}", options->cache);
12911261

1292-
bool const has_bucket_index =
1293-
check_bucket_index(&m_db_connection, options->prefix);
1294-
1295-
if (!has_bucket_index && options->append) {
1296-
log_debug("You don't have a bucket index. See manual for details.");
1297-
}
1298-
12991262
m_tables.nodes() = table_desc{*options, sql_for_nodes(m_store_options)};
1300-
m_tables.ways() = table_desc{*options, sql_for_ways(m_store_options)};
1263+
m_tables.ways() = table_desc{*options, sql_for_ways()};
13011264
m_tables.relations() = table_desc{*options, sql_for_relations()};
13021265

13031266
m_users_table = table_desc{*options, sql_for_users(m_store_options)};
@@ -1310,8 +1273,6 @@ void middle_pgsql_t::set_requirements(
13101273
log_debug(" nodes: {}", m_store_options.nodes);
13111274
log_debug(" untagged_nodes: {}", m_store_options.untagged_nodes);
13121275
log_debug(" use_flat_node_file: {}", m_store_options.use_flat_node_file);
1313-
log_debug(" way_node_index_id_shift: {}",
1314-
m_store_options.way_node_index_id_shift);
13151276
log_debug(" with_attributes: {}", m_store_options.with_attributes);
13161277
}
13171278

src/middle-pgsql.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,6 @@ struct middle_pgsql_options
3939
// Store untagged nodes also (set in addition to nodes=true).
4040
bool untagged_nodes = false;
4141

42-
// Bit shift used in way node index
43-
uint8_t way_node_index_id_shift = 5;
44-
4542
// Use a flat node file
4643
bool use_flat_node_file = false;
4744

src/options.hpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,6 @@ struct options_t
107107

108108
unsigned int num_procs = 1;
109109

110-
/**
111-
* How many bits should the node id be shifted for the way node index?
112-
* The result is a lossy index which is significantly smaller.
113-
* See https://osm2pgsql.org/doc/manual.html#bucket-index-for-slim-mode
114-
* Use 0 to use a classic loss-less GIN index.
115-
*/
116-
uint8_t way_node_index_id_shift = 5;
117-
118110
/**
119111
* Middle database format:
120112
* 0 = non-slim mode, no database middle (ram middle)

0 commit comments

Comments
 (0)