diff --git a/docs/bucket-index.md b/docs/bucket-index.md new file mode 100644 index 000000000..8f853216b --- /dev/null +++ b/docs/bucket-index.md @@ -0,0 +1,88 @@ + +NOTE: This is only available from osm2pgsql version 1.4.0! + +NOTE: The default is still to create the old index for now. + +# Bucket index for slim mode + +Osm2pgsql can use an index for way node lookups in slim mode that needs a lot +less disk space than earlier versions did. For a planet the savings can be +about 200 GB! Lookup times are slightly slower, but this shouldn't be an issue +for most people. + +*If you are not using slim mode and/or not doing updates of your database, this +does not apply to you.* + +For backwards compatibility osm2pgsql will never update an existing database +to the new index. It will keep using the old index. So you do not have to do +anything when upgrading osm2pgsql. + +If you want to use the new index, there are two ways of doing this: The "safe" +way for most users and the "doit-it-yourself" way for expert users. Note that +once you switched to the new index, older versions of osm2pgsql will not work +correctly any more. + +## Update for most users + +NOTE: This does not work yet. Currently the default is still to create the +old type of index. + +If your database was created with an older version of osm2pgsql you might want +to start again from an empty database. Just do a reimport and osm2pgsql will +use the new space-saving index. + +## Update for expert users + +This is only for users who are very familiar with osm2pgsql and PostgreSQL +operation. You can break your osm2pgsql database beyond repair if something +goes wrong here and you might not even notice. + +You can create the index yourself by following these steps: + +Drop the existing index. Replace `{prefix}` by the prefix you are using. +Usually this is `planet_osm`: + +``` +DROP INDEX {prefix}_ways_nodes_idx; +``` + +Create the `index_bucket` function needed for the index. Replace +`{way_node_index_id_shift}` by the number of bits you want the id to be +shifted. If you don't have a reason to use something else, use `5`: + +``` +CREATE FUNCTION {prefix}_index_bucket(int8[]) RETURNS int8[] AS $$ + SELECT ARRAY(SELECT DISTINCT unnest($1) >> {way_node_index_id_shift}) +$$ LANGUAGE SQL IMMUTABLE; +``` + +Now you can create the new index. Again, replace `{prefix}` by the prefix +you are using: + +``` +CREATE INDEX {prefix}_ways_nodes_bucket_idx ON {prefix}_ways + USING GIN ({prefix}_index_bucket(nodes)) + WITH (fastupdate = off); +``` + +If you want to create the index in a specific tablespace you can do this: + +``` +CREATE INDEX {prefix}_ways_nodes_bucket_idx ON {prefix}_ways + USING GIN ({prefix}_index_bucket(nodes)) + WITH (fastupdate = off) TABLESPACE {tablespace}; +``` + +## Id shift (for experts) + +When creating a new database (when used in create mode with slim option), +osm2pgsql can create a bucket index using a configurable id shift. + +You can set the shift with the command line option +`--middle-way-node-index-id-shift`. Values between about 3 and 6 might make +sense. + +To completely disable the bucket index and create an index compatible with +earlier versions of osm2pgsql, use `--middle-way-node-index-id-shift=0`. +(This is currently still the default.) + diff --git a/docs/osm2pgsql.md b/docs/osm2pgsql.md index 8ff2739f4..8e47e9778 100644 --- a/docs/osm2pgsql.md +++ b/docs/osm2pgsql.md @@ -220,6 +220,10 @@ starting with two dashes (`--`). A summary of options is included below. -v, \--verbose : Verbose output. +--middle-way-node-index-id-shift shift +: Set ID shift for way node bucket index in middle. Experts only. See + documentation for details. + # SUPPORTED PROJECTIONS diff --git a/src/middle-pgsql.cpp b/src/middle-pgsql.cpp index 5a74363c4..d1f92ec87 100644 --- a/src/middle-pgsql.cpp +++ b/src/middle-pgsql.cpp @@ -55,8 +55,8 @@ static std::string build_sql(options_t const &options, char const *templ) fmt::arg("unlogged", options.droptemp ? "UNLOGGED" : ""), fmt::arg("using_tablespace", using_tablespace), fmt::arg("data_tablespace", tablespace_clause(options.tblsslim_data)), - fmt::arg("index_tablespace", - tablespace_clause(options.tblsslim_index))); + fmt::arg("index_tablespace", tablespace_clause(options.tblsslim_index)), + fmt::arg("way_node_index_id_shift", options.way_node_index_id_shift)); } middle_pgsql_t::table_desc::table_desc(options_t const &options, @@ -634,7 +634,8 @@ static table_sql sql_for_nodes() noexcept return sql; } -static table_sql sql_for_ways() noexcept +static table_sql sql_for_ways(bool has_bucket_index, + uint8_t way_node_index_id_shift) noexcept { table_sql sql{}; @@ -653,12 +654,33 @@ static table_sql sql_for_ways() noexcept " SELECT id, nodes, tags" " FROM {prefix}_ways WHERE id = ANY($1::int8[]);\n"; - sql.prepare_mark = "PREPARE mark_ways_by_node(int8) AS" - " SELECT id FROM {prefix}_ways" - " WHERE nodes && ARRAY[$1];\n"; + if (has_bucket_index) { + sql.prepare_mark = "PREPARE mark_ways_by_node(int8) AS" + " SELECT id FROM {prefix}_ways w" + " WHERE $1 = ANY(nodes)" + " AND {prefix}_index_bucket(w.nodes)" + " && {prefix}_index_bucket(ARRAY[$1]);\n"; + } else { + sql.prepare_mark = "PREPARE mark_ways_by_node(int8) AS" + " SELECT id FROM {prefix}_ways" + " WHERE nodes && ARRAY[$1];\n"; + } - sql.create_index = "CREATE INDEX ON {prefix}_ways USING GIN (nodes)" - " WITH (fastupdate = off) {index_tablespace};\n"; + if (way_node_index_id_shift == 0) { + sql.create_index = "CREATE INDEX ON {prefix}_ways USING GIN (nodes)" + " WITH (fastupdate = off) {index_tablespace};\n"; + } else { + sql.create_index = "CREATE OR REPLACE FUNCTION" + " {prefix}_index_bucket(int8[])" + " RETURNS int8[] AS $$\n" + " SELECT ARRAY(SELECT DISTINCT" + " unnest($1) >> {way_node_index_id_shift})\n" + "$$ LANGUAGE SQL IMMUTABLE;\n" + "CREATE INDEX {prefix}_ways_nodes_bucket_idx" + " ON {prefix}_ways" + " USING GIN ({prefix}_index_bucket(nodes))" + " WITH (fastupdate = off) {index_tablespace};\n"; + } return sql; } @@ -697,6 +719,16 @@ static table_sql sql_for_relations() noexcept return sql; } +static bool check_bucket_index(pg_conn_t *db_connection, + std::string const &prefix) +{ + auto const res = db_connection->query( + PGRES_TUPLES_OK, + "SELECT relname FROM pg_class WHERE relkind='i' AND" + " relname = '{}_ways_nodes_bucket_idx';"_format(prefix)); + return res.num_tuples() > 0; +} + middle_pgsql_t::middle_pgsql_t(options_t const *options) : m_append(options->append), m_out_options(options), m_cache(new node_ram_cache{options->alloc_chunkwise | ALLOC_LOSSY, @@ -712,8 +744,18 @@ middle_pgsql_t::middle_pgsql_t(options_t const *options) fmt::print(stderr, "Mid: pgsql, cache={}\n", options->cache); + bool const has_bucket_index = + check_bucket_index(&m_db_connection, options->prefix); + + if (!has_bucket_index && options->append) { + fmt::print(stderr, "You don't have a bucket index. See" + " docs/bucket-index.md for details.\n"); + } + m_tables[NODE_TABLE] = table_desc{*options, sql_for_nodes()}; - m_tables[WAY_TABLE] = table_desc{*options, sql_for_ways()}; + m_tables[WAY_TABLE] = + table_desc{*options, sql_for_ways(has_bucket_index, + options->way_node_index_id_shift)}; m_tables[REL_TABLE] = table_desc{*options, sql_for_relations()}; } diff --git a/src/options.cpp b/src/options.cpp index 65f76111c..e682de429 100644 --- a/src/options.cpp +++ b/src/options.cpp @@ -58,6 +58,7 @@ const struct option long_options[] = { {"keep-coastlines", no_argument, nullptr, 'K'}, {"latlong", no_argument, nullptr, 'l'}, {"merc", no_argument, nullptr, 'm'}, + {"middle-way-node-index-id-shift", required_argument, nullptr, 300}, {"multi-geometry", no_argument, nullptr, 'G'}, {"number-processes", required_argument, nullptr, 205}, {"output", required_argument, nullptr, 'O'}, @@ -182,6 +183,10 @@ void long_usage(char const *arg0, bool verbose) #endif printf("%s", "\ \n\ + Middle options (experts only):\n\ + --middle-way-node-index-id-shift shift Set ID shift for bucket\ + index. See documentation for details.\ + \n\ Expiry options:\n\ -e|--expire-tiles [min_zoom-]max_zoom Create a tile expiry list.\n\ Zoom levels must be larger than 0 and smaller\n\ @@ -567,6 +572,9 @@ options_t::options_t(int argc, char *argv[]) : options_t() fprintf(stderr, "\n"); exit(EXIT_SUCCESS); break; + case 300: + way_node_index_id_shift = atoi(optarg); + break; case '?': default: short_usage(argv[0]); diff --git a/src/options.hpp b/src/options.hpp index 13cb7fe6d..070cb5a95 100644 --- a/src/options.hpp +++ b/src/options.hpp @@ -130,6 +130,14 @@ class options_t std::vector input_files; + /** + * How many bits should the node id be shifted for the way node index? + * Use 0 to disable for backwards compatibility. + * Currently the default is 0, making osm2pgsql backwards compatible to + * earlier versions. + */ + uint8_t way_node_index_id_shift = 0; + private: /** * Check input options for sanity