Skip to content

Commit e3bd89b

Browse files
authored
Merge pull request #1275 from joto/way-node-index
Way node index using shifted node ids
2 parents 85fcbe5 + 2d8f5fa commit e3bd89b

File tree

5 files changed

+159
-9
lines changed

5 files changed

+159
-9
lines changed

docs/bucket-index.md

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
2+
NOTE: This is only available from osm2pgsql version 1.4.0!
3+
4+
NOTE: The default is still to create the old index for now.
5+
6+
# Bucket index for slim mode
7+
8+
Osm2pgsql can use an index for way node lookups in slim mode that needs a lot
9+
less disk space than earlier versions did. For a planet the savings can be
10+
about 200 GB! Lookup times are slightly slower, but this shouldn't be an issue
11+
for most people.
12+
13+
*If you are not using slim mode and/or not doing updates of your database, this
14+
does not apply to you.*
15+
16+
For backwards compatibility osm2pgsql will never update an existing database
17+
to the new index. It will keep using the old index. So you do not have to do
18+
anything when upgrading osm2pgsql.
19+
20+
If you want to use the new index, there are two ways of doing this: The "safe"
21+
way for most users and the "doit-it-yourself" way for expert users. Note that
22+
once you switched to the new index, older versions of osm2pgsql will not work
23+
correctly any more.
24+
25+
## Update for most users
26+
27+
NOTE: This does not work yet. Currently the default is still to create the
28+
old type of index.
29+
30+
If your database was created with an older version of osm2pgsql you might want
31+
to start again from an empty database. Just do a reimport and osm2pgsql will
32+
use the new space-saving index.
33+
34+
## Update for expert users
35+
36+
This is only for users who are very familiar with osm2pgsql and PostgreSQL
37+
operation. You can break your osm2pgsql database beyond repair if something
38+
goes wrong here and you might not even notice.
39+
40+
You can create the index yourself by following these steps:
41+
42+
Drop the existing index. Replace `{prefix}` by the prefix you are using.
43+
Usually this is `planet_osm`:
44+
45+
```
46+
DROP INDEX {prefix}_ways_nodes_idx;
47+
```
48+
49+
Create the `index_bucket` function needed for the index. Replace
50+
`{way_node_index_id_shift}` by the number of bits you want the id to be
51+
shifted. If you don't have a reason to use something else, use `5`:
52+
53+
```
54+
CREATE FUNCTION {prefix}_index_bucket(int8[]) RETURNS int8[] AS $$
55+
SELECT ARRAY(SELECT DISTINCT unnest($1) >> {way_node_index_id_shift})
56+
$$ LANGUAGE SQL IMMUTABLE;
57+
```
58+
59+
Now you can create the new index. Again, replace `{prefix}` by the prefix
60+
you are using:
61+
62+
```
63+
CREATE INDEX {prefix}_ways_nodes_bucket_idx ON {prefix}_ways
64+
USING GIN ({prefix}_index_bucket(nodes))
65+
WITH (fastupdate = off);
66+
```
67+
68+
If you want to create the index in a specific tablespace you can do this:
69+
70+
```
71+
CREATE INDEX {prefix}_ways_nodes_bucket_idx ON {prefix}_ways
72+
USING GIN ({prefix}_index_bucket(nodes))
73+
WITH (fastupdate = off) TABLESPACE {tablespace};
74+
```
75+
76+
## Id shift (for experts)
77+
78+
When creating a new database (when used in create mode with slim option),
79+
osm2pgsql can create a bucket index using a configurable id shift.
80+
81+
You can set the shift with the command line option
82+
`--middle-way-node-index-id-shift`. Values between about 3 and 6 might make
83+
sense.
84+
85+
To completely disable the bucket index and create an index compatible with
86+
earlier versions of osm2pgsql, use `--middle-way-node-index-id-shift=0`.
87+
(This is currently still the default.)
88+

docs/osm2pgsql.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@ starting with two dashes (`--`). A summary of options is included below.
220220
-v, \--verbose
221221
: Verbose output.
222222

223+
--middle-way-node-index-id-shift shift
224+
: Set ID shift for way node bucket index in middle. Experts only. See
225+
documentation for details.
226+
223227

224228
# SUPPORTED PROJECTIONS
225229

src/middle-pgsql.cpp

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ static std::string build_sql(options_t const &options, char const *templ)
5555
fmt::arg("unlogged", options.droptemp ? "UNLOGGED" : ""),
5656
fmt::arg("using_tablespace", using_tablespace),
5757
fmt::arg("data_tablespace", tablespace_clause(options.tblsslim_data)),
58-
fmt::arg("index_tablespace",
59-
tablespace_clause(options.tblsslim_index)));
58+
fmt::arg("index_tablespace", tablespace_clause(options.tblsslim_index)),
59+
fmt::arg("way_node_index_id_shift", options.way_node_index_id_shift));
6060
}
6161

6262
middle_pgsql_t::table_desc::table_desc(options_t const &options,
@@ -634,7 +634,8 @@ static table_sql sql_for_nodes() noexcept
634634
return sql;
635635
}
636636

637-
static table_sql sql_for_ways() noexcept
637+
static table_sql sql_for_ways(bool has_bucket_index,
638+
uint8_t way_node_index_id_shift) noexcept
638639
{
639640
table_sql sql{};
640641

@@ -653,12 +654,33 @@ static table_sql sql_for_ways() noexcept
653654
" SELECT id, nodes, tags"
654655
" FROM {prefix}_ways WHERE id = ANY($1::int8[]);\n";
655656

656-
sql.prepare_mark = "PREPARE mark_ways_by_node(int8) AS"
657-
" SELECT id FROM {prefix}_ways"
658-
" WHERE nodes && ARRAY[$1];\n";
657+
if (has_bucket_index) {
658+
sql.prepare_mark = "PREPARE mark_ways_by_node(int8) AS"
659+
" SELECT id FROM {prefix}_ways w"
660+
" WHERE $1 = ANY(nodes)"
661+
" AND {prefix}_index_bucket(w.nodes)"
662+
" && {prefix}_index_bucket(ARRAY[$1]);\n";
663+
} else {
664+
sql.prepare_mark = "PREPARE mark_ways_by_node(int8) AS"
665+
" SELECT id FROM {prefix}_ways"
666+
" WHERE nodes && ARRAY[$1];\n";
667+
}
659668

660-
sql.create_index = "CREATE INDEX ON {prefix}_ways USING GIN (nodes)"
661-
" WITH (fastupdate = off) {index_tablespace};\n";
669+
if (way_node_index_id_shift == 0) {
670+
sql.create_index = "CREATE INDEX ON {prefix}_ways USING GIN (nodes)"
671+
" WITH (fastupdate = off) {index_tablespace};\n";
672+
} else {
673+
sql.create_index = "CREATE OR REPLACE FUNCTION"
674+
" {prefix}_index_bucket(int8[])"
675+
" RETURNS int8[] AS $$\n"
676+
" SELECT ARRAY(SELECT DISTINCT"
677+
" unnest($1) >> {way_node_index_id_shift})\n"
678+
"$$ LANGUAGE SQL IMMUTABLE;\n"
679+
"CREATE INDEX {prefix}_ways_nodes_bucket_idx"
680+
" ON {prefix}_ways"
681+
" USING GIN ({prefix}_index_bucket(nodes))"
682+
" WITH (fastupdate = off) {index_tablespace};\n";
683+
}
662684

663685
return sql;
664686
}
@@ -697,6 +719,16 @@ static table_sql sql_for_relations() noexcept
697719
return sql;
698720
}
699721

722+
static bool check_bucket_index(pg_conn_t *db_connection,
723+
std::string const &prefix)
724+
{
725+
auto const res = db_connection->query(
726+
PGRES_TUPLES_OK,
727+
"SELECT relname FROM pg_class WHERE relkind='i' AND"
728+
" relname = '{}_ways_nodes_bucket_idx';"_format(prefix));
729+
return res.num_tuples() > 0;
730+
}
731+
700732
middle_pgsql_t::middle_pgsql_t(options_t const *options)
701733
: m_append(options->append), m_out_options(options),
702734
m_cache(new node_ram_cache{options->alloc_chunkwise | ALLOC_LOSSY,
@@ -712,8 +744,18 @@ middle_pgsql_t::middle_pgsql_t(options_t const *options)
712744

713745
fmt::print(stderr, "Mid: pgsql, cache={}\n", options->cache);
714746

747+
bool const has_bucket_index =
748+
check_bucket_index(&m_db_connection, options->prefix);
749+
750+
if (!has_bucket_index && options->append) {
751+
fmt::print(stderr, "You don't have a bucket index. See"
752+
" docs/bucket-index.md for details.\n");
753+
}
754+
715755
m_tables[NODE_TABLE] = table_desc{*options, sql_for_nodes()};
716-
m_tables[WAY_TABLE] = table_desc{*options, sql_for_ways()};
756+
m_tables[WAY_TABLE] =
757+
table_desc{*options, sql_for_ways(has_bucket_index,
758+
options->way_node_index_id_shift)};
717759
m_tables[REL_TABLE] = table_desc{*options, sql_for_relations()};
718760
}
719761

src/options.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ const struct option long_options[] = {
5858
{"keep-coastlines", no_argument, nullptr, 'K'},
5959
{"latlong", no_argument, nullptr, 'l'},
6060
{"merc", no_argument, nullptr, 'm'},
61+
{"middle-way-node-index-id-shift", required_argument, nullptr, 300},
6162
{"multi-geometry", no_argument, nullptr, 'G'},
6263
{"number-processes", required_argument, nullptr, 205},
6364
{"output", required_argument, nullptr, 'O'},
@@ -182,6 +183,10 @@ void long_usage(char const *arg0, bool verbose)
182183
#endif
183184
printf("%s", "\
184185
\n\
186+
Middle options (experts only):\n\
187+
--middle-way-node-index-id-shift shift Set ID shift for bucket\
188+
index. See documentation for details.\
189+
\n\
185190
Expiry options:\n\
186191
-e|--expire-tiles [min_zoom-]max_zoom Create a tile expiry list.\n\
187192
Zoom levels must be larger than 0 and smaller\n\
@@ -567,6 +572,9 @@ options_t::options_t(int argc, char *argv[]) : options_t()
567572
fprintf(stderr, "\n");
568573
exit(EXIT_SUCCESS);
569574
break;
575+
case 300:
576+
way_node_index_id_shift = atoi(optarg);
577+
break;
570578
case '?':
571579
default:
572580
short_usage(argv[0]);

src/options.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,14 @@ class options_t
130130

131131
std::vector<std::string> input_files;
132132

133+
/**
134+
* How many bits should the node id be shifted for the way node index?
135+
* Use 0 to disable for backwards compatibility.
136+
* Currently the default is 0, making osm2pgsql backwards compatible to
137+
* earlier versions.
138+
*/
139+
uint8_t way_node_index_id_shift = 0;
140+
133141
private:
134142
/**
135143
* Check input options for sanity

0 commit comments

Comments
 (0)