From a8ab3fc630861d131fe333286e16f08ea96dcfca Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Sun, 15 Sep 2024 16:30:06 +0200 Subject: [PATCH] Add support for node processing in second stage This is completely analog to how way processing happens in the second stage. So from the select_relation_member() function in the Lua config file you can now also return a list of nodes that you request to process again. Note that this will only re-process the member nodes themselves, ways which have these nodes as members are not re-processed. Also there is no way to mark member nodes of ways, only member nodes of relations. So this will allow processing, say, stop positions in public transport route relations but not, say, barriers on roads. This change is now possible because we removed support for the old middle format which didn't allow storing the complete nodes (with tags). --- flex-config/README.md | 2 + flex-config/public-transport.lua | 226 +++++++++++++++++++++++++++++++ src/init.lua | 10 ++ src/middle-pgsql.cpp | 52 ++++++- src/middle-pgsql.hpp | 2 + src/middle-ram.cpp | 10 ++ src/middle-ram.hpp | 2 + src/middle.hpp | 17 ++- src/osmdata.cpp | 6 +- src/output-flex.cpp | 134 ++++++++++++------ src/output-flex.hpp | 5 +- src/output.hpp | 6 + 12 files changed, 422 insertions(+), 50 deletions(-) create mode 100644 flex-config/public-transport.lua diff --git a/flex-config/README.md b/flex-config/README.md index 5bd85e61b..0a2b9a256 100644 --- a/flex-config/README.md +++ b/flex-config/README.md @@ -17,6 +17,8 @@ following order (from easiest to understand to the more complex ones): After that you can dive into more advanced topics: +* [public-transport.lua](public-transport.lua) -- Use multi-stage processing + to bring tags from public transport relations to member nodes and ways * [route-relations.lua](route-relations.lua) -- Use multi-stage processing to bring tags from relations to member ways * [unitable.lua](unitable.lua) -- Put all OSM data into a single table diff --git a/flex-config/public-transport.lua b/flex-config/public-transport.lua new file mode 100644 index 000000000..779514f34 --- /dev/null +++ b/flex-config/public-transport.lua @@ -0,0 +1,226 @@ +-- This config example file is released into the Public Domain. + +-- This file shows how to use multi-stage processing to bring tags from +-- public transport relations into member nodes and ways. This allows +-- advanced processing of public transport networks including stops. + +-- Nodes tagged as public transport stops are imported into the 'stops' table, +-- ways tagged as highway or railway or imported into the 'lines' table. The +-- public transport routes themselves will be in the 'routes' table, but +-- without any geomtry. As a "bonus" public transport stop area relations +-- will be imported into the 'stop_areas' table. +-- +-- For the 'stops' and 'lines' table two-stage processing is used. The +-- 'rel_refs' text column will contain a list of all ref tags found in parent +-- relations with type=route and route=public_transport. The 'rel_ids' column +-- will be an integer array containing the relation ids. These could be used, +-- for instance, to look up other relation tags from the 'routes' table. + +local tables = {} + +tables.stops = osm2pgsql.define_node_table('stops', { + { column = 'tags', type = 'jsonb' }, + { column = 'rel_refs', type = 'text' }, -- for the refs from the relations + { column = 'rel_ids', sql_type = 'int8[]' }, -- array with integers (for relation IDs) + { column = 'geom', type = 'point', not_null = true }, +}) + +tables.lines = osm2pgsql.define_way_table('lines', { + { column = 'tags', type = 'jsonb' }, + { column = 'rel_refs', type = 'text' }, -- for the refs from the relations + { column = 'rel_ids', sql_type = 'int8[]' }, -- array with integers (for relation IDs) + { column = 'geom', type = 'linestring', not_null = true }, +}) + +-- Tables don't have to have a geometry column +tables.routes = osm2pgsql.define_relation_table('routes', { + { column = 'ref', type = 'text' }, + { column = 'type', type = 'text' }, + { column = 'from', type = 'text' }, + { column = 'to', type = 'text' }, + { column = 'tags', type = 'jsonb' }, +}) + +-- Stop areas contain everything belonging to a specific public transport +-- stop. We model them here by adding a center point as geometry plus the +-- radius of a circle that contains everything in that stop. +tables.stop_areas = osm2pgsql.define_relation_table('stop_areas', { + { column = 'tags', type = 'jsonb' }, + { column = 'radius', type = 'real', not_null = true }, + { column = 'geom', type = 'point', not_null = true }, +}) + +-- This will be used to store information about relations queryable by member +-- node/way id. These are table of tables. The outer table is indexed by the +-- node/way id, the inner table indexed by the relation id. This way even if +-- the information about a relation is added twice, it will be in there only +-- once. It is always good to write your osm2pgsql Lua code in an idempotent +-- way, i.e. it can be called any number of times and will lead to the same +-- result. +local n2r = {} +local w2r = {} + +local function clean_tags(tags) + tags.odbl = nil + tags.created_by = nil + tags.source = nil + tags['source:ref'] = nil + + return next(tags) == nil +end + +local function unique_array(array) + local result = {} + + local last = nil + for _, v in ipairs(array) do + if v ~= last then + result[#result + 1] = v + last = v + end + end + + return result +end + +local separator = 'ยท' -- use middle dot as separator character + +local function add_rel_data(row, d) + if not d then + return + end + + local refs = {} + local ids = {} + for rel_id, rel_ref in pairs(d) do + refs[#refs + 1] = rel_ref + ids[#ids + 1] = rel_id + end + table.sort(refs) + table.sort(ids) + + row.rel_refs = table.concat(unique_array(refs), separator) + row.rel_ids = '{' .. table.concat(unique_array(ids), ',') .. '}' +end + +function osm2pgsql.process_node(object) + -- Wer are only interested in public transport stops here, and they are + -- only available in the second stage. + if osm2pgsql.stage ~= 2 then + return + end + + local row = { + tags = object.tags, + geom = object:as_point() + } + + -- If there is any data from parent relations, add it in + add_rel_data(row, n2r[object.id]) + + tables.stops:insert(row) +end + +function osm2pgsql.process_way(object) + -- We are only interested in highways and railways + if not object.tags.highway and not object.tags.railway then + return + end + + clean_tags(object.tags) + + -- Data we will store in the 'lines' table always has the tags from + -- the way + local row = { + tags = object.tags, + geom = object:as_linestring() + } + + -- If there is any data from parent relations, add it in + add_rel_data(row, w2r[object.id]) + + tables.lines:insert(row) +end + +local pt = { + bus = true, + light_rail = true, + subway = true, + tram = true, + trolleybus = true, +} + +-- We are only interested in certain route relations with a ref tag +local function wanted_relation(tags) + return tags.type == 'route' and pt[tags.route] and tags.ref +end + +-- This function is called for every added, modified, or deleted relation. +-- Its only job is to return the ids of all member nodes/ways of the specified +-- relation we want to see in stage 2 again. It MUST NOT store any information +-- about the relation! +function osm2pgsql.select_relation_members(relation) + -- Only interested in public transport relations with refs + if wanted_relation(relation.tags) then + local node_ids = {} + for _, member in ipairs(relation.members) do + if member.type == 'n' and member.role == 'stop' then + node_ids[#node_ids + 1] = member.ref + end + end + + local way_ids = {} + for _, member in ipairs(relation.members) do + if member.type == 'w' and member.role == '' then + way_ids[#way_ids + 1] = member.ref + end + end + + return { + nodes = node_ids, + ways = way_ids, + } + end +end + +-- The process_relation() function should store all information about relation +-- members that might be needed in stage 2. +function osm2pgsql.process_relation(object) + if object.tags.type == 'public_transport' and object.tags.public_transport == 'stop_area' then + local x1, y1, x2, y2 = object:as_geometrycollection():transform(3857):get_bbox() + local radius = math.sqrt((x2-x1)*(x2-x1) + (y2-y1)*(y2-y1)) + tables.stop_areas:insert({ + tags = object.tags, + geom = object:as_geometrycollection():centroid(), + radius = radius, + }) + return + end + + if wanted_relation(object.tags) then + tables.routes:insert({ + type = object.tags.route, + ref = object.tags.ref, + from = object.tags.from, + to = object.tags.to, + tags = object.tags, + }) + + -- Go through all the members and store relation ids and refs so they + -- can be found by the member node/way id. + for _, member in ipairs(object.members) do + if member.type == 'n' then + if not n2r[member.ref] then + n2r[member.ref] = {} + end + n2r[member.ref][object.id] = object.tags.ref + elseif member.type == 'w' then + if not w2r[member.ref] then + w2r[member.ref] = {} + end + w2r[member.ref][object.id] = object.tags.ref + end + end + end +end + diff --git a/src/init.lua b/src/init.lua index 26571cb5f..9d0c76413 100644 --- a/src/init.lua +++ b/src/init.lua @@ -46,6 +46,16 @@ function osm2pgsql.define_area_table(_name, _columns, _options) return _define_table_impl('area', _name, _columns, _options) end +function osm2pgsql.node_member_ids(relation) + local ids = {} + for _, member in ipairs(relation.members) do + if member.type == 'n' then + ids[#ids + 1] = member.ref + end + end + return ids +end + function osm2pgsql.way_member_ids(relation) local ids = {} for _, member in ipairs(relation.members) do diff --git a/src/middle-pgsql.cpp b/src/middle-pgsql.cpp index 34a2ae9d3..c44b69d0d 100644 --- a/src/middle-pgsql.cpp +++ b/src/middle-pgsql.cpp @@ -691,7 +691,10 @@ INSERT INTO osm2pgsql_changed_relations m_db_connection.exec(build_sql(*m_options, query)); } - load_id_list(m_db_connection, "osm2pgsql_changed_ways", parent_ways); + if (parent_ways) { + load_id_list(m_db_connection, "osm2pgsql_changed_ways", parent_ways); + } + load_id_list(m_db_connection, "osm2pgsql_changed_relations", parent_relations); @@ -700,9 +703,17 @@ INSERT INTO osm2pgsql_changed_relations timer.stop(); log_debug("Found {} new/changed nodes in input.", changed_nodes.size()); - log_debug(" Found in {} their {} parent ways and {} parent relations.", - std::chrono::duration_cast(timer.elapsed()), - parent_ways->size(), parent_relations->size()); + + auto const elapsed_sec = + std::chrono::duration_cast(timer.elapsed()); + + if (parent_ways) { + log_debug(" Found in {} their {} parent ways and {} parent relations.", + elapsed_sec, parent_ways->size(), parent_relations->size()); + } else { + log_debug(" Found in {} their {} parent relations.", elapsed_sec, + parent_relations->size()); + } } void middle_pgsql_t::get_way_parents(idlist_t const &changed_ways, @@ -771,6 +782,21 @@ void middle_pgsql_t::way_set(osmium::Way const &way) namespace { +/** + * Build node in buffer from database results. + */ +void build_node(osmid_t id, pg_result_t const &res, int res_num, int offset, + osmium::memory::Buffer *buffer, bool with_attributes) +{ + osmium::builder::NodeBuilder builder{*buffer}; + builder.set_id(id); + + if (with_attributes) { + set_attributes_on_builder(&builder, res, res_num, offset); + } + pgsql_parse_json_tags(res.get_value(res_num, offset + 1), buffer, &builder); +} + /** * Build way in buffer from database results. */ @@ -789,6 +815,24 @@ void build_way(osmid_t id, pg_result_t const &res, int res_num, int offset, } // anonymous namespace +bool middle_query_pgsql_t::node_get(osmid_t id, + osmium::memory::Buffer *buffer) const +{ + assert(buffer); + + auto const res = m_db_connection.exec_prepared("get_node", id); + + if (res.num_tuples() != 1) { + return false; + } + + build_node(id, res, 0, 0, buffer, m_store_options.with_attributes); + + buffer->commit(); + + return true; +} + bool middle_query_pgsql_t::way_get(osmid_t id, osmium::memory::Buffer *buffer) const { diff --git a/src/middle-pgsql.hpp b/src/middle-pgsql.hpp index fc01f2ebc..4c7dd9218 100644 --- a/src/middle-pgsql.hpp +++ b/src/middle-pgsql.hpp @@ -59,6 +59,8 @@ class middle_query_pgsql_t : public middle_query_t size_t nodes_get_list(osmium::WayNodeList *nodes) const override; + bool node_get(osmid_t id, osmium::memory::Buffer *buffer) const override; + bool way_get(osmid_t id, osmium::memory::Buffer *buffer) const override; size_t rel_members_get(osmium::Relation const &rel, diff --git a/src/middle-ram.cpp b/src/middle-ram.cpp index 13c3d96eb..74e1e44fb 100644 --- a/src/middle-ram.cpp +++ b/src/middle-ram.cpp @@ -278,6 +278,16 @@ std::size_t middle_ram_t::nodes_get_list(osmium::WayNodeList *nodes) const return count; } +bool middle_ram_t::node_get(osmid_t id, osmium::memory::Buffer *buffer) const +{ + assert(buffer); + + if (m_store_options.nodes) { + return get_object(osmium::item_type::node, id, buffer); + } + return false; +} + bool middle_ram_t::way_get(osmid_t id, osmium::memory::Buffer *buffer) const { assert(buffer); diff --git a/src/middle-ram.hpp b/src/middle-ram.hpp index 935491012..6fe5924a2 100644 --- a/src/middle-ram.hpp +++ b/src/middle-ram.hpp @@ -68,6 +68,8 @@ class middle_ram_t : public middle_t, public middle_query_t std::size_t nodes_get_list(osmium::WayNodeList *nodes) const override; + bool node_get(osmid_t id, osmium::memory::Buffer *buffer) const override; + bool way_get(osmid_t id, osmium::memory::Buffer *buffer) const override; size_t rel_members_get(osmium::Relation const &rel, diff --git a/src/middle.hpp b/src/middle.hpp index 4b3499735..82a71fe92 100644 --- a/src/middle.hpp +++ b/src/middle.hpp @@ -50,11 +50,22 @@ struct middle_query_t : std::enable_shared_from_this */ virtual size_t nodes_get_list(osmium::WayNodeList *nodes) const = 0; + /** + * Retrieves a single node from the nodes storage + * and stores it in the given osmium buffer. + * + * \param id id of the node to retrieve + * \param buffer osmium buffer where to put the node + * + * \return true if the node was retrieved + */ + virtual bool node_get(osmid_t id, osmium::memory::Buffer *buffer) const = 0; + /** * Retrieves a single way from the ways storage * and stores it in the given osmium buffer. * - * \param id id of the way to retrive + * \param id id of the way to retrieve * \param buffer osmium buffer where to put the way * * The function does not retrieve the node locations. @@ -78,10 +89,10 @@ struct middle_query_t : std::enable_shared_from_this osmium::osm_entity_bits::type types) const = 0; /** - * Retrives a single relation from the relation storage + * Retrieves a single relation from the relation storage * and stores it in the given osmium buffer. * - * \param id id of the relation to retrive + * \param id id of the relation to retrieve * \param buffer osmium buffer where to put the relation * * \return true if the relation was retrieved diff --git a/src/osmdata.cpp b/src/osmdata.cpp index 06aaf4bdf..3ced9ed31 100644 --- a/src/osmdata.cpp +++ b/src/osmdata.cpp @@ -413,13 +413,15 @@ void osmdata_t::process_dependents() } // stage 1c processing: mark parent relations of marked objects as changed + auto const &marked_nodes = m_output->get_marked_node_ids(); auto const &marked_ways = m_output->get_marked_way_ids(); - if (marked_ways.empty()) { + if (marked_nodes.empty() && marked_ways.empty()) { return; } - // process parent relations of marked ways + // process parent relations of marked nodes and ways idlist_t rels_pending_tracker{}; + m_mid->get_node_parents(marked_nodes, nullptr, &rels_pending_tracker); m_mid->get_way_parents(marked_ways, &rels_pending_tracker); if (rels_pending_tracker.empty()) { diff --git a/src/output-flex.cpp b/src/output-flex.cpp index e059d73a3..003a59718 100644 --- a/src/output-flex.cpp +++ b/src/output-flex.cpp @@ -899,6 +899,54 @@ void output_flex_t::pending_way(osmid_t id) get_mutex_and_call_lua_function(m_process_way, m_way_cache.get()); } +/** + * Expects a Lua (hash) table on the stack, reads the field with name of the + * 'type' parameter which must be either nil or a Lua (array) table, in which + * case all (integer) ids in that table are reads into the 'ids' out + * parameter. + */ +void get_object_ids(lua_State *lua_state, char const *const type, idlist_t *ids) +{ + lua_getfield(lua_state, -1, type); + int const ltype = lua_type(lua_state, -1); + + if (ltype == LUA_TNIL) { + lua_pop(lua_state, 1); + return; + } + + if (ltype != LUA_TTABLE) { + lua_pop(lua_state, 1); + throw fmt_error( + "Table returned from select_relation_members() contains '{}' " + "field, but it isn't an array table.", + type); + } + + if (!luaX_is_array(lua_state)) { + lua_pop(lua_state, 1); + throw fmt_error( + "Table returned from select_relation_members() contains '{}' " + "field, but it isn't an array table.", + type); + } + + luaX_for_each(lua_state, [&]() { + osmid_t const id = lua_tointeger(lua_state, -1); + if (id == 0) { + throw fmt_error( + "Table returned from select_relation_members() contains " + "'{}' field, which must contain an array of non-zero " + "integer node ids.", + type); + } + + ids->push_back(id); + }); + + lua_pop(lua_state, 1); +} + void output_flex_t::select_relation_members() { if (!m_select_relation_members) { @@ -921,43 +969,13 @@ void output_flex_t::select_relation_members() "other than nil or a table."}; } - // We have established that we have a table. Get the 'ways' field... - lua_getfield(lua_state(), -1, "ways"); - int const ltype = lua_type(lua_state(), -1); + // We have established that we have a table... - // No 'ways' field, that is okay, nothing to be marked. - if (ltype == LUA_TNIL) { - lua_pop(lua_state(), 2); // return value (a table), ways field (nil) - return; - } + // Get the 'nodes' and 'ways' fields... + get_object_ids(lua_state(), "nodes", m_stage2_node_ids.get()); + get_object_ids(lua_state(), "ways", m_stage2_way_ids.get()); - if (ltype != LUA_TTABLE) { - throw std::runtime_error{ - "Table returned from select_relation_members() contains 'ways' " - "field, but it isn't an array table."}; - } - - // Iterate over the 'ways' table to get all ids... - if (!luaX_is_array(lua_state())) { - throw std::runtime_error{ - "Table returned from select_relation_members() contains 'ways' " - "field, but it isn't an array table."}; - } - - luaX_for_each( - lua_state(), [&]() { - osmid_t const id = lua_tointeger(lua_state(), -1); - if (id == 0) { - throw std::runtime_error{ - "Table returned from select_relation_members() contains " - "'ways' field, which must contain an array of non-zero " - "integer way ids."}; - } - - m_stage2_way_ids->push_back(id); - }); - - lua_pop(lua_state(), 2); // return value (a table), ways field (a table) + lua_pop(lua_state(), 1); // return value (a table) } void output_flex_t::select_relation_members(osmid_t id) @@ -1261,8 +1279,9 @@ output_flex_t::output_flex_t(std::shared_ptr const &mid, // If the osm2pgsql.select_relation_members() Lua function is defined // it means we need two-stage processing which in turn means we need - // the full ways stored in the middle. + // the full nodes and ways stored in the middle. if (m_select_relation_members) { + access_requirements().full_nodes = true; access_requirements().full_ways = true; } @@ -1482,10 +1501,23 @@ void output_flex_t::init_lua(std::string const &filename, lua_remove(lua_state(), 1); // global "osm2pgsql" } +idlist_t const &output_flex_t::get_marked_node_ids() +{ + if (m_stage2_node_ids->empty()) { + log_info("Skipping stage 1c for nodes (no marked nodes)."); + } else { + log_info("Entering stage 1c processing of {} nodes...", + m_stage2_node_ids->size()); + m_stage2_node_ids->sort_unique(); + } + + return *m_stage2_node_ids; +} + idlist_t const &output_flex_t::get_marked_way_ids() { if (m_stage2_way_ids->empty()) { - log_info("Skipping stage 1c (no marked ways)."); + log_info("Skipping stage 1c for ways (no marked ways)."); } else { log_info("Entering stage 1c processing of {} ways...", m_stage2_way_ids->size()); @@ -1497,12 +1529,12 @@ idlist_t const &output_flex_t::get_marked_way_ids() void output_flex_t::reprocess_marked() { - if (m_stage2_way_ids->empty()) { - log_info("No marked ways (Skipping stage 2)."); + if (m_stage2_node_ids->empty() && m_stage2_way_ids->empty()) { + log_info("No marked nodes or ways (Skipping stage 2)."); return; } - log_info("Reprocess marked ways (stage 2)..."); + log_info("Reprocess marked nodes/ways (stage 2)..."); if (!get_options()->append) { util::timer_t timer; @@ -1528,8 +1560,30 @@ void output_flex_t::reprocess_marked() lua_setfield(lua_state(), -2, "stage"); lua_pop(lua_state(), 1); // osm2pgsql + m_stage2_node_ids->sort_unique(); m_stage2_way_ids->sort_unique(); + log_info("There are {} nodes to reprocess...", m_stage2_node_ids->size()); + { + osmium::memory::Buffer node_buffer{ + 1024, osmium::memory::Buffer::auto_grow::yes}; + + for (osmid_t const id : *m_stage2_node_ids) { + if (middle().node_get(id, &node_buffer)) { + node_delete(id); + if (m_process_node) { + auto const &node = node_buffer.get(0); + m_context_node = &node; + get_mutex_and_call_lua_function(m_process_node, node); + } + } + node_buffer.clear(); + } + } + + // We don't need these any more so can free the memory. + m_stage2_node_ids->clear(); + log_info("There are {} ways to reprocess...", m_stage2_way_ids->size()); for (osmid_t const id : *m_stage2_way_ids) { diff --git a/src/output-flex.hpp b/src/output-flex.hpp index 9a4ad92ff..f5191d0ce 100644 --- a/src/output-flex.hpp +++ b/src/output-flex.hpp @@ -127,7 +127,9 @@ class output_flex_t : public output_t void wait() override; + idlist_t const &get_marked_node_ids() override; idlist_t const &get_marked_way_ids() override; + void reprocess_marked() override; void pending_way(osmid_t id) override; @@ -280,8 +282,9 @@ class output_flex_t : public output_t /// The connection to the database server. pg_conn_t m_db_connection; - // This is shared between all clones of the output and must only be + // These are shared between all clones of the output and must only be // accessed while protected using the lua_mutex. + std::shared_ptr m_stage2_node_ids = std::make_shared(); std::shared_ptr m_stage2_way_ids = std::make_shared(); std::shared_ptr m_copy_thread; diff --git a/src/output.hpp b/src/output.hpp index ac807e425..f32107d95 100644 --- a/src/output.hpp +++ b/src/output.hpp @@ -70,6 +70,12 @@ class output_t virtual void wait() {} + virtual idlist_t const &get_marked_node_ids() + { + static idlist_t const ids{}; + return ids; + } + virtual idlist_t const &get_marked_way_ids() { static idlist_t const ids{};