diff --git a/options.cpp b/options.cpp index 5383f4e52..5950cd25f 100644 --- a/options.cpp +++ b/options.cpp @@ -31,40 +31,39 @@ extern "C" { namespace { const char * short_options = "ab:cd:KhlmMp:suvU:WH:P:i:IE:C:S:e:o:O:xkjGz:r:VF:"; - const struct option long_options[] = - { - {"append", 0, 0, 'a'}, - {"bbox", 1, 0, 'b'}, - {"create", 0, 0, 'c'}, + const struct option long_options[] = { + {"append", 0, 0, 'a'}, + {"bbox", 1, 0, 'b'}, + {"create", 0, 0, 'c'}, {"database", 1, 0, 'd'}, - {"latlong", 0, 0, 'l'}, - {"verbose", 0, 0, 'v'}, - {"slim", 0, 0, 's'}, - {"prefix", 1, 0, 'p'}, - {"proj", 1, 0, 'E'}, - {"merc", 0, 0, 'm'}, - {"cache", 1, 0, 'C'}, + {"latlong", 0, 0, 'l'}, + {"verbose", 0, 0, 'v'}, + {"slim", 0, 0, 's'}, + {"prefix", 1, 0, 'p'}, + {"proj", 1, 0, 'E'}, + {"merc", 0, 0, 'm'}, + {"cache", 1, 0, 'C'}, {"username", 1, 0, 'U'}, {"password", 0, 0, 'W'}, - {"host", 1, 0, 'H'}, - {"port", 1, 0, 'P'}, + {"host", 1, 0, 'H'}, + {"port", 1, 0, 'P'}, {"tablespace-index", 1, 0, 'i'}, {"tablespace-slim-data", 1, 0, 200}, {"tablespace-slim-index", 1, 0, 201}, {"tablespace-main-data", 1, 0, 202}, {"tablespace-main-index", 1, 0, 203}, - {"help", 0, 0, 'h'}, - {"style", 1, 0, 'S'}, + {"help", 0, 0, 'h'}, + {"style", 1, 0, 'S'}, {"expire-tiles", 1, 0, 'e'}, {"expire-output", 1, 0, 'o'}, {"expire-bbox-size", 1, 0, 214}, - {"output", 1, 0, 'O'}, + {"output", 1, 0, 'O'}, {"extra-attributes", 0, 0, 'x'}, {"hstore", 0, 0, 'k'}, {"hstore-all", 0, 0, 'j'}, {"hstore-column", 1, 0, 'z'}, {"hstore-match-only", 0, 0, 208}, - {"hstore-add-index",0,0,211}, + {"hstore-add-index", 0, 0, 211}, {"multi-geometry", 0, 0, 'G'}, {"keep-coastlines", 0, 0, 'K'}, {"input-reader", 1, 0, 'r'}, @@ -74,11 +73,12 @@ namespace {"number-processes", 1, 0, 205}, {"drop", 0, 0, 206}, {"unlogged", 0, 0, 207}, - {"flat-nodes",1,0, 'F'}, - {"tag-transform-script",1,0,212}, - {"reproject-area",0,0,213}, - {0, 0, 0, 0} - }; + {"flat-nodes", 1, 0, 'F'}, + {"tag-transform-script", 1, 0, 212}, + {"reproject-area", 0, 0, 213}, + {"skip-optimizing", 0, 0, 215}, + {"skip-indexing", 0, 0, 216}, + {0, 0, 0, 0}}; void short_usage(char *arg0) { @@ -219,6 +219,8 @@ namespace -K|--keep-coastlines Keep coastline data rather than filtering it out.\n\ By default natural=coastline tagged data will be discarded\n\ because renderers usually have shape files for them.\n\ + --skip-optimizing Do not optimize DB after fresh import. \n\ + --skip-indexing Do not build any indexes after import.\n\ --reproject-area compute area column using spherical mercator coordinates.\n\ -h|--help Help information.\n\ -v|--verbose Verbose output.\n"); @@ -283,24 +285,25 @@ options_t::options_t() projection(reprojection::create_projection(PROJ_SPHERE_MERC)), append(false), slim(false), cache(800), tblsmain_index(boost::none), tblsslim_index(boost::none), tblsmain_data(boost::none), - tblsslim_data(boost::none), style(DEFAULT_STYLE), - expire_tiles_zoom(0), expire_tiles_zoom_min(0), - expire_tiles_max_bbox(20000.0), expire_tiles_filename("dirty_tiles"), - hstore_mode(HSTORE_NONE), enable_hstore_index(false), enable_multi(false), - hstore_columns(), keep_coastlines(false), parallel_indexing(true), + tblsslim_data(boost::none), style(DEFAULT_STYLE), expire_tiles_zoom(0), + expire_tiles_zoom_min(0), expire_tiles_max_bbox(20000.0), + expire_tiles_filename("dirty_tiles"), hstore_mode(HSTORE_NONE), + enable_hstore_index(false), enable_multi(false), hstore_columns(), + keep_coastlines(false), parallel_indexing(true), #ifdef __amd64__ alloc_chunkwise(ALLOC_SPARSE | ALLOC_DENSE), #else alloc_chunkwise(ALLOC_SPARSE), #endif droptemp(false), unlogged(false), hstore_match_only(false), - flat_node_cache_enabled(false), reproject_area(false), - flat_node_file(boost::none), tag_transform_script(boost::none), - tag_transform_node_func(boost::none), tag_transform_way_func(boost::none), - tag_transform_rel_func(boost::none), tag_transform_rel_mem_func(boost::none), - create(false), long_usage_bool(false), pass_prompt(false), - output_backend("pgsql"), input_reader("auto"), bbox(boost::none), - extra_attributes(false), verbose(false) + flat_node_cache_enabled(false), reproject_area(false), skip_optimizing(false), + skip_indexing(false), flat_node_file(boost::none), + tag_transform_script(boost::none), tag_transform_node_func(boost::none), + tag_transform_way_func(boost::none), tag_transform_rel_func(boost::none), + tag_transform_rel_mem_func(boost::none), create(false), + long_usage_bool(false), pass_prompt(false), output_backend("pgsql"), + input_reader("auto"), bbox(boost::none), extra_attributes(false), + verbose(false) { num_procs = std::thread::hardware_concurrency(); if (num_procs < 1) { @@ -507,6 +510,12 @@ options_t::options_t(int argc, char *argv[]): options_t() case 213: reproject_area = true; break; + case 215: + skip_optimizing = true; + break; + case 216: + skip_indexing = true; + break; case 'V': fprintf(stderr, "Compiled using the following library versions:\n"); fprintf(stderr, "Libosmium %s\n", LIBOSMIUM_VERSION_STRING); @@ -576,6 +585,11 @@ void options_t::check_options() unlogged = false; } + if (enable_hstore_index && skip_indexing) { + throw std::runtime_error("Error: --hstore-add-index and " + "--skip-indexing are mutually exclusive.\n"); + } + if (hstore_mode == HSTORE_NONE && hstore_columns.size() == 0 && hstore_match_only) { fprintf(stderr, "Warning: --hstore-match-only only makes sense with --hstore, --hstore-all, or --hstore-column; ignored.\n"); hstore_match_only = false; diff --git a/options.hpp b/options.hpp index 0df2cf148..2230574e3 100644 --- a/options.hpp +++ b/options.hpp @@ -73,6 +73,8 @@ struct options_t { bool hstore_match_only; ///< only copy rows that match an explicitly listed key bool flat_node_cache_enabled; bool reproject_area; + bool skip_optimizing; + bool skip_indexing; boost::optional flat_node_file; /** * these options allow you to control the name of the diff --git a/output-multi.cpp b/output-multi.cpp index 9bac699c7..b00ce0681 100644 --- a/output-multi.cpp +++ b/output-multi.cpp @@ -28,7 +28,8 @@ output_multi_t::output_multi_t(const std::string &name, m_table(new table_t( m_options.database_options.conninfo(), name, m_processor->column_type(), m_export_list->normal_columns(m_osm_type), m_options.hstore_columns, - m_processor->srid(), m_options.append, m_options.slim, m_options.droptemp, + m_processor->srid(), m_options.append, m_options.skip_optimizing, + m_options.skip_indexing, m_options.slim, m_options.droptemp, m_options.hstore_mode, m_options.enable_hstore_index, m_options.tblsmain_data, m_options.tblsmain_index)), ways_done_tracker(new id_tracker()), diff --git a/output-pgsql.cpp b/output-pgsql.cpp index 4e5ee3344..8fd6a38d1 100644 --- a/output-pgsql.cpp +++ b/output-pgsql.cpp @@ -570,7 +570,8 @@ output_pgsql_t::output_pgsql_t(const middle_query_t *mid, const options_t &o) m_tables.push_back(std::shared_ptr(new table_t( m_options.database_options.conninfo(), name, type, columns, m_options.hstore_columns, m_options.projection->target_srs(), - m_options.append, m_options.slim, m_options.droptemp, + m_options.append, m_options.skip_optimizing, + m_options.skip_indexing, m_options.slim, m_options.droptemp, m_options.hstore_mode, m_options.enable_hstore_index, m_options.tblsmain_data, m_options.tblsmain_index))); } diff --git a/table.cpp b/table.cpp index 974bee6f8..339cb02da 100644 --- a/table.cpp +++ b/table.cpp @@ -16,13 +16,21 @@ typedef boost::format fmt; #define BUFFER_SEND_SIZE 1024 - -table_t::table_t(const string& conninfo, const string& name, const string& type, const columns_t& columns, const hstores_t& hstore_columns, - const int srid, const bool append, const bool slim, const bool drop_temp, const int hstore_mode, - const bool enable_hstore_index, const boost::optional& table_space, const boost::optional& table_space_index) : - conninfo(conninfo), name(name), type(type), sql_conn(nullptr), copyMode(false), srid((fmt("%1%") % srid).str()), - append(append), slim(slim), drop_temp(drop_temp), hstore_mode(hstore_mode), enable_hstore_index(enable_hstore_index), - columns(columns), hstore_columns(hstore_columns), table_space(table_space), table_space_index(table_space_index) +table_t::table_t(const string &conninfo, const string &name, const string &type, + const columns_t &columns, const hstores_t &hstore_columns, + const int srid, const bool append, const bool skip_optimizing, + const bool skip_indexing, const bool slim, + const bool drop_temp, const int hstore_mode, + const bool enable_hstore_index, + const boost::optional &table_space, + const boost::optional &table_space_index) +: conninfo(conninfo), name(name), type(type), sql_conn(nullptr), + copyMode(false), srid((fmt("%1%") % srid).str()), append(append), + skip_optimizing(skip_optimizing), skip_indexing(skip_indexing), slim(slim), + drop_temp(drop_temp), hstore_mode(hstore_mode), + enable_hstore_index(enable_hstore_index), columns(columns), + hstore_columns(hstore_columns), table_space(table_space), + table_space_index(table_space_index) { //if we dont have any columns if(columns.size() == 0 && hstore_mode != HSTORE_ALL) @@ -36,11 +44,16 @@ table_t::table_t(const string& conninfo, const string& name, const string& type, del_fmt = fmt("DELETE FROM %1% WHERE osm_id = %2%"); } -table_t::table_t(const table_t& other): - conninfo(other.conninfo), name(other.name), type(other.type), sql_conn(nullptr), copyMode(false), buffer(), srid(other.srid), - append(other.append), slim(other.slim), drop_temp(other.drop_temp), hstore_mode(other.hstore_mode), enable_hstore_index(other.enable_hstore_index), - columns(other.columns), hstore_columns(other.hstore_columns), copystr(other.copystr), table_space(other.table_space), - table_space_index(other.table_space_index), single_fmt(other.single_fmt), del_fmt(other.del_fmt) +table_t::table_t(const table_t &other) +: conninfo(other.conninfo), name(other.name), type(other.type), + sql_conn(nullptr), copyMode(false), buffer(), srid(other.srid), + append(other.append), skip_optimizing(other.skip_optimizing), + skip_indexing(other.skip_indexing), slim(other.slim), + drop_temp(other.drop_temp), hstore_mode(other.hstore_mode), + enable_hstore_index(other.enable_hstore_index), columns(other.columns), + hstore_columns(other.hstore_columns), copystr(other.copystr), + table_space(other.table_space), table_space_index(other.table_space_index), + single_fmt(other.single_fmt), del_fmt(other.del_fmt) { // if the other table has already started, then we want to execute // the same stuff to get into the same state. but if it hasn't, then @@ -211,96 +224,143 @@ void table_t::start() void table_t::stop() { stop_copy(); - if (!append) - { + // Post-procrssing for initial import only. + if (!append) { time_t start, end; time(&start); - fprintf(stderr, "Sorting data and creating indexes for %s\n", name.c_str()); - - if (srid == "4326") { - /* libosmium assures validity of geometries in 4326, so the WHERE can be skipped. - Because we know the geom is already in 4326, no reprojection is needed for GeoHashing */ + fprintf(stderr, "Sorting data in %s\n", name.c_str()); + if (!skip_optimizing) { + if (srid == "4326") { + /* libosmium assures validity of geometries in 4326, so the WHERE can be skipped. + Because we know the geom is already in 4326, no reprojection is needed for GeoHashing */ + pgsql_exec_simple( + sql_conn, PGRES_COMMAND_OK, + (fmt("CREATE TABLE %1%_tmp %2% AS\n" + " SELECT * FROM %1%\n" + " ORDER BY ST_GeoHash(way,10)\n" + " COLLATE \"C\"") % + name % + (table_space ? "TABLESPACE " + table_space.get() : "")) + .str()); + } else { + /* osm2pgsql's transformation from 4326 to another projection could make a geometry invalid, + and these need to be filtered. Also, a transformation is needed for geohashing. */ + pgsql_exec_simple( + sql_conn, PGRES_COMMAND_OK, + (fmt("CREATE TABLE %1%_tmp %2% AS\n" + " SELECT * FROM %1%\n" + " WHERE ST_IsValid(way)\n" + // clang-format off + " ORDER BY ST_GeoHash(ST_Transform(ST_Envelope(way),4326),10)\n" + // clang-format on + " COLLATE \"C\"") % + name % + (table_space ? "TABLESPACE " + table_space.get() : "")) + .str()); + } + pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, + (fmt("DROP TABLE %1%") % name).str()); pgsql_exec_simple( sql_conn, PGRES_COMMAND_OK, - (fmt("CREATE TABLE %1%_tmp %2% AS\n" - " SELECT * FROM %1%\n" - " ORDER BY ST_GeoHash(way,10)\n" - " COLLATE \"C\"") % - name % (table_space ? "TABLESPACE " + table_space.get() : "")) - .str()); + (fmt("ALTER TABLE %1%_tmp RENAME TO %1%") % name).str()); + fprintf(stderr, "Copying %s to cluster by geometry finished\n", + name.c_str()); } else { - /* osm2pgsql's transformation from 4326 to another projection could make a geometry invalid, - and these need to be filtered. Also, a transformation is needed for geohashing. */ - pgsql_exec_simple( - sql_conn, PGRES_COMMAND_OK, - (fmt("CREATE TABLE %1%_tmp %2% AS\n" - " SELECT * FROM %1%\n" - " WHERE ST_IsValid(way)\n" - // clang-format off - " ORDER BY ST_GeoHash(ST_Transform(ST_Envelope(way),4326),10)\n" - // clang-format on - " COLLATE \"C\"") % - name % (table_space ? "TABLESPACE " + table_space.get() : "")) - .str()); - } - pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, (fmt("DROP TABLE %1%") % name).str()); - pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, (fmt("ALTER TABLE %1%_tmp RENAME TO %1%") % name).str()); - fprintf(stderr, "Copying %s to cluster by geometry finished\n", name.c_str()); - fprintf(stderr, "Creating geometry index on %s\n", name.c_str()); - - // Use fillfactor 100 for un-updatable imports - pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, (fmt("CREATE INDEX ON %1% USING GIST (way) %2% %3%") % name % - (slim && !drop_temp ? "" : "WITH (FILLFACTOR=100)") % - (table_space_index ? "TABLESPACE " + table_space_index.get() : "")).str()); - - /* slim mode needs this to be able to apply diffs */ - if (slim && !drop_temp) - { - fprintf(stderr, "Creating osm_id index on %s\n", name.c_str()); - pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, (fmt("CREATE INDEX ON %1% USING BTREE (osm_id) %2%") % name % - (table_space_index ? "TABLESPACE " + table_space_index.get() : "")).str()); + // We haven't removed invalid geometries as we did not perform a COPY. if (srid != "4326") { pgsql_exec_simple( sql_conn, PGRES_COMMAND_OK, - (fmt("CREATE OR REPLACE FUNCTION %1%_osm2pgsql_valid()\n" - "RETURNS TRIGGER AS $$\n" - "BEGIN\n" - " IF ST_IsValid(NEW.way) THEN \n" - " RETURN NEW;\n" - " END IF;\n" - " RETURN NULL;\n" - "END;" - "$$ LANGUAGE plpgsql;") % - name) + (fmt("DELETE FROM %1% WHERE NOT ST_IsValid(way)") % name) .str()); + } + } + + // Create a trigger that will skip importing invalid geometries while inserting diffs later. + // SRID=4326 invalidity is covered by Osmium library. + if (slim && !drop_temp && srid != "4326") { + pgsql_exec_simple( + sql_conn, PGRES_COMMAND_OK, + (fmt("CREATE OR REPLACE FUNCTION %1%_osm2pgsql_valid()\n" + "RETURNS TRIGGER AS $$\n" + "BEGIN\n" + " IF ST_IsValid(NEW.way) THEN \n" + " RETURN NEW;\n" + " END IF;\n" + " RETURN NULL;\n" + "END;" + "$$ LANGUAGE plpgsql;") % + name) + .str()); + pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, + (fmt("CREATE TRIGGER %1%_osm2pgsql_valid BEFORE " + "INSERT OR UPDATE\n" + " ON %1%\n" + " FOR EACH ROW EXECUTE PROCEDURE " + "%1%_osm2pgsql_valid();") % + name) + .str()); + } + + if (!skip_indexing) { + fprintf(stderr, "Creating geometry index on %s\n", name.c_str()); + + // Use fillfactor 100 for un-updatable imports + pgsql_exec_simple( + sql_conn, PGRES_COMMAND_OK, + (fmt("CREATE INDEX ON %1% USING GIST (way) %2% %3%") % name % + (slim && !drop_temp ? "" : "WITH (FILLFACTOR=100)") % + (table_space_index ? "TABLESPACE " + table_space_index.get() + : "")) + .str()); + + // slim mode needs osm_id index to be able to apply diffs + if (slim && !drop_temp) { + fprintf(stderr, "Creating osm_id index on %s\n", name.c_str()); pgsql_exec_simple( sql_conn, PGRES_COMMAND_OK, - (fmt("CREATE TRIGGER %1%_osm2pgsql_valid BEFORE INSERT OR UPDATE\n" - " ON %1%\n" - " FOR EACH ROW EXECUTE PROCEDURE %1%_osm2pgsql_valid();") % - name) + (fmt("CREATE INDEX ON %1% USING BTREE (osm_id) %2%") % + name % + (table_space_index + ? "TABLESPACE " + table_space_index.get() + : "")) .str()); } - //pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, (fmt("ALTER TABLE %1% ADD CHECK (ST_IsValid(way));") % name).str()); - } - /* Create hstore index if selected */ - if (enable_hstore_index) { - fprintf(stderr, "Creating hstore indexes on %s\n", name.c_str()); - if (hstore_mode != HSTORE_NONE) { - pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, (fmt("CREATE INDEX ON %1% USING GIN (tags) %2%") % name % - (table_space_index ? "TABLESPACE " + table_space_index.get() : "")).str()); - } - for(size_t i = 0; i < hstore_columns.size(); ++i) { - pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, (fmt("CREATE INDEX ON %1% USING GIN (\"%3%\") %4%") % name % i % hstore_columns[i] % - (table_space_index ? "TABLESPACE " + table_space_index.get() : "")).str()); + /* Create hstore index if selected */ + if (enable_hstore_index) { + fprintf(stderr, "Creating hstore indexes on %s\n", + name.c_str()); + if (hstore_mode != HSTORE_NONE) { + pgsql_exec_simple( + sql_conn, PGRES_COMMAND_OK, + (fmt("CREATE INDEX ON %1% USING GIN (tags) %2%") % + name % + (table_space_index + ? "TABLESPACE " + table_space_index.get() + : "")) + .str()); + } + for (size_t i = 0; i < hstore_columns.size(); ++i) { + pgsql_exec_simple( + sql_conn, PGRES_COMMAND_OK, + (fmt("CREATE INDEX ON %1% USING GIN (\"%3%\") %4%") % + name % i % hstore_columns[i] % + (table_space_index + ? "TABLESPACE " + table_space_index.get() + : "")) + .str()); + } } + fprintf(stderr, "Creating indexes on %s finished\n", name.c_str()); } - fprintf(stderr, "Creating indexes on %s finished\n", name.c_str()); - pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, (fmt("ANALYZE %1%") % name).str()); + fprintf(stderr, "Analyzing statistics on %s\n", name.c_str()); + pgsql_exec_simple(sql_conn, PGRES_COMMAND_OK, + (fmt("ANALYZE %1%") % name).str()); time(&end); - fprintf(stderr, "All indexes on %s created in %ds\n", name.c_str(), (int)(end - start)); + + fprintf(stderr, "Post-import processing on %s finished in %ds\n", + name.c_str(), (int)(end - start)); } teardown(); diff --git a/table.hpp b/table.hpp index 1aa404b2d..284641918 100644 --- a/table.hpp +++ b/table.hpp @@ -19,9 +19,14 @@ typedef std::vector hstores_t; class table_t { public: - table_t(const std::string& conninfo, const std::string& name, const std::string& type, const columns_t& columns, const hstores_t& hstore_columns, const int srid, - const bool append, const bool slim, const bool droptemp, const int hstore_mode, const bool enable_hstore_index, - const boost::optional& table_space, const boost::optional& table_space_index); + table_t(const std::string &conninfo, const std::string &name, + const std::string &type, const columns_t &columns, + const hstores_t &hstore_columns, const int srid, + const bool append, const bool skip_optimizing, + const bool skip_indexing, const bool slim, const bool droptemp, + const int hstore_mode, const bool enable_hstore_index, + const boost::optional &table_space, + const boost::optional &table_space_index); table_t(const table_t& other); ~table_t(); @@ -89,6 +94,8 @@ class table_t std::string buffer; std::string srid; bool append; + bool skip_optimizing; + bool skip_indexing; bool slim; bool drop_temp; int hstore_mode;