osrm-backend/include/osmium/io/detail/pbf_output_format.hpp

945 lines
41 KiB
C++
Raw Normal View History

#ifndef OSMIUM_IO_DETAIL_PBF_OUTPUT_FORMAT_HPP
#define OSMIUM_IO_DETAIL_PBF_OUTPUT_FORMAT_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Squashed 'third_party/libosmium/' changes from 910f8f1..6522da5 6522da5 Merge pull request #79 from DennisOSRM/master 7c8d8dc add override keyword to close(), overridden function in include/osmium/io/detail/input_format.hpp d24841e Changes copyright dates in all files to a consistent "2013-2015". 3adc7d7 Another try to make it compile on Windows. 20dad8e Use auto instead of hard-coded types, hopefully fixes Windows build. b73ab3f Set build config in appveyor ctest call. 3e33857 Merge pull request #76 from BergWerkGIS/master c78ca6e Merge remote-tracking branch 'upstream/master' cf42013 enable test again 69e4a91 include compatibility.h 6f79b5c CMake: Remove Dart include, it seems to be superfluous. 4d40a18 Explicitly set copy constructors etc. for OutputIterator and test it. 69ee34c Run tests in build directory instead of source directory. 11c44c8 Fix typo. 675cc11 Fix formatting CMake config. 7aa3cb0 Add a benchmark counting amenity=post_box tags. 89fd942 Make it more evident that we handling constant tag lists. c72bbdf Add count() and empty() functions to Filter class. 88b9543 Disable annoying warnings from YouCompleteMe. 6cec403 Benchmarks: Only find files, not directories etc. in data dir. 09c4630 Follow redirects in benchmark download script. a8a552e Use Approx() function from Catch framework for floating point comparisons. 7db5086 Reorder data tests so fast tests run first. 24ce403 Fix #includes. ad9515c Add some comments in different config files to explain what they are. 987aa9c Some README updates. 3c18de7 Add configuration for YouCompleteMe Vim plugin. 433148e Explicit comparison against 0 to avoid warning with MSVC. e64a459 Small updates in benchmarks README. e4aff7f Add benchmark to compare static vs. dynamic index maps. f702634 Add missing include. 95a8c3b Merge branch 'master' of github.com:osmcode/libosmium 560a2ae Merge pull request #73 from osmcode/travis-fix f41e4ca remove incorrect cd f826107 Add some helper functions for working with entity_bits. 8fec1c1 Updated README: Lots of documentation is now on wiki. 6110cc8 Make cmake config work if no components are given. 8136557 Use 'Libosmium' instead of 'Osmium' as project name in doc. 10e631a Improved documenation: Dependencies to link with. 41d0ca4 Merge remote-tracking branch 'upstream/master' 6768026 SLN configuration is always 'Release' 5d985cf av: show env vars. 272ac88 don't use '%CONFIGURATION%' expands to e.g. 'Dev|x86' 59b15eb use cmake 3.1.0 again. old cmake on AppVeyor doesn't know yet about VS2014 SLN 0d0d71a try building with VS, Dev works only with VS at least locally 2fe6d02 name of geos.lib changed fc4662e test latest binary package d4a265f Add more labels to tests. 8722a3d Only check for valgrind if testing is enabled. cf7dc56 README improvements. 13a426e Fix cpack configuration. ae649c8 travis build: fix double install of make package c102040 Move cmake config for documentation into doc directory. 6064d55 Also list advanced cmake variables in travis build. c299bfa constexprt workaround for MSVC. afcf7a1 Cleanup/formatting of cmake config. a536720 Fix travis build by cloning osm-testdata in the right place. 2bdbf7f cmake: Don't clone osm-testdata repository any more. 5bdbb28 Rename test/osm-testdata to test/data-tests. 0ad0020 cmake: build data tests only by default on Dev builds 2cefa50 Add some benchmarks. 3fdb6e1 Remove outdated make_osmium_project.sh script. aca58ac Fix cmake config: overwriting of variables. 152e318 Fix compilation of examples that need wingetopt on Windows. ae17cd0 Remove superfluous warning on Windows for header test builds. f88d4c2 Switch to /W3 on Windows. 8edeba5 Remove duplicate copy constructor. cdb474e Cleanup examples cmake config ca9045a Cmake cleanup. 501eb61 Update to new version of catch unit test framework. 0646d6c Parenthesize expression to make test clearer and avoid warnings. dfdaeae Disable a warning that gdal throws on Windows. eacfe4f Explicitly test int !=0 to convert to bool. b3ba693 Cleanup of cmake config (mostly warning options Linux vs. Win) 3f5cb81 Appveyor: Also show advanced cmake variables. b544bd9 Never return valid but empty buffer from Reader.read() call. a34bb5d Declare var in a more local scope. b83e5f1 Merge pull request #66 from BergWerkGIS/master 04ef1b2 Last try for today, should solve geos test failures a749d6c hey AppVeyor: what's going on? 73131fb av: boost not found?? show directory tree 5e02886 av: should solve most "***Exception: Other" except for geos.dll related problem. should solve projection test fails. a7ae560 FlexReader now takes location handler as parameter. 6f1bd8d Disable non-existing Debian build in Makefile. 3d75178 Change warning level on MSVC. 3f0abc0 Appveyor: List cached variables from cmake. d1e1e9f Travis/Appveyor build in Dev and Release mode. Call ctest on Appveyor. 116bcc9 Fix default node location store. dedfe0e Make classes we submit to queue properly copyable. 8a432c9 Try the same ugly hack on OPLOutputBlock. 9c5b314 Try a hack to see whether it works on MSVC. 9592132 Give XMLOutputBlock a copy constructor. 991f91a Disable warning C4715: "not all control paths return a value" 340a4d7 Fix copy constructor. 7ceae00 Give OPLOutputBlock a copy constructor which basically does a move. e82951a Use rvalue as paremeter for queue submit function. 54a9cec Use explicit cast to double to silence warning. 7c5d04c Simplified use of ogr includes. 43fd388 Use appveyor supplied cmake. ce4311e cmake cleanup. 1108517 Disable warnings from MSVC in OGR headers. 2fe820c Harmonize signature of virtual member function dump_as_list(). 93c31b4 Parenthesize string in pragma message. 54ae1a0 Explicitly int to bool conversion to silence warning on MSVC. 4bde9b7 Formatting: Use spaces instead of tabs. 7224d84 cmake: Don't try to set C++ version with -std= with MSVC. 53fbcfd Remove trailing semikolon (which gcc doesn't like). 3b0c9ed Removed command that doesn't work on older cmakes. de6cc78 Merge pull request #62 from BergWerkGIS/master 9126530 Use less warning options on Windows. 6ec79fb Silence unknown pragma warning on Windows. b611589 Add static_cast to silence warning on windows. 18e54e9 remove call to build2.bat b0d9d28 try again with -DCMAKE_BUILD_TYPE=Release b495018 usage installed cmake, -DCMAKE_BUILD_TYPE=Dev 5bac95d Merge remote-tracking branch 'upstream/master' d41ea31 cmake: Do not use -Werror when using MSVC compiler 04cfe00 Provide convenience include file to include all index maps. 72fbaa7 Rename multimap indexes according to new schema. af49a27 Make registering index map types easier. 0a35701 av: remove debug echo calls 886c15f av: ditch -DCMAKE_BUILD_TYPE=Dev ad0a629 The great renaming of map index classes. 97b4d34 av: try again bf243b2 Move some hpp files into detail directory and fix include guards. 64e4841 av: try again calling cmake directly 1700789 Add way to dynamically choose node location index. da49b7f av: try again with build2.bat 4bf55f5 is appveyor working again? 5e63433 delete ws_32.lib patch. generate makefile instead of VS 1054016 Merge remote-tracking branch 'upstream/master' 8c198fc av: try even newer cmake 7cccb06 av: maybe newer cmake helps d04967a av: no existing env var was still there 80763c6 av: no build.bat, call cmake directly 3b58fc6 av: no cmds over spanning serveral lines? 33c09a0 CALL bat 34639c6 maybe double quotes? 72572de explicitly call VS2014 cmd prompt 6955e87 1st try on AppVeyor git-subtree-dir: third_party/libosmium git-subtree-split: 6522da53748e12379bbfaf70ad29ee7bd98ef02d
2015-02-05 09:13:48 -05:00
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
/*
About the .osm.pbf file format
This is an excerpt of <http://wiki.openstreetmap.org/wiki/PBF_Format>
The .osm.pbf format and it's derived formats (.osh.pbf and .osc.pbf) are encoded
using googles protobuf library for the low-level storage. They are constructed
by nesting data on two levels:
On the lower level the file is constructed using BlobHeaders and Blobs. A .osm.pbf
file contains multiple sequences of
1. a 4-byte header size, stored in network-byte-order
2. a BlobHeader of exactly this size
3. a Blob
The BlobHeader tells the reader about the type and size of the following Blob. The
Blob can contain data in raw or zlib-compressed form. After uncompressing the blob
it is treated differently depending on the type specified in the BlobHeader.
The contents of the Blob belongs to the higher level. It contains either an HeaderBlock
(type="OSMHeader") or an PrimitiveBlock (type="OSMData"). The file needs to have
at least one HeaderBlock before the first PrimitiveBlock.
The HeaderBlock contains meta-information like the writing program or a bbox. It may
also contain multiple "required features" that describe what kinds of input a
reading program needs to handle in order to fully understand the files' contents.
The PrimitiveBlock can store multiple types of objects (i.e. 5 nodes, 2 ways and
1 relation). It contains one or more PrimitiveGroup which in turn contain multiple
nodes, ways or relations. A PrimitiveGroup should only contain one kind of object.
There's a special kind of "object type" called dense-nodes. It is used to store nodes
in a very dense format, avoiding message overheads and using delta-encoding for nearly
all ids.
All Strings are stored as indexes to rows in a StringTable. The StringTable contains
one row for each used string, so strings that are used multiple times need to be
stored only once. The StringTable is sorted by usage-count, so the most often used
string is stored at index 1.
A simple outline of a .osm.pbf file could look like this:
4-bytes header size
BlobHeader
Blob
HeaderBlock
4-bytes header size
BlobHeader
Blob
PrimitiveBlock
StringTable
PrimitiveGroup
5 nodes
PrimitiveGroup
2 ways
PrimitiveGroup
1 relation
More complete outlines of real .osm.pbf files can be created using the osmpbf-outline tool:
<https://github.com/MaZderMind/OSM-binary/tree/osmpbf-outline>
*/
#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <future>
#include <iostream>
#include <memory>
#include <ratio>
#include <string>
#include <thread>
#include <time.h>
#include <utility>
#include <osmium/handler.hpp>
#include <osmium/io/detail/output_format.hpp>
#include <osmium/io/detail/pbf.hpp> // IWYU pragma: export
#include <osmium/io/detail/pbf_stringtable.hpp>
#include <osmium/io/detail/zlib.hpp>
#include <osmium/io/file.hpp>
#include <osmium/io/file_format.hpp>
#include <osmium/io/header.hpp>
#include <osmium/memory/buffer.hpp>
#include <osmium/memory/collection.hpp>
#include <osmium/osm/box.hpp>
#include <osmium/osm/item_type.hpp>
#include <osmium/osm/location.hpp>
#include <osmium/osm/node.hpp>
#include <osmium/osm/object.hpp>
#include <osmium/osm/relation.hpp>
#include <osmium/osm/tag.hpp>
#include <osmium/osm/timestamp.hpp>
#include <osmium/osm/way.hpp>
#include <osmium/util/cast.hpp>
#include <osmium/visitor.hpp>
namespace osmium {
namespace io {
namespace detail {
namespace {
/**
* Serialize a protobuf message into a Blob, optionally apply compression
* and return it together with a BlobHeader ready to be written to a file.
*
* @param type Type-string used in the BlobHeader.
* @param msg Protobuf-message.
* @param use_compression Should the output be compressed using zlib?
*/
std::string serialize_blob(const std::string& type, const google::protobuf::MessageLite& msg, bool use_compression) {
OSMPBF::Blob pbf_blob;
{
std::string content;
msg.SerializeToString(&content);
pbf_blob.set_raw_size(static_cast_with_assert<::google::protobuf::int32>(content.size()));
if (use_compression) {
pbf_blob.set_zlib_data(osmium::io::detail::zlib_compress(content));
} else {
pbf_blob.set_raw(content);
}
}
std::string blob_data;
pbf_blob.SerializeToString(&blob_data);
OSMPBF::BlobHeader pbf_blob_header;
pbf_blob_header.set_type(type);
pbf_blob_header.set_datasize(static_cast_with_assert<::google::protobuf::int32>(blob_data.size()));
std::string blob_header_data;
pbf_blob_header.SerializeToString(&blob_header_data);
uint32_t sz = htonl(static_cast_with_assert<uint32_t>(blob_header_data.size()));
// write to output: the 4-byte BlobHeader-Size followed by the BlobHeader followed by the Blob
std::string output;
output.reserve(sizeof(sz) + blob_header_data.size() + blob_data.size());
output.append(reinterpret_cast<const char*>(&sz), sizeof(sz));
output.append(blob_header_data);
output.append(blob_data);
return output;
}
} // anonymous namespace
class PBFOutputFormat : public osmium::io::detail::OutputFormat, public osmium::handler::Handler {
/**
* This class models a variable that keeps track of the value
* it was last set to and returns the delta between old and
* new value from the update() call.
*/
template <typename T>
class Delta {
T m_value;
public:
Delta() :
m_value(0) {
}
void clear() {
m_value = 0;
}
T update(T new_value) {
using std::swap;
swap(m_value, new_value);
return m_value - new_value;
}
}; // class Delta
/**
* Maximum number of items in a primitive block.
*
* The uncompressed length of a Blob *should* be less
* than 16 megabytes and *must* be less than 32 megabytes.
*
* A block may contain any number of entities, as long as
* the size limits for the surrounding blob are obeyed.
* However, for simplicity, the current Osmosis (0.38)
* as well as Osmium implementation always
* uses at most 8k entities in a block.
*/
static constexpr uint32_t max_block_contents = 8000;
/**
* The output buffer (block) will be filled to about
* 95% and then written to disk. This leaves more than
* enough space for the string table (which typically
* needs about 0.1 to 0.3% of the block size).
*/
static constexpr int64_t buffer_fill_percent = 95;
/**
* protobuf-struct of a HeaderBlock
*/
OSMPBF::HeaderBlock pbf_header_block;
/**
* protobuf-struct of a PrimitiveBlock
*/
OSMPBF::PrimitiveBlock pbf_primitive_block;
/**
* pointer to PrimitiveGroups inside the current PrimitiveBlock,
* used for writing nodes, ways or relations
*/
OSMPBF::PrimitiveGroup* pbf_nodes;
OSMPBF::PrimitiveGroup* pbf_ways;
OSMPBF::PrimitiveGroup* pbf_relations;
/**
* To flexibly handle multiple resolutions, the granularity, or
* resolution used for representing locations is adjustable in
* multiples of 1 nanodegree. The default scaling factor is 100
* nanodegrees, corresponding to about ~1cm at the equator.
* This is the current resolution of the OSM database.
*/
int m_location_granularity;
/**
* The granularity used for representing timestamps is also adjustable in
* multiples of 1 millisecond. The default scaling factor is 1000
* milliseconds, which is the current resolution of the OSM database.
*/
int m_date_granularity;
/**
* should nodes be serialized into the dense format?
*
* nodes can be encoded one of two ways, as a Node
* (m_use_dense_nodes = false) and a special dense format.
* In the dense format, all information is stored 'column wise',
* as an array of ID's, array of latitudes, and array of
* longitudes. Each column is delta-encoded. This reduces
* header overheads and allows delta-coding to work very effectively.
*/
bool m_use_dense_nodes {true};
/**
* should the PBF blobs contain zlib compressed data?
*
* the zlib compression is optional, it's possible to store the
* blobs in raw format. Disabling the compression can improve the
* writing speed a little but the output will be 2x to 3x bigger.
*/
bool m_use_compression {true};
/**
* While the .osm.pbf-format is able to carry all meta information, it is
* also able to omit this information to reduce size.
*/
bool m_should_add_metadata {true};
/**
* Should the visible flag be added on objects?
*/
bool m_add_visible;
/**
* counter used to quickly check the number of objects stored inside
* the current PrimitiveBlock. When the counter reaches max_block_contents
* the PrimitiveBlock is serialized into a Blob and flushed to the file.
*
* this check is performed in check_block_contents_counter() which is
* called once for each object.
*/
uint16_t primitive_block_contents;
int primitive_block_size;
// StringTable management
StringTable string_table;
/**
* These variables are used to calculate the
* delta-encoding while storing dense-nodes. It holds the last seen values
* from which the difference is stored into the protobuf.
*/
Delta<int64_t> m_delta_id;
Delta<int64_t> m_delta_lat;
Delta<int64_t> m_delta_lon;
Delta<int64_t> m_delta_timestamp;
Delta<int64_t> m_delta_changeset;
Delta<int64_t> m_delta_uid;
Delta<::google::protobuf::int32> m_delta_user_sid;
bool debug;
bool has_debug_level(int) {
return false;
}
///// Blob writing /////
/**
* Before a PrimitiveBlock gets serialized, all interim StringTable-ids needs to be
* mapped to the associated real StringTable ids. This is done in this function.
*
* This function needs to know about the concrete structure of all item types to find
* all occurrences of string-ids.
*/
void map_string_ids() {
// test, if the node-block has been allocated
if (pbf_nodes) {
// iterate over all nodes, passing them to the map_common_string_ids function
for (int i=0, l=pbf_nodes->nodes_size(); i<l; ++i) {
map_common_string_ids(pbf_nodes->mutable_nodes(i));
}
// test, if the node-block has a densenodes structure
if (pbf_nodes->has_dense()) {
// get a pointer to the densenodes structure
OSMPBF::DenseNodes* dense = pbf_nodes->mutable_dense();
// in the densenodes structure keys and vals are encoded in an intermixed
// array, individual nodes are seperated by a value of 0 (0 in the StringTable
// is always unused). String-ids of 0 are thus kept alone.
for (int i=0, l=dense->keys_vals_size(); i<l; ++i) {
// map interim string-ids > 0 to real string ids
auto sid = dense->keys_vals(i);
if (sid > 0) {
dense->set_keys_vals(i, string_table.map_string_id(sid));
}
}
// test if the densenodes block has meta infos
if (dense->has_denseinfo()) {
// get a pointer to the denseinfo structure
OSMPBF::DenseInfo* denseinfo = dense->mutable_denseinfo();
// iterate over all username string-ids
for (int i=0, l=denseinfo->user_sid_size(); i<l; ++i) {
// map interim string-ids > 0 to real string ids
auto user_sid = string_table.map_string_id(denseinfo->user_sid(i));
// delta encode the string-id
denseinfo->set_user_sid(i, m_delta_user_sid.update(user_sid));
}
}
}
}
// test, if the ways-block has been allocated
if (pbf_ways) {
// iterate over all ways, passing them to the map_common_string_ids function
for (int i=0, l=pbf_ways->ways_size(); i<l; ++i) {
map_common_string_ids(pbf_ways->mutable_ways(i));
}
}
// test, if the relations-block has been allocated
if (pbf_relations) {
// iterate over all relations
for (int i=0, l=pbf_relations->relations_size(); i<l; ++i) {
// get a pointer to the relation
OSMPBF::Relation* relation = pbf_relations->mutable_relations(i);
// pass them to the map_common_string_ids function
map_common_string_ids(relation);
// iterate over all relation members, mapping the interim string-ids
// of the role to real string ids
for (int mi=0; mi < relation->roles_sid_size(); ++mi) {
relation->set_roles_sid(mi, string_table.map_string_id(relation->roles_sid(mi)));
}
}
}
}
/**
* a helper function used in map_string_ids to map common interim string-ids of the
* user name and all tags to real string ids.
*
* TPBFObject is either OSMPBF::Node, OSMPBF::Way or OSMPBF::Relation.
*/
template <class TPBFObject>
void map_common_string_ids(TPBFObject* in) {
// if the object has meta-info attached
if (in->has_info()) {
// map the interim-id of the user name to a real id
OSMPBF::Info* info = in->mutable_info();
info->set_user_sid(string_table.map_string_id(info->user_sid()));
}
// iterate over all tags and map the interim-ids of the key and the value to real ids
for (int i=0, l=in->keys_size(); i<l; ++i) {
in->set_keys(i, string_table.map_string_id(in->keys(i)));
in->set_vals(i, string_table.map_string_id(in->vals(i)));
}
}
///// MetaData helper /////
/**
* convert a double lat or lon value to an int, respecting the current blocks granularity
*/
int64_t lonlat2int(double lonlat) {
return static_cast<int64_t>(std::round(lonlat * OSMPBF::lonlat_resolution / location_granularity()));
}
/**
* convert a timestamp to an int, respecting the current blocks granularity
*/
int64_t timestamp2int(time_t timestamp) {
return static_cast<int64_t>(std::round(timestamp * (1000.0 / date_granularity())));
}
/**
* helper function used in the write()-calls to apply common information from an osmium-object
* onto a pbf-object.
*
* TPBFObject is either OSMPBF::Node, OSMPBF::Way or OSMPBF::Relation.
*/
template <class TPBFObject>
void apply_common_info(const osmium::OSMObject& in, TPBFObject* out) {
// set the object-id
out->set_id(in.id());
// iterate over all tags and set the keys and vals, recording the strings in the
// interim StringTable and storing the interim ids
for (const auto& tag : in.tags()) {
out->add_keys(string_table.record_string(tag.key()));
out->add_vals(string_table.record_string(tag.value()));
}
if (m_should_add_metadata) {
// add an info-section to the pbf object and set the meta-info on it
OSMPBF::Info* out_info = out->mutable_info();
if (m_add_visible) {
out_info->set_visible(in.visible());
}
out_info->set_version(static_cast<::google::protobuf::int32>(in.version()));
out_info->set_timestamp(timestamp2int(in.timestamp()));
out_info->set_changeset(in.changeset());
out_info->set_uid(static_cast<::google::protobuf::int32>(in.uid()));
out_info->set_user_sid(string_table.record_string(in.user()));
}
}
///// High-Level Block writing /////
/**
* store the current pbf_header_block into a Blob and clear this struct afterwards.
*/
void store_header_block() {
if (debug && has_debug_level(1)) {
std::cerr << "storing header block" << std::endl;
}
std::promise<std::string> promise;
m_output_queue.push(promise.get_future());
promise.set_value(serialize_blob("OSMHeader", pbf_header_block, m_use_compression));
pbf_header_block.Clear();
}
/**
* store the interim StringTable to the current pbf_primitive_block, map all interim string ids
* to real StringTable ids and then store the current pbf_primitive_block into a Blob and clear
* this struct and all related pointers and maps afterwards.
*/
void store_primitive_block() {
if (debug && has_debug_level(1)) {
std::cerr << "storing primitive block with " << primitive_block_contents << " items" << std::endl;
}
// set the granularity
pbf_primitive_block.set_granularity(location_granularity());
pbf_primitive_block.set_date_granularity(date_granularity());
// store the interim StringTable into the protobuf object
string_table.store_stringtable(pbf_primitive_block.mutable_stringtable());
// map all interim string ids to real ids
map_string_ids();
std::promise<std::string> promise;
m_output_queue.push(promise.get_future());
promise.set_value(serialize_blob("OSMData", pbf_primitive_block, m_use_compression));
// clear the PrimitiveBlock struct
pbf_primitive_block.Clear();
// clear the interim StringTable and its id map
string_table.clear();
// reset the delta variables
m_delta_id.clear();
m_delta_lat.clear();
m_delta_lon.clear();
m_delta_timestamp.clear();
m_delta_changeset.clear();
m_delta_uid.clear();
m_delta_user_sid.clear();
// reset the contents-counter to zero
primitive_block_contents = 0;
primitive_block_size = 0;
// reset the node/way/relation pointers to nullptr
pbf_nodes = nullptr;
pbf_ways = nullptr;
pbf_relations = nullptr;
}
/**
* this little function checks primitive_block_contents counter against its maximum and calls
* store_primitive_block to flush the block to the disk when it's reached. It's also responsible
* for increasing this counter.
*
* this function also checks the estimated size of the current block and calls store_primitive_block
* when the estimated size reaches buffer_fill_percent of the maximum uncompressed blob size.
*/
void check_block_contents_counter() {
if (primitive_block_contents >= max_block_contents) {
store_primitive_block();
} else if (primitive_block_size > OSMPBF::max_uncompressed_blob_size * buffer_fill_percent / 100) {
if (debug && has_debug_level(1)) {
std::cerr << "storing primitive_block with only " << primitive_block_contents << " items, because its ByteSize (" << primitive_block_size << ") reached " <<
(static_cast<float>(primitive_block_size) / static_cast<float>(OSMPBF::max_uncompressed_blob_size) * 100.0) << "% of the maximum blob-size" << std::endl;
}
store_primitive_block();
}
++primitive_block_contents;
}
///// Block content writing /////
/**
* Add a node to the block.
*
* @param node The node to add.
*/
void write_node(const osmium::Node& node) {
// add a way to the group
OSMPBF::Node* pbf_node = pbf_nodes->add_nodes();
// copy the common meta-info from the osmium-object to the pbf-object
apply_common_info(node, pbf_node);
// modify lat & lon to integers, respecting the block's granularity and copy
// the ints to the pbf-object
pbf_node->set_lon(lonlat2int(node.location().lon_without_check()));
pbf_node->set_lat(lonlat2int(node.location().lat_without_check()));
}
/**
* Add a node to the block using DenseNodes.
*
* @param node The node to add.
*/
void write_dense_node(const osmium::Node& node) {
// add a DenseNodes-Section to the PrimitiveGroup
OSMPBF::DenseNodes* dense = pbf_nodes->mutable_dense();
// copy the id, delta encoded
dense->add_id(m_delta_id.update(node.id()));
// copy the longitude, delta encoded
dense->add_lon(m_delta_lon.update(lonlat2int(node.location().lon_without_check())));
// copy the latitude, delta encoded
dense->add_lat(m_delta_lat.update(lonlat2int(node.location().lat_without_check())));
// in the densenodes structure keys and vals are encoded in an intermixed
// array, individual nodes are seperated by a value of 0 (0 in the StringTable
// is always unused)
// so for three nodes the keys_vals array may look like this: 3 5 2 1 0 0 8 5
// the first node has two tags (3=>5 and 2=>1), the second node does not
// have any tags and the third node has a single tag (8=>5)
for (const auto& tag : node.tags()) {
dense->add_keys_vals(string_table.record_string(tag.key()));
dense->add_keys_vals(string_table.record_string(tag.value()));
}
dense->add_keys_vals(0);
if (m_should_add_metadata) {
// add a DenseInfo-Section to the PrimitiveGroup
OSMPBF::DenseInfo* denseinfo = dense->mutable_denseinfo();
denseinfo->add_version(static_cast<::google::protobuf::int32>(node.version()));
if (m_add_visible) {
denseinfo->add_visible(node.visible());
}
// copy the timestamp, delta encoded
denseinfo->add_timestamp(m_delta_timestamp.update(timestamp2int(node.timestamp())));
// copy the changeset, delta encoded
denseinfo->add_changeset(m_delta_changeset.update(node.changeset()));
// copy the user id, delta encoded
denseinfo->add_uid(static_cast<::google::protobuf::int32>(m_delta_uid.update(node.uid())));
// record the user-name to the interim stringtable and copy the
// interim string-id to the pbf-object
denseinfo->add_user_sid(string_table.record_string(node.user()));
}
}
/**
* Add a way to the block.
*
* @param way The way to add.
*/
void write_way(const osmium::Way& way) {
// add a way to the group
OSMPBF::Way* pbf_way = pbf_ways->add_ways();
// copy the common meta-info from the osmium-object to the pbf-object
apply_common_info(way, pbf_way);
// last way-node-id used for delta-encoding
Delta<int64_t> delta_id;
for (const auto& node_ref : way.nodes()) {
// copy the way-node-id, delta encoded
pbf_way->add_refs(delta_id.update(node_ref.ref()));
}
// count up blob size by the size of the Way
primitive_block_size += pbf_way->ByteSize();
}
/**
* Add a relation to the block.
*
* @param relation The relation to add.
*/
void write_relation(const osmium::Relation& relation) {
// add a relation to the group
OSMPBF::Relation* pbf_relation = pbf_relations->add_relations();
// copy the common meta-info from the osmium-object to the pbf-object
apply_common_info(relation, pbf_relation);
Delta<int64_t> delta_id;
for (const auto& member : relation.members()) {
// record the relation-member role to the interim stringtable and copy the
// interim string-id to the pbf-object
pbf_relation->add_roles_sid(string_table.record_string(member.role()));
// copy the relation-member-id, delta encoded
pbf_relation->add_memids(delta_id.update(member.ref()));
// copy the relation-member-type, mapped to the OSMPBF enum
pbf_relation->add_types(item_type_to_osmpbf_membertype(member.type()));
}
// count up blob size by the size of the Relation
primitive_block_size += pbf_relation->ByteSize();
}
// objects of this class can't be copied
PBFOutputFormat(const PBFOutputFormat&) = delete;
PBFOutputFormat& operator=(const PBFOutputFormat&) = delete;
public:
/**
* Create PBFOutputFormat object from File.
*/
explicit PBFOutputFormat(const osmium::io::File& file, data_queue_type& output_queue) :
OutputFormat(file, output_queue),
pbf_header_block(),
pbf_primitive_block(),
pbf_nodes(nullptr),
pbf_ways(nullptr),
pbf_relations(nullptr),
m_location_granularity(pbf_primitive_block.granularity()),
m_date_granularity(pbf_primitive_block.date_granularity()),
m_add_visible(file.has_multiple_object_versions()),
primitive_block_contents(0),
primitive_block_size(0),
string_table(),
m_delta_id(),
m_delta_lat(),
m_delta_lon(),
m_delta_timestamp(),
m_delta_changeset(),
m_delta_uid(),
m_delta_user_sid(),
debug(true) {
GOOGLE_PROTOBUF_VERIFY_VERSION;
if (file.get("pbf_dense_nodes") == "false") {
m_use_dense_nodes = false;
}
if (file.get("pbf_compression") == "none" || file.get("pbf_compression") == "false") {
m_use_compression = false;
}
if (file.get("pbf_add_metadata") == "false") {
m_should_add_metadata = false;
}
}
void write_buffer(osmium::memory::Buffer&& buffer) override final {
osmium::apply(buffer.cbegin(), buffer.cend(), *this);
}
/**
* getter to access the granularity
*/
int location_granularity() const {
return m_location_granularity;
}
/**
* setter to set the granularity
*/
PBFOutputFormat& location_granularity(int g) {
m_location_granularity = g;
return *this;
}
/**
* getter to access the date_granularity
*/
int date_granularity() const {
return m_date_granularity;
}
/**
* Set date granularity.
*/
PBFOutputFormat& date_granularity(int g) {
m_date_granularity = g;
return *this;
}
/**
* Initialize the writing process.
*
* This initializes the header-block, sets the required-features and
* the writing-program and adds the obligatory StringTable-Index 0.
*/
void write_header(const osmium::io::Header& header) override final {
// add the schema version as required feature to the HeaderBlock
pbf_header_block.add_required_features("OsmSchema-V0.6");
// when the densenodes-feature is used, add DenseNodes as required feature
if (m_use_dense_nodes) {
pbf_header_block.add_required_features("DenseNodes");
}
// when the resulting file will carry history information, add
// HistoricalInformation as required feature
if (m_file.has_multiple_object_versions()) {
pbf_header_block.add_required_features("HistoricalInformation");
}
// set the writing program
pbf_header_block.set_writingprogram(header.get("generator"));
if (!header.boxes().empty()) {
OSMPBF::HeaderBBox* pbf_bbox = pbf_header_block.mutable_bbox();
osmium::Box box = header.joined_boxes();
pbf_bbox->set_left(static_cast<::google::protobuf::int64>(box.bottom_left().lon() * OSMPBF::lonlat_resolution));
pbf_bbox->set_bottom(static_cast<::google::protobuf::int64>(box.bottom_left().lat() * OSMPBF::lonlat_resolution));
pbf_bbox->set_right(static_cast<::google::protobuf::int64>(box.top_right().lon() * OSMPBF::lonlat_resolution));
pbf_bbox->set_top(static_cast<::google::protobuf::int64>(box.top_right().lat() * OSMPBF::lonlat_resolution));
}
std::string osmosis_replication_timestamp = header.get("osmosis_replication_timestamp");
if (!osmosis_replication_timestamp.empty()) {
osmium::Timestamp ts(osmosis_replication_timestamp.c_str());
pbf_header_block.set_osmosis_replication_timestamp(ts);
}
std::string osmosis_replication_sequence_number = header.get("osmosis_replication_sequence_number");
if (!osmosis_replication_sequence_number.empty()) {
pbf_header_block.set_osmosis_replication_sequence_number(std::atoll(osmosis_replication_sequence_number.c_str()));
}
std::string osmosis_replication_base_url = header.get("osmosis_replication_base_url");
if (!osmosis_replication_base_url.empty()) {
pbf_header_block.set_osmosis_replication_base_url(osmosis_replication_base_url);
}
store_header_block();
}
/**
* Add a node to the pbf.
*
* A call to this method won't write the node to the file directly but
* cache it for later bulk-writing. Calling final() ensures that everything
* gets written and every file pointer is closed.
*/
void node(const osmium::Node& node) {
// first of we check the contents-counter which may flush the cached nodes to
// disk if the limit is reached. This call also increases the contents-counter
check_block_contents_counter();
if (debug && has_debug_level(2)) {
std::cerr << "node " << node.id() << " v" << node.version() << std::endl;
}
// if no PrimitiveGroup for nodes has been added, add one and save the pointer
if (!pbf_nodes) {
pbf_nodes = pbf_primitive_block.add_primitivegroup();
}
if (m_use_dense_nodes) {
write_dense_node(node);
} else {
write_node(node);
}
}
/**
* Add a way to the pbf.
*
* A call to this method won't write the way to the file directly but
* cache it for later bulk-writing. Calling final() ensures that everything
* gets written and every file pointer is closed.
*/
void way(const osmium::Way& way) {
// first of we check the contents-counter which may flush the cached ways to
// disk if the limit is reached. This call also increases the contents-counter
check_block_contents_counter();
// if no PrimitiveGroup for nodes has been added, add one and save the pointer
if (!pbf_ways) {
pbf_ways = pbf_primitive_block.add_primitivegroup();
}
write_way(way);
}
/**
* Add a relation to the pbf.
*
* A call to this method won't write the way to the file directly but
* cache it for later bulk-writing. Calling final() ensures that everything
* gets written and every file pointer is closed.
*/
void relation(const osmium::Relation& relation) {
// first of we check the contents-counter which may flush the cached relations to
// disk if the limit is reached. This call also increases the contents-counter
check_block_contents_counter();
// if no PrimitiveGroup for relations has been added, add one and save the pointer
if (!pbf_relations) {
pbf_relations = pbf_primitive_block.add_primitivegroup();
}
write_relation(relation);
}
/**
* Finalize the writing process, flush any open primitive blocks to the file and
* close the file.
*/
void close() override final {
if (debug && has_debug_level(1)) {
std::cerr << "finishing" << std::endl;
}
// if the current block contains any elements, flush it to the protobuf
if (primitive_block_contents > 0) {
store_primitive_block();
}
std::promise<std::string> promise;
m_output_queue.push(promise.get_future());
promise.set_value(std::string());
}
}; // class PBFOutputFormat
namespace {
const bool registered_pbf_output = osmium::io::detail::OutputFormatFactory::instance().register_output_format(osmium::io::file_format::pbf,
[](const osmium::io::File& file, data_queue_type& output_queue) {
return new osmium::io::detail::PBFOutputFormat(file, output_queue);
});
} // anonymous namespace
} // namespace detail
} // namespace io
} // namespace osmium
#endif // OSMIUM_IO_DETAIL_PBF_OUTPUT_FORMAT_HPP