osrm-backend/third_party/osmium/io/detail/pbf_stringtable.hpp

205 lines
8.1 KiB
C++
Raw Normal View History

2014-08-26 10:03:44 -04:00
#ifndef OSMIUM_IO_DETAIL_PBF_STRINGTABLE_HPP
#define OSMIUM_IO_DETAIL_PBF_STRINGTABLE_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013,2014 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <algorithm>
2014-10-29 16:27:48 -04:00
#include <cstdint>
2014-08-26 10:03:44 -04:00
#include <iterator>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include <osmpbf/osmpbf.h>
2014-10-29 16:27:48 -04:00
#include <osmium/util/cast.hpp>
2014-08-26 10:03:44 -04:00
namespace osmium {
namespace io {
namespace detail {
/**
* StringTable management for PBF writer
*
* All strings are stored as indexes to rows in a StringTable. The StringTable contains
* one row for each used string, so strings that are used multiple times need to be
* stored only once. The StringTable is sorted by usage-count, so the most often used
* string is stored at index 1.
*/
class StringTable {
public:
/// type for string IDs (interim and final)
typedef uint16_t string_id_type;
private:
/**
* this is the struct used to build the StringTable. It is stored as
* the value-part in the strings-map.
*
* when a new string is added to the map, its count is set to 0 and
* the interim_id is set to the current size of the map. This interim_id
* is then stored into the pbf-objects.
*
* before the PrimitiveBlock is serialized, the map is sorted by count
* and stored into the pbf-StringTable. Afterwards the interim-ids are
* mapped to the "real" id in the StringTable.
*
* this way often used strings get lower ids in the StringTable. As the
* protobuf-serializer stores numbers in variable bit-lengths, lower
* IDs means less used space in the resulting file.
*/
struct string_info {
2014-10-29 16:27:48 -04:00
2014-08-26 10:03:44 -04:00
/// number of occurrences of this string
uint16_t count;
/// an intermediate-id
string_id_type interim_id;
2014-10-29 16:27:48 -04:00
}; // struct string_info
2014-08-26 10:03:44 -04:00
/**
* Interim StringTable, storing all strings that should be written to
* the StringTable once the block is written to disk.
*/
typedef std::map<std::string, string_info> string2string_info_type;
string2string_info_type m_strings;
/**
* This vector is used to map the interim IDs to real StringTable IDs after
* writing all strings to the StringTable.
*/
typedef std::vector<string_id_type> interim_id2id_type;
interim_id2id_type m_id2id_map;
size_t m_size = 0;
public:
StringTable() {
}
friend bool operator<(const string_info& lhs, const string_info& rhs) {
return lhs.count > rhs.count;
}
/**
* record a string in the interim StringTable if it's missing, otherwise just increase its counter,
* return the interim-id assigned to the string.
*/
string_id_type record_string(const std::string& string) {
string_info& info = m_strings[string];
if (info.interim_id == 0) {
++m_size;
2014-10-29 16:27:48 -04:00
info.interim_id = static_cast_with_assert<string_id_type>(m_size);
2014-08-26 10:03:44 -04:00
} else {
info.count++;
}
return info.interim_id;
}
/**
* Sort the interim StringTable and store it to the real protobuf StringTable.
* while storing to the real table, this function fills the id2id_map with
* pairs, mapping the interim-ids to final and real StringTable ids.
*
* Note that the m_strings table is a std::map and as such is sorted lexicographically.
* When the transformation into the sortedby multimap is done, it gets sorted by
* the count. The end result (at least with the glibc standard container/algorithm
* implementation) is that the string table is sorted first by reverse count (ie descending)
* and then by reverse lexicographic order.
*/
void store_stringtable(OSMPBF::StringTable* st) {
// add empty StringTable entry at index 0
// StringTable index 0 is reserved as delimiter in the densenodes key/value list
// this line also ensures that there's always a valid StringTable
st->add_s("");
std::multimap<string_info, std::string> sortedbycount;
m_id2id_map.resize(m_size+1);
std::transform(m_strings.begin(), m_strings.end(),
std::inserter(sortedbycount, sortedbycount.begin()),
[](const std::pair<std::string, string_info>& p) {
return std::pair<string_info, std::string>(p.second, p.first);
});
string_id_type n=0;
for (const auto& mapping : sortedbycount) {
// add the string of the current item to the pbf StringTable
st->add_s(mapping.second);
// store the mapping from the interim-id to the real id
m_id2id_map[mapping.first.interim_id] = ++n;
}
}
/**
* Map from an interim ID to a real string ID.
*/
string_id_type map_string_id(const string_id_type interim_id) const {
return m_id2id_map[interim_id];
}
2014-10-29 16:27:48 -04:00
template <typename T>
string_id_type map_string_id(const T interim_id) const {
return map_string_id(static_cast_with_assert<string_id_type>(interim_id));
}
2014-08-26 10:03:44 -04:00
/**
* Clear the stringtable, preparing for the next block.
*/
void clear() {
m_strings.clear();
m_id2id_map.clear();
m_size = 0;
}
}; // class StringTable
} // namespace detail
} // namespace io
} // namespace osmium
#endif // OSMIUM_IO_DETAIL_PBF_STRINGTABLE_HPP