449 lines
14 KiB
C++
449 lines
14 KiB
C++
|
#ifndef OSMIUM_INDEX_ID_SET_HPP
|
||
|
#define OSMIUM_INDEX_ID_SET_HPP
|
||
|
|
||
|
/*
|
||
|
|
||
|
This file is part of Osmium (http://osmcode.org/libosmium).
|
||
|
|
||
|
Copyright 2013-2017 Jochen Topf <jochen@topf.org> and others (see README).
|
||
|
|
||
|
Boost Software License - Version 1.0 - August 17th, 2003
|
||
|
|
||
|
Permission is hereby granted, free of charge, to any person or organization
|
||
|
obtaining a copy of the software and accompanying documentation covered by
|
||
|
this license (the "Software") to use, reproduce, display, distribute,
|
||
|
execute, and transmit the Software, and to prepare derivative works of the
|
||
|
Software, and to permit third-parties to whom the Software is furnished to
|
||
|
do so, all subject to the following:
|
||
|
|
||
|
The copyright notices in the Software and this entire statement, including
|
||
|
the above license grant, this restriction and the following disclaimer,
|
||
|
must be included in all copies of the Software, in whole or in part, and
|
||
|
all derivative works of the Software, unless such copies or derivative
|
||
|
works are solely in the form of machine-executable object code generated by
|
||
|
a source language processor.
|
||
|
|
||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||
|
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||
|
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||
|
DEALINGS IN THE SOFTWARE.
|
||
|
|
||
|
*/
|
||
|
|
||
|
#include <algorithm>
|
||
|
#include <cassert>
|
||
|
#include <cstddef>
|
||
|
#include <cstring>
|
||
|
#include <iterator>
|
||
|
#include <memory>
|
||
|
#include <type_traits>
|
||
|
#include <vector>
|
||
|
|
||
|
#include <osmium/osm/item_type.hpp>
|
||
|
#include <osmium/osm/types.hpp>
|
||
|
|
||
|
namespace osmium {
|
||
|
|
||
|
namespace index {
|
||
|
|
||
|
/**
|
||
|
* Virtual parent class for IdSets. Use one of the implementations
|
||
|
* provided.
|
||
|
*/
|
||
|
template <typename T>
|
||
|
class IdSet {
|
||
|
|
||
|
public:
|
||
|
|
||
|
virtual ~IdSet() = default;
|
||
|
|
||
|
/**
|
||
|
* Add the given Id to the set.
|
||
|
*/
|
||
|
virtual void set(T id) = 0;
|
||
|
|
||
|
/**
|
||
|
* Is the Id in the set?
|
||
|
*/
|
||
|
virtual bool get(T id) const noexcept = 0;
|
||
|
|
||
|
/**
|
||
|
* Is the set empty?
|
||
|
*/
|
||
|
virtual bool empty() const = 0;
|
||
|
|
||
|
/**
|
||
|
* Clear the set.
|
||
|
*/
|
||
|
virtual void clear() = 0;
|
||
|
|
||
|
/**
|
||
|
* Get an estimate of the amount of memory used for the set.
|
||
|
*/
|
||
|
virtual std::size_t used_memory() const noexcept = 0;
|
||
|
|
||
|
}; // class IdSet
|
||
|
|
||
|
template <typename T>
|
||
|
class IdSetDense;
|
||
|
|
||
|
/**
|
||
|
* Const_iterator for iterating over a IdSetDense.
|
||
|
*/
|
||
|
template <typename T>
|
||
|
class IdSetDenseIterator {
|
||
|
|
||
|
static_assert(std::is_unsigned<T>::value, "Needs unsigned type");
|
||
|
static_assert(sizeof(T) >= 4, "Needs at least 32bit type");
|
||
|
|
||
|
const IdSetDense<T>* m_set;
|
||
|
T m_value;
|
||
|
T m_last;
|
||
|
|
||
|
void next() noexcept {
|
||
|
while (m_value != m_last && !m_set->get(m_value)) {
|
||
|
const T cid = IdSetDense<T>::chunk_id(m_value);
|
||
|
assert(cid < m_set->m_data.size());
|
||
|
if (!m_set->m_data[cid]) {
|
||
|
m_value = (cid + 1) << (IdSetDense<T>::chunk_bits + 3);
|
||
|
} else {
|
||
|
const auto slot = m_set->m_data[cid][IdSetDense<T>::offset(m_value)];
|
||
|
if (slot == 0) {
|
||
|
m_value += 8;
|
||
|
m_value &= ~0x7;
|
||
|
} else {
|
||
|
++m_value;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
|
||
|
using iterator_category = std::forward_iterator_tag;
|
||
|
using value_type = T;
|
||
|
using pointer = value_type*;
|
||
|
using reference = value_type&;
|
||
|
|
||
|
IdSetDenseIterator(const IdSetDense<T>* set, T value, T last) noexcept :
|
||
|
m_set(set),
|
||
|
m_value(value),
|
||
|
m_last(last) {
|
||
|
next();
|
||
|
}
|
||
|
|
||
|
IdSetDenseIterator<T>& operator++() noexcept {
|
||
|
if (m_value != m_last) {
|
||
|
++m_value;
|
||
|
next();
|
||
|
}
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
IdSetDenseIterator<T> operator++(int) noexcept {
|
||
|
IdSetDenseIterator<T> tmp{*this};
|
||
|
operator++();
|
||
|
return tmp;
|
||
|
}
|
||
|
|
||
|
bool operator==(const IdSetDenseIterator<T>& rhs) const noexcept {
|
||
|
return m_set == rhs.m_set && m_value == rhs.m_value;
|
||
|
}
|
||
|
|
||
|
bool operator!=(const IdSetDenseIterator<T>& rhs) const noexcept {
|
||
|
return ! (*this == rhs);
|
||
|
}
|
||
|
|
||
|
T operator*() const noexcept {
|
||
|
assert(m_value < m_last);
|
||
|
return m_value;
|
||
|
}
|
||
|
|
||
|
}; // class IdSetDenseIterator
|
||
|
|
||
|
/**
|
||
|
* A set of Ids of the given type. Internal storage is in chunks of
|
||
|
* arrays used as bit fields. Internally those chunks will be allocated
|
||
|
* as needed, so it works relatively efficiently with both smaller
|
||
|
* and larger Id sets. If it is not used, no memory is allocated at
|
||
|
* all.
|
||
|
*/
|
||
|
template <typename T>
|
||
|
class IdSetDense : public IdSet<T> {
|
||
|
|
||
|
static_assert(std::is_unsigned<T>::value, "Needs unsigned type");
|
||
|
static_assert(sizeof(T) >= 4, "Needs at least 32bit type");
|
||
|
|
||
|
friend class IdSetDenseIterator<T>;
|
||
|
|
||
|
// This value is a compromise. For node Ids it could be bigger
|
||
|
// which would mean less (but larger) memory allocations. For
|
||
|
// relations Ids it could be smaller, because they would all fit
|
||
|
// into a smaller allocation.
|
||
|
constexpr static const std::size_t chunk_bits = 22;
|
||
|
constexpr static const std::size_t chunk_size = 1 << chunk_bits;
|
||
|
|
||
|
std::vector<std::unique_ptr<unsigned char[]>> m_data;
|
||
|
T m_size = 0;
|
||
|
|
||
|
static std::size_t chunk_id(T id) noexcept {
|
||
|
return id >> (chunk_bits + 3);
|
||
|
}
|
||
|
|
||
|
static std::size_t offset(T id) noexcept {
|
||
|
return (id >> 3) & ((1 << chunk_bits) - 1);
|
||
|
}
|
||
|
|
||
|
static unsigned char bitmask(T id) noexcept {
|
||
|
return 1 << (id & 0x7);
|
||
|
}
|
||
|
|
||
|
T last() const noexcept {
|
||
|
return static_cast<T>(m_data.size()) * chunk_size * 8;
|
||
|
}
|
||
|
|
||
|
unsigned char& get_element(T id) {
|
||
|
const auto cid = chunk_id(id);
|
||
|
if (cid >= m_data.size()) {
|
||
|
m_data.resize(cid + 1);
|
||
|
}
|
||
|
|
||
|
auto& chunk = m_data[cid];
|
||
|
if (!chunk) {
|
||
|
chunk.reset(new unsigned char[chunk_size]);
|
||
|
::memset(chunk.get(), 0, chunk_size);
|
||
|
}
|
||
|
|
||
|
return chunk[offset(id)];
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
|
||
|
using const_iterator = IdSetDenseIterator<T>;
|
||
|
|
||
|
IdSetDense() = default;
|
||
|
|
||
|
/**
|
||
|
* Add the Id to the set if it is not already in there.
|
||
|
*
|
||
|
* @param id The Id to set.
|
||
|
* @returns true if the Id was added, false if it was already set.
|
||
|
*/
|
||
|
bool check_and_set(T id) {
|
||
|
auto& element = get_element(id);
|
||
|
|
||
|
if ((element & bitmask(id)) == 0) {
|
||
|
element |= bitmask(id);
|
||
|
++m_size;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Add the given Id to the set.
|
||
|
*
|
||
|
* @param id The Id to set.
|
||
|
*/
|
||
|
void set(T id) final {
|
||
|
(void)check_and_set(id);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Remove the given Id from the set.
|
||
|
*
|
||
|
* @param id The Id to set.
|
||
|
*/
|
||
|
void unset(T id) {
|
||
|
auto& element = get_element(id);
|
||
|
|
||
|
if ((element & bitmask(id)) != 0) {
|
||
|
element &= ~bitmask(id);
|
||
|
--m_size;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Is the Id in the set?
|
||
|
*
|
||
|
* @param id The Id to check.
|
||
|
*/
|
||
|
bool get(T id) const noexcept final {
|
||
|
if (chunk_id(id) >= m_data.size()) {
|
||
|
return false;
|
||
|
}
|
||
|
auto* r = m_data[chunk_id(id)].get();
|
||
|
if (!r) {
|
||
|
return false;
|
||
|
}
|
||
|
return (r[offset(id)] & bitmask(id)) != 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Is the set empty?
|
||
|
*/
|
||
|
bool empty() const noexcept final {
|
||
|
return m_size == 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* The number of Ids stored in the set.
|
||
|
*/
|
||
|
T size() const noexcept {
|
||
|
return m_size;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Clear the set.
|
||
|
*/
|
||
|
void clear() final {
|
||
|
m_data.clear();
|
||
|
m_size = 0;
|
||
|
}
|
||
|
|
||
|
std::size_t used_memory() const noexcept final {
|
||
|
return m_data.size() * chunk_size;
|
||
|
}
|
||
|
|
||
|
IdSetDenseIterator<T> begin() const {
|
||
|
return {this, 0, last()};
|
||
|
}
|
||
|
|
||
|
IdSetDenseIterator<T> end() const {
|
||
|
return {this, last(), last()};
|
||
|
}
|
||
|
|
||
|
}; // class IdSetDense
|
||
|
|
||
|
/**
|
||
|
* IdSet implementation for small Id sets. It writes the Ids
|
||
|
* into a vector and uses linear search.
|
||
|
*/
|
||
|
template <typename T>
|
||
|
class IdSetSmall : public IdSet<T> {
|
||
|
|
||
|
std::vector<T> m_data;
|
||
|
|
||
|
public:
|
||
|
|
||
|
/**
|
||
|
* Add the given Id to the set.
|
||
|
*/
|
||
|
void set(T id) final {
|
||
|
m_data.push_back(id);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Is the Id in the set? Uses linear search.
|
||
|
*
|
||
|
* @param id The Id to check.
|
||
|
*/
|
||
|
bool get(T id) const noexcept final {
|
||
|
const auto it = std::find(m_data.cbegin(), m_data.cend(), id);
|
||
|
return it != m_data.cend();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Is the Id in the set? Uses a binary search. For larger sets
|
||
|
* this might be more efficient than calling get(), the set
|
||
|
* must be sorted.
|
||
|
*
|
||
|
* @param id The Id to check.
|
||
|
* @pre You must have called sort_unique() before calling this
|
||
|
* or be sure there are no duplicates and the Ids have been
|
||
|
* set in order.
|
||
|
*/
|
||
|
bool get_binary_search(T id) const noexcept {
|
||
|
return std::binary_search(m_data.cbegin(), m_data.cend(), id);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Is the set empty?
|
||
|
*/
|
||
|
bool empty() const noexcept final {
|
||
|
return m_data.empty();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Clear the set.
|
||
|
*/
|
||
|
void clear() final {
|
||
|
m_data.clear();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Sort the internal vector and remove any duplicates. Call this
|
||
|
* before using size(), get_binary_search() or using an iterator.
|
||
|
*/
|
||
|
void sort_unique() {
|
||
|
std::sort(m_data.begin(), m_data.end());
|
||
|
const auto last = std::unique(m_data.begin(), m_data.end());
|
||
|
m_data.erase(last, m_data.end());
|
||
|
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* The number of Ids stored in the set.
|
||
|
*
|
||
|
* @pre You must have called sort_unique() before calling this
|
||
|
* or be sure there are no duplicates.
|
||
|
*/
|
||
|
std::size_t size() const noexcept {
|
||
|
return m_data.size();
|
||
|
}
|
||
|
|
||
|
std::size_t used_memory() const noexcept final {
|
||
|
return m_data.capacity() * sizeof(T);
|
||
|
}
|
||
|
|
||
|
/// Iterator type. There is no non-const iterator.
|
||
|
using const_iterator = typename std::vector<T>::const_iterator;
|
||
|
|
||
|
const_iterator begin() const noexcept {
|
||
|
return m_data.cbegin();
|
||
|
}
|
||
|
|
||
|
const_iterator end() const noexcept {
|
||
|
return m_data.cend();
|
||
|
}
|
||
|
|
||
|
const_iterator cbegin() const noexcept {
|
||
|
return m_data.cbegin();
|
||
|
}
|
||
|
|
||
|
const_iterator cend() const noexcept {
|
||
|
return m_data.cend();
|
||
|
}
|
||
|
|
||
|
}; // class IdSetSmall
|
||
|
|
||
|
/// @deprecated Use nwr_array helper class instead.
|
||
|
template <template<typename> class IdSetType>
|
||
|
class NWRIdSet {
|
||
|
|
||
|
using id_set_type = IdSetType<osmium::unsigned_object_id_type>;
|
||
|
|
||
|
id_set_type m_sets[3];
|
||
|
|
||
|
public:
|
||
|
|
||
|
id_set_type& operator()(osmium::item_type type) noexcept {
|
||
|
return m_sets[osmium::item_type_to_nwr_index(type)];
|
||
|
}
|
||
|
|
||
|
const id_set_type& operator()(osmium::item_type type) const noexcept {
|
||
|
return m_sets[osmium::item_type_to_nwr_index(type)];
|
||
|
}
|
||
|
|
||
|
}; // class NWRIdSet
|
||
|
|
||
|
} // namespace index
|
||
|
|
||
|
} // namespace osmium
|
||
|
|
||
|
#endif // OSMIUM_INDEX_ID_SET_HPP
|