435 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			435 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
#ifndef OSMIUM_INDEX_ID_SET_HPP
 | 
						|
#define OSMIUM_INDEX_ID_SET_HPP
 | 
						|
 | 
						|
/*
 | 
						|
 | 
						|
This file is part of Osmium (http://osmcode.org/libosmium).
 | 
						|
 | 
						|
Copyright 2013-2017 Jochen Topf <jochen@topf.org> and others (see README).
 | 
						|
 | 
						|
Boost Software License - Version 1.0 - August 17th, 2003
 | 
						|
 | 
						|
Permission is hereby granted, free of charge, to any person or organization
 | 
						|
obtaining a copy of the software and accompanying documentation covered by
 | 
						|
this license (the "Software") to use, reproduce, display, distribute,
 | 
						|
execute, and transmit the Software, and to prepare derivative works of the
 | 
						|
Software, and to permit third-parties to whom the Software is furnished to
 | 
						|
do so, all subject to the following:
 | 
						|
 | 
						|
The copyright notices in the Software and this entire statement, including
 | 
						|
the above license grant, this restriction and the following disclaimer,
 | 
						|
must be included in all copies of the Software, in whole or in part, and
 | 
						|
all derivative works of the Software, unless such copies or derivative
 | 
						|
works are solely in the form of machine-executable object code generated by
 | 
						|
a source language processor.
 | 
						|
 | 
						|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
						|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
						|
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
 | 
						|
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
 | 
						|
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
 | 
						|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 | 
						|
DEALINGS IN THE SOFTWARE.
 | 
						|
 | 
						|
*/
 | 
						|
 | 
						|
#include <algorithm>
 | 
						|
#include <cassert>
 | 
						|
#include <cstring>
 | 
						|
#include <memory>
 | 
						|
#include <type_traits>
 | 
						|
#include <unordered_set>
 | 
						|
#include <vector>
 | 
						|
 | 
						|
#include <osmium/osm/item_type.hpp>
 | 
						|
#include <osmium/osm/types.hpp>
 | 
						|
 | 
						|
namespace osmium {
 | 
						|
 | 
						|
    namespace index {
 | 
						|
 | 
						|
        /**
 | 
						|
         * Virtual parent class for IdSets. Use one of the implementations
 | 
						|
         * provided.
 | 
						|
         */
 | 
						|
        template <typename T>
 | 
						|
        class IdSet {
 | 
						|
 | 
						|
        public:
 | 
						|
 | 
						|
            virtual ~IdSet() {
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Add the given Id to the set.
 | 
						|
             */
 | 
						|
            virtual void set(T id) = 0;
 | 
						|
 | 
						|
            /**
 | 
						|
             * Is the Id in the set?
 | 
						|
             */
 | 
						|
            virtual bool get(T id) const noexcept = 0;
 | 
						|
 | 
						|
            /**
 | 
						|
             * Is the set empty?
 | 
						|
             */
 | 
						|
            virtual bool empty() const = 0;
 | 
						|
 | 
						|
            /**
 | 
						|
             * Clear the set.
 | 
						|
             */
 | 
						|
            virtual void clear() = 0;
 | 
						|
 | 
						|
        }; // class IdSet
 | 
						|
 | 
						|
        template <typename T>
 | 
						|
        class IdSetDense;
 | 
						|
 | 
						|
        /**
 | 
						|
         * Const_iterator for iterating over a IdSetDense.
 | 
						|
         */
 | 
						|
        template <typename T>
 | 
						|
        class IdSetDenseIterator {
 | 
						|
 | 
						|
            static_assert(std::is_unsigned<T>::value, "Needs unsigned type");
 | 
						|
            static_assert(sizeof(T) >= 4, "Needs at least 32bit type");
 | 
						|
 | 
						|
            const IdSetDense<T>* m_set;
 | 
						|
            T m_value;
 | 
						|
            T m_last;
 | 
						|
 | 
						|
            void next() noexcept {
 | 
						|
                while (m_value != m_last && !m_set->get(m_value)) {
 | 
						|
                    const T cid = IdSetDense<T>::chunk_id(m_value);
 | 
						|
                    assert(cid < m_set->m_data.size());
 | 
						|
                    if (!m_set->m_data[cid]) {
 | 
						|
                        m_value = (cid + 1) << (IdSetDense<T>::chunk_bits + 3);
 | 
						|
                    } else {
 | 
						|
                        const auto slot = m_set->m_data[cid][IdSetDense<T>::offset(m_value)];
 | 
						|
                        if (slot == 0) {
 | 
						|
                            m_value += 8;
 | 
						|
                            m_value &= ~0x7;
 | 
						|
                        } else {
 | 
						|
                            ++m_value;
 | 
						|
                        }
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
        public:
 | 
						|
 | 
						|
            using iterator_category = std::forward_iterator_tag;
 | 
						|
            using value_type        = T;
 | 
						|
            using pointer           = value_type*;
 | 
						|
            using reference         = value_type&;
 | 
						|
 | 
						|
            IdSetDenseIterator(const IdSetDense<T>* set, T value, T last) noexcept :
 | 
						|
                m_set(set),
 | 
						|
                m_value(value),
 | 
						|
                m_last(last) {
 | 
						|
                next();
 | 
						|
            }
 | 
						|
 | 
						|
            IdSetDenseIterator<T>& operator++() noexcept {
 | 
						|
                if (m_value != m_last) {
 | 
						|
                    ++m_value;
 | 
						|
                    next();
 | 
						|
                }
 | 
						|
                return *this;
 | 
						|
            }
 | 
						|
 | 
						|
            IdSetDenseIterator<T> operator++(int) noexcept {
 | 
						|
                IdSetDenseIterator<T> tmp(*this);
 | 
						|
                operator++();
 | 
						|
                return tmp;
 | 
						|
            }
 | 
						|
 | 
						|
            bool operator==(const IdSetDenseIterator<T>& rhs) const noexcept {
 | 
						|
                return m_set == rhs.m_set && m_value == rhs.m_value;
 | 
						|
            }
 | 
						|
 | 
						|
            bool operator!=(const IdSetDenseIterator<T>& rhs) const noexcept {
 | 
						|
                return ! (*this == rhs);
 | 
						|
            }
 | 
						|
 | 
						|
            T operator*() const noexcept {
 | 
						|
                assert(m_value < m_last);
 | 
						|
                return m_value;
 | 
						|
            }
 | 
						|
 | 
						|
        }; // class IdSetDenseIterator
 | 
						|
 | 
						|
        /**
 | 
						|
         * A set of Ids of the given type. Internal storage is in chunks of
 | 
						|
         * arrays used as bit fields. Internally those chunks will be allocated
 | 
						|
         * as needed, so it works relatively efficiently with both smaller
 | 
						|
         * and larger Id sets. If it is not used, no memory is allocated at
 | 
						|
         * all.
 | 
						|
         */
 | 
						|
        template <typename T>
 | 
						|
        class IdSetDense : public IdSet<T> {
 | 
						|
 | 
						|
            static_assert(std::is_unsigned<T>::value, "Needs unsigned type");
 | 
						|
            static_assert(sizeof(T) >= 4, "Needs at least 32bit type");
 | 
						|
 | 
						|
            friend class IdSetDenseIterator<T>;
 | 
						|
 | 
						|
            // This value is a compromise. For node Ids it could be bigger
 | 
						|
            // which would mean less (but larger) memory allocations. For
 | 
						|
            // relations Ids it could be smaller, because they would all fit
 | 
						|
            // into a smaller allocation.
 | 
						|
            constexpr static const size_t chunk_bits = 22;
 | 
						|
            constexpr static const size_t chunk_size = 1 << chunk_bits;
 | 
						|
 | 
						|
            std::vector<std::unique_ptr<unsigned char[]>> m_data;
 | 
						|
            T m_size = 0;
 | 
						|
 | 
						|
            static size_t chunk_id(T id) noexcept {
 | 
						|
                return id >> (chunk_bits + 3);
 | 
						|
            }
 | 
						|
 | 
						|
            static size_t offset(T id) noexcept {
 | 
						|
                return (id >> 3) & ((1 << chunk_bits) - 1);
 | 
						|
            }
 | 
						|
 | 
						|
            static unsigned char bitmask(T id) noexcept {
 | 
						|
                return 1 << (id & 0x7);
 | 
						|
            }
 | 
						|
 | 
						|
            T last() const noexcept {
 | 
						|
                return static_cast<T>(m_data.size()) * chunk_size * 8;
 | 
						|
            }
 | 
						|
 | 
						|
            unsigned char& get_element(T id) {
 | 
						|
                const auto cid = chunk_id(id);
 | 
						|
                if (cid >= m_data.size()) {
 | 
						|
                    m_data.resize(cid + 1);
 | 
						|
                }
 | 
						|
 | 
						|
                auto& chunk = m_data[cid];
 | 
						|
                if (!chunk) {
 | 
						|
                    chunk.reset(new unsigned char[chunk_size]);
 | 
						|
                    ::memset(chunk.get(), 0, chunk_size);
 | 
						|
                }
 | 
						|
 | 
						|
                return chunk[offset(id)];
 | 
						|
            }
 | 
						|
 | 
						|
        public:
 | 
						|
 | 
						|
            using const_iterator = IdSetDenseIterator<T>;
 | 
						|
 | 
						|
            IdSetDense() = default;
 | 
						|
 | 
						|
            /**
 | 
						|
             * Add the Id to the set if it is not already in there.
 | 
						|
             *
 | 
						|
             * @param id The Id to set.
 | 
						|
             * @returns true if the Id was added, false if it was already set.
 | 
						|
             */
 | 
						|
            bool check_and_set(T id) {
 | 
						|
                auto& element = get_element(id);
 | 
						|
 | 
						|
                if ((element & bitmask(id)) == 0) {
 | 
						|
                    element |= bitmask(id);
 | 
						|
                    ++m_size;
 | 
						|
                    return true;
 | 
						|
                }
 | 
						|
 | 
						|
                return false;
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Add the given Id to the set.
 | 
						|
             *
 | 
						|
             * @param id The Id to set.
 | 
						|
             */
 | 
						|
            void set(T id) override final {
 | 
						|
                (void)check_and_set(id);
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Remove the given Id from the set.
 | 
						|
             *
 | 
						|
             * @param id The Id to set.
 | 
						|
             */
 | 
						|
            void unset(T id) {
 | 
						|
                auto& element = get_element(id);
 | 
						|
 | 
						|
                if ((element & bitmask(id)) != 0) {
 | 
						|
                    element &= ~bitmask(id);
 | 
						|
                    --m_size;
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Is the Id in the set?
 | 
						|
             *
 | 
						|
             * @param id The Id to check.
 | 
						|
             */
 | 
						|
            bool get(T id) const noexcept override final {
 | 
						|
                if (chunk_id(id) >= m_data.size()) {
 | 
						|
                    return false;
 | 
						|
                }
 | 
						|
                auto* r = m_data[chunk_id(id)].get();
 | 
						|
                if (!r) {
 | 
						|
                    return false;
 | 
						|
                }
 | 
						|
                return (r[offset(id)] & bitmask(id)) != 0;
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Is the set empty?
 | 
						|
             */
 | 
						|
            bool empty() const noexcept override final {
 | 
						|
                return m_size == 0;
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * The number of Ids stored in the set.
 | 
						|
             */
 | 
						|
            T size() const noexcept {
 | 
						|
                return m_size;
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Clear the set.
 | 
						|
             */
 | 
						|
            void clear() override final {
 | 
						|
                m_data.clear();
 | 
						|
                m_size = 0;
 | 
						|
            }
 | 
						|
 | 
						|
            IdSetDenseIterator<T> begin() const {
 | 
						|
                return IdSetDenseIterator<T>{this, 0, last()};
 | 
						|
            }
 | 
						|
 | 
						|
            IdSetDenseIterator<T> end() const {
 | 
						|
                return IdSetDenseIterator<T>{this, last(), last()};
 | 
						|
            }
 | 
						|
 | 
						|
        }; // class IdSetDense
 | 
						|
 | 
						|
        /**
 | 
						|
         * IdSet implementation for small Id sets. It writes the Ids
 | 
						|
         * into a vector and uses linear search.
 | 
						|
         */
 | 
						|
        template <typename T>
 | 
						|
        class IdSetSmall : public IdSet<T> {
 | 
						|
 | 
						|
            std::vector<T> m_data;
 | 
						|
 | 
						|
        public:
 | 
						|
 | 
						|
            /**
 | 
						|
             * Add the given Id to the set.
 | 
						|
             */
 | 
						|
            void set(T id) override final {
 | 
						|
                m_data.push_back(id);
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Is the Id in the set? Uses linear search.
 | 
						|
             *
 | 
						|
             * @param id The Id to check.
 | 
						|
             */
 | 
						|
            bool get(T id) const noexcept override final {
 | 
						|
                const auto it = std::find(m_data.cbegin(), m_data.cend(), id);
 | 
						|
                return it != m_data.cend();
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Is the Id in the set? Uses a binary search. For larger sets
 | 
						|
             * this might be more efficient than calling get(), the set
 | 
						|
             * must be sorted.
 | 
						|
             *
 | 
						|
             * @param id The Id to check.
 | 
						|
             * @pre You must have called sort_unique() before calling this
 | 
						|
             *      or be sure there are no duplicates and the Ids have been
 | 
						|
             *      set in order.
 | 
						|
             */
 | 
						|
            bool get_binary_search(T id) const noexcept {
 | 
						|
                return std::binary_search(m_data.cbegin(), m_data.cend(), id);
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Is the set empty?
 | 
						|
             */
 | 
						|
            bool empty() const noexcept override final {
 | 
						|
                return m_data.empty();
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Clear the set.
 | 
						|
             */
 | 
						|
            void clear() override final {
 | 
						|
                m_data.clear();
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * Sort the internal vector and remove any duplicates. Call this
 | 
						|
             * before using size(), get_binary_search() or using an iterator.
 | 
						|
             */
 | 
						|
            void sort_unique() {
 | 
						|
                std::sort(m_data.begin(), m_data.end());
 | 
						|
                const auto last = std::unique(m_data.begin(), m_data.end());
 | 
						|
                m_data.erase(last, m_data.end());
 | 
						|
 | 
						|
            }
 | 
						|
 | 
						|
            /**
 | 
						|
             * The number of Ids stored in the set.
 | 
						|
             *
 | 
						|
             * @pre You must have called sort_unique() before calling this
 | 
						|
             *      or be sure there are no duplicates.
 | 
						|
             */
 | 
						|
            size_t size() const noexcept {
 | 
						|
                return m_data.size();
 | 
						|
            }
 | 
						|
 | 
						|
            /// Iterator type. There is no non-const iterator.
 | 
						|
            using const_iterator = typename std::vector<T>::const_iterator;
 | 
						|
 | 
						|
            const_iterator begin() const noexcept {
 | 
						|
                return m_data.cbegin();
 | 
						|
            }
 | 
						|
 | 
						|
            const_iterator end() const noexcept {
 | 
						|
                return m_data.cend();
 | 
						|
            }
 | 
						|
 | 
						|
            const_iterator cbegin() const noexcept {
 | 
						|
                return m_data.cbegin();
 | 
						|
            }
 | 
						|
 | 
						|
            const_iterator cend() const noexcept {
 | 
						|
                return m_data.cend();
 | 
						|
            }
 | 
						|
 | 
						|
        }; // class IdSetSmall
 | 
						|
 | 
						|
        template <template<typename> class IdSetType>
 | 
						|
        class NWRIdSet {
 | 
						|
 | 
						|
            using id_set_type = IdSetType<osmium::unsigned_object_id_type>;
 | 
						|
 | 
						|
            id_set_type m_sets[3];
 | 
						|
 | 
						|
        public:
 | 
						|
 | 
						|
            id_set_type& operator()(osmium::item_type type) noexcept {
 | 
						|
                return m_sets[osmium::item_type_to_nwr_index(type)];
 | 
						|
            }
 | 
						|
 | 
						|
            const id_set_type& operator()(osmium::item_type type) const noexcept {
 | 
						|
                return m_sets[osmium::item_type_to_nwr_index(type)];
 | 
						|
            }
 | 
						|
 | 
						|
        }; // class NWRIdSet
 | 
						|
 | 
						|
    } // namespace index
 | 
						|
 | 
						|
} // namespace osmium
 | 
						|
 | 
						|
#endif // OSMIUM_INDEX_ID_SET_HPP
 |