From 6bd724fe247bc207f5a116ea6969b3621c673903 Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Fri, 21 Apr 2017 09:23:49 +0000 Subject: [PATCH] Rewrite packed vector to also allow random access This fixes issues #3952. The new approach pre-computes masks for fast access. Since elements can potentially span multiple words we need masks and offsets for each upper and lower word. Due to a bug in the C++14 standart the mask computation is not recognized as constexpr, but would work on C++17. --- .../contiguous_internalmem_datafacade.hpp | 30 +- include/util/packed_vector.hpp | 575 +++++++++++++----- src/benchmarks/CMakeLists.txt | 14 + src/benchmarks/packed_vector.cpp | 81 +++ src/storage/storage.cpp | 19 +- unit_tests/util/packed_vector.cpp | 161 ++++- 6 files changed, 717 insertions(+), 163 deletions(-) create mode 100644 src/benchmarks/packed_vector.cpp diff --git a/include/engine/datafacade/contiguous_internalmem_datafacade.hpp b/include/engine/datafacade/contiguous_internalmem_datafacade.hpp index f678c3965..9c5f2b93a 100644 --- a/include/engine/datafacade/contiguous_internalmem_datafacade.hpp +++ b/include/engine/datafacade/contiguous_internalmem_datafacade.hpp @@ -300,25 +300,21 @@ class ContiguousInternalMemoryDataFacadeBase : public BaseDataFacade new SharedGeospatialQuery(*m_static_rtree, m_coordinate_list, *this)); } - void InitializeNodeInformationPointers(storage::DataLayout &data_layout, char *memory_block) + void InitializeNodeInformationPointers(storage::DataLayout &layout, char *memory_ptr) { - const auto coordinate_list_ptr = data_layout.GetBlockPtr( - memory_block, storage::DataLayout::COORDINATE_LIST); + const auto coordinate_list_ptr = + layout.GetBlockPtr(memory_ptr, storage::DataLayout::COORDINATE_LIST); m_coordinate_list.reset(coordinate_list_ptr, - data_layout.num_entries[storage::DataLayout::COORDINATE_LIST]); + layout.num_entries[storage::DataLayout::COORDINATE_LIST]); - for (unsigned i = 0; i < m_coordinate_list.size(); ++i) - { - BOOST_ASSERT(GetCoordinateOfNode(i).IsValid()); - } - - const auto osmnodeid_list_ptr = data_layout.GetBlockPtr( - memory_block, storage::DataLayout::OSM_NODE_ID_LIST); - m_osmnodeid_list.reset(osmnodeid_list_ptr, - data_layout.num_entries[storage::DataLayout::OSM_NODE_ID_LIST]); - // We (ab)use the number of coordinates here because we know we have the same amount of ids - m_osmnodeid_list.set_number_of_entries( - data_layout.num_entries[storage::DataLayout::COORDINATE_LIST]); + const auto osmnodeid_ptr = layout.GetBlockPtr( + memory_ptr, storage::DataLayout::OSM_NODE_ID_LIST); + m_osmnodeid_list = extractor::PackedOSMIDsView( + util::vector_view( + osmnodeid_ptr, layout.num_entries[storage::DataLayout::OSM_NODE_ID_LIST]), + // We (ab)use the number of coordinates here because we know we have the same amount of + // ids + layout.num_entries[storage::DataLayout::COORDINATE_LIST]); } void InitializeEdgeBasedNodeDataInformationPointers(storage::DataLayout &layout, @@ -544,7 +540,7 @@ class ContiguousInternalMemoryDataFacadeBase : public BaseDataFacade OSMNodeID GetOSMNodeIDOfNode(const NodeID id) const override final { - return m_osmnodeid_list.at(id); + return m_osmnodeid_list[id]; } std::vector GetUncompressedForwardGeometry(const EdgeID id) const override final diff --git a/include/util/packed_vector.hpp b/include/util/packed_vector.hpp index 2542de7ee..c9cd836ba 100644 --- a/include/util/packed_vector.hpp +++ b/include/util/packed_vector.hpp @@ -1,12 +1,17 @@ #ifndef PACKED_VECTOR_HPP #define PACKED_VECTOR_HPP +#include "util/integer_range.hpp" #include "util/typedefs.hpp" #include "util/vector_view.hpp" #include "storage/io_fwd.hpp" #include "storage/shared_memory_ownership.hpp" +#include +#include + +#include #include #include @@ -31,137 +36,410 @@ inline void write(storage::io::FileWriter &writer, namespace detail { + +template +inline T get_lower_half_value(WordT word, + WordT mask, + std::uint8_t offset, + typename std::enable_if_t::value> * = 0) +{ + return static_cast((word & mask) >> offset); +} + +template +inline T +get_lower_half_value(WordT word, WordT mask, std::uint8_t offset, typename T::value_type * = 0) +{ + return T{static_cast((word & mask) >> offset)}; +} + +template +inline T get_upper_half_value(WordT word, + WordT mask, + std::uint8_t offset, + typename std::enable_if_t::value> * = 0) +{ + return static_cast((word & mask) << offset); +} + +template +inline T +get_upper_half_value(WordT word, WordT mask, std::uint8_t offset, typename T::value_type * = 0) +{ + static_assert(std::is_unsigned::value, "Only unsigned word types supported for now."); + return T{static_cast((word & mask) << offset)}; +} + +template +inline WordT set_lower_value(WordT word, WordT mask, std::uint8_t offset, T value) +{ + static_assert(std::is_unsigned::value, "Only unsigned word types supported for now."); + return (word & ~mask) | ((static_cast(value) << offset) & mask); +} + +template +inline WordT set_upper_value(WordT word, WordT mask, std::uint8_t offset, T value) +{ + static_assert(std::is_unsigned::value, "Only unsigned word types supported for now."); + return (word & ~mask) | ((static_cast(value) >> offset) & mask); +} + template class PackedVector { + using WordT = std::uint64_t; + // This fails for all strong typedef types // static_assert(std::is_integral::value, "T must be an integral type."); - static_assert(sizeof(T) <= sizeof(std::uint64_t), "Maximum size of type T is 8 bytes"); + static_assert(sizeof(T) <= sizeof(WordT), "Maximum size of type T is 8 bytes"); static_assert(Bits > 0, "Minimum number of bits is 0."); - static_assert(Bits <= sizeof(std::uint64_t) * CHAR_BIT, "Maximum number of bits is 64."); + static_assert(Bits <= sizeof(WordT) * CHAR_BIT, "Maximum number of bits is 64."); - static const constexpr std::size_t ELEMSIZE = sizeof(std::uint64_t) * CHAR_BIT; - static const constexpr std::size_t PACKSIZE = Bits * ELEMSIZE; + static constexpr std::size_t WORD_BITS = sizeof(WordT) * CHAR_BIT; + // number of elements per block, use the number of bits so we make sure + // we can devide the total number of bits by the element bis + public: + static constexpr std::size_t BLOCK_ELEMENTS = WORD_BITS; + + private: + // number of words per block + static constexpr std::size_t BLOCK_WORDS = (Bits * BLOCK_ELEMENTS) / WORD_BITS; + + // C++14 does not allow operator[] to be constexpr, this is fixed in C++17. + static /* constexpr */ std::array initialize_lower_mask() + { + std::array lower_mask{}; + + const WordT mask = (1ULL << Bits) - 1; + auto offset = 0; + for (auto element_index = 0u; element_index < BLOCK_ELEMENTS; element_index++) + { + auto local_offset = offset % WORD_BITS; + lower_mask[element_index] = mask << local_offset; + offset += Bits; + } + + return lower_mask; + } + + static /* constexpr */ std::array initialize_upper_mask() + { + std::array upper_mask{}; + + const WordT mask = (1ULL << Bits) - 1; + auto offset = 0; + for (auto element_index = 0u; element_index < BLOCK_ELEMENTS; element_index++) + { + auto local_offset = offset % WORD_BITS; + // check we sliced off bits + if (local_offset + Bits > WORD_BITS) + { + upper_mask[element_index] = mask >> (WORD_BITS - local_offset); + } + else + { + upper_mask[element_index] = 0; + } + offset += Bits; + } + + return upper_mask; + } + + static /* constexpr */ std::array initialize_lower_offset() + { + std::array lower_offset{}; + + auto offset = 0; + for (auto element_index = 0u; element_index < BLOCK_ELEMENTS; element_index++) + { + auto local_offset = offset % WORD_BITS; + lower_offset[element_index] = local_offset; + offset += Bits; + } + + return lower_offset; + } + + static /* constexpr */ std::array initialize_upper_offset() + { + std::array upper_offset{}; + + auto offset = 0; + for (auto element_index = 0u; element_index < BLOCK_ELEMENTS; element_index++) + { + auto local_offset = offset % WORD_BITS; + // check we sliced off bits + if (local_offset + Bits > WORD_BITS) + { + upper_offset[element_index] = WORD_BITS - local_offset; + } + else + { + upper_offset[element_index] = Bits; + } + offset += Bits; + } + + return upper_offset; + } + + static /* constexpr */ std::array initialize_word_offset() + { + std::array word_offset{}; + + auto offset = 0; + for (auto element_index = 0u; element_index < BLOCK_ELEMENTS; element_index++) + { + word_offset[element_index] = offset / WORD_BITS; + offset += Bits; + } + + return word_offset; + } + + // For now we need to call these on object creation + void initialize() + { + lower_mask = initialize_lower_mask(); + upper_mask = initialize_upper_mask(); + lower_offset = initialize_lower_offset(); + upper_offset = initialize_upper_offset(); + word_offset = initialize_word_offset(); + } + + // mask for the lower/upper word of a record + // TODO: With C++17 these could be constexpr + /* static constexpr */ std::array + lower_mask /* = initialize_lower_mask()*/; + /* static constexpr */ std::array + upper_mask /* = initialize_upper_mask()*/; + /* static constexpr */ std::array + lower_offset /* = initialize_lower_offset()*/; + /* static constexpr */ std::array + upper_offset /* = initialize_upper_offset()*/; + // in which word of the block is the element + /* static constexpr */ std::array word_offset = + initialize_word_offset(); + + struct InternalIndex + { + // index to the word that contains the lower + // part of the value + // note: upper_word == lower_word + 1 + std::size_t lower_word; + // index to the element of the block + std::uint8_t element; + + bool operator==(const InternalIndex &other) const + { + return std::tie(lower_word, element) == std::tie(other.lower_word, other.element); + } + }; public: using value_type = T; + using block_type = WordT; - /** - * Returns the size of the packed vector datastructure with `elements` packed elements (the size - * of - * its underlying uint64 vector) - */ - inline static std::size_t elements_to_blocks(std::size_t elements) + class internal_reference { - return std::ceil(static_cast(elements) * Bits / ELEMSIZE); + public: + internal_reference(PackedVector &container, const InternalIndex internal_index) + : container(container), internal_index(internal_index) + { + } + + internal_reference &operator=(const value_type value) + { + container.set_value(internal_index, value); + return *this; + } + + operator T() const { return container.get_value(internal_index); } + + bool operator==(const internal_reference &other) const + { + return &container == &other.container && internal_index == other.internal_index; + } + + friend std::ostream &operator<<(std::ostream &os, const internal_reference &rhs) + { + return os << static_cast(rhs); + } + + private: + PackedVector &container; + const InternalIndex internal_index; + }; + + template + class iterator_impl + : public boost::iterator_facade, + DataT, + boost::random_access_traversal_tag, + ReferenceT> + { + typedef boost::iterator_facade, + DataT, + boost::random_access_traversal_tag, + ReferenceT> + base_t; + + public: + typedef typename base_t::value_type value_type; + typedef typename base_t::difference_type difference_type; + typedef typename base_t::reference reference; + typedef std::random_access_iterator_tag iterator_category; + + explicit iterator_impl() + : container(nullptr), index(std::numeric_limits::max()) + { + } + explicit iterator_impl(ContainerT *container, const std::size_t index) + : container(container), index(index) + { + } + + private: + void increment() { ++index; } + void decrement() { --index; } + void advance(difference_type offset) { index += offset; } + bool equal(const iterator_impl &other) const { return index == other.index; } + auto dereference() const { return (*container)[index]; } + difference_type distance_to(const iterator_impl &other) const + { + return other.index - index; + } + + private: + ContainerT *container; + std::size_t index; + + friend class ::boost::iterator_core_access; + }; + + using iterator = iterator_impl; + using const_iterator = iterator_impl; + using reverse_iterator = boost::reverse_iterator; + + PackedVector(std::initializer_list list) + { + initialize(); + reserve(list.size()); + for (const auto value : list) + push_back(value); } - void push_back(T data) + PackedVector() { initialize(); }; + PackedVector(const PackedVector &) = default; + PackedVector(PackedVector &&) = default; + PackedVector &operator=(const PackedVector &) = default; + PackedVector &operator=(PackedVector &&) = default; + + PackedVector(std::size_t size) { - std::uint64_t node_id = static_cast(data); + initialize(); + resize(size); + } - // mask incoming values, just in case they are > bitsize - const std::uint64_t incoming_mask = static_cast(pow(2, Bits)) - 1; - node_id = node_id & incoming_mask; + PackedVector(std::size_t size, T initial_value) + { + initialize(); + resize(size); + fill(initial_value); + } - const std::size_t available = (PACKSIZE - Bits * num_elements) % ELEMSIZE; + PackedVector(util::ViewOrVector vec_, std::size_t num_elements) + : vec(std::move(vec_)), num_elements(num_elements) + { + initialize(); + } - if (available == 0) - { - // insert ID at the left side of this element - std::uint64_t at_left = node_id << (ELEMSIZE - Bits); + // forces the efficient read-only lookup + auto peek(const std::size_t index) const { return operator[](index); } - add_last_elem(at_left); - } - else if (available >= Bits) - { - // insert ID somewhere in the middle of this element; ID can be contained - // entirely within one element - const std::uint64_t shifted = node_id << (available - Bits); + auto operator[](const std::size_t index) const { return get_value(get_internal_index(index)); } - replace_last_elem(vec_back() | shifted); - } + auto operator[](const std::size_t index) + { + return internal_reference{*this, get_internal_index(index)}; + } + + auto at(std::size_t index) const + { + if (index < num_elements) + return operator[](index); else + throw std::out_of_range(std::to_string(index) + " is bigger then container size " + + std::to_string(num_elements)); + } + + auto at(std::size_t index) + { + if (index < num_elements) + return operator[](index); + else + throw std::out_of_range(std::to_string(index) + " is bigger then container size " + + std::to_string(num_elements)); + } + + auto begin() { return iterator(this, 0); } + + auto end() { return iterator(this, num_elements); } + + auto begin() const { return const_iterator(this, 0); } + + auto end() const { return const_iterator(this, num_elements); } + + auto cbegin() const { return const_iterator(this, 0); } + + auto cend() const { return const_iterator(this, num_elements); } + + auto rbegin() { return reverse_iterator(end()); } + + auto rend() { return reverse_iterator(begin()); } + + auto front() const { return operator[](0); } + auto back() const { return operator[](num_elements - 1); } + auto front() { return operator[](0); } + auto back() { return operator[](num_elements - 1); } + + // Since we only allow passing by value anyway this is just an alias + template void emplace_back(Args... args) + { + push_back(T{std::forward(args)...}); + } + + void push_back(const T value) + { + auto internal_index = get_internal_index(num_elements); + + while (internal_index.lower_word + 1 >= vec.size()) { - // ID will be split between the end of this element and the beginning - // of the next element - const std::uint64_t left = node_id >> (Bits - available); - - std::uint64_t right = node_id << (ELEMSIZE - (Bits - available)); - - replace_last_elem(vec_back() | left); - add_last_elem(right); + allocate_blocks(1); } + set_value(internal_index, value); num_elements++; - } - T operator[](const std::size_t index) const { return at(index); } - - T at(const std::size_t a_index) const - { - BOOST_ASSERT(a_index < num_elements); - - const std::size_t pack_group = trunc(a_index / ELEMSIZE); - const std::size_t pack_index = (a_index + ELEMSIZE) % ELEMSIZE; - const std::size_t left_index = (PACKSIZE - Bits * pack_index) % ELEMSIZE; - - const bool back_half = pack_index >= Bits; - const std::size_t index = - pack_group * Bits + trunc(pack_index / Bits) + trunc((pack_index - back_half) / 2); - - BOOST_ASSERT(index < vec.size()); - const std::uint64_t elem = static_cast(vec.at(index)); - - if (left_index == 0) - { - // ID is at the far left side of this element - return T{elem >> (ELEMSIZE - Bits)}; - } - else if (left_index >= Bits) - { - // ID is entirely contained within this element - const std::uint64_t at_right = elem >> (left_index - Bits); - const std::uint64_t left_mask = static_cast(pow(2, Bits)) - 1; - return T{at_right & left_mask}; - } - else - { - // ID is split between this and the next element - const std::uint64_t left_mask = static_cast(pow(2, left_index)) - 1; - const std::uint64_t left_side = (elem & left_mask) << (Bits - left_index); - - BOOST_ASSERT(index < vec.size() - 1); - const std::uint64_t next_elem = static_cast(vec.at(index + 1)); - - const std::uint64_t right_side = next_elem >> (ELEMSIZE - (Bits - left_index)); - return T{left_side | right_side}; - } + BOOST_ASSERT(static_cast(back()) == value); } std::size_t size() const { return num_elements; } + void resize(std::size_t elements) + { + num_elements = elements; + auto num_blocks = std::ceil(static_cast(elements) / BLOCK_ELEMENTS); + vec.resize(num_blocks * BLOCK_WORDS + 1); + } + + std::size_t capacity() const { return (vec.capacity() / BLOCK_WORDS) * BLOCK_ELEMENTS; } + template void reserve(typename std::enable_if::type capacity) { - vec.reserve(elements_to_blocks(capacity)); - } - - template - void reset(typename std::enable_if::type *ptr, - typename std::enable_if::type size) - { - vec.reset(ptr, size); - } - - template - void set_number_of_entries(typename std::enable_if::type count) - { - num_elements = count; - } - - std::size_t capacity() const - { - return std::floor(static_cast(vec.capacity()) * ELEMSIZE / Bits); + auto num_blocks = std::ceil(static_cast(capacity) / BLOCK_ELEMENTS); + vec.reserve(num_blocks * BLOCK_WORDS + 1); } friend void serialization::read(storage::io::FileReader &reader, @@ -171,48 +449,67 @@ template class Pack const PackedVector &vec); private: + void allocate_blocks(std::size_t num_blocks) + { + vec.resize(vec.size() + num_blocks * BLOCK_WORDS); + } + + inline InternalIndex get_internal_index(const std::size_t index) const + { + const auto block_offset = BLOCK_WORDS * (index / BLOCK_ELEMENTS); + const std::uint8_t element_index = index % BLOCK_ELEMENTS; + const auto lower_word_index = block_offset + word_offset[element_index]; + + return InternalIndex{lower_word_index, element_index}; + } + + inline void fill(const T value) + { + for (auto block_index : util::irange(0, vec.size() / BLOCK_WORDS)) + { + const auto block_offset = block_index * BLOCK_WORDS; + + for (auto element_index : util::irange(0, BLOCK_ELEMENTS)) + { + const auto lower_word_index = block_offset + word_offset[element_index]; + set_value({lower_word_index, element_index}, value); + } + } + } + + inline T get_value(const InternalIndex internal_index) const + { + const auto lower_word = vec[internal_index.lower_word]; + // note this can actually already be a word of the next block however in + // that case the upper mask will be 0. + // we make sure to have a sentinel element to avoid out-of-bounds errors. + const auto upper_word = vec[internal_index.lower_word + 1]; + const auto value = get_lower_half_value(lower_word, + lower_mask[internal_index.element], + lower_offset[internal_index.element]) | + get_upper_half_value(upper_word, + upper_mask[internal_index.element], + upper_offset[internal_index.element]); + return value; + } + + inline void set_value(const InternalIndex internal_index, const T value) + { + auto &lower_word = vec[internal_index.lower_word]; + auto &upper_word = vec[internal_index.lower_word + 1]; + + lower_word = set_lower_value(lower_word, + lower_mask[internal_index.element], + lower_offset[internal_index.element], + value); + upper_word = set_upper_value(upper_word, + upper_mask[internal_index.element], + upper_offset[internal_index.element], + value); + } + util::ViewOrVector vec; - std::uint64_t num_elements = 0; - - signed cursor = -1; - - template - void replace_last_elem(typename std::enable_if::type last_elem) - { - vec[cursor] = last_elem; - } - - template - void replace_last_elem(typename std::enable_if::type last_elem) - { - vec.back() = last_elem; - } - - template - void add_last_elem(typename std::enable_if::type last_elem) - { - vec[cursor + 1] = last_elem; - cursor++; - } - - template - void add_last_elem(typename std::enable_if::type last_elem) - { - vec.push_back(last_elem); - } - - template - std::uint64_t vec_back(typename std::enable_if::type * = nullptr) - { - return vec[cursor]; - } - - template - std::uint64_t vec_back(typename std::enable_if::type * = nullptr) - { - return vec.back(); - } }; } diff --git a/src/benchmarks/CMakeLists.txt b/src/benchmarks/CMakeLists.txt index 140efab38..460c6a9b2 100644 --- a/src/benchmarks/CMakeLists.txt +++ b/src/benchmarks/CMakeLists.txt @@ -1,6 +1,7 @@ file(GLOB RTreeBenchmarkSources static_rtree.cpp) file(GLOB MatchBenchmarkSources match.cpp) file(GLOB AliasBenchmarkSources alias.cpp) +file(GLOB PackedVectorBenchmarkSources packed_vector.cpp) add_executable(rtree-bench EXCLUDE_FROM_ALL @@ -40,8 +41,21 @@ target_link_libraries(alias-bench ${TBB_LIBRARIES} ${MAYBE_SHAPEFILE}) +add_executable(packedvector-bench + EXCLUDE_FROM_ALL + ${PackedVectorBenchmarkSources} + $) + +target_link_libraries(packedvector-bench + ${BOOST_BASE_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT} + ${TBB_LIBRARIES} + ${MAYBE_SHAPEFILE}) + + add_custom_target(benchmarks DEPENDS rtree-bench + packedvector-bench match-bench alias-bench) diff --git a/src/benchmarks/packed_vector.cpp b/src/benchmarks/packed_vector.cpp new file mode 100644 index 000000000..ac51d1a68 --- /dev/null +++ b/src/benchmarks/packed_vector.cpp @@ -0,0 +1,81 @@ +#include "util/packed_vector.hpp" +#include "util/integer_range.hpp" +#include "util/log.hpp" +#include "util/timing_util.hpp" + +#include +#include +#include +#include +#include +#include + +using namespace osrm; + +struct Measurement +{ + double random_write_ms; + double random_read_ms; +}; + +#ifdef _WIN32 +#pragma optimize("", off) +template void dont_optimize_away(T &&datum) { T local = datum; } +#pragma optimize("", on) +#else +template void dont_optimize_away(T &&datum) { asm volatile("" : "+r"(datum)); } +#endif + +template +auto measure_random_access() +{ + std::vector indices(num_entries); + std::iota(indices.begin(), indices.end(), 0); + std::mt19937 g(1337); + std::shuffle(indices.begin(), indices.end(), g); + + VectorT vector(num_entries); + + TIMER_START(write); + for (auto round : util::irange(0, num_rounds)) + { + for (auto idx : util::irange(0, num_entries)) + { + vector[indices[idx]] = idx + round; + } + } + TIMER_STOP(write); + + TIMER_START(read); + auto sum = 0; + for (auto round : util::irange(0, num_rounds)) + { + sum = round; + for (auto idx : util::irange(0, num_entries)) + { + sum += vector[indices[idx]]; + } + dont_optimize_away(sum); + } + TIMER_STOP(read); + + return Measurement{TIMER_MSEC(write), TIMER_MSEC(read)}; +} + +int main(int, char **) +{ + util::LogPolicy::GetInstance().Unmute(); + + auto result_plain = measure_random_access<10000, 1000000, std::vector>(); + auto result_packed = + measure_random_access<10000, 1000000, util::PackedVector>(); + + auto write_slowdown = result_packed.random_write_ms / result_plain.random_write_ms; + auto read_slowdown = result_packed.random_read_ms / result_plain.random_read_ms; + util::Log() << "random write: std::vector " << result_plain.random_write_ms + << " ms, util::packed_vector " << result_packed.random_write_ms << " ms. " + << write_slowdown; + util::Log() << "random read: std::vector " << result_plain.random_read_ms + << " ms, util::packed_vector " << result_packed.random_read_ms << " ms. " + << read_slowdown; +} diff --git a/src/storage/storage.cpp b/src/storage/storage.cpp index e2217deb7..0612e5c91 100644 --- a/src/storage/storage.cpp +++ b/src/storage/storage.cpp @@ -329,11 +329,14 @@ void Storage::PopulateLayout(DataLayout &layout) io::FileReader node_file(config.nodes_data_path, io::FileReader::VerifyFingerprint); const auto coordinate_list_size = node_file.ReadElementCount64(); layout.SetBlockSize(DataLayout::COORDINATE_LIST, coordinate_list_size); + node_file.Skip(coordinate_list_size); + // skip number of elements + node_file.Skip(1); + const auto num_id_blocks = node_file.ReadElementCount64(); // we'll read a list of OSM node IDs from the same data, so set the block size for the same // number of items: - layout.SetBlockSize( - DataLayout::OSM_NODE_ID_LIST, - extractor::PackedOSMIDsView::elements_to_blocks(coordinate_list_size)); + layout.SetBlockSize(DataLayout::OSM_NODE_ID_LIST, + num_id_blocks); } // load geometries sizes @@ -703,11 +706,15 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) const auto coordinates_ptr = layout.GetBlockPtr(memory_ptr, DataLayout::COORDINATE_LIST); const auto osmnodeid_ptr = - layout.GetBlockPtr(memory_ptr, DataLayout::OSM_NODE_ID_LIST); + layout.GetBlockPtr( + memory_ptr, DataLayout::OSM_NODE_ID_LIST); util::vector_view coordinates( coordinates_ptr, layout.num_entries[DataLayout::COORDINATE_LIST]); - extractor::PackedOSMIDsView osm_node_ids; - osm_node_ids.reset(osmnodeid_ptr, layout.num_entries[DataLayout::OSM_NODE_ID_LIST]); + extractor::PackedOSMIDsView osm_node_ids( + util::vector_view( + osmnodeid_ptr, layout.num_entries[DataLayout::OSM_NODE_ID_LIST]), + layout.num_entries[DataLayout::OSM_NODE_ID_LIST] * + extractor::PackedOSMIDsView::BLOCK_ELEMENTS); extractor::files::readNodes(config.nodes_data_path, coordinates, osm_node_ids); } diff --git a/unit_tests/util/packed_vector.cpp b/unit_tests/util/packed_vector.cpp index f7d8ddaaa..c3833c4d7 100644 --- a/unit_tests/util/packed_vector.cpp +++ b/unit_tests/util/packed_vector.cpp @@ -1,9 +1,15 @@ #include "util/packed_vector.hpp" #include "util/typedefs.hpp" +#include +#include #include #include +#include +#include +#include + BOOST_AUTO_TEST_SUITE(packed_vector_test) using namespace osrm; @@ -16,10 +22,15 @@ BOOST_AUTO_TEST_CASE(insert_and_retrieve_packed_test) std::vector original_ids; const constexpr std::size_t num_test_cases = 399; + const constexpr std::uint64_t max_id = (1ULL << 33) - 1; + + std::mt19937 rng; + rng.seed(1337); + std::uniform_int_distribution dist(0, max_id); for (std::size_t i = 0; i < num_test_cases; i++) { - OSMNodeID r{static_cast(rand() % 2147483647)}; // max 33-bit uint + OSMNodeID r{static_cast(dist(rng))}; // max 33-bit uint packed_ids.push_back(r); original_ids.push_back(r); @@ -44,4 +55,152 @@ BOOST_AUTO_TEST_CASE(packed_vector_capacity_test) BOOST_CHECK(packed_vec.capacity() >= 100); } +BOOST_AUTO_TEST_CASE(packed_vector_resize_test) +{ + PackedVector packed_vec(100); + + BOOST_CHECK_EQUAL(packed_vec.size(), 100); + packed_vec[99] = 1337; + packed_vec[0] = 42; + BOOST_CHECK_EQUAL(packed_vec[99], 1337u); + BOOST_CHECK_EQUAL(packed_vec[0], 42u); +} + +BOOST_AUTO_TEST_CASE(packed_vector_iterator_test) +{ + PackedVector packed_vec(100); + + std::iota(packed_vec.begin(), packed_vec.end(), 0); + + BOOST_CHECK(std::is_sorted(packed_vec.begin(), packed_vec.end())); + + auto idx = 0; + for (auto value : packed_vec) + { + BOOST_CHECK_EQUAL(packed_vec[idx], value); + idx++; + } + BOOST_CHECK_EQUAL(idx, packed_vec.size()); + + auto range = boost::make_iterator_range(packed_vec.cbegin(), packed_vec.cend()); + BOOST_CHECK_EQUAL(range.size(), packed_vec.size()); + for (auto idx : util::irange(0, packed_vec.size())) + { + BOOST_CHECK_EQUAL(packed_vec[idx], range[idx]); + } + + auto reverse_range = boost::adaptors::reverse( + boost::make_iterator_range(packed_vec.cbegin(), packed_vec.cend())); + BOOST_CHECK_EQUAL(reverse_range.size(), packed_vec.size()); + for (auto idx : util::irange(0, packed_vec.size())) + { + BOOST_CHECK_EQUAL(packed_vec[packed_vec.size() - 1 - idx], reverse_range[idx]); + } + + auto mut_range = boost::make_iterator_range(packed_vec.begin(), packed_vec.end()); + BOOST_CHECK_EQUAL(range.size(), packed_vec.size()); + for (auto idx : util::irange(0, packed_vec.size())) + { + BOOST_CHECK_EQUAL(packed_vec[idx], mut_range[idx]); + } + + auto mut_reverse_range = + boost::adaptors::reverse(boost::make_iterator_range(packed_vec.begin(), packed_vec.end())); + BOOST_CHECK_EQUAL(reverse_range.size(), packed_vec.size()); + for (auto idx : util::irange(0, packed_vec.size())) + { + BOOST_CHECK_EQUAL(packed_vec[packed_vec.size() - 1 - idx], mut_reverse_range[idx]); + } +} + +BOOST_AUTO_TEST_CASE(packed_vector_10bit_small_test) +{ + PackedVector vector = {10, 5, 8, 12, 254, 4, (1 << 10) - 1, 6}; + std::vector reference = {10, 5, 8, 12, 254, 4, (1 << 10) - 1, 6}; + + BOOST_CHECK_EQUAL(vector[0], reference[0]); + BOOST_CHECK_EQUAL(vector[1], reference[1]); + BOOST_CHECK_EQUAL(vector[2], reference[2]); + BOOST_CHECK_EQUAL(vector[3], reference[3]); + BOOST_CHECK_EQUAL(vector[4], reference[4]); + BOOST_CHECK_EQUAL(vector[5], reference[5]); + BOOST_CHECK_EQUAL(vector[6], reference[6]); + BOOST_CHECK_EQUAL(vector[7], reference[7]); +} + +BOOST_AUTO_TEST_CASE(packed_vector_33bit_small_test) +{ + std::vector reference = {1597322404, + 1939964443, + 2112255763, + 1432114613, + 1067854538, + 352118606, + 1782436840, + 1909002904, + 165344818}; + + PackedVector vector = {1597322404, + 1939964443, + 2112255763, + 1432114613, + 1067854538, + 352118606, + 1782436840, + 1909002904, + 165344818}; + + BOOST_CHECK_EQUAL(vector[0], reference[0]); + BOOST_CHECK_EQUAL(vector[1], reference[1]); + BOOST_CHECK_EQUAL(vector[2], reference[2]); + BOOST_CHECK_EQUAL(vector[3], reference[3]); + BOOST_CHECK_EQUAL(vector[4], reference[4]); + BOOST_CHECK_EQUAL(vector[5], reference[5]); + BOOST_CHECK_EQUAL(vector[6], reference[6]); + BOOST_CHECK_EQUAL(vector[7], reference[7]); +} + +BOOST_AUTO_TEST_CASE(values_overflow) +{ + const std::uint64_t mask = (1ull << 42) - 1; + PackedVector vector(52, 0); + + for (auto it = vector.begin(); it != vector.end(); ++it) + { + BOOST_CHECK_EQUAL(*it, 0); + } + + std::uint64_t value = 1; + for (auto it = vector.begin(); it != vector.end(); ++it) + { + BOOST_CHECK_EQUAL(*it, 0); + *it = value; + BOOST_CHECK_EQUAL(*it, value & mask); + value <<= 1; + } + + for (auto it = vector.rbegin(); it != vector.rend(); ++it) + { + value >>= 1; + BOOST_CHECK_EQUAL(*it, value & mask); + } + + for (auto it = vector.cbegin(); it != vector.cend(); ++it) + { + BOOST_CHECK_EQUAL(*it, value & mask); + value <<= 1; + } +} + +BOOST_AUTO_TEST_CASE(packed_vector_33bit_continious) +{ + PackedVector vector; + + for (std::uint64_t i : osrm::util::irange(0, 400)) + { + vector.push_back(i); + BOOST_CHECK_EQUAL(vector.back(), i); + } +} + BOOST_AUTO_TEST_SUITE_END()