diff --git a/include/storage/serialization.hpp b/include/storage/serialization.hpp index 92ee9600c..78185f9ed 100644 --- a/include/storage/serialization.hpp +++ b/include/storage/serialization.hpp @@ -9,6 +9,7 @@ #include "storage/shared_datatype.hpp" #include "storage/tar.hpp" +#include #include #include @@ -30,22 +31,37 @@ namespace serialization namespace detail { template -inline BlockT packBits(const T &data, std::size_t index, std::size_t count) +inline BlockT packBits(const T &data, std::size_t base_index, const std::size_t count) { static_assert(std::is_same::value, "value_type is not bool"); + static_assert(std::is_unsigned::value, "BlockT must be unsigned type"); + static_assert(std::is_integral::value, "BlockT must be an integral type"); + static_assert(CHAR_BIT == 8, "Non-8-bit bytes not supported, sorry!"); + BOOST_ASSERT(sizeof(BlockT) * CHAR_BIT >= count); + + // Note: if this packing is changed, be sure to update vector_view + // as well, so that on-disk and in-memory layouts match. BlockT value = 0; - for (std::size_t bit = 0; bit < count; ++bit, ++index) - value = (value << 1) | data[index]; + for (std::size_t bit = 0; bit < count; ++bit) + { + value |= (data[base_index + bit] ? BlockT{1} : BlockT{0}) << bit; + } return value; } template -inline void unpackBits(T &data, std::size_t index, std::size_t count, BlockT value) +inline void +unpackBits(T &data, const std::size_t base_index, const std::size_t count, const BlockT value) { static_assert(std::is_same::value, "value_type is not bool"); - const BlockT mask = BlockT{1} << (count - 1); - for (std::size_t bit = 0; bit < count; value <<= 1, ++bit, ++index) - data[index] = value & mask; + static_assert(std::is_unsigned::value, "BlockT must be unsigned type"); + static_assert(std::is_integral::value, "BlockT must be an integral type"); + static_assert(CHAR_BIT == 8, "Non-8-bit bytes not supported, sorry!"); + BOOST_ASSERT(sizeof(BlockT) * CHAR_BIT >= count); + for (std::size_t bit = 0; bit < count; ++bit) + { + data[base_index + bit] = value & (BlockT{1} << bit); + } } template @@ -55,15 +71,16 @@ void readBoolVector(tar::FileReader &reader, const std::string &name, VectorT &d data.resize(count); std::uint64_t index = 0; - constexpr std::uint64_t WORD_BITS = CHAR_BIT * sizeof(std::uint64_t); + using BlockType = std::uint64_t; + constexpr std::uint64_t BLOCK_BITS = CHAR_BIT * sizeof(BlockType); - const auto decode = [&](const std::uint64_t block) { - auto read_size = std::min(count - index, WORD_BITS); - unpackBits(data, index, read_size, block); - index += WORD_BITS; + const auto decode = [&](const BlockType block) { + auto read_size = std::min(count - index, BLOCK_BITS); + unpackBits(data, index, read_size, block); + index += BLOCK_BITS; }; - reader.ReadStreaming(name, boost::make_function_output_iterator(decode)); + reader.ReadStreaming(name, boost::make_function_output_iterator(decode)); } template @@ -73,19 +90,20 @@ void writeBoolVector(tar::FileWriter &writer, const std::string &name, const Vec writer.WriteElementCount64(name, count); std::uint64_t index = 0; - constexpr std::uint64_t WORD_BITS = CHAR_BIT * sizeof(std::uint64_t); + using BlockType = std::uint64_t; + constexpr std::uint64_t BLOCK_BITS = CHAR_BIT * sizeof(BlockType); // FIXME on old boost version the function_input_iterator does not work with lambdas // so we need to wrap it in a function here. - const std::function encode_function = [&]() -> std::uint64_t { - auto write_size = std::min(count - index, WORD_BITS); - auto packed = packBits(data, index, write_size); - index += WORD_BITS; + const std::function encode_function = [&]() -> BlockType { + auto write_size = std::min(count - index, BLOCK_BITS); + auto packed = packBits(data, index, write_size); + index += BLOCK_BITS; return packed; }; - std::uint64_t number_of_blocks = (count + WORD_BITS - 1) / WORD_BITS; - writer.WriteStreaming( + std::uint64_t number_of_blocks = (count + BLOCK_BITS - 1) / BLOCK_BITS; + writer.WriteStreaming( name, boost::make_function_input_iterator(encode_function, boost::infinite()), number_of_blocks); diff --git a/include/util/vector_view.hpp b/include/util/vector_view.hpp index ab6e91fd6..260cc511e 100644 --- a/include/util/vector_view.hpp +++ b/include/util/vector_view.hpp @@ -195,7 +195,10 @@ template <> class vector_view { BOOST_ASSERT_MSG(index < m_size, "invalid size"); const std::size_t bucket = index / WORD_BITS; + // Note: ordering of bits here should match packBits in storage/serialization.hpp + // so that directly mmap-ing data is possible const auto offset = index % WORD_BITS; + BOOST_ASSERT(WORD_BITS > offset); return m_ptr[bucket] & (static_cast(1) << offset); } @@ -224,11 +227,23 @@ template <> class vector_view { BOOST_ASSERT(index < m_size); const auto bucket = index / WORD_BITS; + // Note: ordering of bits here should match packBits in storage/serialization.hpp + // so that directly mmap-ing data is possible const auto offset = index % WORD_BITS; + BOOST_ASSERT(WORD_BITS > offset); return reference{m_ptr + bucket, static_cast(1) << offset}; } template friend void swap(vector_view &, vector_view &) noexcept; + + friend std::ostream &operator<<(std::ostream &os, const vector_view &rhs) + { + for (std::size_t i = 0; i < rhs.size(); ++i) + { + os << (i > 0 ? " " : "") << rhs.at(i); + } + return os; + } }; // Both vector_view and the vector_view specializations share this impl. diff --git a/unit_tests/storage/serialization.cpp b/unit_tests/storage/serialization.cpp index e42f8ea8e..760f1faea 100644 --- a/unit_tests/storage/serialization.cpp +++ b/unit_tests/storage/serialization.cpp @@ -1,11 +1,15 @@ #include "storage/serialization.hpp" +#include "util/vector_view.hpp" + #include "../common/range_tools.hpp" #include "../common/temporary_file.hpp" #include #include +#include + BOOST_AUTO_TEST_SUITE(serialization) using namespace osrm; @@ -15,20 +19,48 @@ BOOST_AUTO_TEST_CASE(pack_test) { std::vector v = {0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1}; - BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 0, 8), 0x2e); - BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 5, 7), 0x65); - BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 6, 8), 0x95); + BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 0, 8), 0x74); + BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 5, 7), 0x53); + BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 6, 8), 0xa9); BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 11, 1), 0x01); } +BOOST_AUTO_TEST_CASE(vector_view_pack_test) +{ + // Verifies that the packing generated by packBits matches + // what vector_view expects + + // 1. Generate a random bool vector that covers several uint64_t bytes + constexpr unsigned RANDOM_SEED = 42; + std::mt19937 g(RANDOM_SEED); + std::uniform_int_distribution<> binary_distribution(0, 1); + std::vector v(150); + for (std::size_t i = 0; i < v.size(); ++i) + v[i] = binary_distribution(g) == 1; + + // 2. Pack the vector into a contiguous set of bytes + std::uint64_t data[3]; + data[0] = storage::serialization::detail::packBits(v, 0, 64); + data[1] = storage::serialization::detail::packBits(v, 64, 64); + data[2] = storage::serialization::detail::packBits(v, 128, 22); + + // 3. Make a vector_view of that memory, and see if the bit sequence is + // interpreted correctly by vector_view + util::vector_view view(data, v.size()); + for (std::size_t index = 0; index < v.size(); ++index) + { + BOOST_CHECK_EQUAL(v[index], view[index]); + } +} + BOOST_AUTO_TEST_CASE(unpack_test) { std::vector v(14), expected = {0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1}; - storage::serialization::detail::unpackBits(v, 0, 8, 0x2e); - storage::serialization::detail::unpackBits(v, 5, 7, 0x65); - storage::serialization::detail::unpackBits(v, 6, 8, 0x95); - storage::serialization::detail::unpackBits(v, 11, 1, 0x01); + storage::serialization::detail::unpackBits(v, 0, 8, 0x74u); + storage::serialization::detail::unpackBits(v, 5, 7, 0x53u); + storage::serialization::detail::unpackBits(v, 6, 8, 0xa9u); + storage::serialization::detail::unpackBits(v, 11, 1, 0x01u); BOOST_CHECK_EQUAL_COLLECTIONS(v.begin(), v.end(), expected.begin(), expected.end()); }