Match serialized bit-packing for vector<bool> to match in-memory layout for vector_view<bool> so that data can be directly mmapped.
This commit is contained in:
		
							parent
							
								
									b1791d1ab3
								
							
						
					
					
						commit
						d80318f8ea
					
				| @ -9,6 +9,7 @@ | ||||
| #include "storage/shared_datatype.hpp" | ||||
| #include "storage/tar.hpp" | ||||
| 
 | ||||
| #include <boost/assert.hpp> | ||||
| #include <boost/function_output_iterator.hpp> | ||||
| #include <boost/iterator/function_input_iterator.hpp> | ||||
| 
 | ||||
| @ -30,22 +31,37 @@ namespace serialization | ||||
| namespace detail | ||||
| { | ||||
| template <typename T, typename BlockT = unsigned char> | ||||
| inline BlockT packBits(const T &data, std::size_t index, std::size_t count) | ||||
| inline BlockT packBits(const T &data, std::size_t base_index, const std::size_t count) | ||||
| { | ||||
|     static_assert(std::is_same<typename T::value_type, bool>::value, "value_type is not bool"); | ||||
|     static_assert(std::is_unsigned<BlockT>::value, "BlockT must be unsigned type"); | ||||
|     static_assert(std::is_integral<BlockT>::value, "BlockT must be an integral type"); | ||||
|     static_assert(CHAR_BIT == 8, "Non-8-bit bytes not supported, sorry!"); | ||||
|     BOOST_ASSERT(sizeof(BlockT) * CHAR_BIT >= count); | ||||
| 
 | ||||
|     // Note: if this packing is changed, be sure to update vector_view<bool>
 | ||||
|     //       as well, so that on-disk and in-memory layouts match.
 | ||||
|     BlockT value = 0; | ||||
|     for (std::size_t bit = 0; bit < count; ++bit, ++index) | ||||
|         value = (value << 1) | data[index]; | ||||
|     for (std::size_t bit = 0; bit < count; ++bit) | ||||
|     { | ||||
|         value |= (data[base_index + bit] ? BlockT{1} : BlockT{0}) << bit; | ||||
|     } | ||||
|     return value; | ||||
| } | ||||
| 
 | ||||
| template <typename T, typename BlockT = unsigned char> | ||||
| inline void unpackBits(T &data, std::size_t index, std::size_t count, BlockT value) | ||||
| inline void | ||||
| unpackBits(T &data, const std::size_t base_index, const std::size_t count, const BlockT value) | ||||
| { | ||||
|     static_assert(std::is_same<typename T::value_type, bool>::value, "value_type is not bool"); | ||||
|     const BlockT mask = BlockT{1} << (count - 1); | ||||
|     for (std::size_t bit = 0; bit < count; value <<= 1, ++bit, ++index) | ||||
|         data[index] = value & mask; | ||||
|     static_assert(std::is_unsigned<BlockT>::value, "BlockT must be unsigned type"); | ||||
|     static_assert(std::is_integral<BlockT>::value, "BlockT must be an integral type"); | ||||
|     static_assert(CHAR_BIT == 8, "Non-8-bit bytes not supported, sorry!"); | ||||
|     BOOST_ASSERT(sizeof(BlockT) * CHAR_BIT >= count); | ||||
|     for (std::size_t bit = 0; bit < count; ++bit) | ||||
|     { | ||||
|         data[base_index + bit] = value & (BlockT{1} << bit); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <typename VectorT> | ||||
| @ -55,15 +71,16 @@ void readBoolVector(tar::FileReader &reader, const std::string &name, VectorT &d | ||||
|     data.resize(count); | ||||
|     std::uint64_t index = 0; | ||||
| 
 | ||||
|     constexpr std::uint64_t WORD_BITS = CHAR_BIT * sizeof(std::uint64_t); | ||||
|     using BlockType = std::uint64_t; | ||||
|     constexpr std::uint64_t BLOCK_BITS = CHAR_BIT * sizeof(BlockType); | ||||
| 
 | ||||
|     const auto decode = [&](const std::uint64_t block) { | ||||
|         auto read_size = std::min<std::size_t>(count - index, WORD_BITS); | ||||
|         unpackBits<VectorT, std::uint64_t>(data, index, read_size, block); | ||||
|         index += WORD_BITS; | ||||
|     const auto decode = [&](const BlockType block) { | ||||
|         auto read_size = std::min<std::size_t>(count - index, BLOCK_BITS); | ||||
|         unpackBits<VectorT, BlockType>(data, index, read_size, block); | ||||
|         index += BLOCK_BITS; | ||||
|     }; | ||||
| 
 | ||||
|     reader.ReadStreaming<std::uint64_t>(name, boost::make_function_output_iterator(decode)); | ||||
|     reader.ReadStreaming<BlockType>(name, boost::make_function_output_iterator(decode)); | ||||
| } | ||||
| 
 | ||||
| template <typename VectorT> | ||||
| @ -73,19 +90,20 @@ void writeBoolVector(tar::FileWriter &writer, const std::string &name, const Vec | ||||
|     writer.WriteElementCount64(name, count); | ||||
|     std::uint64_t index = 0; | ||||
| 
 | ||||
|     constexpr std::uint64_t WORD_BITS = CHAR_BIT * sizeof(std::uint64_t); | ||||
|     using BlockType = std::uint64_t; | ||||
|     constexpr std::uint64_t BLOCK_BITS = CHAR_BIT * sizeof(BlockType); | ||||
| 
 | ||||
|     // FIXME on old boost version the function_input_iterator does not work with lambdas
 | ||||
|     // so we need to wrap it in a function here.
 | ||||
|     const std::function<std::uint64_t()> encode_function = [&]() -> std::uint64_t { | ||||
|         auto write_size = std::min<std::size_t>(count - index, WORD_BITS); | ||||
|         auto packed = packBits<VectorT, std::uint64_t>(data, index, write_size); | ||||
|         index += WORD_BITS; | ||||
|     const std::function<BlockType()> encode_function = [&]() -> BlockType { | ||||
|         auto write_size = std::min<std::size_t>(count - index, BLOCK_BITS); | ||||
|         auto packed = packBits<VectorT, BlockType>(data, index, write_size); | ||||
|         index += BLOCK_BITS; | ||||
|         return packed; | ||||
|     }; | ||||
| 
 | ||||
|     std::uint64_t number_of_blocks = (count + WORD_BITS - 1) / WORD_BITS; | ||||
|     writer.WriteStreaming<std::uint64_t>( | ||||
|     std::uint64_t number_of_blocks = (count + BLOCK_BITS - 1) / BLOCK_BITS; | ||||
|     writer.WriteStreaming<BlockType>( | ||||
|         name, | ||||
|         boost::make_function_input_iterator(encode_function, boost::infinite()), | ||||
|         number_of_blocks); | ||||
|  | ||||
| @ -195,7 +195,10 @@ template <> class vector_view<bool> | ||||
|     { | ||||
|         BOOST_ASSERT_MSG(index < m_size, "invalid size"); | ||||
|         const std::size_t bucket = index / WORD_BITS; | ||||
|         // Note: ordering of bits here should match packBits in storage/serialization.hpp
 | ||||
|         //       so that directly mmap-ing data is possible
 | ||||
|         const auto offset = index % WORD_BITS; | ||||
|         BOOST_ASSERT(WORD_BITS > offset); | ||||
|         return m_ptr[bucket] & (static_cast<Word>(1) << offset); | ||||
|     } | ||||
| 
 | ||||
| @ -224,11 +227,23 @@ template <> class vector_view<bool> | ||||
|     { | ||||
|         BOOST_ASSERT(index < m_size); | ||||
|         const auto bucket = index / WORD_BITS; | ||||
|         // Note: ordering of bits here should match packBits in storage/serialization.hpp
 | ||||
|         //       so that directly mmap-ing data is possible
 | ||||
|         const auto offset = index % WORD_BITS; | ||||
|         BOOST_ASSERT(WORD_BITS > offset); | ||||
|         return reference{m_ptr + bucket, static_cast<Word>(1) << offset}; | ||||
|     } | ||||
| 
 | ||||
|     template <typename T> friend void swap(vector_view<T> &, vector_view<T> &) noexcept; | ||||
| 
 | ||||
|     friend std::ostream &operator<<(std::ostream &os, const vector_view<bool> &rhs) | ||||
|     { | ||||
|         for (std::size_t i = 0; i < rhs.size(); ++i) | ||||
|         { | ||||
|             os << (i > 0 ? " " : "") << rhs.at(i); | ||||
|         } | ||||
|         return os; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| // Both vector_view<T> and the vector_view<bool> specializations share this impl.
 | ||||
|  | ||||
| @ -1,11 +1,15 @@ | ||||
| #include "storage/serialization.hpp" | ||||
| 
 | ||||
| #include "util/vector_view.hpp" | ||||
| 
 | ||||
| #include "../common/range_tools.hpp" | ||||
| #include "../common/temporary_file.hpp" | ||||
| 
 | ||||
| #include <boost/filesystem.hpp> | ||||
| #include <boost/test/unit_test.hpp> | ||||
| 
 | ||||
| #include <random> | ||||
| 
 | ||||
| BOOST_AUTO_TEST_SUITE(serialization) | ||||
| 
 | ||||
| using namespace osrm; | ||||
| @ -15,20 +19,48 @@ BOOST_AUTO_TEST_CASE(pack_test) | ||||
| { | ||||
|     std::vector<bool> v = {0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1}; | ||||
| 
 | ||||
|     BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 0, 8), 0x2e); | ||||
|     BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 5, 7), 0x65); | ||||
|     BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 6, 8), 0x95); | ||||
|     BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 0, 8), 0x74); | ||||
|     BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 5, 7), 0x53); | ||||
|     BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 6, 8), 0xa9); | ||||
|     BOOST_CHECK_EQUAL(storage::serialization::detail::packBits(v, 11, 1), 0x01); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(vector_view_pack_test) | ||||
| { | ||||
|     // Verifies that the packing generated by packBits matches
 | ||||
|     // what vector_view<bool> expects
 | ||||
| 
 | ||||
|     // 1. Generate a random bool vector that covers several uint64_t bytes
 | ||||
|     constexpr unsigned RANDOM_SEED = 42; | ||||
|     std::mt19937 g(RANDOM_SEED); | ||||
|     std::uniform_int_distribution<> binary_distribution(0, 1); | ||||
|     std::vector<bool> v(150); | ||||
|     for (std::size_t i = 0; i < v.size(); ++i) | ||||
|         v[i] = binary_distribution(g) == 1; | ||||
| 
 | ||||
|     // 2. Pack the vector into a contiguous set of bytes
 | ||||
|     std::uint64_t data[3]; | ||||
|     data[0] = storage::serialization::detail::packBits<decltype(v), std::uint64_t>(v, 0, 64); | ||||
|     data[1] = storage::serialization::detail::packBits<decltype(v), std::uint64_t>(v, 64, 64); | ||||
|     data[2] = storage::serialization::detail::packBits<decltype(v), std::uint64_t>(v, 128, 22); | ||||
| 
 | ||||
|     // 3. Make a vector_view of that memory, and see if the bit sequence is
 | ||||
|     //    interpreted correctly by vector_view
 | ||||
|     util::vector_view<bool> view(data, v.size()); | ||||
|     for (std::size_t index = 0; index < v.size(); ++index) | ||||
|     { | ||||
|         BOOST_CHECK_EQUAL(v[index], view[index]); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(unpack_test) | ||||
| { | ||||
|     std::vector<bool> v(14), expected = {0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1}; | ||||
| 
 | ||||
|     storage::serialization::detail::unpackBits(v, 0, 8, 0x2e); | ||||
|     storage::serialization::detail::unpackBits(v, 5, 7, 0x65); | ||||
|     storage::serialization::detail::unpackBits(v, 6, 8, 0x95); | ||||
|     storage::serialization::detail::unpackBits(v, 11, 1, 0x01); | ||||
|     storage::serialization::detail::unpackBits(v, 0, 8, 0x74u); | ||||
|     storage::serialization::detail::unpackBits(v, 5, 7, 0x53u); | ||||
|     storage::serialization::detail::unpackBits(v, 6, 8, 0xa9u); | ||||
|     storage::serialization::detail::unpackBits(v, 11, 1, 0x01u); | ||||
|     BOOST_CHECK_EQUAL_COLLECTIONS(v.begin(), v.end(), expected.begin(), expected.end()); | ||||
| } | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user