From c322d93435889c455b687142fd8db7b7e6e47d78 Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Fri, 23 Mar 2018 12:02:20 +0000 Subject: [PATCH] Make the block size of vector consistent --- .../contiguous_internalmem_datafacade.hpp | 2 +- include/storage/serialization.hpp | 38 ++++++++++--------- include/util/packed_vector.hpp | 4 +- include/util/vector_view.hpp | 35 +++++++++-------- src/storage/storage.cpp | 2 +- unit_tests/util/vector_view.cpp | 7 +++- 6 files changed, 49 insertions(+), 39 deletions(-) diff --git a/include/engine/datafacade/contiguous_internalmem_datafacade.hpp b/include/engine/datafacade/contiguous_internalmem_datafacade.hpp index 377f70fd7..091b98a2c 100644 --- a/include/engine/datafacade/contiguous_internalmem_datafacade.hpp +++ b/include/engine/datafacade/contiguous_internalmem_datafacade.hpp @@ -94,7 +94,7 @@ class ContiguousInternalMemoryAlgorithmDataFacade : public datafacade::Algor auto filter_block_id = static_cast( storage::DataLayout::CH_EDGE_FILTER_0 + exclude_index); - auto edge_filter_ptr = data_layout.GetBlockPtr(memory_block, filter_block_id); + auto edge_filter_ptr = data_layout.GetBlockPtr::Word>(memory_block, filter_block_id); util::vector_view node_list( graph_nodes_ptr, data_layout.GetBlockEntries(storage::DataLayout::CH_GRAPH_NODE_LIST)); diff --git a/include/storage/serialization.hpp b/include/storage/serialization.hpp index 84614ce36..051e2ce0b 100644 --- a/include/storage/serialization.hpp +++ b/include/storage/serialization.hpp @@ -129,21 +129,21 @@ void write(tar::FileWriter &writer, const std::string &name, const util::vector_ namespace detail { -template -inline unsigned char packBits(const T &data, std::size_t index, std::size_t count) +template +inline BlockT packBits(const T &data, std::size_t index, std::size_t count) { static_assert(std::is_same::value, "value_type is not bool"); - unsigned char value = 0; + BlockT value = 0; for (std::size_t bit = 0; bit < count; ++bit, ++index) value = (value << 1) | data[index]; return value; } -template -inline void unpackBits(T &data, std::size_t index, std::size_t count, unsigned char value) +template +inline void unpackBits(T &data, std::size_t index, std::size_t count, BlockT value) { static_assert(std::is_same::value, "value_type is not bool"); - const unsigned char mask = 1 << (count - 1); + const BlockT mask = BlockT {1} << (count - 1); for (std::size_t bit = 0; bit < count; value <<= 1, ++bit, ++index) data[index] = value & mask; } @@ -155,13 +155,15 @@ void readBoolVector(tar::FileReader &reader, const std::string &name, VectorT &d data.resize(count); std::uint64_t index = 0; - const auto decode = [&](const unsigned char block) { - auto read_size = std::min(count - index, CHAR_BIT); - unpackBits(data, index, read_size, block); - index += CHAR_BIT; + constexpr std::uint64_t WORD_BITS = CHAR_BIT * sizeof(std::uint64_t); + + const auto decode = [&](const std::uint64_t block) { + auto read_size = std::min(count - index, WORD_BITS); + unpackBits(data, index, read_size, block); + index += WORD_BITS; }; - reader.ReadStreaming(name, boost::make_function_output_iterator(decode)); + reader.ReadStreaming(name, boost::make_function_output_iterator(decode)); } template @@ -171,17 +173,19 @@ void writeBoolVector(tar::FileWriter &writer, const std::string &name, const Vec writer.WriteElementCount64(name, count); std::uint64_t index = 0; + constexpr std::uint64_t WORD_BITS = CHAR_BIT * sizeof(std::uint64_t); + // FIXME on old boost version the function_input_iterator does not work with lambdas // so we need to wrap it in a function here. - const std::function encode_function = [&]() -> char { - auto write_size = std::min(count - index, CHAR_BIT); - auto packed = packBits(data, index, write_size); - index += CHAR_BIT; + const std::function encode_function = [&]() -> std::uint64_t { + auto write_size = std::min(count - index, WORD_BITS); + auto packed = packBits(data, index, write_size); + index += WORD_BITS; return packed; }; - std::uint64_t number_of_blocks = std::ceil((double)count / CHAR_BIT); - writer.WriteStreaming( + std::uint64_t number_of_blocks = (count + WORD_BITS - 1) / WORD_BITS; + writer.WriteStreaming( name, boost::make_function_input_iterator(encode_function, boost::infinite()), number_of_blocks); diff --git a/include/util/packed_vector.hpp b/include/util/packed_vector.hpp index cec8a1235..a3ca47f7d 100644 --- a/include/util/packed_vector.hpp +++ b/include/util/packed_vector.hpp @@ -436,7 +436,7 @@ template class Pack void resize(std::size_t elements) { num_elements = elements; - auto num_blocks = std::ceil(static_cast(elements) / BLOCK_ELEMENTS); + auto num_blocks = (elements + BLOCK_ELEMENTS - 1) / BLOCK_ELEMENTS; vec.resize(num_blocks * BLOCK_WORDS + 1); } @@ -445,7 +445,7 @@ template class Pack template void reserve(typename std::enable_if::type capacity) { - auto num_blocks = std::ceil(static_cast(capacity) / BLOCK_ELEMENTS); + auto num_blocks = (capacity + BLOCK_ELEMENTS - 1) / BLOCK_ELEMENTS; vec.reserve(num_blocks * BLOCK_WORDS + 1); } diff --git a/include/util/vector_view.hpp b/include/util/vector_view.hpp index 49f27404b..ae77e19b3 100644 --- a/include/util/vector_view.hpp +++ b/include/util/vector_view.hpp @@ -150,11 +150,14 @@ template class vector_view template <> class vector_view { - private: - unsigned *m_ptr; - std::size_t m_size; + public: + using Word = std::uint64_t; - static constexpr std::size_t UNSIGNED_BITS = CHAR_BIT * sizeof(unsigned); + private: + static constexpr std::size_t WORD_BITS = CHAR_BIT * sizeof(Word); + + Word *m_ptr; + std::size_t m_size; public: using value_type = bool; @@ -178,23 +181,23 @@ template <> class vector_view return os << static_cast(rhs); } - unsigned *m_ptr; - const unsigned mask; + Word *m_ptr; + const Word mask; }; vector_view() : m_ptr(nullptr), m_size(0) {} - vector_view(unsigned *ptr, std::size_t size) : m_ptr(ptr), m_size(size) {} + vector_view(Word *ptr, std::size_t size) : m_ptr(ptr), m_size(size) {} bool at(const std::size_t index) const { BOOST_ASSERT_MSG(index < m_size, "invalid size"); - const std::size_t bucket = index / UNSIGNED_BITS; - const unsigned offset = index % UNSIGNED_BITS; - return m_ptr[bucket] & (1u << offset); + const std::size_t bucket = index / WORD_BITS; + const auto offset = index % WORD_BITS; + return m_ptr[bucket] & (static_cast(1) << offset); } - void reset(unsigned *ptr, std::size_t size) + void reset(std::uint64_t *ptr, std::size_t size) { m_ptr = ptr; m_size = size; @@ -213,14 +216,14 @@ template <> class vector_view bool empty() const { return 0 == size(); } - bool operator[](const unsigned index) const { return at(index); } + bool operator[](const std::size_t index) const { return at(index); } - reference operator[](const unsigned index) + reference operator[](const std::size_t index) { BOOST_ASSERT(index < m_size); - const std::size_t bucket = index / UNSIGNED_BITS; - const unsigned offset = index % UNSIGNED_BITS; - return reference{m_ptr + bucket, 1u << offset}; + const auto bucket = index / WORD_BITS; + const auto offset = index % WORD_BITS; + return reference{m_ptr + bucket, static_cast(1) << offset}; } template friend void swap(vector_view &, vector_view &) noexcept; diff --git a/src/storage/storage.cpp b/src/storage/storage.cpp index 06edb4c6b..63428c4e7 100644 --- a/src/storage/storage.cpp +++ b/src/storage/storage.cpp @@ -712,7 +712,7 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) { auto block_id = static_cast(storage::DataLayout::CH_EDGE_FILTER_0 + index); - auto data_ptr = layout.GetBlockPtr(memory_ptr, block_id); + auto data_ptr = layout.GetBlockPtr::Word, true>(memory_ptr, block_id); auto num_entries = layout.GetBlockEntries(block_id); edge_filter.emplace_back(data_ptr, num_entries); } diff --git a/unit_tests/util/vector_view.cpp b/unit_tests/util/vector_view.cpp index 1cde76e6c..50c72ceb4 100644 --- a/unit_tests/util/vector_view.cpp +++ b/unit_tests/util/vector_view.cpp @@ -43,8 +43,11 @@ BOOST_AUTO_TEST_CASE(rw_short) BOOST_AUTO_TEST_CASE(rw_bool) { std::size_t num_elements = 1000; - std::unique_ptr data = std::make_unique(num_elements / sizeof(std::uint32_t)); - util::vector_view view(reinterpret_cast(data.get()), num_elements); + auto data = std::make_unique::Word[]>( + (num_elements + sizeof(typename vector_view::Word) - 1) / + sizeof(typename vector_view::Word)); + util::vector_view view(reinterpret_cast::Word *>(data.get()), + num_elements); std::vector reference; std::mt19937 rng;