From 97592e5bc3beb890c5583e166485e0014b9c6868 Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Thu, 15 Jun 2017 14:52:14 +0000 Subject: [PATCH] Refactor file writing in OSRM contract --- include/contractor/contractor.hpp | 7 --- include/contractor/files.hpp | 21 ++++++++ include/contractor/graph_contractor.hpp | 19 +++++-- include/storage/serialization.hpp | 42 +++++++++++++++- include/util/vector_view.hpp | 46 ++++++++++++++++- src/contractor/contractor.cpp | 54 ++++++-------------- src/contractor/graph_contractor.cpp | 16 +++--- src/storage/storage.cpp | 34 ++----------- unit_tests/util/vector_view.cpp | 67 +++++++++++++++++++++++++ 9 files changed, 212 insertions(+), 94 deletions(-) create mode 100644 unit_tests/util/vector_view.cpp diff --git a/include/contractor/contractor.hpp b/include/contractor/contractor.hpp index 680cc1c9a..7fabd34cf 100644 --- a/include/contractor/contractor.hpp +++ b/include/contractor/contractor.hpp @@ -65,16 +65,9 @@ class Contractor std::vector &&node_weights, std::vector &is_core_node, std::vector &inout_node_levels) const; - void WriteCoreNodeMarker(std::vector &&is_core_node) const; - void WriteContractedGraph(unsigned number_of_edge_based_nodes, - util::DeallocatingVector contracted_edge_list); private: ContractorConfig config; - - EdgeID LoadEdgeExpandedGraph(const ContractorConfig &config, - std::vector &edge_based_edge_list, - std::vector &node_weights); }; } } diff --git a/include/contractor/files.hpp b/include/contractor/files.hpp index 8dd570e87..88e8dda76 100644 --- a/include/contractor/files.hpp +++ b/include/contractor/files.hpp @@ -14,6 +14,27 @@ namespace contractor { namespace files { +// reads .osrm.core +template +void readCoreMarker(const boost::filesystem::path &path, CoreVectorT &is_core_node) +{ + static_assert(util::is_view_or_vector::value, + "is_core_node must be a vector"); + storage::io::FileReader reader(path, storage::io::FileReader::VerifyFingerprint); + + storage::serialization::read(reader, is_core_node); +} + +// writes .osrm.core +template +void writeCoreMarker(const boost::filesystem::path &path, const CoreVectorT &is_core_node) +{ + static_assert(util::is_view_or_vector::value, + "is_core_node must be a vector"); + storage::io::FileWriter writer(path, storage::io::FileWriter::GenerateFingerprint); + + storage::serialization::write(writer, is_core_node); +} // reads .osrm.hsgr file template diff --git a/include/contractor/graph_contractor.hpp b/include/contractor/graph_contractor.hpp index 06904af20..32850db0d 100644 --- a/include/contractor/graph_contractor.hpp +++ b/include/contractor/graph_contractor.hpp @@ -91,8 +91,8 @@ class GraphContractor GraphContractor(int nodes, std::vector edges, - std::vector &&node_levels_, - std::vector &&node_weights_); + std::vector node_levels_, + std::vector node_weights_); /* Flush all data from the contraction to disc and reorder stuff for better locality */ void FlushDataAndRebuildContractorGraph(ThreadDataContainer &thread_data_list, @@ -101,12 +101,14 @@ class GraphContractor void Run(double core_factor = 1.0); - void GetCoreMarker(std::vector &out_is_core_node); + std::vector GetCoreMarker(); - void GetNodeLevels(std::vector &out_node_levels); + std::vector GetNodeLevels(); - template inline void GetEdges(util::DeallocatingVector &edges) + template inline util::DeallocatingVector GetEdges() { + util::DeallocatingVector edges; + util::UnbufferedLog log; log << "Getting edges of minimized graph "; util::Percent p(log, contractor_graph->GetNumberOfNodes()); @@ -161,6 +163,13 @@ class GraphContractor edges.append(external_edge_list.begin(), external_edge_list.end()); external_edge_list.clear(); + + // sort and remove duplicates + tbb::parallel_sort(edges.begin(), edges.end()); + auto new_end = std::unique(edges.begin(), edges.end()); + edges.resize(new_end - edges.begin()); + + return edges; } private: diff --git a/include/storage/serialization.hpp b/include/storage/serialization.hpp index 7ea9bf0f9..c75757319 100644 --- a/include/storage/serialization.hpp +++ b/include/storage/serialization.hpp @@ -104,7 +104,47 @@ template void write(io::FileWriter &writer, const util::vector_view { const auto count = data.size(); writer.WriteElementCount64(count); - return writer.WriteFrom(data.data(), count); + writer.WriteFrom(data.data(), count); +} + +template <> inline void read(io::FileReader &reader, util::vector_view &data) +{ + const auto count = reader.ReadElementCount64(); + BOOST_ASSERT(data.size() == count); + for (const auto index : util::irange(0, count)) + { + data[index] = reader.ReadOne(); + } +} + +template <> inline void write(io::FileWriter &writer, const util::vector_view &data) +{ + const auto count = data.size(); + writer.WriteElementCount64(count); + for (const auto index : util::irange(0, count)) + { + writer.WriteOne(data[index]); + } +} + +template <> inline void read(io::FileReader &reader, std::vector &data) +{ + const auto count = reader.ReadElementCount64(); + BOOST_ASSERT(data.size() == count); + for (const auto index : util::irange(0, count)) + { + data[index] = reader.ReadOne(); + } +} + +template <> inline void write(io::FileWriter &writer, const std::vector &data) +{ + const auto count = data.size(); + writer.WriteElementCount64(count); + for (const auto index : util::irange(0, count)) + { + writer.WriteOne(data[index]); + } } } } diff --git a/include/util/vector_view.hpp b/include/util/vector_view.hpp index 2c8482477..23dd4734f 100644 --- a/include/util/vector_view.hpp +++ b/include/util/vector_view.hpp @@ -143,8 +143,33 @@ template <> class vector_view unsigned *m_ptr; std::size_t m_size; + static constexpr std::size_t UNSIGNED_BITS = CHAR_BIT * sizeof(unsigned); + public: using value_type = bool; + struct reference + { + reference &operator=(bool value) + { + *m_ptr = (*m_ptr & ~mask) | (static_cast(value) * mask); + return *this; + } + + operator bool() const { return (*m_ptr) & mask; } + + bool operator==(const reference &other) const + { + return other.m_ptr == m_ptr && other.mask == mask; + } + + friend std::ostream &operator<<(std::ostream &os, const reference &rhs) + { + return os << static_cast(rhs); + } + + unsigned *m_ptr; + const unsigned mask; + }; vector_view() : m_ptr(nullptr), m_size(0) {} @@ -153,8 +178,8 @@ template <> class vector_view bool at(const std::size_t index) const { BOOST_ASSERT_MSG(index < m_size, "invalid size"); - const std::size_t bucket = index / (CHAR_BIT * sizeof(unsigned)); - const unsigned offset = index % (CHAR_BIT * sizeof(unsigned)); + const std::size_t bucket = index / UNSIGNED_BITS; + const unsigned offset = index % UNSIGNED_BITS; return m_ptr[bucket] & (1u << offset); } @@ -166,6 +191,14 @@ template <> class vector_view bool operator[](const unsigned index) const { return at(index); } + reference operator[](const unsigned index) + { + BOOST_ASSERT(index < m_size); + const std::size_t bucket = index / UNSIGNED_BITS; + const unsigned offset = index % UNSIGNED_BITS; + return reference{m_ptr + bucket, 1u << offset}; + } + template friend void swap(vector_view &, vector_view &) noexcept; }; @@ -186,6 +219,15 @@ template using ViewOrVector = typename std::conditional, InternalOrExternalVector>::type; + +// We can use this for compile time assertions +template +struct is_view_or_vector + : std::integral_constant, VectorT>::value || + std::is_same, VectorT>::value> +{ +}; } } diff --git a/src/contractor/contractor.cpp b/src/contractor/contractor.cpp index 9d4fb2215..4eb01ddf7 100644 --- a/src/contractor/contractor.cpp +++ b/src/contractor/contractor.cpp @@ -77,20 +77,27 @@ int Contractor::Run() std::move(node_levels), std::move(node_weights)); graph_contractor.Run(config.core_factor); - graph_contractor.GetEdges(contracted_edge_list); - graph_contractor.GetCoreMarker(is_core_node); - graph_contractor.GetNodeLevels(node_levels); + + contracted_edge_list = graph_contractor.GetEdges(); + is_core_node = graph_contractor.GetCoreMarker(); + node_levels = graph_contractor.GetNodeLevels(); } TIMER_STOP(contraction); util::Log() << "Contraction took " << TIMER_SEC(contraction) << " sec"; - WriteContractedGraph(max_edge_id, std::move(contracted_edge_list)); - WriteCoreNodeMarker(std::move(is_core_node)); + { + RangebasedCRC32 crc32_calculator; + const unsigned checksum = crc32_calculator(contracted_edge_list); + + files::writeGraph(config.graph_output_path, + checksum, + QueryGraph{max_edge_id + 1, std::move(contracted_edge_list)}); + } + + files::writeCoreMarker(config.core_output_path, is_core_node); if (!config.use_cached_priority) { - std::vector out_node_levels(std::move(node_levels)); - files::writeLevels(config.level_output_path, node_levels); } @@ -103,38 +110,5 @@ int Contractor::Run() return 0; } -void Contractor::WriteCoreNodeMarker(std::vector &&in_is_core_node) const -{ - std::vector is_core_node(std::move(in_is_core_node)); - std::vector unpacked_bool_flags(std::move(is_core_node.size())); - for (auto i = 0u; i < is_core_node.size(); ++i) - { - unpacked_bool_flags[i] = is_core_node[i] ? 1 : 0; - } - - storage::io::FileWriter core_marker_output_file(config.core_output_path, - storage::io::FileWriter::GenerateFingerprint); - - const std::size_t count = unpacked_bool_flags.size(); - core_marker_output_file.WriteElementCount64(count); - core_marker_output_file.WriteFrom(unpacked_bool_flags.data(), count); -} - -void Contractor::WriteContractedGraph(unsigned max_node_id, - util::DeallocatingVector contracted_edge_list) -{ - // Sorting contracted edges in a way that the static query graph can read some in in-place. - tbb::parallel_sort(contracted_edge_list.begin(), contracted_edge_list.end()); - auto new_end = std::unique(contracted_edge_list.begin(), contracted_edge_list.end()); - contracted_edge_list.resize(new_end - contracted_edge_list.begin()); - - RangebasedCRC32 crc32_calculator; - const unsigned checksum = crc32_calculator(contracted_edge_list); - - QueryGraph query_graph{max_node_id + 1, contracted_edge_list}; - - files::writeGraph(config.graph_output_path, checksum, query_graph); -} - } // namespace contractor } // namespace osrm diff --git a/src/contractor/graph_contractor.cpp b/src/contractor/graph_contractor.cpp index 0a3833ccf..51706f932 100644 --- a/src/contractor/graph_contractor.cpp +++ b/src/contractor/graph_contractor.cpp @@ -12,8 +12,8 @@ GraphContractor::GraphContractor(int nodes, std::vector input_ed GraphContractor::GraphContractor(int nodes, std::vector edges, - std::vector &&node_levels_, - std::vector &&node_weights_) + std::vector node_levels_, + std::vector node_weights_) : node_levels(std::move(node_levels_)), node_weights(std::move(node_weights_)) { tbb::parallel_sort(edges.begin(), edges.end()); @@ -427,15 +427,11 @@ void GraphContractor::Run(double core_factor) thread_data_list.data.clear(); } -void GraphContractor::GetCoreMarker(std::vector &out_is_core_node) -{ - out_is_core_node.swap(is_core_node); -} +// Can only be called once because it invalides the marker +std::vector GraphContractor::GetCoreMarker() { return std::move(is_core_node); } -void GraphContractor::GetNodeLevels(std::vector &out_node_levels) -{ - out_node_levels.swap(node_levels); -} +// Can only be called once because it invalides the node levels +std::vector GraphContractor::GetNodeLevels() { return std::move(node_levels); } float GraphContractor::EvaluateNodePriority(ContractorThreadData *const data, const NodeDepth node_depth, diff --git a/src/storage/storage.cpp b/src/storage/storage.cpp index 3655a32f0..cf364af1a 100644 --- a/src/storage/storage.cpp +++ b/src/storage/storage.cpp @@ -809,36 +809,12 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) if (boost::filesystem::exists(config.core_data_path)) { - io::FileReader core_marker_file(config.core_data_path, io::FileReader::VerifyFingerprint); - const auto number_of_core_markers = core_marker_file.ReadElementCount64(); + auto core_marker_ptr = + layout.GetBlockPtr(memory_ptr, storage::DataLayout::CH_CORE_MARKER); + util::vector_view is_core_node( + core_marker_ptr, layout.num_entries[storage::DataLayout::CH_CORE_MARKER]); - // load core markers - std::vector unpacked_core_markers(number_of_core_markers); - core_marker_file.ReadInto(unpacked_core_markers.data(), number_of_core_markers); - - const auto core_marker_ptr = - layout.GetBlockPtr(memory_ptr, DataLayout::CH_CORE_MARKER); - - for (auto i = 0u; i < number_of_core_markers; ++i) - { - BOOST_ASSERT(unpacked_core_markers[i] == 0 || unpacked_core_markers[i] == 1); - - if (unpacked_core_markers[i] == 1) - { - const unsigned bucket = i / 32; - const unsigned offset = i % 32; - const unsigned value = [&] { - unsigned return_value = 0; - if (0 != offset) - { - return_value = core_marker_ptr[bucket]; - } - return return_value; - }(); - - core_marker_ptr[bucket] = (value | (1u << offset)); - } - } + contractor::files::readCoreMarker(config.core_data_path, is_core_node); } // load profile properties diff --git a/unit_tests/util/vector_view.cpp b/unit_tests/util/vector_view.cpp new file mode 100644 index 000000000..1cde76e6c --- /dev/null +++ b/unit_tests/util/vector_view.cpp @@ -0,0 +1,67 @@ +#include "util/vector_view.hpp" +#include "util/typedefs.hpp" + +#include +#include +#include +#include + +#include +#include +#include + +BOOST_AUTO_TEST_SUITE(vector_view_test) + +using namespace osrm; +using namespace osrm::util; + +BOOST_AUTO_TEST_CASE(rw_short) +{ + std::size_t num_elements = 1000; + std::unique_ptr data = std::make_unique(sizeof(std::uint16_t) * num_elements); + util::vector_view view(reinterpret_cast(data.get()), + num_elements); + std::vector reference; + + std::mt19937 rng; + rng.seed(1337); + std::uniform_int_distribution dist(0, (1UL << 16)); + + for (std::size_t i = 0; i < num_elements; i++) + { + auto r = dist(rng); + view[i] = r; + reference.push_back(r); + } + + for (std::size_t i = 0; i < num_elements; i++) + { + BOOST_CHECK_EQUAL(view[i], reference[i]); + } +} + +BOOST_AUTO_TEST_CASE(rw_bool) +{ + std::size_t num_elements = 1000; + std::unique_ptr data = std::make_unique(num_elements / sizeof(std::uint32_t)); + util::vector_view view(reinterpret_cast(data.get()), num_elements); + std::vector reference; + + std::mt19937 rng; + rng.seed(1337); + std::uniform_int_distribution dist(0, 2); + + for (std::size_t i = 0; i < num_elements; i++) + { + auto r = dist(rng); + view[i] = r; + reference.push_back(r); + } + + for (std::size_t i = 0; i < num_elements; i++) + { + BOOST_CHECK_EQUAL(view[i], reference[i]); + } +} + +BOOST_AUTO_TEST_SUITE_END()