From 9231335eefda6ff52301d49c9d42fcc4b5938070 Mon Sep 17 00:00:00 2001 From: "Daniel J. Hofmann" Date: Wed, 9 Sep 2015 17:22:51 +0200 Subject: [PATCH] Use Intel TBB's parallel_sort even for nested parallelism. TBB has a global task scheduler (that's one of the reason TBB is not linked statically but dyanmically instead). This allows control over all running threads, enabling us to use nested parallelism and the scheduler doing all the task allocation itself. That is, nested parallel execution such as in parallel_for(seq, [](const auto& rng){ parallel_sort(rng); }); is no problem at all, as the scheduler still claims control over the global environment. Therefore, use `parallel_sort` Range overload where possible. References: - https://www.threadingbuildingblocks.org/docs/help/hh_goto.htm#reference/algorithms.htm - https://www.threadingbuildingblocks.org/docs/help/hh_goto.htm#reference/algorithms/parallel_sort_func.htm - https://www.threadingbuildingblocks.org/docs/help/hh_goto.htm#reference/task_scheduler.htm - https://www.threadingbuildingblocks.org/docs/help/hh_goto.htm#reference/task_scheduler/task_scheduler_init_cls.htm - https://www.threadingbuildingblocks.org/docs/help/hh_goto.htm#tbb_userguide/Initializing_and_Terminating_the_Library.htm --- algorithms/route_name_extraction.hpp | 16 +++---- contractor/contractor.hpp | 36 +++++++++------- contractor/processing_chain.cpp | 2 +- data_structures/node_based_graph.hpp | 2 +- data_structures/static_rtree.hpp | 2 +- plugins/match.hpp | 46 +++++++++++++-------- routing_algorithms/alternative_path.hpp | 7 +++- routing_algorithms/direct_shortest_path.hpp | 10 ++++- tools/components.cpp | 2 +- util/container.hpp | 5 ++- util/graph_loader.hpp | 36 +++++++--------- 11 files changed, 93 insertions(+), 71 deletions(-) diff --git a/algorithms/route_name_extraction.hpp b/algorithms/route_name_extraction.hpp index 00dae89c4..24c046168 100644 --- a/algorithms/route_name_extraction.hpp +++ b/algorithms/route_name_extraction.hpp @@ -88,12 +88,11 @@ template struct ExtractRouteNames } // pick the longest segment for the shortest path. - std::sort(shortest_path_segments.begin(), shortest_path_segments.end(), length_comperator); + tbb::parallel_sort(shortest_path_segments, length_comperator); shortest_segment_1 = shortest_path_segments[0]; if (!alternative_path_segments.empty()) { - std::sort(alternative_path_segments.begin(), alternative_path_segments.end(), - length_comperator); + tbb::parallel_sort(alternative_path_segments, length_comperator); // also pick the longest segment for the alternative path alternative_segment_1 = alternative_path_segments[0]; @@ -102,15 +101,13 @@ template struct ExtractRouteNames // compute the set difference (for shortest path) depending on names between shortest and // alternative std::vector shortest_path_set_difference(shortest_path_segments.size()); - std::sort(shortest_path_segments.begin(), shortest_path_segments.end(), name_id_comperator); - std::sort(alternative_path_segments.begin(), alternative_path_segments.end(), - name_id_comperator); + tbb::parallel_sort(shortest_path_segments, name_id_comperator); + tbb::parallel_sort(alternative_path_segments, name_id_comperator); std::set_difference(shortest_path_segments.begin(), shortest_path_segments.end(), alternative_path_segments.begin(), alternative_path_segments.end(), shortest_path_set_difference.begin(), name_id_comperator); - std::sort(shortest_path_set_difference.begin(), shortest_path_set_difference.end(), - length_comperator); + tbb::parallel_sort(shortest_path_set_difference, length_comperator); shortest_segment_2 = PickNextLongestSegment(shortest_path_set_difference, shortest_segment_1.name_id); @@ -127,8 +124,7 @@ template struct ExtractRouteNames shortest_path_segments.begin(), shortest_path_segments.end(), alternative_path_set_difference.begin(), name_id_comperator); - std::sort(alternative_path_set_difference.begin(), alternative_path_set_difference.end(), - length_comperator); + tbb::parallel_sort(alternative_path_set_difference, length_comperator); if (!alternative_path_segments.empty()) { diff --git a/contractor/contractor.hpp b/contractor/contractor.hpp index d6f564259..44d3e3e7c 100644 --- a/contractor/contractor.hpp +++ b/contractor/contractor.hpp @@ -50,6 +50,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include class Contractor @@ -172,7 +173,8 @@ class Contractor SimpleLogger().Write(logWARNING) << "Edge weight large -> " << static_cast(std::max(diter->weight, 1)) << " : " - << static_cast(diter->source) << " -> " << static_cast(diter->target); + << static_cast(diter->source) << " -> " + << static_cast(diter->target); } #endif edges.emplace_back(diter->source, diter->target, @@ -190,7 +192,7 @@ class Contractor // FIXME not sure if we need this edges.shrink_to_fit(); - tbb::parallel_sort(edges.begin(), edges.end()); + tbb::parallel_sort(edges); NodeID edge = 0; for (NodeID i = 0; i < edges.size();) { @@ -284,7 +286,7 @@ class Contractor ~Contractor() {} - void Run( double core_factor = 1.0 ) + void Run(double core_factor = 1.0) { // for the preperation we can use a big grain size, which is much faster (probably cache) constexpr size_t InitGrainSize = 100000; @@ -330,13 +332,15 @@ class Contractor this->EvaluateNodePriority(data, &node_data[x], x); } }); - std::cout << "ok" << std::endl << "preprocessing " << number_of_nodes << " nodes ..." - << std::flush; + std::cout << "ok" << std::endl + << "preprocessing " << number_of_nodes << " nodes ..." << std::flush; bool flushed_contractor = false; - while (number_of_nodes > 2 && number_of_contracted_nodes < static_cast(number_of_nodes * core_factor) ) + while (number_of_nodes > 2 && + number_of_contracted_nodes < static_cast(number_of_nodes * core_factor)) { - if (!flushed_contractor && (number_of_contracted_nodes > static_cast(number_of_nodes * 0.65 * core_factor))) + if (!flushed_contractor && (number_of_contracted_nodes > + static_cast(number_of_nodes * 0.65 * core_factor))) { DeallocatingVector new_edge_set; // this one is not explicitely // cleared since it goes out of @@ -360,7 +364,8 @@ class Contractor for (const auto new_node_id : osrm::irange(0, remaining_nodes.size())) { // create renumbering maps in both directions - orig_node_id_from_new_node_id_map[new_node_id] = remaining_nodes[new_node_id].id; + orig_node_id_from_new_node_id_map[new_node_id] = + remaining_nodes[new_node_id].id; new_node_id_from_orig_id_map[remaining_nodes[new_node_id].id] = new_node_id; new_node_priority[new_node_id] = node_priorities[remaining_nodes[new_node_id].id]; @@ -411,7 +416,7 @@ class Contractor contractor_graph.reset(); // create new graph - std::sort(new_edge_set.begin(), new_edge_set.end()); + tbb::parallel_sort(new_edge_set); contractor_graph = std::make_shared(remaining_nodes.size(), new_edge_set); @@ -463,7 +468,7 @@ class Contractor [&](const ThreadDataContainer::EnumerableThreadData::range_type &range) { for (auto &data : range) - std::sort(data->inserted_edges.begin(), data->inserted_edges.end()); + tbb::parallel_sort(data->inserted_edges); }); tbb::parallel_for( tbb::blocked_range(first_independent_node, last, DeleteGrainSize), @@ -550,7 +555,7 @@ class Contractor if (remaining_nodes.size() > 2) { // TODO: for small cores a sorted array of core ids might also work good - for (const auto& node : remaining_nodes) + for (const auto &node : remaining_nodes) { auto orig_id = orig_node_id_from_new_node_id_map[node.id]; is_core_node[orig_id] = true; @@ -563,7 +568,8 @@ class Contractor is_core_node.clear(); } - SimpleLogger().Write() << "[core] " << remaining_nodes.size() << " nodes " << contractor_graph->GetNumberOfEdges() << " edges." << std::endl; + SimpleLogger().Write() << "[core] " << remaining_nodes.size() << " nodes " + << contractor_graph->GetNumberOfEdges() << " edges." << std::endl; thread_data_list.data.clear(); } @@ -861,7 +867,7 @@ class Contractor } } // eliminate duplicate entries ( forward + backward edges ) - std::sort(neighbours.begin(), neighbours.end()); + tbb::parallel_sort(neighbours); neighbours.resize(std::unique(neighbours.begin(), neighbours.end()) - neighbours.begin()); for (const auto i : osrm::irange(0, neighbours.size())) @@ -890,7 +896,7 @@ class Contractor node_data[u].depth = (std::max)(node_data[node].depth + 1, node_data[u].depth); } // eliminate duplicate entries ( forward + backward edges ) - std::sort(neighbours.begin(), neighbours.end()); + tbb::parallel_sort(neighbours); neighbours.resize(std::unique(neighbours.begin(), neighbours.end()) - neighbours.begin()); // re-evaluate priorities of neighboring nodes @@ -933,7 +939,7 @@ class Contractor neighbours.push_back(target); } - std::sort(neighbours.begin(), neighbours.end()); + tbb::parallel_sort(neighbours); neighbours.resize(std::unique(neighbours.begin(), neighbours.end()) - neighbours.begin()); // examine all neighbours that are at most 2 hops away diff --git a/contractor/processing_chain.cpp b/contractor/processing_chain.cpp index 574aea733..87efe3b2f 100644 --- a/contractor/processing_chain.cpp +++ b/contractor/processing_chain.cpp @@ -183,7 +183,7 @@ void Prepare::FindComponents(unsigned max_edge_id, } } - tbb::parallel_sort(edges.begin(), edges.end()); + tbb::parallel_sort(edges); auto new_end = std::unique(edges.begin(), edges.end()); edges.resize(new_end - edges.begin()); diff --git a/data_structures/node_based_graph.hpp b/data_structures/node_based_graph.hpp index efdacb998..88fe4df0c 100644 --- a/data_structures/node_based_graph.hpp +++ b/data_structures/node_based_graph.hpp @@ -90,7 +90,7 @@ NodeBasedDynamicGraphFromEdges(int number_of_nodes, const std::vector( static_cast(number_of_nodes), edges_list); diff --git a/data_structures/static_rtree.hpp b/data_structures/static_rtree.hpp index d1a63fb26..436460280 100644 --- a/data_structures/static_rtree.hpp +++ b/data_structures/static_rtree.hpp @@ -392,7 +392,7 @@ class StaticRTree leaf_node_file.write((char *)&m_element_count, sizeof(uint64_t)); // sort the hilbert-value representatives - tbb::parallel_sort(input_wrapper_vector.begin(), input_wrapper_vector.end()); + tbb::parallel_sort(input_wrapper_vector); std::vector tree_nodes_in_level; // pack M elements into leaf node and write to leaf file diff --git a/plugins/match.hpp b/plugins/match.hpp index 640334215..ca57d5da1 100644 --- a/plugins/match.hpp +++ b/plugins/match.hpp @@ -42,6 +42,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "../util/json_util.hpp" #include "../util/string_util.hpp" +#include + #include #include @@ -97,7 +99,8 @@ template class MapMatchingPlugin : public BasePlugin osrm::matching::CandidateLists &candidates_lists) { double query_radius = 10 * gps_precision; - double last_distance = coordinate_calculation::great_circle_distance(input_coords[0], input_coords[1]); + double last_distance = + coordinate_calculation::great_circle_distance(input_coords[0], input_coords[1]); sub_trace_lengths.resize(input_coords.size()); sub_trace_lengths[0] = 0; @@ -106,7 +109,8 @@ template class MapMatchingPlugin : public BasePlugin bool allow_uturn = false; if (0 < current_coordinate) { - last_distance = coordinate_calculation::great_circle_distance(input_coords[current_coordinate - 1], input_coords[current_coordinate]); + last_distance = coordinate_calculation::great_circle_distance( + input_coords[current_coordinate - 1], input_coords[current_coordinate]); sub_trace_lengths[current_coordinate] += sub_trace_lengths[current_coordinate - 1] + last_distance; @@ -127,20 +131,24 @@ template class MapMatchingPlugin : public BasePlugin std::vector> candidates; facade->IncrementalFindPhantomNodeForCoordinateWithMaxDistance( - input_coords[current_coordinate], candidates, query_radius); + input_coords[current_coordinate], candidates, query_radius); // sort by foward id, then by reverse id and then by distance - std::sort(candidates.begin(), candidates.end(), - [](const std::pair& lhs, const std::pair& rhs) { + tbb::parallel_sort( + candidates, [](const std::pair &lhs, + const std::pair &rhs) + { return lhs.first.forward_node_id < rhs.first.forward_node_id || - (lhs.first.forward_node_id == rhs.first.forward_node_id && - (lhs.first.reverse_node_id < rhs.first.reverse_node_id || - (lhs.first.reverse_node_id == rhs.first.reverse_node_id && - lhs.second < rhs.second))); + (lhs.first.forward_node_id == rhs.first.forward_node_id && + (lhs.first.reverse_node_id < rhs.first.reverse_node_id || + (lhs.first.reverse_node_id == rhs.first.reverse_node_id && + lhs.second < rhs.second))); }); - auto new_end = std::unique(candidates.begin(), candidates.end(), - [](const std::pair& lhs, const std::pair& rhs) { + auto new_end = std::unique( + candidates.begin(), candidates.end(), [](const std::pair &lhs, + const std::pair &rhs) + { return lhs.first.forward_node_id == rhs.first.forward_node_id && lhs.first.reverse_node_id == rhs.first.reverse_node_id; }); @@ -165,10 +173,11 @@ template class MapMatchingPlugin : public BasePlugin } // sort by distance to make pruning effective - std::sort(candidates.begin(), candidates.end(), - [](const std::pair& lhs, const std::pair& rhs) { - return lhs.second < rhs.second; - }); + tbb::parallel_sort(candidates, [](const std::pair &lhs, + const std::pair &rhs) + { + return lhs.second < rhs.second; + }); candidates_lists.push_back(std::move(candidates)); } @@ -270,7 +279,8 @@ template class MapMatchingPlugin : public BasePlugin const auto &input_timestamps = route_parameters.timestamps; if (input_timestamps.size() > 0 && input_coords.size() != input_timestamps.size()) { - json_result.values["status"] = "Number of timestamps does not match number of coordinates ."; + json_result.values["status"] = + "Number of timestamps does not match number of coordinates ."; return 400; } @@ -289,8 +299,8 @@ template class MapMatchingPlugin : public BasePlugin return 400; } - const bool found_candidates = - getCandiates(input_coords, route_parameters.gps_precision, sub_trace_lengths, candidates_lists); + const bool found_candidates = getCandiates(input_coords, route_parameters.gps_precision, + sub_trace_lengths, candidates_lists); if (!found_candidates) { json_result.values["status"] = "No suitable matching candidates found."; diff --git a/routing_algorithms/alternative_path.hpp b/routing_algorithms/alternative_path.hpp index 327bb329f..c1712e53e 100644 --- a/routing_algorithms/alternative_path.hpp +++ b/routing_algorithms/alternative_path.hpp @@ -35,10 +35,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + #include #include #include +#include +#include const double VIAPATH_ALPHA = 0.10; const double VIAPATH_EPSILON = 0.15; // alternative at most 15% longer @@ -60,6 +64,7 @@ class AlternativeRouting final { } + NodeID node; int length; int sharing; @@ -287,7 +292,7 @@ class AlternativeRouting final ranked_candidates_list.emplace_back(node, length_of_via_path, sharing_of_via_path); } } - std::sort(ranked_candidates_list.begin(), ranked_candidates_list.end()); + tbb::parallel_sort(ranked_candidates_list); NodeID selected_via_node = SPECIAL_NODEID; int length_of_via_path = INVALID_EDGE_WEIGHT; diff --git a/routing_algorithms/direct_shortest_path.hpp b/routing_algorithms/direct_shortest_path.hpp index 2abee9874..e6351d527 100644 --- a/routing_algorithms/direct_shortest_path.hpp +++ b/routing_algorithms/direct_shortest_path.hpp @@ -30,12 +30,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + #include "routing_base.hpp" #include "../data_structures/search_engine_data.hpp" #include "../util/integer_range.hpp" #include "../util/timing_util.hpp" #include "../typedefs.h" +#include +#include +#include + /// This is a striped down version of the general shortest path algorithm. /// The general algorithm always computes two queries for each leg. This is only /// necessary in case of vias, where the directions of the start node is constrainted @@ -161,8 +167,8 @@ class DirectShortestPathRouting final { return lhs.first < rhs.first || (lhs.first == rhs.first && lhs.second < rhs.second); }; - std::sort(forward_entry_points.begin(), forward_entry_points.end(), entry_point_comparator); - std::sort(reverse_entry_points.begin(), reverse_entry_points.end(), entry_point_comparator); + tbb::parallel_sort(forward_entry_points, entry_point_comparator); + tbb::parallel_sort(reverse_entry_points, entry_point_comparator); NodeID last_id = SPECIAL_NODEID; for (const auto p : forward_entry_points) diff --git a/tools/components.cpp b/tools/components.cpp index d00713abc..96ced3ec5 100644 --- a/tools/components.cpp +++ b/tools/components.cpp @@ -143,7 +143,7 @@ int main(int argc, char *argv[]) std::vector graph_edge_list; auto number_of_nodes = LoadGraph(argv[1], coordinate_list, graph_edge_list); - tbb::parallel_sort(graph_edge_list.begin(), graph_edge_list.end()); + tbb::parallel_sort(graph_edge_list); const auto graph = std::make_shared(number_of_nodes, graph_edge_list); graph_edge_list.clear(); graph_edge_list.shrink_to_fit(); diff --git a/util/container.hpp b/util/container.hpp index d455e94e2..166561d8e 100644 --- a/util/container.hpp +++ b/util/container.hpp @@ -28,9 +28,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef CONTAINER_HPP #define CONTAINER_HPP +#include + #include #include #include +#include namespace osrm { @@ -49,7 +52,7 @@ constexpr bool has_resize_method(...) noexcept { return false; } template void sort_unique_resize(Container &vector) noexcept { - std::sort(std::begin(vector), std::end(vector)); + tbb::parallel_sort(vector); const auto number_of_unique_elements = std::unique(std::begin(vector), std::end(vector)) - std::begin(vector); if (detail::has_resize_method(vector)) diff --git a/util/graph_loader.hpp b/util/graph_loader.hpp index 71bc9e9f3..dc68a61df 100644 --- a/util/graph_loader.hpp +++ b/util/graph_loader.hpp @@ -45,11 +45,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include -#include -#include -#include +#include #include /** @@ -57,8 +54,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * The since the restrictions reference nodes using their external node id, * we need to renumber it to the new internal id. */ -unsigned loadRestrictionsFromFile(std::istream& input_stream, - std::vector& restriction_list) +unsigned loadRestrictionsFromFile(std::istream &input_stream, + std::vector &restriction_list) { const FingerPrint fingerprint_valid = FingerPrint::GetValid(); FingerPrint fingerprint_loaded; @@ -74,14 +71,13 @@ unsigned loadRestrictionsFromFile(std::istream& input_stream, restriction_list.resize(number_of_usable_restrictions); if (number_of_usable_restrictions > 0) { - input_stream.read((char *) restriction_list.data(), - number_of_usable_restrictions * sizeof(TurnRestriction)); + input_stream.read((char *)restriction_list.data(), + number_of_usable_restrictions * sizeof(TurnRestriction)); } return number_of_usable_restrictions; } - /** * Reads the beginning of an .osrm file and produces: * - list of barrier nodes @@ -132,36 +128,36 @@ NodeID loadNodesFromFile(std::istream &input_stream, /** * Reads a .osrm file and produces the edges. */ -NodeID loadEdgesFromFile(std::istream &input_stream, - std::vector &edge_list) +NodeID loadEdgesFromFile(std::istream &input_stream, std::vector &edge_list) { EdgeID m; input_stream.read(reinterpret_cast(&m), sizeof(unsigned)); edge_list.resize(m); SimpleLogger().Write() << " and " << m << " edges "; - input_stream.read((char *) edge_list.data(), m * sizeof(NodeBasedEdge)); + input_stream.read((char *)edge_list.data(), m * sizeof(NodeBasedEdge)); BOOST_ASSERT(edge_list.size() > 0); #ifndef NDEBUG SimpleLogger().Write() << "Validating loaded edges..."; - std::sort(edge_list.begin(), edge_list.end(), - [](const NodeBasedEdge& lhs, const NodeBasedEdge& rhs) - { - return (lhs.source < rhs.source) || (lhs.source == rhs.source && lhs.target < rhs.target); - }); + tbb::parallel_sort(edge_list, [](const NodeBasedEdge &lhs, const NodeBasedEdge &rhs) + { + return (lhs.source < rhs.source) || + (lhs.source == rhs.source && lhs.target < rhs.target); + }); for (auto i = 1u; i < edge_list.size(); ++i) { - const auto& edge = edge_list[i]; - const auto& prev_edge = edge_list[i-1]; + const auto &edge = edge_list[i]; + const auto &prev_edge = edge_list[i - 1]; BOOST_ASSERT_MSG(edge.weight > 0, "loaded null weight"); BOOST_ASSERT_MSG(edge.forward, "edge must be oriented in forward direction"); BOOST_ASSERT_MSG(edge.travel_mode != TRAVEL_MODE_INACCESSIBLE, "loaded non-accessible"); BOOST_ASSERT_MSG(edge.source != edge.target, "loaded edges contain a loop"); - BOOST_ASSERT_MSG(edge.source != prev_edge.source || edge.target != prev_edge.target, "loaded edges contain a multi edge"); + BOOST_ASSERT_MSG(edge.source != prev_edge.source || edge.target != prev_edge.target, + "loaded edges contain a multi edge"); } #endif