Use Intel TBB's parallel_sort even for nested parallelism.

TBB has a global task scheduler (that's one of the reason TBB is not
linked statically but dyanmically instead). This allows control over all
running threads, enabling us to use nested parallelism and the scheduler
doing all the task allocation itself.

That is, nested parallel execution such as in

    parallel_for(seq, [](const auto& rng){
      parallel_sort(rng);
    });

is no problem at all, as the scheduler still claims control over the
global environment.

Therefore, use `parallel_sort` Range overload where possible.

References:

- https://www.threadingbuildingblocks.org/docs/help/hh_goto.htm#reference/algorithms.htm
- https://www.threadingbuildingblocks.org/docs/help/hh_goto.htm#reference/algorithms/parallel_sort_func.htm
- https://www.threadingbuildingblocks.org/docs/help/hh_goto.htm#reference/task_scheduler.htm
- https://www.threadingbuildingblocks.org/docs/help/hh_goto.htm#reference/task_scheduler/task_scheduler_init_cls.htm
- https://www.threadingbuildingblocks.org/docs/help/hh_goto.htm#tbb_userguide/Initializing_and_Terminating_the_Library.htm
This commit is contained in:
Daniel J. Hofmann 2015-09-09 17:22:51 +02:00
parent dfac34beac
commit 9231335eef
11 changed files with 93 additions and 71 deletions

View File

@ -88,12 +88,11 @@ template <class DataFacadeT, class SegmentT> struct ExtractRouteNames
}
// pick the longest segment for the shortest path.
std::sort(shortest_path_segments.begin(), shortest_path_segments.end(), length_comperator);
tbb::parallel_sort(shortest_path_segments, length_comperator);
shortest_segment_1 = shortest_path_segments[0];
if (!alternative_path_segments.empty())
{
std::sort(alternative_path_segments.begin(), alternative_path_segments.end(),
length_comperator);
tbb::parallel_sort(alternative_path_segments, length_comperator);
// also pick the longest segment for the alternative path
alternative_segment_1 = alternative_path_segments[0];
@ -102,15 +101,13 @@ template <class DataFacadeT, class SegmentT> struct ExtractRouteNames
// compute the set difference (for shortest path) depending on names between shortest and
// alternative
std::vector<SegmentT> shortest_path_set_difference(shortest_path_segments.size());
std::sort(shortest_path_segments.begin(), shortest_path_segments.end(), name_id_comperator);
std::sort(alternative_path_segments.begin(), alternative_path_segments.end(),
name_id_comperator);
tbb::parallel_sort(shortest_path_segments, name_id_comperator);
tbb::parallel_sort(alternative_path_segments, name_id_comperator);
std::set_difference(shortest_path_segments.begin(), shortest_path_segments.end(),
alternative_path_segments.begin(), alternative_path_segments.end(),
shortest_path_set_difference.begin(), name_id_comperator);
std::sort(shortest_path_set_difference.begin(), shortest_path_set_difference.end(),
length_comperator);
tbb::parallel_sort(shortest_path_set_difference, length_comperator);
shortest_segment_2 =
PickNextLongestSegment(shortest_path_set_difference, shortest_segment_1.name_id);
@ -127,8 +124,7 @@ template <class DataFacadeT, class SegmentT> struct ExtractRouteNames
shortest_path_segments.begin(), shortest_path_segments.end(),
alternative_path_set_difference.begin(), name_id_comperator);
std::sort(alternative_path_set_difference.begin(), alternative_path_set_difference.end(),
length_comperator);
tbb::parallel_sort(alternative_path_set_difference, length_comperator);
if (!alternative_path_segments.empty())
{

View File

@ -50,6 +50,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <algorithm>
#include <limits>
#include <memory>
#include <vector>
class Contractor
@ -172,7 +173,8 @@ class Contractor
SimpleLogger().Write(logWARNING)
<< "Edge weight large -> "
<< static_cast<unsigned int>(std::max(diter->weight, 1)) << " : "
<< static_cast<unsigned int>(diter->source) << " -> " << static_cast<unsigned int>(diter->target);
<< static_cast<unsigned int>(diter->source) << " -> "
<< static_cast<unsigned int>(diter->target);
}
#endif
edges.emplace_back(diter->source, diter->target,
@ -190,7 +192,7 @@ class Contractor
// FIXME not sure if we need this
edges.shrink_to_fit();
tbb::parallel_sort(edges.begin(), edges.end());
tbb::parallel_sort(edges);
NodeID edge = 0;
for (NodeID i = 0; i < edges.size();)
{
@ -284,7 +286,7 @@ class Contractor
~Contractor() {}
void Run( double core_factor = 1.0 )
void Run(double core_factor = 1.0)
{
// for the preperation we can use a big grain size, which is much faster (probably cache)
constexpr size_t InitGrainSize = 100000;
@ -330,13 +332,15 @@ class Contractor
this->EvaluateNodePriority(data, &node_data[x], x);
}
});
std::cout << "ok" << std::endl << "preprocessing " << number_of_nodes << " nodes ..."
<< std::flush;
std::cout << "ok" << std::endl
<< "preprocessing " << number_of_nodes << " nodes ..." << std::flush;
bool flushed_contractor = false;
while (number_of_nodes > 2 && number_of_contracted_nodes < static_cast<NodeID>(number_of_nodes * core_factor) )
while (number_of_nodes > 2 &&
number_of_contracted_nodes < static_cast<NodeID>(number_of_nodes * core_factor))
{
if (!flushed_contractor && (number_of_contracted_nodes > static_cast<NodeID>(number_of_nodes * 0.65 * core_factor)))
if (!flushed_contractor && (number_of_contracted_nodes >
static_cast<NodeID>(number_of_nodes * 0.65 * core_factor)))
{
DeallocatingVector<ContractorEdge> new_edge_set; // this one is not explicitely
// cleared since it goes out of
@ -360,7 +364,8 @@ class Contractor
for (const auto new_node_id : osrm::irange<std::size_t>(0, remaining_nodes.size()))
{
// create renumbering maps in both directions
orig_node_id_from_new_node_id_map[new_node_id] = remaining_nodes[new_node_id].id;
orig_node_id_from_new_node_id_map[new_node_id] =
remaining_nodes[new_node_id].id;
new_node_id_from_orig_id_map[remaining_nodes[new_node_id].id] = new_node_id;
new_node_priority[new_node_id] =
node_priorities[remaining_nodes[new_node_id].id];
@ -411,7 +416,7 @@ class Contractor
contractor_graph.reset();
// create new graph
std::sort(new_edge_set.begin(), new_edge_set.end());
tbb::parallel_sort(new_edge_set);
contractor_graph =
std::make_shared<ContractorGraph>(remaining_nodes.size(), new_edge_set);
@ -463,7 +468,7 @@ class Contractor
[&](const ThreadDataContainer::EnumerableThreadData::range_type &range)
{
for (auto &data : range)
std::sort(data->inserted_edges.begin(), data->inserted_edges.end());
tbb::parallel_sort(data->inserted_edges);
});
tbb::parallel_for(
tbb::blocked_range<int>(first_independent_node, last, DeleteGrainSize),
@ -550,7 +555,7 @@ class Contractor
if (remaining_nodes.size() > 2)
{
// TODO: for small cores a sorted array of core ids might also work good
for (const auto& node : remaining_nodes)
for (const auto &node : remaining_nodes)
{
auto orig_id = orig_node_id_from_new_node_id_map[node.id];
is_core_node[orig_id] = true;
@ -563,7 +568,8 @@ class Contractor
is_core_node.clear();
}
SimpleLogger().Write() << "[core] " << remaining_nodes.size() << " nodes " << contractor_graph->GetNumberOfEdges() << " edges." << std::endl;
SimpleLogger().Write() << "[core] " << remaining_nodes.size() << " nodes "
<< contractor_graph->GetNumberOfEdges() << " edges." << std::endl;
thread_data_list.data.clear();
}
@ -861,7 +867,7 @@ class Contractor
}
}
// eliminate duplicate entries ( forward + backward edges )
std::sort(neighbours.begin(), neighbours.end());
tbb::parallel_sort(neighbours);
neighbours.resize(std::unique(neighbours.begin(), neighbours.end()) - neighbours.begin());
for (const auto i : osrm::irange<std::size_t>(0, neighbours.size()))
@ -890,7 +896,7 @@ class Contractor
node_data[u].depth = (std::max)(node_data[node].depth + 1, node_data[u].depth);
}
// eliminate duplicate entries ( forward + backward edges )
std::sort(neighbours.begin(), neighbours.end());
tbb::parallel_sort(neighbours);
neighbours.resize(std::unique(neighbours.begin(), neighbours.end()) - neighbours.begin());
// re-evaluate priorities of neighboring nodes
@ -933,7 +939,7 @@ class Contractor
neighbours.push_back(target);
}
std::sort(neighbours.begin(), neighbours.end());
tbb::parallel_sort(neighbours);
neighbours.resize(std::unique(neighbours.begin(), neighbours.end()) - neighbours.begin());
// examine all neighbours that are at most 2 hops away

View File

@ -183,7 +183,7 @@ void Prepare::FindComponents(unsigned max_edge_id,
}
}
tbb::parallel_sort(edges.begin(), edges.end());
tbb::parallel_sort(edges);
auto new_end = std::unique(edges.begin(), edges.end());
edges.resize(new_end - edges.begin());

View File

@ -90,7 +90,7 @@ NodeBasedDynamicGraphFromEdges(int number_of_nodes, const std::vector<NodeBasedE
}
);
tbb::parallel_sort(edges_list.begin(), edges_list.end());
tbb::parallel_sort(edges_list);
auto graph = std::make_shared<NodeBasedDynamicGraph>(
static_cast<NodeBasedDynamicGraph::NodeIterator>(number_of_nodes), edges_list);

View File

@ -392,7 +392,7 @@ class StaticRTree
leaf_node_file.write((char *)&m_element_count, sizeof(uint64_t));
// sort the hilbert-value representatives
tbb::parallel_sort(input_wrapper_vector.begin(), input_wrapper_vector.end());
tbb::parallel_sort(input_wrapper_vector);
std::vector<TreeNode> tree_nodes_in_level;
// pack M elements into leaf node and write to leaf file

View File

@ -42,6 +42,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "../util/json_util.hpp"
#include "../util/string_util.hpp"
#include <tbb/parallel_sort.h>
#include <cstdlib>
#include <algorithm>
@ -97,7 +99,8 @@ template <class DataFacadeT> class MapMatchingPlugin : public BasePlugin
osrm::matching::CandidateLists &candidates_lists)
{
double query_radius = 10 * gps_precision;
double last_distance = coordinate_calculation::great_circle_distance(input_coords[0], input_coords[1]);
double last_distance =
coordinate_calculation::great_circle_distance(input_coords[0], input_coords[1]);
sub_trace_lengths.resize(input_coords.size());
sub_trace_lengths[0] = 0;
@ -106,7 +109,8 @@ template <class DataFacadeT> class MapMatchingPlugin : public BasePlugin
bool allow_uturn = false;
if (0 < current_coordinate)
{
last_distance = coordinate_calculation::great_circle_distance(input_coords[current_coordinate - 1], input_coords[current_coordinate]);
last_distance = coordinate_calculation::great_circle_distance(
input_coords[current_coordinate - 1], input_coords[current_coordinate]);
sub_trace_lengths[current_coordinate] +=
sub_trace_lengths[current_coordinate - 1] + last_distance;
@ -127,20 +131,24 @@ template <class DataFacadeT> class MapMatchingPlugin : public BasePlugin
std::vector<std::pair<PhantomNode, double>> candidates;
facade->IncrementalFindPhantomNodeForCoordinateWithMaxDistance(
input_coords[current_coordinate], candidates, query_radius);
input_coords[current_coordinate], candidates, query_radius);
// sort by foward id, then by reverse id and then by distance
std::sort(candidates.begin(), candidates.end(),
[](const std::pair<PhantomNode, double>& lhs, const std::pair<PhantomNode, double>& rhs) {
tbb::parallel_sort(
candidates, [](const std::pair<PhantomNode, double> &lhs,
const std::pair<PhantomNode, double> &rhs)
{
return lhs.first.forward_node_id < rhs.first.forward_node_id ||
(lhs.first.forward_node_id == rhs.first.forward_node_id &&
(lhs.first.reverse_node_id < rhs.first.reverse_node_id ||
(lhs.first.reverse_node_id == rhs.first.reverse_node_id &&
lhs.second < rhs.second)));
(lhs.first.forward_node_id == rhs.first.forward_node_id &&
(lhs.first.reverse_node_id < rhs.first.reverse_node_id ||
(lhs.first.reverse_node_id == rhs.first.reverse_node_id &&
lhs.second < rhs.second)));
});
auto new_end = std::unique(candidates.begin(), candidates.end(),
[](const std::pair<PhantomNode, double>& lhs, const std::pair<PhantomNode, double>& rhs) {
auto new_end = std::unique(
candidates.begin(), candidates.end(), [](const std::pair<PhantomNode, double> &lhs,
const std::pair<PhantomNode, double> &rhs)
{
return lhs.first.forward_node_id == rhs.first.forward_node_id &&
lhs.first.reverse_node_id == rhs.first.reverse_node_id;
});
@ -165,10 +173,11 @@ template <class DataFacadeT> class MapMatchingPlugin : public BasePlugin
}
// sort by distance to make pruning effective
std::sort(candidates.begin(), candidates.end(),
[](const std::pair<PhantomNode, double>& lhs, const std::pair<PhantomNode, double>& rhs) {
return lhs.second < rhs.second;
});
tbb::parallel_sort(candidates, [](const std::pair<PhantomNode, double> &lhs,
const std::pair<PhantomNode, double> &rhs)
{
return lhs.second < rhs.second;
});
candidates_lists.push_back(std::move(candidates));
}
@ -270,7 +279,8 @@ template <class DataFacadeT> class MapMatchingPlugin : public BasePlugin
const auto &input_timestamps = route_parameters.timestamps;
if (input_timestamps.size() > 0 && input_coords.size() != input_timestamps.size())
{
json_result.values["status"] = "Number of timestamps does not match number of coordinates .";
json_result.values["status"] =
"Number of timestamps does not match number of coordinates .";
return 400;
}
@ -289,8 +299,8 @@ template <class DataFacadeT> class MapMatchingPlugin : public BasePlugin
return 400;
}
const bool found_candidates =
getCandiates(input_coords, route_parameters.gps_precision, sub_trace_lengths, candidates_lists);
const bool found_candidates = getCandiates(input_coords, route_parameters.gps_precision,
sub_trace_lengths, candidates_lists);
if (!found_candidates)
{
json_result.values["status"] = "No suitable matching candidates found.";

View File

@ -35,10 +35,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <boost/assert.hpp>
#include <tbb/parallel_sort.h>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <algorithm>
#include <utility>
const double VIAPATH_ALPHA = 0.10;
const double VIAPATH_EPSILON = 0.15; // alternative at most 15% longer
@ -60,6 +64,7 @@ class AlternativeRouting final
{
}
NodeID node;
int length;
int sharing;
@ -287,7 +292,7 @@ class AlternativeRouting final
ranked_candidates_list.emplace_back(node, length_of_via_path, sharing_of_via_path);
}
}
std::sort(ranked_candidates_list.begin(), ranked_candidates_list.end());
tbb::parallel_sort(ranked_candidates_list);
NodeID selected_via_node = SPECIAL_NODEID;
int length_of_via_path = INVALID_EDGE_WEIGHT;

View File

@ -30,12 +30,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <boost/assert.hpp>
#include <tbb/parallel_sort.h>
#include "routing_base.hpp"
#include "../data_structures/search_engine_data.hpp"
#include "../util/integer_range.hpp"
#include "../util/timing_util.hpp"
#include "../typedefs.h"
#include <vector>
#include <algorithm>
#include <utility>
/// This is a striped down version of the general shortest path algorithm.
/// The general algorithm always computes two queries for each leg. This is only
/// necessary in case of vias, where the directions of the start node is constrainted
@ -161,8 +167,8 @@ class DirectShortestPathRouting final
{
return lhs.first < rhs.first || (lhs.first == rhs.first && lhs.second < rhs.second);
};
std::sort(forward_entry_points.begin(), forward_entry_points.end(), entry_point_comparator);
std::sort(reverse_entry_points.begin(), reverse_entry_points.end(), entry_point_comparator);
tbb::parallel_sort(forward_entry_points, entry_point_comparator);
tbb::parallel_sort(reverse_entry_points, entry_point_comparator);
NodeID last_id = SPECIAL_NODEID;
for (const auto p : forward_entry_points)

View File

@ -143,7 +143,7 @@ int main(int argc, char *argv[])
std::vector<TarjanEdge> graph_edge_list;
auto number_of_nodes = LoadGraph(argv[1], coordinate_list, graph_edge_list);
tbb::parallel_sort(graph_edge_list.begin(), graph_edge_list.end());
tbb::parallel_sort(graph_edge_list);
const auto graph = std::make_shared<TarjanGraph>(number_of_nodes, graph_edge_list);
graph_edge_list.clear();
graph_edge_list.shrink_to_fit();

View File

@ -28,9 +28,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef CONTAINER_HPP
#define CONTAINER_HPP
#include <tbb/parallel_sort.h>
#include <algorithm>
#include <iterator>
#include <vector>
#include <utility>
namespace osrm
{
@ -49,7 +52,7 @@ constexpr bool has_resize_method(...) noexcept { return false; }
template <typename Container> void sort_unique_resize(Container &vector) noexcept
{
std::sort(std::begin(vector), std::end(vector));
tbb::parallel_sort(vector);
const auto number_of_unique_elements =
std::unique(std::begin(vector), std::end(vector)) - std::begin(vector);
if (detail::has_resize_method(vector))

View File

@ -45,11 +45,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cmath>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <unordered_map>
#include <ios>
#include <vector>
/**
@ -57,8 +54,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* The since the restrictions reference nodes using their external node id,
* we need to renumber it to the new internal id.
*/
unsigned loadRestrictionsFromFile(std::istream& input_stream,
std::vector<TurnRestriction>& restriction_list)
unsigned loadRestrictionsFromFile(std::istream &input_stream,
std::vector<TurnRestriction> &restriction_list)
{
const FingerPrint fingerprint_valid = FingerPrint::GetValid();
FingerPrint fingerprint_loaded;
@ -74,14 +71,13 @@ unsigned loadRestrictionsFromFile(std::istream& input_stream,
restriction_list.resize(number_of_usable_restrictions);
if (number_of_usable_restrictions > 0)
{
input_stream.read((char *) restriction_list.data(),
number_of_usable_restrictions * sizeof(TurnRestriction));
input_stream.read((char *)restriction_list.data(),
number_of_usable_restrictions * sizeof(TurnRestriction));
}
return number_of_usable_restrictions;
}
/**
* Reads the beginning of an .osrm file and produces:
* - list of barrier nodes
@ -132,36 +128,36 @@ NodeID loadNodesFromFile(std::istream &input_stream,
/**
* Reads a .osrm file and produces the edges.
*/
NodeID loadEdgesFromFile(std::istream &input_stream,
std::vector<NodeBasedEdge> &edge_list)
NodeID loadEdgesFromFile(std::istream &input_stream, std::vector<NodeBasedEdge> &edge_list)
{
EdgeID m;
input_stream.read(reinterpret_cast<char *>(&m), sizeof(unsigned));
edge_list.resize(m);
SimpleLogger().Write() << " and " << m << " edges ";
input_stream.read((char *) edge_list.data(), m * sizeof(NodeBasedEdge));
input_stream.read((char *)edge_list.data(), m * sizeof(NodeBasedEdge));
BOOST_ASSERT(edge_list.size() > 0);
#ifndef NDEBUG
SimpleLogger().Write() << "Validating loaded edges...";
std::sort(edge_list.begin(), edge_list.end(),
[](const NodeBasedEdge& lhs, const NodeBasedEdge& rhs)
{
return (lhs.source < rhs.source) || (lhs.source == rhs.source && lhs.target < rhs.target);
});
tbb::parallel_sort(edge_list, [](const NodeBasedEdge &lhs, const NodeBasedEdge &rhs)
{
return (lhs.source < rhs.source) ||
(lhs.source == rhs.source && lhs.target < rhs.target);
});
for (auto i = 1u; i < edge_list.size(); ++i)
{
const auto& edge = edge_list[i];
const auto& prev_edge = edge_list[i-1];
const auto &edge = edge_list[i];
const auto &prev_edge = edge_list[i - 1];
BOOST_ASSERT_MSG(edge.weight > 0, "loaded null weight");
BOOST_ASSERT_MSG(edge.forward, "edge must be oriented in forward direction");
BOOST_ASSERT_MSG(edge.travel_mode != TRAVEL_MODE_INACCESSIBLE, "loaded non-accessible");
BOOST_ASSERT_MSG(edge.source != edge.target, "loaded edges contain a loop");
BOOST_ASSERT_MSG(edge.source != prev_edge.source || edge.target != prev_edge.target, "loaded edges contain a multi edge");
BOOST_ASSERT_MSG(edge.source != prev_edge.source || edge.target != prev_edge.target,
"loaded edges contain a multi edge");
}
#endif