Implements Compressed Node Based Graph (De-)Serialization Skeleton

Implements parallel recursion for the partitioner
Fixes osrm-extract's -dump-partition-graph: accept no further tokens

References:
- http://www.boost.org/doc/libs/1_55_0/doc/html/boost/program_options/bool_switch.html

Pulls parameters through to make them configurable from the outside

Defaults are equivalent to:

    ./osrm-partition \
      berlin-latest.osrm \
      --max-cell-size 4096 \
      --balance 1.2 \
      --boundary 0.25 \
      --optimizing-cuts 10

Fixes parallel_do call for Intel TBB 4.2 (Trusty): no range-based overload
This commit is contained in:
Daniel J. Hofmann 2017-01-26 17:53:19 +01:00 committed by Patrick Niklaus
parent 786be6f570
commit b9ed20bb9b
10 changed files with 327 additions and 85 deletions

View File

@ -91,6 +91,11 @@ class Extractor
void WriteTurnLaneData(const std::string &turn_lane_file) const;
// Writes compressed node based graph and its embedding into a file for osrm-partition to use.
static void WriteCompressedNodeBasedGraph(const std::string &path,
const util::NodeBasedDynamicGraph &graph,
const std::vector<QueryNode> &externals);
// globals persisting during the extraction process and the graph generation process
// during turn lane analysis, we might have to combine lanes for roads that are modelled as two

View File

@ -77,6 +77,7 @@ struct ExtractorConfig
edge_based_node_weights_output_path = basepath + ".osrm.enw";
profile_properties_output_path = basepath + ".osrm.properties";
intersection_class_data_output_path = basepath + ".osrm.icd";
compressed_node_based_graph_output_path = basepath + ".osrm.cnbg";
}
boost::filesystem::path input_path;
@ -108,6 +109,9 @@ struct ExtractorConfig
std::string edge_segment_lookup_path;
bool use_metadata;
bool dump_compressed_node_based_graph;
std::string compressed_node_based_graph_output_path;
};
}
}

View File

@ -31,15 +31,22 @@ struct PartitionConfig
}
edge_based_graph_path = basepath + ".osrm.ebg";
compressed_node_based_graph_path = basepath + ".osrm.cnbg";
partition_path = basepath + ".osrm.partition";
}
// might be changed to the node based graph at some point
boost::filesystem::path base_path;
boost::filesystem::path edge_based_graph_path;
boost::filesystem::path compressed_node_based_graph_path;
boost::filesystem::path partition_path;
unsigned requested_num_threads;
std::size_t maximum_cell_size;
double balance;
double boundary_factor;
std::size_t num_optimizing_cuts;
};
}
}

View File

@ -125,7 +125,7 @@ template <typename NodeEntryT, typename EdgeEntryT> class RemappableGraph
const auto remaining_edges = std::distance(BeginEdges(node), center);
node.edges_end = node.edges_begin + remaining_edges;
return center;
};
}
protected:
std::vector<NodeT> nodes;

View File

@ -19,6 +19,7 @@ class RecursiveBisection
RecursiveBisection(std::size_t maximum_cell_size,
double balance,
double boundary_factor,
std::size_t num_optimizing_cuts,
BisectionGraph &bisection_graph);
private:

View File

@ -13,6 +13,7 @@
#include "util/exception.hpp"
#include "util/exception_utils.hpp"
#include "util/graph_loader.hpp"
#include "util/integer_range.hpp"
#include "util/io.hpp"
#include "util/log.hpp"
#include "util/name_table.hpp"
@ -32,6 +33,7 @@
#include <boost/filesystem.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/optional/optional.hpp>
#include <boost/scope_exit.hpp>
#include <osmium/io/any_input.hpp>
@ -45,6 +47,7 @@
#include <bitset>
#include <chrono>
#include <fstream>
#include <future>
#include <iostream>
#include <iterator>
#include <memory>
@ -503,6 +506,32 @@ Extractor::BuildEdgeExpandedGraph(ScriptingEnvironment &scripting_environment,
config.turn_penalties_index_path,
config.generate_edge_lookup);
// The osrm-partition tool requires the compressed node based graph with an embedding.
//
// The `Run` function above re-numbers non-reverse compressed node based graph edges
// to a continuous range so that the nodes in the edge based graph are continuous.
//
// Luckily node based node ids still coincide with the coordinate array.
// That's the reason we can only here write out the final compressed node based graph.
// Dumps to file asynchronously and makes sure we wait for its completion.
std::future<void> compressed_node_based_graph_writing;
BOOST_SCOPE_EXIT_ALL(&)
{
if (compressed_node_based_graph_writing.valid())
compressed_node_based_graph_writing.wait();
};
if (config.dump_compressed_node_based_graph)
{
compressed_node_based_graph_writing = std::async(std::launch::async, [&] {
WriteCompressedNodeBasedGraph(config.compressed_node_based_graph_output_path,
*node_based_graph,
internal_to_external_node_map);
});
}
WriteTurnLaneData(config.turn_lane_descriptions_file_name);
compressed_edge_container.SerializeInternalVector(config.geometry_output_path);
@ -695,5 +724,57 @@ void Extractor::WriteTurnLaneData(const std::string &turn_lane_file) const
util::Log() << "done (" << TIMER_SEC(turn_lane_timer) << ")";
}
void Extractor::WriteCompressedNodeBasedGraph(const std::string &path,
const util::NodeBasedDynamicGraph &graph,
const std::vector<QueryNode> &externals)
{
const auto fingerprint = storage::io::FileWriter::GenerateFingerprint;
storage::io::FileWriter writer{path, fingerprint};
// Writes: | Fingerprint | #e | #n | edges | coordinates |
// - uint64: number of edges (from, to) pairs
// - uint64: number of nodes and therefore also coordinates
// - (uint32_t, uint32_t): num_edges * edges
// - (int32_t, int32_t: num_nodes * coordinates (lon, lat)
const auto num_edges = graph.GetNumberOfEdges();
const auto num_nodes = graph.GetNumberOfNodes();
BOOST_ASSERT_MSG(num_nodes == externals.size(), "graph and embedding out of sync");
const auto die = [] {
throw util::exception("Writing the compressed node based graph to disk failed");
};
if (!writer.WriteElementCount64(num_edges))
die();
if (!writer.WriteElementCount64(num_nodes))
die();
// For all nodes iterate over its edges and dump (from, to) pairs
for (const NodeID from_node : util::irange(0u, num_nodes))
{
for (const EdgeID edge : graph.GetAdjacentEdgeRange(from_node))
{
const auto to_node = graph.GetTarget(edge);
if (!writer.WriteOne(from_node))
die();
if (!writer.WriteOne(to_node))
die();
}
}
for (const auto &qnode : externals)
{
if (!writer.WriteOne(qnode.lon))
die();
if (!writer.WriteOne(qnode.lat))
die();
}
}
} // namespace extractor
} // namespace osrm

View File

@ -2,9 +2,14 @@
#include "partition/bisection_graph.hpp"
#include "partition/recursive_bisection.hpp"
#include "storage/io.hpp"
#include "util/coordinate.hpp"
#include "util/log.hpp"
#include <iterator>
#include <tuple>
#include <utility>
#include <vector>
#include <boost/assert.hpp>
@ -16,14 +21,54 @@ namespace osrm
namespace partition
{
int Partitioner::Run(const PartitionConfig &config)
struct CompressedNodeBasedGraphEdge
{
const unsigned recommended_num_threads = tbb::task_scheduler_init::default_num_threads();
const auto number_of_threads = std::min(recommended_num_threads, config.requested_num_threads);
tbb::task_scheduler_init init(number_of_threads);
NodeID source;
NodeID target;
};
auto compressed_node_based_graph =
LoadCompressedNodeBasedGraph(config.compressed_node_based_graph_path.string());
struct CompressedNodeBasedGraph
{
CompressedNodeBasedGraph(storage::io::FileReader &reader)
{
// Reads: | Fingerprint | #e | #n | edges | coordinates |
// - uint64: number of edges (from, to) pairs
// - uint64: number of nodes and therefore also coordinates
// - (uint32_t, uint32_t): num_edges * edges
// - (int32_t, int32_t: num_nodes * coordinates (lon, lat)
const auto num_edges = reader.ReadElementCount64();
const auto num_nodes = reader.ReadElementCount64();
edges.resize(num_edges);
coordinates.resize(num_nodes);
reader.ReadInto(edges);
reader.ReadInto(coordinates);
}
std::vector<CompressedNodeBasedGraphEdge> edges;
std::vector<util::Coordinate> coordinates;
};
CompressedNodeBasedGraph LoadCompressedNodeBasedGraph(const std::string &path)
{
const auto fingerprint = storage::io::FileReader::VerifyFingerprint;
storage::io::FileReader reader(path, fingerprint);
CompressedNodeBasedGraph graph{reader};
return graph;
}
void LogGeojson(const std::string &filename, const std::vector<std::uint32_t> &bisection_ids)
{
// reload graph, since we destroyed the old one
auto compressed_node_based_graph = LoadCompressedNodeBasedGraph(filename);
util::Log() << "Loaded compressed node based graph: "
<< compressed_node_based_graph.edges.size() << " edges, "
<< compressed_node_based_graph.coordinates.size() << " nodes";
groupEdgesBySource(begin(compressed_node_based_graph.edges),
end(compressed_node_based_graph.edges));
@ -32,7 +77,78 @@ int Partitioner::Run(const PartitionConfig &config)
makeBisectionGraph(compressed_node_based_graph.coordinates,
adaptToBisectionEdge(std::move(compressed_node_based_graph.edges)));
RecursiveBisection recursive_bisection(1024, 1.1, 0.25, graph);
const auto get_level = [](const std::uint32_t lhs, const std::uint32_t rhs) {
auto xored = lhs ^ rhs;
std::uint32_t level = log(xored) / log(2.0);
return level;
};
const auto reverse_bits = [](std::uint32_t x) {
x = ((x >> 1) & 0x55555555u) | ((x & 0x55555555u) << 1);
x = ((x >> 2) & 0x33333333u) | ((x & 0x33333333u) << 2);
x = ((x >> 4) & 0x0f0f0f0fu) | ((x & 0x0f0f0f0fu) << 4);
x = ((x >> 8) & 0x00ff00ffu) | ((x & 0x00ff00ffu) << 8);
x = ((x >> 16) & 0xffffu) | ((x & 0xffffu) << 16);
return x;
};
std::vector<std::vector<util::Coordinate>> border_vertices(33);
for (NodeID nid = 0; nid < graph.NumberOfNodes(); ++nid)
{
const auto source_id = reverse_bits(bisection_ids[nid]);
for (const auto &edge : graph.Edges(nid))
{
const auto target_id = reverse_bits(bisection_ids[edge.target]);
if (source_id != target_id)
{
auto level = get_level(source_id, target_id);
border_vertices[level].push_back(graph.Node(nid).coordinate);
border_vertices[level].push_back(graph.Node(edge.target).coordinate);
}
}
}
util::ScopedGeojsonLoggerGuard<util::CoordinateVectorToMultiPoint> guard(
"border_vertices.geojson");
std::size_t level = 0;
for (auto &bv : border_vertices)
{
if (!bv.empty())
{
std::sort(bv.begin(), bv.end(), [](const auto lhs, const auto rhs) {
return std::tie(lhs.lon, lhs.lat) < std::tie(rhs.lon, rhs.lat);
});
bv.erase(std::unique(bv.begin(), bv.end()), bv.end());
util::json::Object jslevel;
jslevel.values["level"] = util::json::Number(level++);
guard.Write(bv, jslevel);
}
}
}
int Partitioner::Run(const PartitionConfig &config)
{
auto compressed_node_based_graph =
LoadCompressedNodeBasedGraph(config.compressed_node_based_graph_path.string());
util::Log() << "Loaded compressed node based graph: "
<< compressed_node_based_graph.edges.size() << " edges, "
<< compressed_node_based_graph.coordinates.size() << " nodes";
auto graph =
makeBisectionGraph(compressed_node_based_graph.coordinates,
adaptToBisectionEdge(std::move(compressed_node_based_graph.edges)));
RecursiveBisection recursive_bisection(config.maximum_cell_size,
config.balance,
config.boundary_factor,
config.num_optimizing_cuts,
graph);
LogGeojson(config.compressed_node_based_graph_path.string(),
recursive_bisection.BisectionIDs());
return 0;
}

View File

@ -4,10 +4,18 @@
#include "partition/graph_view.hpp"
#include "partition/recursive_bisection_state.hpp"
#include "util/log.hpp"
#include "util/timing_util.hpp"
#include "util/geojson_debug_logger.hpp"
#include "util/geojson_debug_policies.hpp"
#include <tbb/parallel_do.h>
#include <climits> // for CHAR_BIT
#include <cstddef>
#include <algorithm>
#include <iterator>
#include <utility>
#include <vector>
#include "extractor/tarjan_scc.hpp"
#include "partition/tarjan_graph_wrapper.hpp"
@ -22,83 +30,78 @@ namespace partition
RecursiveBisection::RecursiveBisection(std::size_t maximum_cell_size,
double balance,
double boundary_factor,
std::size_t num_optimizing_cuts,
BisectionGraph &bisection_graph_)
: bisection_graph(bisection_graph_), internal_state(bisection_graph_)
{
auto views = FakeFirstPartitionWithSCC(1000);
auto components = FakeFirstPartitionWithSCC(1000 /*limit for small*/); // TODO
BOOST_ASSERT(!components.empty());
std::cout << "Components: " << views.size() << std::endl;
;
// Parallelize recursive bisection trees. Root cut happens serially (well, this is a lie:
// since we handle big components in parallel, too. But we don't know this and
// don't have to. TBB's scheduler handles nested parallelism just fine).
//
// [ | ]
// / \ root cut
// [ | ] [ | ]
// / \ / \ descend, do cuts in parallel
//
// https://www.threadingbuildingblocks.org/docs/help/index.htm#reference/algorithms/parallel_do_func.html
struct TreeNode
{
GraphView graph;
std::uint64_t depth;
};
// Build a recursive bisection tree for all big components independently in parallel.
// Last GraphView is all small components: skip for bisection.
auto first = begin(components);
auto last = end(components) - 1;
// We construct the trees on the fly: the root node is the entry point.
// All tree branches depend on the actual cut and will be generated while descending.
std::vector<TreeNode> forest;
forest.reserve(last - first);
std::transform(first, last, std::back_inserter(forest), [](auto graph) {
return TreeNode{std::move(graph), 0};
});
using Feeder = tbb::parallel_do_feeder<TreeNode>;
TIMER_START(bisection);
GraphView view = views.front();
InertialFlow flow(view);
const auto partition = flow.ComputePartition(10, balance, boundary_factor);
const auto center = internal_state.ApplyBisection(view.Begin(), view.End(), 0, partition.flags);
{
auto state = internal_state;
}
// Bisect graph into two parts. Get partition point and recurse left and right in parallel.
tbb::parallel_do(begin(forest), end(forest), [&](const TreeNode &node, Feeder &feeder) {
InertialFlow flow{node.graph};
const auto partition = flow.ComputePartition(num_optimizing_cuts, balance, boundary_factor);
const auto center = internal_state.ApplyBisection(
node.graph.Begin(), node.graph.End(), node.depth, partition.flags);
const auto terminal = [&](const auto &node) {
const auto maximum_depth = sizeof(RecursiveBisectionState::BisectionID) * CHAR_BIT;
const auto too_small = node.graph.NumberOfNodes() < maximum_cell_size;
const auto too_deep = node.depth >= maximum_depth;
return too_small || too_deep;
};
GraphView left_graph{bisection_graph, node.graph.Begin(), center};
TreeNode left_node{std::move(left_graph), node.depth + 1};
if (!terminal(left_node))
feeder.add(std::move(left_node));
GraphView right_graph{bisection_graph, center, node.graph.End()};
TreeNode right_node{std::move(right_graph), node.depth + 1};
if (!terminal(right_node))
feeder.add(std::move(right_node));
});
TIMER_STOP(bisection);
std::cout << "Bisection completed in " << TIMER_SEC(bisection)
<< " Cut Size: " << partition.num_edges << " Balance: " << partition.num_nodes_source
<< std::endl;
util::ScopedGeojsonLoggerGuard<util::CoordinateVectorToLineString, util::LoggingScenario(0)>
logger_zero("level_0.geojson");
for (NodeID nid = 0; nid < bisection_graph.NumberOfNodes(); ++nid)
{
for (const auto &edge : bisection_graph.Edges(nid))
{
const auto target = edge.target;
if (internal_state.GetBisectionID(nid) != internal_state.GetBisectionID(target))
{
std::vector<util::Coordinate> coordinates;
coordinates.push_back(bisection_graph.Node(nid).coordinate);
coordinates.push_back(bisection_graph.Node(target).coordinate);
logger_zero.Write(coordinates);
}
}
}
TIMER_START(bisection_2_1);
GraphView recursive_view_lhs(bisection_graph, view.Begin(), center);
InertialFlow flow_lhs(recursive_view_lhs);
const auto partition_lhs = flow_lhs.ComputePartition(10, balance, boundary_factor);
internal_state.ApplyBisection(
recursive_view_lhs.Begin(), recursive_view_lhs.End(), 1, partition_lhs.flags);
TIMER_STOP(bisection_2_1);
std::cout << "Bisection(2) completed in " << TIMER_SEC(bisection_2_1)
<< " Cut Size: " << partition_lhs.num_edges
<< " Balance: " << partition_lhs.num_nodes_source << std::endl;
TIMER_START(bisection_2_2);
GraphView recursive_view_rhs(bisection_graph, center, view.End());
InertialFlow flow_rhs(recursive_view_rhs);
const auto partition_rhs = flow_rhs.ComputePartition(10, balance, boundary_factor);
internal_state.ApplyBisection(
recursive_view_rhs.Begin(), recursive_view_rhs.End(), 1, partition_rhs.flags);
TIMER_STOP(bisection_2_2);
std::cout << "Bisection(3) completed in " << TIMER_SEC(bisection_2_2)
<< " Cut Size: " << partition_rhs.num_edges
<< " Balance: " << partition_rhs.num_nodes_source << std::endl;
util::ScopedGeojsonLoggerGuard<util::CoordinateVectorToLineString, util::LoggingScenario(1)>
logger_one("level_1.geojson");
for (NodeID nid = 0; nid < bisection_graph.NumberOfNodes(); ++nid)
{
for (const auto &edge : bisection_graph.Edges(nid))
{
const auto target = edge.target;
if (internal_state.GetBisectionID(nid) != internal_state.GetBisectionID(target))
{
std::vector<util::Coordinate> coordinates;
coordinates.push_back(bisection_graph.Node(nid).coordinate);
coordinates.push_back(bisection_graph.Node(target).coordinate);
logger_one.Write(coordinates);
}
}
}
util::Log() << "Full bisection done in " << TIMER_SEC(bisection) << "s";
}
std::vector<GraphView>

View File

@ -46,13 +46,18 @@ return_code parseArguments(int argc, char *argv[], extractor::ExtractorConfig &e
->implicit_value(true)
->default_value(false),
"Generate a lookup table for internal edge-expanded-edge IDs to OSM node pairs")(
"dump-partition-graph",
boost::program_options::bool_switch(&extractor_config.dump_compressed_node_based_graph)
->implicit_value(true)
->default_value(false),
"Generate a partitionable graph file (.cnbg) for use with osrm-partition")(
"small-component-size",
boost::program_options::value<unsigned int>(&extractor_config.small_component_size)
->default_value(1000),
"Number of nodes required before a strongly-connected-componennt is considered big "
"(affects nearest neighbor snapping)")(
"with-osm-metadata",
boost::program_options::value<bool>(&extractor_config.use_metadata)
boost::program_options::bool_switch(&extractor_config.use_metadata)
->implicit_value(true)
->default_value(false),
"Use metada during osm parsing (This can affect the extraction performance).");

View File

@ -28,11 +28,31 @@ return_code parseArguments(int argc, char *argv[], partition::PartitionConfig &p
// declare a group of options that will be allowed both on command line
boost::program_options::options_description config_options("Configuration");
config_options.add_options()(
"threads,t",
boost::program_options::value<unsigned int>(&partition_config.requested_num_threads)
->default_value(tbb::task_scheduler_init::default_num_threads()),
"Number of threads to use");
config_options.add_options()
//
("threads,t",
boost::program_options::value<unsigned int>(&partition_config.requested_num_threads)
->default_value(tbb::task_scheduler_init::default_num_threads()),
"Number of threads to use")
//
("max-cell-size",
boost::program_options::value<std::size_t>(&partition_config.maximum_cell_size)
->default_value(4096),
"Bisection termination citerion based on cell size")
//
("balance",
boost::program_options::value<double>(&partition_config.balance)->default_value(1.2),
"Balance for left and right side in single bisection")
//
("boundary",
boost::program_options::value<double>(&partition_config.boundary_factor)
->default_value(0.25),
"Percentage of embedded nodes to contract as sources and sinks")
//
("optimizing-cuts",
boost::program_options::value<std::size_t>(&partition_config.num_optimizing_cuts)
->default_value(10),
"Number of cuts to use for optimizing a single bisection");
// hidden options, will be allowed on command line, but will not be
// shown to the user