add unit tests for the different components of the parttion tool

This commit is contained in:
Moritz Kobitzsch
2017-02-02 15:53:42 +01:00
committed by Patrick Niklaus
parent e316dad1cb
commit b789da45bd
25 changed files with 917 additions and 152 deletions
+21
View File
@@ -35,6 +35,7 @@ DinicMaxFlow::MinCut DinicMaxFlow::operator()(const GraphView &view,
const SourceSinkNodes &source_nodes,
const SourceSinkNodes &sink_nodes) const
{
BOOST_ASSERT(Validate(view, source_nodes, sink_nodes));
// for the inertial flow algorithm, we use quite a large set of nodes as source/sink nodes. Only
// a few of them can be part of the process, since they are grouped together. A standard
// parameterisation would be 25% sink/source nodes. This already includes 50% of the graph. By
@@ -219,6 +220,7 @@ std::size_t DinicMaxFlow::BlockingFlow(FlowEdges &flow,
};
std::for_each(border_sink_nodes.begin(), border_sink_nodes.end(), augment_all_paths);
BOOST_ASSERT(flow_increase > 0);
return flow_increase;
}
@@ -288,5 +290,24 @@ std::vector<NodeID> DinicMaxFlow::GetAugmentingPath(LevelGraph &levels,
return path;
}
bool DinicMaxFlow::Validate(const GraphView &view,
const SourceSinkNodes &source_nodes,
const SourceSinkNodes &sink_nodes) const
{
// sink and source cannot share a common node
const auto separated =
std::find_if(source_nodes.begin(), source_nodes.end(), [&sink_nodes](const auto node) {
return sink_nodes.count(node);
}) == source_nodes.end();
const auto invalid_id = [&view](const NodeID nid) { return nid >= view.NumberOfNodes(); };
const auto in_range_source =
std::find_if(source_nodes.begin(), source_nodes.end(), invalid_id) == source_nodes.end();
const auto in_range_sink =
std::find_if(sink_nodes.begin(), sink_nodes.end(), invalid_id) == sink_nodes.end();
return separated && in_range_source && in_range_sink;
}
} // namespace partition
} // namespace osrm
+1 -4
View File
@@ -42,10 +42,7 @@ BisectionGraph::ConstNodeIterator GraphView::Begin() const { return begin; }
BisectionGraph::ConstNodeIterator GraphView::End() const { return end; }
const GraphView::NodeT &GraphView::Node(const NodeID nid) const
{
return bisection_graph.Node(nid);
}
const GraphView::NodeT &GraphView::Node(const NodeID nid) const { return *(begin + nid); }
const GraphView::EdgeT &GraphView::Edge(const EdgeID eid) const
{
+12 -5
View File
@@ -1,6 +1,7 @@
#include "partition/partitioner.hpp"
#include "partition/bisection_graph.hpp"
#include "partition/recursive_bisection.hpp"
#include "partition/recursive_bisection_stats.hpp"
#include "storage/io.hpp"
#include "util/coordinate.hpp"
@@ -64,7 +65,7 @@ CompressedNodeBasedGraph LoadCompressedNodeBasedGraph(const std::string &path)
return graph;
}
void LogGeojson(const std::string &filename, const std::vector<std::uint32_t> &bisection_ids)
void LogGeojson(const std::string &filename, std::vector<std::uint32_t> bisection_ids)
{
// reload graph, since we destroyed the old one
auto compressed_node_based_graph = LoadCompressedNodeBasedGraph(filename);
@@ -95,14 +96,19 @@ void LogGeojson(const std::string &filename, const std::vector<std::uint32_t> &b
return x;
};
std::transform(bisection_ids.begin(),bisection_ids.end(),bisection_ids.begin(),reverse_bits);
printBisectionStats(bisection_ids, graph);
std::vector<std::vector<util::Coordinate>> border_vertices(33);
for (NodeID nid = 0; nid < graph.NumberOfNodes(); ++nid)
{
const auto source_id = reverse_bits(bisection_ids[nid]);
const auto source_id = bisection_ids[nid];
for (const auto &edge : graph.Edges(nid))
{
const auto target_id = reverse_bits(bisection_ids[edge.target]);
const auto target_id = bisection_ids[edge.target];
if (source_id != target_id)
{
auto level = get_level(source_id, target_id);
@@ -147,11 +153,12 @@ int Partitioner::Run(const PartitionConfig &config)
makeBisectionGraph(compressed_node_based_graph.coordinates,
adaptToBisectionEdge(std::move(compressed_node_based_graph.edges)));
RecursiveBisection recursive_bisection(config.maximum_cell_size,
RecursiveBisection recursive_bisection(graph,
config.maximum_cell_size,
config.balance,
config.boundary_factor,
config.num_optimizing_cuts,
graph);
config.small_component_size);
LogGeojson(config.compressed_node_based_graph_path.string(),
recursive_bisection.BisectionIDs());
+9 -81
View File
@@ -9,32 +9,28 @@
#include <tbb/parallel_do.h>
#include <algorithm>
#include <climits> // for CHAR_BIT
#include <cstddef>
#include <algorithm>
#include <iterator>
#include <unordered_map>
#include <utility>
#include <vector>
#include "extractor/tarjan_scc.hpp"
#include "partition/tarjan_graph_wrapper.hpp"
#include <unordered_map>
namespace osrm
{
namespace partition
{
RecursiveBisection::RecursiveBisection(std::size_t maximum_cell_size,
double balance,
double boundary_factor,
std::size_t num_optimizing_cuts,
BisectionGraph &bisection_graph_)
RecursiveBisection::RecursiveBisection(BisectionGraph &bisection_graph_,
const std::size_t maximum_cell_size,
const double balance,
const double boundary_factor,
const std::size_t num_optimizing_cuts,
const std::size_t small_component_size)
: bisection_graph(bisection_graph_), internal_state(bisection_graph_)
{
auto components = FakeFirstPartitionWithSCC(1000 /*limit for small*/); // TODO
auto components = internal_state.PrePartitionWithSCC(small_component_size);
BOOST_ASSERT(!components.empty());
// Parallelize recursive bisection trees. Root cut happens serially (well, this is a lie:
@@ -104,74 +100,6 @@ RecursiveBisection::RecursiveBisection(std::size_t maximum_cell_size,
util::Log() << "Full bisection done in " << TIMER_SEC(bisection) << "s";
}
std::vector<GraphView>
RecursiveBisection::FakeFirstPartitionWithSCC(const std::size_t small_component_size)
{
// since our graphs are unidirectional, we don't realy need the scc. But tarjan is so nice and
// assigns IDs and counts sizes
TarjanGraphWrapper wrapped_graph(bisection_graph);
extractor::TarjanSCC<TarjanGraphWrapper> scc_algo(wrapped_graph);
scc_algo.Run();
// Map Edges to Sccs
const auto in_small = [&scc_algo, small_component_size](const NodeID node_id) {
return scc_algo.GetComponentSize(scc_algo.GetComponentID(node_id)) <= small_component_size;
};
const constexpr std::size_t small_component_id = -1;
std::unordered_map<std::size_t, std::size_t> component_map;
const auto transform_id = [&](const NodeID node_id) -> std::size_t {
if (in_small(node_id))
return small_component_id;
else
return scc_algo.GetComponentID(node_id);
};
std::vector<NodeID> mapping(bisection_graph.NumberOfNodes(), SPECIAL_NODEID);
for (const auto &node : bisection_graph.Nodes())
mapping[node.original_id] = component_map[transform_id(node.original_id)]++;
// needs to remove edges, if we should ever switch to directed graphs here
std::stable_sort(
bisection_graph.Begin(), bisection_graph.End(), [&](const auto &lhs, const auto &rhs) {
return transform_id(lhs.original_id) < transform_id(rhs.original_id);
});
// remap all remaining edges
std::for_each(bisection_graph.Begin(), bisection_graph.End(), [&](const auto &node) {
for (auto &edge : bisection_graph.Edges(node))
edge.target = mapping[edge.target];
});
std::vector<GraphView> views;
auto last = bisection_graph.CBegin();
auto last_id = transform_id(bisection_graph.Begin()->original_id);
for (auto itr = bisection_graph.CBegin(); itr != bisection_graph.CEnd(); ++itr)
{
auto itr_id = transform_id(itr->original_id);
if (last_id != itr_id)
{
views.push_back(GraphView(bisection_graph, last, itr));
last_id = itr_id;
last = itr;
}
}
views.push_back(GraphView(bisection_graph, last, bisection_graph.CEnd()));
bool has_small_component = [&]() {
for (std::size_t i = 0; i < scc_algo.GetNumberOfComponents(); ++i)
if (scc_algo.GetComponentSize(i) <= small_component_size)
return true;
return false;
}();
if (!has_small_component)
views.push_back(GraphView(bisection_graph, bisection_graph.CEnd(), bisection_graph.CEnd()));
return views;
}
const std::vector<RecursiveBisectionState::BisectionID> &RecursiveBisection::BisectionIDs() const
{
return internal_state.BisectionIDs();
@@ -1,4 +1,6 @@
#include "partition/recursive_bisection_state.hpp"
#include "extractor/tarjan_scc.hpp"
#include "partition/tarjan_graph_wrapper.hpp"
#include <algorithm>
#include <numeric>
@@ -6,6 +8,7 @@
// TODO remove
#include <bitset>
#include <iostream>
#include <unordered_map>
namespace osrm
{
@@ -32,6 +35,10 @@ RecursiveBisectionState::ApplyBisection(const NodeIterator const_begin,
const std::size_t depth,
const std::vector<bool> &partition)
{
// ensure that the iterators belong to the graph
BOOST_ASSERT(bisection_graph.GetID(*const_begin) < bisection_graph.NumberOfNodes() &&
bisection_graph.GetID(*const_begin) + std::distance(const_begin, const_end) <=
bisection_graph.NumberOfNodes());
// augment the partition ids
const auto flag = BisectionID{1} << depth;
for (auto itr = const_begin; itr != const_end; ++itr)
@@ -80,6 +87,73 @@ RecursiveBisectionState::ApplyBisection(const NodeIterator const_begin,
return const_begin + std::distance(begin, center);
}
std::vector<GraphView>
RecursiveBisectionState::PrePartitionWithSCC(const std::size_t small_component_size)
{
// since our graphs are unidirectional, we don't realy need the scc. But tarjan is so nice and
// assigns IDs and counts sizes
TarjanGraphWrapper wrapped_graph(bisection_graph);
extractor::TarjanSCC<TarjanGraphWrapper> scc_algo(wrapped_graph);
scc_algo.Run();
// Map Edges to Sccs
const auto in_small = [&scc_algo, small_component_size](const NodeID node_id) {
return scc_algo.GetComponentSize(scc_algo.GetComponentID(node_id)) <= small_component_size;
};
const constexpr std::size_t small_component_id = -1;
std::unordered_map<std::size_t, std::size_t> component_map;
const auto transform_id = [&](const NodeID node_id) -> std::size_t {
if (in_small(node_id))
return small_component_id;
else
return scc_algo.GetComponentID(node_id);
};
std::vector<NodeID> mapping(bisection_graph.NumberOfNodes(), SPECIAL_NODEID);
for (const auto &node : bisection_graph.Nodes())
mapping[node.original_id] = component_map[transform_id(node.original_id)]++;
// needs to remove edges, if we should ever switch to directed graphs here
std::stable_sort(
bisection_graph.Begin(), bisection_graph.End(), [&](const auto &lhs, const auto &rhs) {
return transform_id(lhs.original_id) < transform_id(rhs.original_id);
});
// remap all remaining edges
std::for_each(bisection_graph.Begin(), bisection_graph.End(), [&](const auto &node) {
for (auto &edge : bisection_graph.Edges(node))
edge.target = mapping[edge.target];
});
std::vector<GraphView> views;
auto last = bisection_graph.CBegin();
auto last_id = transform_id(bisection_graph.Begin()->original_id);
for (auto itr = bisection_graph.CBegin(); itr != bisection_graph.CEnd(); ++itr)
{
auto itr_id = transform_id(itr->original_id);
if (last_id != itr_id)
{
views.push_back(GraphView(bisection_graph, last, itr));
last_id = itr_id;
last = itr;
}
}
views.push_back(GraphView(bisection_graph, last, bisection_graph.CEnd()));
bool has_small_component = [&]() {
for (std::size_t i = 0; i < scc_algo.GetNumberOfComponents(); ++i)
if (scc_algo.GetComponentSize(i) <= small_component_size)
return true;
return false;
}();
if (!has_small_component)
views.push_back(GraphView(bisection_graph, bisection_graph.CEnd(), bisection_graph.CEnd()));
return views;
}
const std::vector<RecursiveBisectionState::BisectionID> &
RecursiveBisectionState::BisectionIDs() const
{
@@ -0,0 +1,97 @@
#include "partition/recursive_bisection_stats.hpp"
#include <boost/assert.hpp>
#include <bitset>
#include <cstddef>
#include <iostream>
#include <unordered_map>
#include <unordered_set>
namespace osrm
{
namespace partition
{
void printBisectionStats(std::vector<RecursiveBisectionState::BisectionID> const &bisection_ids,
const BisectionGraph &graph)
{
BOOST_ASSERT(graph.NumberOfNodes() == bisection_ids.size());
std::size_t total_border_nodes = 0;
std::unordered_map<RecursiveBisectionState::BisectionID, std::size_t> cell_sizes[32];
std::unordered_map<RecursiveBisectionState::BisectionID, std::size_t> border_nodes[32];
std::unordered_set<RecursiveBisectionState::BisectionID> all_ids[32];
std::uint32_t flag = 0;
for (std::uint32_t level = 0; level < 32; ++level)
{
flag |= (1 << level);
for (const auto &node : graph.Nodes())
{
const auto bisection_id_node = bisection_ids[node.original_id];
all_ids[level].insert(bisection_id_node&flag);
auto is_border_node = false;
for (const auto &edge : graph.Edges(node))
{
if (bisection_ids[edge.target] != bisection_id_node)
is_border_node = true;
}
if (is_border_node)
++total_border_nodes;
cell_sizes[level][bisection_id_node & flag]++;
if (is_border_node)
{
for (const auto &edge : graph.Edges(node))
{
if ((bisection_id_node & flag) != (bisection_ids[edge.target] & flag))
{
border_nodes[level][bisection_id_node & flag]++;
break;
}
}
}
}
}
std::cout << "Partition statistics\n";
std::cout << "Total border vertices: " << total_border_nodes << std::endl;
unsigned level = 0;
do
{
std::size_t min_size = -1, max_size = 0, total_size = 0;
std::size_t min_border = -1, max_border = 1, total_border = 0;
const auto summarize =
[](const std::unordered_map<RecursiveBisectionState::BisectionID, std::size_t> &map,
std::size_t &min,
std::size_t &max,
std::size_t &total) {
for (const auto itr : map)
{
min = std::min(min, itr.second);
max = std::max(max, itr.second);
total += itr.second;
}
};
summarize(cell_sizes[level], min_size, max_size, total_size);
summarize(border_nodes[level], min_border, max_border, total_border);
std::cout << "Level: " << level << " Cells: " << cell_sizes[level].size();
if (cell_sizes[level].size() > 1)
std::cout << " Border: " << min_border << " " << max_border << " "
<< total_border / (double)cell_sizes[level].size();
std::cout << " Cell Sizes: " << min_size << " " << max_size << " "
<< total_size / (double)cell_sizes[level].size();
std::cout << std::endl;
} while (level < 31 && cell_sizes[level++].size() > 1);
}
} // namespace partition
} // namespace osrm
+5
View File
@@ -52,6 +52,11 @@ return_code parseArguments(int argc, char *argv[], partition::PartitionConfig &p
("optimizing-cuts",
boost::program_options::value<std::size_t>(&partition_config.num_optimizing_cuts)
->default_value(10),
"Number of cuts to use for optimizing a single bisection")
//
("small-component-size",
boost::program_options::value<std::size_t>(&partition_config.small_component_size)
->default_value(1000),
"Number of cuts to use for optimizing a single bisection");
// hidden options, will be allowed on command line, but will not be