Parallize scripting on osmium::Buffer granularity

Fixes #3447 and reduces parsing time by about 15%.
This commit is contained in:
Patrick Niklaus 2017-06-08 21:04:48 +00:00 committed by Patrick Niklaus
parent cd8fb82215
commit 27ed69b08f
4 changed files with 117 additions and 106 deletions

View File

@ -58,13 +58,12 @@ class ScriptingEnvironment
virtual void ProcessTurn(ExtractionTurn &turn) = 0; virtual void ProcessTurn(ExtractionTurn &turn) = 0;
virtual void ProcessSegment(ExtractionSegment &segment) = 0; virtual void ProcessSegment(ExtractionSegment &segment) = 0;
virtual void virtual void ProcessElements(
ProcessElements(const std::vector<osmium::memory::Buffer::const_iterator> &osm_elements, const osmium::memory::Buffer &buffer,
const RestrictionParser &restriction_parser, const RestrictionParser &restriction_parser,
tbb::concurrent_vector<std::pair<std::size_t, ExtractionNode>> &resulting_nodes, std::vector<std::pair<const osmium::Node &, ExtractionNode>> &resulting_nodes,
tbb::concurrent_vector<std::pair<std::size_t, ExtractionWay>> &resulting_ways, std::vector<std::pair<const osmium::Way &, ExtractionWay>> &resulting_ways,
tbb::concurrent_vector<boost::optional<InputRestrictionContainer>> std::vector<boost::optional<InputRestrictionContainer>> &resulting_restrictions) = 0;
&resulting_restrictions) = 0;
}; };
} }
} }

View File

@ -65,13 +65,12 @@ class Sol2ScriptingEnvironment final : public ScriptingEnvironment
void ProcessTurn(ExtractionTurn &turn) override; void ProcessTurn(ExtractionTurn &turn) override;
void ProcessSegment(ExtractionSegment &segment) override; void ProcessSegment(ExtractionSegment &segment) override;
void void ProcessElements(
ProcessElements(const std::vector<osmium::memory::Buffer::const_iterator> &osm_elements, const osmium::memory::Buffer &buffer,
const RestrictionParser &restriction_parser, const RestrictionParser &restriction_parser,
tbb::concurrent_vector<std::pair<std::size_t, ExtractionNode>> &resulting_nodes, std::vector<std::pair<const osmium::Node &, ExtractionNode>> &resulting_nodes,
tbb::concurrent_vector<std::pair<std::size_t, ExtractionWay>> &resulting_ways, std::vector<std::pair<const osmium::Way &, ExtractionWay>> &resulting_ways,
tbb::concurrent_vector<boost::optional<InputRestrictionContainer>> std::vector<boost::optional<InputRestrictionContainer>> &resulting_restrictions) override;
&resulting_restrictions) override;
private: private:
void InitContext(LuaScriptingContext &context); void InitContext(LuaScriptingContext &context);

View File

@ -33,12 +33,13 @@
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
#include <boost/filesystem/fstream.hpp> #include <boost/filesystem/fstream.hpp>
#include <boost/iterator/function_input_iterator.hpp>
#include <boost/optional/optional.hpp> #include <boost/optional/optional.hpp>
#include <boost/scope_exit.hpp> #include <boost/scope_exit.hpp>
#include <osmium/io/any_input.hpp> #include <osmium/io/any_input.hpp>
#include <tbb/concurrent_vector.h> #include <tbb/pipeline.h>
#include <tbb/task_scheduler_init.h> #include <tbb/task_scheduler_init.h>
#include <cstdlib> #include <cstdlib>
@ -252,11 +253,6 @@ std::vector<TurnRestriction> Extractor::ParseOSMData(ScriptingEnvironment &scrip
timestamp_file.WriteFrom(timestamp.c_str(), timestamp.length()); timestamp_file.WriteFrom(timestamp.c_str(), timestamp.length());
// initialize vectors holding parsed objects
tbb::concurrent_vector<std::pair<std::size_t, ExtractionNode>> resulting_nodes;
tbb::concurrent_vector<std::pair<std::size_t, ExtractionWay>> resulting_ways;
tbb::concurrent_vector<boost::optional<InputRestrictionContainer>> resulting_restrictions;
std::vector<std::string> restrictions = scripting_environment.GetRestrictions(); std::vector<std::string> restrictions = scripting_environment.GetRestrictions();
// setup restriction parser // setup restriction parser
const RestrictionParser restriction_parser( const RestrictionParser restriction_parser(
@ -264,46 +260,70 @@ std::vector<TurnRestriction> Extractor::ParseOSMData(ScriptingEnvironment &scrip
config.parse_conditionals, config.parse_conditionals,
restrictions); restrictions);
// create a vector of iterators into the buffer std::mutex process_mutex;
for (std::vector<osmium::memory::Buffer::const_iterator> osm_elements;
const osmium::memory::Buffer buffer = reader.read(); using SharedBuffer = std::shared_ptr<const osmium::memory::Buffer>;
osm_elements.clear()) struct ParsedBuffer
{ {
for (auto iter = std::begin(buffer), end = std::end(buffer); iter != end; ++iter) SharedBuffer buffer;
std::vector<std::pair<const osmium::Node &, ExtractionNode>> resulting_nodes;
std::vector<std::pair<const osmium::Way &, ExtractionWay>> resulting_ways;
std::vector<boost::optional<InputRestrictionContainer>> resulting_restrictions;
};
tbb::filter_t<void, SharedBuffer> buffer_reader(
tbb::filter::serial_in_order, [&](tbb::flow_control &fc) {
if (auto buffer = reader.read())
{ {
osm_elements.push_back(iter); return std::make_shared<const osmium::memory::Buffer>(std::move(buffer));
} }
else
{
fc.stop();
return SharedBuffer{};
}
});
tbb::filter_t<SharedBuffer, std::shared_ptr<ParsedBuffer>> buffer_transform(
tbb::filter::parallel, [&](const SharedBuffer buffer) {
if (!buffer)
return std::shared_ptr<ParsedBuffer>{};
// clear resulting vectors auto parsed_buffer = std::make_shared<ParsedBuffer>();
resulting_nodes.clear(); parsed_buffer->buffer = buffer;
resulting_ways.clear(); scripting_environment.ProcessElements(*buffer,
resulting_restrictions.clear();
scripting_environment.ProcessElements(osm_elements,
restriction_parser, restriction_parser,
resulting_nodes, parsed_buffer->resulting_nodes,
resulting_ways, parsed_buffer->resulting_ways,
resulting_restrictions); parsed_buffer->resulting_restrictions);
return parsed_buffer;
});
tbb::filter_t<std::shared_ptr<ParsedBuffer>, void> buffer_storage(
tbb::filter::serial_in_order, [&](const std::shared_ptr<ParsedBuffer> parsed_buffer) {
if (!parsed_buffer)
return;
number_of_nodes += resulting_nodes.size(); number_of_nodes += parsed_buffer->resulting_nodes.size();
// put parsed objects thru extractor callbacks // put parsed objects thru extractor callbacks
for (const auto &result : resulting_nodes) for (const auto &result : parsed_buffer->resulting_nodes)
{ {
extractor_callbacks->ProcessNode( extractor_callbacks->ProcessNode(result.first, result.second);
static_cast<const osmium::Node &>(*(osm_elements[result.first])), result.second);
} }
number_of_ways += resulting_ways.size(); number_of_ways += parsed_buffer->resulting_ways.size();
for (const auto &result : resulting_ways) for (const auto &result : parsed_buffer->resulting_ways)
{ {
extractor_callbacks->ProcessWay( extractor_callbacks->ProcessWay(result.first, result.second);
static_cast<const osmium::Way &>(*(osm_elements[result.first])), result.second);
} }
number_of_relations += resulting_restrictions.size(); number_of_relations += parsed_buffer->resulting_restrictions.size();
for (const auto &result : resulting_restrictions) for (const auto &result : parsed_buffer->resulting_restrictions)
{ {
extractor_callbacks->ProcessRestriction(result); extractor_callbacks->ProcessRestriction(result);
} }
} });
// Number of pipeline tokens that yielded the best speedup was about 1.5 * num_cores
tbb::parallel_pipeline(tbb::task_scheduler_init::default_num_threads() * 1.5,
buffer_reader & buffer_transform & buffer_storage);
TIMER_STOP(parsing); TIMER_STOP(parsing);
util::Log() << "Parsing finished after " << TIMER_SEC(parsing) << " seconds"; util::Log() << "Parsing finished after " << TIMER_SEC(parsing) << " seconds";

View File

@ -489,25 +489,19 @@ LuaScriptingContext &Sol2ScriptingEnvironment::GetSol2Context()
} }
void Sol2ScriptingEnvironment::ProcessElements( void Sol2ScriptingEnvironment::ProcessElements(
const std::vector<osmium::memory::Buffer::const_iterator> &osm_elements, const osmium::memory::Buffer &buffer,
const RestrictionParser &restriction_parser, const RestrictionParser &restriction_parser,
tbb::concurrent_vector<std::pair<std::size_t, ExtractionNode>> &resulting_nodes, std::vector<std::pair<const osmium::Node &, ExtractionNode>> &resulting_nodes,
tbb::concurrent_vector<std::pair<std::size_t, ExtractionWay>> &resulting_ways, std::vector<std::pair<const osmium::Way &, ExtractionWay>> &resulting_ways,
tbb::concurrent_vector<boost::optional<InputRestrictionContainer>> &resulting_restrictions) std::vector<boost::optional<InputRestrictionContainer>> &resulting_restrictions)
{ {
// parse OSM entities in parallel, store in resulting vectors
tbb::parallel_for(
tbb::blocked_range<std::size_t>(0, osm_elements.size()),
[&](const tbb::blocked_range<std::size_t> &range) {
ExtractionNode result_node; ExtractionNode result_node;
ExtractionWay result_way; ExtractionWay result_way;
std::vector<InputRestrictionContainer> result_res; std::vector<InputRestrictionContainer> result_res;
auto &local_context = this->GetSol2Context(); auto &local_context = this->GetSol2Context();
for (auto x = range.begin(), end = range.end(); x != end; ++x) for (auto entity = buffer.cbegin(), end = buffer.cend(); entity != end; ++entity)
{ {
const auto entity = osm_elements[x];
switch (entity->type()) switch (entity->type())
{ {
case osmium::item_type::node: case osmium::item_type::node:
@ -516,19 +510,19 @@ void Sol2ScriptingEnvironment::ProcessElements(
(!static_cast<const osmium::Node &>(*entity).tags().empty() || (!static_cast<const osmium::Node &>(*entity).tags().empty() ||
local_context.properties.call_tagless_node_function)) local_context.properties.call_tagless_node_function))
{ {
local_context.ProcessNode(static_cast<const osmium::Node &>(*entity), local_context.ProcessNode(static_cast<const osmium::Node &>(*entity), result_node);
result_node);
} }
resulting_nodes.push_back(std::make_pair(x, std::move(result_node))); resulting_nodes.push_back(std::pair<const osmium::Node &, ExtractionNode>(
static_cast<const osmium::Node &>(*entity), std::move(result_node)));
break; break;
case osmium::item_type::way: case osmium::item_type::way:
result_way.clear(); result_way.clear();
if (local_context.has_way_function) if (local_context.has_way_function)
{ {
local_context.ProcessWay(static_cast<const osmium::Way &>(*entity), local_context.ProcessWay(static_cast<const osmium::Way &>(*entity), result_way);
result_way);
} }
resulting_ways.push_back(std::make_pair(x, std::move(result_way))); resulting_ways.push_back(std::pair<const osmium::Way &, ExtractionWay>(
static_cast<const osmium::Way &>(*entity), std::move(result_way)));
break; break;
case osmium::item_type::relation: case osmium::item_type::relation:
result_res.clear(); result_res.clear();
@ -543,7 +537,6 @@ void Sol2ScriptingEnvironment::ProcessElements(
break; break;
} }
} }
});
} }
std::vector<std::string> Sol2ScriptingEnvironment::GetNameSuffixList() std::vector<std::string> Sol2ScriptingEnvironment::GetNameSuffixList()