Fix ambiguity in edges by using names lexicographical order

This commit is contained in:
Michael Krasnyk 2016-07-05 20:08:21 +02:00
parent e17b306265
commit 2b466b2fb2
No known key found for this signature in database
GPG Key ID: 3854C9454FE00649
5 changed files with 131 additions and 73 deletions

View File

@ -266,3 +266,24 @@ Feature: Basic Routing
| from | to | route | time | | from | to | route | time |
| a | b | , | 10s | | a | b | , | 10s |
| b | a | , | 10s | | b | a | , | 10s |
Scenario: Ambiguous edge names - Use lexicographically smallest name
Given the node map
| a | b | c |
And the ways
| nodes | highway | name |
| ab | primary | |
| ab | primary | Αβγ |
| ab | primary | |
| ab | primary | Abc |
| ab | primary | |
| ab | primary | Абв |
| bc | primary | Ηθι |
| bc | primary | Δεζ |
| bc | primary | Где |
When I route I should get
| from | to | route |
| a | c | Abc,Δεζ,Δεζ |
| c | a | Δεζ,Abc,Abc |

View File

@ -8,9 +8,9 @@
#include "extractor/restriction.hpp" #include "extractor/restriction.hpp"
#include "extractor/scripting_environment.hpp" #include "extractor/scripting_environment.hpp"
#include <cstdint>
#include <stxxl/vector> #include <stxxl/vector>
#include <unordered_map> #include <unordered_map>
#include <cstdint>
namespace osrm namespace osrm
{ {
@ -39,10 +39,9 @@ class ExtractionContainers
void WriteNodes(std::ofstream &file_out_stream) const; void WriteNodes(std::ofstream &file_out_stream) const;
void WriteRestrictions(const std::string &restrictions_file_name) const; void WriteRestrictions(const std::string &restrictions_file_name) const;
void WriteEdges(std::ofstream &file_out_stream) const; void WriteEdges(std::ofstream &file_out_stream) const;
void WriteCharData(const std::string &file_name, void WriteCharData(const std::string &file_name);
const stxxl::vector<unsigned> &offests, void
const stxxl::vector<char> &char_data) const; WriteTurnLaneMasks(const std::string &file_name,
void WriteTurnLaneMasks(const std::string &file_name,
const stxxl::vector<std::uint32_t> &turn_lane_offsets, const stxxl::vector<std::uint32_t> &turn_lane_offsets,
const stxxl::vector<guidance::TurnLaneType::Mask> &turn_lane_masks) const; const stxxl::vector<guidance::TurnLaneType::Mask> &turn_lane_masks) const;
@ -52,12 +51,14 @@ class ExtractionContainers
using STXXLEdgeVector = stxxl::vector<InternalExtractorEdge>; using STXXLEdgeVector = stxxl::vector<InternalExtractorEdge>;
using STXXLRestrictionsVector = stxxl::vector<InputRestrictionContainer>; using STXXLRestrictionsVector = stxxl::vector<InputRestrictionContainer>;
using STXXLWayIDStartEndVector = stxxl::vector<FirstAndLastSegmentOfWay>; using STXXLWayIDStartEndVector = stxxl::vector<FirstAndLastSegmentOfWay>;
using STXXLNameCharData = stxxl::vector<unsigned char>;
using STXXLNameOffsets = stxxl::vector<unsigned>;
STXXLNodeIDVector used_node_id_list; STXXLNodeIDVector used_node_id_list;
STXXLNodeVector all_nodes_list; STXXLNodeVector all_nodes_list;
STXXLEdgeVector all_edges_list; STXXLEdgeVector all_edges_list;
stxxl::vector<char> name_char_data; STXXLNameCharData name_char_data;
stxxl::vector<unsigned> name_lengths; STXXLNameOffsets name_offsets;
// an adjacency array containing all turn lane masks // an adjacency array containing all turn lane masks
stxxl::vector<std::uint32_t> turn_lane_offsets; stxxl::vector<std::uint32_t> turn_lane_offsets;
stxxl::vector<guidance::TurnLaneType::Mask> turn_lane_masks; stxxl::vector<guidance::TurnLaneType::Mask> turn_lane_masks;

View File

@ -142,43 +142,6 @@ struct InternalExtractorEdge
} }
}; };
struct CmpEdgeByInternalStartThenInternalTargetID
{
using value_type = InternalExtractorEdge;
bool operator()(const InternalExtractorEdge &lhs, const InternalExtractorEdge &rhs) const
{
return (lhs.result.source < rhs.result.source) ||
((lhs.result.source == rhs.result.source) &&
(lhs.result.target < rhs.result.target));
}
value_type max_value() { return InternalExtractorEdge::max_internal_value(); }
value_type min_value() { return InternalExtractorEdge::min_internal_value(); }
};
struct CmpEdgeByOSMStartID
{
using value_type = InternalExtractorEdge;
bool operator()(const InternalExtractorEdge &lhs, const InternalExtractorEdge &rhs) const
{
return lhs.result.osm_source_id < rhs.result.osm_source_id;
}
value_type max_value() { return InternalExtractorEdge::max_osm_value(); }
value_type min_value() { return InternalExtractorEdge::min_osm_value(); }
};
struct CmpEdgeByOSMTargetID
{
using value_type = InternalExtractorEdge;
bool operator()(const InternalExtractorEdge &lhs, const InternalExtractorEdge &rhs) const
{
return lhs.result.osm_target_id < rhs.result.osm_target_id;
}
value_type max_value() { return InternalExtractorEdge::max_osm_value(); }
value_type min_value() { return InternalExtractorEdge::min_osm_value(); }
};
} }
} }

View File

@ -26,6 +26,8 @@
namespace namespace
{ {
namespace oe = osrm::extractor;
// Needed for STXXL comparison - STXXL requires max_value(), min_value(), so we can not use // Needed for STXXL comparison - STXXL requires max_value(), min_value(), so we can not use
// std::less<OSMNodeId>{}. Anonymous namespace to keep translation unit local. // std::less<OSMNodeId>{}. Anonymous namespace to keep translation unit local.
struct OSMNodeIDSTXXLLess struct OSMNodeIDSTXXLLess
@ -35,6 +37,69 @@ struct OSMNodeIDSTXXLLess
value_type max_value() { return MAX_OSM_NODEID; } value_type max_value() { return MAX_OSM_NODEID; }
value_type min_value() { return MIN_OSM_NODEID; } value_type min_value() { return MIN_OSM_NODEID; }
}; };
struct CmpEdgeByOSMStartID
{
using value_type = oe::InternalExtractorEdge;
bool operator()(const value_type &lhs, const value_type &rhs) const
{
return lhs.result.osm_source_id < rhs.result.osm_source_id;
}
value_type max_value() { return value_type::max_osm_value(); }
value_type min_value() { return value_type::min_osm_value(); }
};
struct CmpEdgeByOSMTargetID
{
using value_type = oe::InternalExtractorEdge;
bool operator()(const value_type &lhs, const value_type &rhs) const
{
return lhs.result.osm_target_id < rhs.result.osm_target_id;
}
value_type max_value() { return value_type::max_osm_value(); }
value_type min_value() { return value_type::min_osm_value(); }
};
struct CmpEdgeByInternalSourceTargetAndName
{
using value_type = oe::InternalExtractorEdge;
bool operator()(const value_type &lhs, const value_type &rhs) const
{
if (lhs.result.source != rhs.result.source)
return lhs.result.source < rhs.result.source;
if (lhs.result.target != rhs.result.target)
return lhs.result.target < rhs.result.target;
if (lhs.result.source == SPECIAL_NODEID)
return false;
if (lhs.result.name_id == rhs.result.name_id)
return false;
if (lhs.result.name_id == EMPTY_NAMEID)
return false;
if (rhs.result.name_id == EMPTY_NAMEID)
return true;
BOOST_ASSERT(!name_offsets.empty() && name_offsets.back() == name_data.size());
const oe::ExtractionContainers::STXXLNameCharData::const_iterator data = name_data.begin();
return std::lexicographical_compare(data + name_offsets[lhs.result.name_id],
data + name_offsets[lhs.result.name_id + 1],
data + name_offsets[rhs.result.name_id],
data + name_offsets[rhs.result.name_id + 1]);
}
value_type max_value() { return value_type::max_internal_value(); }
value_type min_value() { return value_type::min_internal_value(); }
const oe::ExtractionContainers::STXXLNameCharData &name_data;
const oe::ExtractionContainers::STXXLNameOffsets &name_offsets;
};
} }
namespace osrm namespace osrm
@ -48,10 +113,13 @@ ExtractionContainers::ExtractionContainers()
{ {
// Check if stxxl can be instantiated // Check if stxxl can be instantiated
stxxl::vector<unsigned> dummy_vector; stxxl::vector<unsigned> dummy_vector;
// Insert three empty strings for name, destination and pronunciation
name_lengths.push_back(0); // Insert three empty strings offsets for name, destination and pronunciation
name_lengths.push_back(0); name_offsets.push_back(0);
name_lengths.push_back(0); name_offsets.push_back(0);
name_offsets.push_back(0);
// Insert the total length sentinel (corresponds to the next name string offset)
name_offsets.push_back(0);
// the offsets have to be initialized with two values, since we have the empty turn string for // the offsets have to be initialized with two values, since we have the empty turn string for
// the first id // the first id
@ -90,7 +158,7 @@ void ExtractionContainers::PrepareData(const std::string &output_file_name,
PrepareRestrictions(); PrepareRestrictions();
WriteRestrictions(restrictions_file_name); WriteRestrictions(restrictions_file_name);
WriteCharData(name_file_name, name_lengths, name_char_data); WriteCharData(name_file_name);
WriteTurnLaneMasks(turn_lane_file_name, turn_lane_offsets, turn_lane_masks); WriteTurnLaneMasks(turn_lane_file_name, turn_lane_offsets, turn_lane_masks);
} }
catch (const std::exception &e) catch (const std::exception &e)
@ -125,23 +193,26 @@ void ExtractionContainers::WriteTurnLaneMasks(
util::SimpleLogger().Write() << "done (" << TIMER_SEC(turn_lane_timer) << ")"; util::SimpleLogger().Write() << "done (" << TIMER_SEC(turn_lane_timer) << ")";
} }
void ExtractionContainers::WriteCharData(const std::string &file_name, void ExtractionContainers::WriteCharData(const std::string &file_name)
const stxxl::vector<unsigned> &offsets,
const stxxl::vector<char> &char_data) const
{ {
std::cout << "[extractor] writing street name index ... " << std::flush; std::cout << "[extractor] writing street name index ... " << std::flush;
TIMER_START(write_index); TIMER_START(write_index);
boost::filesystem::ofstream file_stream(file_name, std::ios::binary); boost::filesystem::ofstream file_stream(file_name, std::ios::binary);
unsigned total_length = 0; // transforms in-place name offsets to name lengths
BOOST_ASSERT(!name_offsets.empty());
for (const auto length : offsets) for (auto curr = name_offsets.begin(), next = name_offsets.begin() + 1;
next != name_offsets.end(); ++curr, ++next)
{ {
total_length += length; *curr = *next - *curr;
} }
// removes the total length sentinel
unsigned total_length = name_offsets.back();
name_offsets.pop_back();
// builds and writes the index // builds and writes the index
util::RangeTable<> index_range(offsets); util::RangeTable<> index_range(name_offsets);
file_stream << index_range; file_stream << index_range;
file_stream.write((char *)&total_length, sizeof(unsigned)); file_stream.write((char *)&total_length, sizeof(unsigned));
@ -150,7 +221,7 @@ void ExtractionContainers::WriteCharData(const std::string &file_name,
char write_buffer[WRITE_BLOCK_BUFFER_SIZE]; char write_buffer[WRITE_BLOCK_BUFFER_SIZE];
unsigned buffer_len = 0; unsigned buffer_len = 0;
for (const auto c : char_data) for (const auto c : name_char_data)
{ {
write_buffer[buffer_len++] = c; write_buffer[buffer_len++] = c;
@ -416,7 +487,7 @@ void ExtractionContainers::PrepareEdges(lua_State *segment_state)
TIMER_START(sort_edges_by_renumbered_start); TIMER_START(sort_edges_by_renumbered_start);
stxxl::sort(all_edges_list.begin(), stxxl::sort(all_edges_list.begin(),
all_edges_list.end(), all_edges_list.end(),
CmpEdgeByInternalStartThenInternalTargetID(), CmpEdgeByInternalSourceTargetAndName{name_char_data, name_offsets},
stxxl_memory); stxxl_memory);
TIMER_STOP(sort_edges_by_renumbered_start); TIMER_STOP(sort_edges_by_renumbered_start);
std::cout << "ok, after " << TIMER_SEC(sort_edges_by_renumbered_start) << "s" << std::endl; std::cout << "ok, after " << TIMER_SEC(sort_edges_by_renumbered_start) << "s" << std::endl;

View File

@ -1,8 +1,8 @@
#include "extractor/extractor_callbacks.hpp"
#include "extractor/external_memory_node.hpp" #include "extractor/external_memory_node.hpp"
#include "extractor/extraction_containers.hpp" #include "extractor/extraction_containers.hpp"
#include "extractor/extraction_node.hpp" #include "extractor/extraction_node.hpp"
#include "extractor/extraction_way.hpp" #include "extractor/extraction_way.hpp"
#include "extractor/extractor_callbacks.hpp"
#include "extractor/restriction.hpp" #include "extractor/restriction.hpp"
#include "util/for_each_pair.hpp" #include "util/for_each_pair.hpp"
@ -215,7 +215,7 @@ void ExtractorCallbacks::ProcessWay(const osmium::Way &input_way, const Extracti
// convert the lane description into an ID and, if necessary, remembr the description in the // convert the lane description into an ID and, if necessary, remembr the description in the
// description_map // description_map
const auto requestId = [&](std::string lane_string) { const auto requestId = [&](std::string lane_string) {
if( lane_string.empty() ) if (lane_string.empty())
return INVALID_LANE_DESCRIPTIONID; return INVALID_LANE_DESCRIPTIONID;
TurnLaneDescription lane_description = laneStringToDescription(std::move(lane_string)); TurnLaneDescription lane_description = laneStringToDescription(std::move(lane_string));
@ -253,33 +253,35 @@ void ExtractorCallbacks::ProcessWay(const osmium::Way &input_way, const Extracti
// Get the unique identifier for the street name // Get the unique identifier for the street name
// Get the unique identifier for the street name and destination // Get the unique identifier for the street name and destination
const auto name_iterator = string_map.find(MapKey(parsed_way.name, parsed_way.destinations)); const auto name_iterator = string_map.find(MapKey(parsed_way.name, parsed_way.destinations));
unsigned name_id = external_memory.name_lengths.size(); unsigned name_id = EMPTY_NAMEID;
if (string_map.end() == name_iterator) if (string_map.end() == name_iterator)
{ {
auto name_length = std::min<unsigned>(MAX_STRING_LENGTH, parsed_way.name.size()); const auto name_length = std::min<unsigned>(MAX_STRING_LENGTH, parsed_way.name.size());
auto destinations_length = const auto destinations_length =
std::min<unsigned>(MAX_STRING_LENGTH, parsed_way.destinations.size()); std::min<unsigned>(MAX_STRING_LENGTH, parsed_way.destinations.size());
auto pronunciation_length = const auto pronunciation_length =
std::min<unsigned>(MAX_STRING_LENGTH, parsed_way.pronunciation.size()); std::min<unsigned>(MAX_STRING_LENGTH, parsed_way.pronunciation.size());
external_memory.name_char_data.reserve(name_id + name_length + destinations_length + // name_offsets already has an offset of a new name, take the offset index as the name id
pronunciation_length); name_id = external_memory.name_offsets.size() - 1;
external_memory.name_char_data.reserve(external_memory.name_char_data.size() + name_length
+ destinations_length + pronunciation_length);
std::copy(parsed_way.name.c_str(), std::copy(parsed_way.name.c_str(),
parsed_way.name.c_str() + name_length, parsed_way.name.c_str() + name_length,
std::back_inserter(external_memory.name_char_data)); std::back_inserter(external_memory.name_char_data));
external_memory.name_offsets.push_back(external_memory.name_char_data.size());
std::copy(parsed_way.destinations.c_str(), std::copy(parsed_way.destinations.c_str(),
parsed_way.destinations.c_str() + destinations_length, parsed_way.destinations.c_str() + destinations_length,
std::back_inserter(external_memory.name_char_data)); std::back_inserter(external_memory.name_char_data));
external_memory.name_offsets.push_back(external_memory.name_char_data.size());
std::copy(parsed_way.pronunciation.c_str(), std::copy(parsed_way.pronunciation.c_str(),
parsed_way.pronunciation.c_str() + pronunciation_length, parsed_way.pronunciation.c_str() + pronunciation_length,
std::back_inserter(external_memory.name_char_data)); std::back_inserter(external_memory.name_char_data));
external_memory.name_offsets.push_back(external_memory.name_char_data.size());
external_memory.name_lengths.push_back(name_length);
external_memory.name_lengths.push_back(destinations_length);
external_memory.name_lengths.push_back(pronunciation_length);
auto k = MapKey{parsed_way.name, parsed_way.destinations}; auto k = MapKey{parsed_way.name, parsed_way.destinations};
auto v = MapVal{name_id}; auto v = MapVal{name_id};