osrm-backend/include/engine/routing_algorithms/map_matching.hpp
2016-03-08 00:43:45 +01:00

387 lines
15 KiB
C++

#ifndef MAP_MATCHING_HPP
#define MAP_MATCHING_HPP
#include "engine/routing_algorithms/routing_base.hpp"
#include "util/coordinate_calculation.hpp"
#include "engine/map_matching/hidden_markov_model.hpp"
#include "util/json_logger.hpp"
#include "util/matching_debug_info.hpp"
#include <cstddef>
#include <algorithm>
#include <deque>
#include <iomanip>
#include <numeric>
#include <utility>
#include <vector>
namespace osrm
{
namespace engine
{
namespace routing_algorithms
{
struct SubMatching
{
std::vector<PhantomNode> nodes;
std::vector<unsigned> indices;
double length;
double confidence;
};
using CandidateList = std::vector<PhantomNodeWithDistance>;
using CandidateLists = std::vector<CandidateList>;
using HMM = map_matching::HiddenMarkovModel<CandidateLists>;
using SubMatchingList = std::vector<SubMatching>;
constexpr static const unsigned MAX_BROKEN_STATES = 10;
constexpr static const double MAX_SPEED = 180 / 3.6; // 180km -> m/s
constexpr static const unsigned SUSPICIOUS_DISTANCE_DELTA = 100;
constexpr static const double MAX_DISTANCE_DELTA = 2000.;
// implements a hidden markov model map matching algorithm
template <class DataFacadeT>
class MapMatching final : public BasicRoutingInterface<DataFacadeT, MapMatching<DataFacadeT>>
{
using super = BasicRoutingInterface<DataFacadeT, MapMatching<DataFacadeT>>;
using QueryHeap = SearchEngineData::QueryHeap;
SearchEngineData &engine_working_data;
unsigned GetMedianSampleTime(const std::vector<unsigned> &timestamps) const
{
BOOST_ASSERT(timestamps.size() > 1);
std::vector<unsigned> sample_times(timestamps.size());
std::adjacent_difference(timestamps.begin(), timestamps.end(), sample_times.begin());
// don't use first element of sample_times -> will not be a difference.
auto first_elem = std::next(sample_times.begin());
auto median = first_elem + std::distance(first_elem, sample_times.end()) / 2;
std::nth_element(first_elem, median, sample_times.end());
return *median;
}
public:
MapMatching(DataFacadeT *facade, SearchEngineData &engine_working_data)
: super(facade), engine_working_data(engine_working_data)
{
}
void operator()(const CandidateLists &candidates_list,
const std::vector<util::FixedPointCoordinate> &trace_coordinates,
const std::vector<unsigned> &trace_timestamps,
const double matching_beta,
const double gps_precision,
SubMatchingList &sub_matchings) const
{
BOOST_ASSERT(candidates_list.size() == trace_coordinates.size());
BOOST_ASSERT(candidates_list.size() > 1);
const bool use_timestamps = trace_timestamps.size() > 1;
const auto median_sample_time = [&]()
{
if (use_timestamps)
{
return std::max(1u, GetMedianSampleTime(trace_timestamps));
}
else
{
return 1u;
}
}();
const auto max_broken_time = median_sample_time * MAX_BROKEN_STATES;
const auto max_distance_delta = [&]()
{
if (use_timestamps)
{
return median_sample_time * MAX_SPEED;
}
else
{
return MAX_DISTANCE_DELTA;
}
}();
// TODO replace default values with table lookup based on sampling frequency
map_matching::EmissionLogProbability emission_log_probability(gps_precision);
map_matching::TransitionLogProbability transition_log_probability(matching_beta);
HMM model(candidates_list, emission_log_probability);
std::size_t initial_timestamp = model.initialize(0);
if (initial_timestamp == map_matching::INVALID_STATE)
{
return;
}
util::MatchingDebugInfo matching_debug(util::json::Logger::get());
matching_debug.initialize(candidates_list);
engine_working_data.InitializeOrClearFirstThreadLocalStorage(
super::facade->GetNumberOfNodes());
engine_working_data.InitializeOrClearSecondThreadLocalStorage(
super::facade->GetNumberOfNodes());
QueryHeap &forward_heap = *(engine_working_data.forward_heap_1);
QueryHeap &reverse_heap = *(engine_working_data.reverse_heap_1);
QueryHeap &forward_core_heap = *(engine_working_data.forward_heap_2);
QueryHeap &reverse_core_heap = *(engine_working_data.reverse_heap_2);
std::size_t breakage_begin = map_matching::INVALID_STATE;
std::vector<std::size_t> split_points;
std::vector<std::size_t> prev_unbroken_timestamps;
prev_unbroken_timestamps.reserve(candidates_list.size());
prev_unbroken_timestamps.push_back(initial_timestamp);
for (auto t = initial_timestamp + 1; t < candidates_list.size(); ++t)
{
// breakage recover has removed all previous good points
bool trace_split = prev_unbroken_timestamps.empty();
// use temporal information if available to determine a split
if (use_timestamps)
{
trace_split =
trace_split ||
(trace_timestamps[t] - trace_timestamps[prev_unbroken_timestamps.back()] >
max_broken_time);
}
else
{
trace_split =
trace_split || (t - prev_unbroken_timestamps.back() > MAX_BROKEN_STATES);
}
if (trace_split)
{
std::size_t split_index = t;
if (breakage_begin != map_matching::INVALID_STATE)
{
split_index = breakage_begin;
breakage_begin = map_matching::INVALID_STATE;
}
split_points.push_back(split_index);
// note: this preserves everything before split_index
model.clear(split_index);
std::size_t new_start = model.initialize(split_index);
// no new start was found -> stop viterbi calculation
if (new_start == map_matching::INVALID_STATE)
{
break;
}
prev_unbroken_timestamps.clear();
prev_unbroken_timestamps.push_back(new_start);
// Important: We potentially go back here!
// However since t > new_start >= breakge_begin
// we can only reset trace_coordindates.size() times.
t = new_start + 1;
}
BOOST_ASSERT(!prev_unbroken_timestamps.empty());
const std::size_t prev_unbroken_timestamp = prev_unbroken_timestamps.back();
const auto &prev_viterbi = model.viterbi[prev_unbroken_timestamp];
const auto &prev_pruned = model.pruned[prev_unbroken_timestamp];
const auto &prev_unbroken_timestamps_list = candidates_list[prev_unbroken_timestamp];
const auto &prev_coordinate = trace_coordinates[prev_unbroken_timestamp];
auto &current_viterbi = model.viterbi[t];
auto &current_pruned = model.pruned[t];
auto &current_suspicious = model.suspicious[t];
auto &current_parents = model.parents[t];
auto &current_lengths = model.path_lengths[t];
const auto &current_timestamps_list = candidates_list[t];
const auto &current_coordinate = trace_coordinates[t];
const auto haversine_distance = util::coordinate_calculation::haversineDistance(
prev_coordinate, current_coordinate);
// assumes minumum of 0.1 m/s
const int duration_uppder_bound = ((haversine_distance + max_distance_delta) * 0.25) * 10;
// compute d_t for this timestamp and the next one
for (const auto s : util::irange<std::size_t>(0u, prev_viterbi.size()))
{
if (prev_pruned[s])
{
continue;
}
for (const auto s_prime : util::irange<std::size_t>(0u, current_viterbi.size()))
{
// how likely is candidate s_prime at time t to be emitted?
// FIXME this can be pre-computed
const double emission_pr =
emission_log_probability(candidates_list[t][s_prime].distance);
double new_value = prev_viterbi[s] + emission_pr;
if (current_viterbi[s_prime] > new_value)
{
continue;
}
forward_heap.Clear();
reverse_heap.Clear();
double network_distance;
if (super::facade->GetCoreSize() > 0)
{
forward_core_heap.Clear();
reverse_core_heap.Clear();
network_distance = super::GetNetworkDistanceWithCore(
forward_heap, reverse_heap, forward_core_heap, reverse_core_heap,
prev_unbroken_timestamps_list[s].phantom_node,
current_timestamps_list[s_prime].phantom_node,
duration_uppder_bound);
}
else
{
network_distance = super::GetNetworkDistance(
forward_heap, reverse_heap,
prev_unbroken_timestamps_list[s].phantom_node,
current_timestamps_list[s_prime].phantom_node);
}
// get distance diff between loc1/2 and locs/s_prime
const auto d_t = std::abs(network_distance - haversine_distance);
// very low probability transition -> prune
if (d_t >= max_distance_delta)
{
continue;
}
const double transition_pr = transition_log_probability(d_t);
new_value += transition_pr;
matching_debug.add_transition_info(prev_unbroken_timestamp, t, s, s_prime,
prev_viterbi[s], emission_pr, transition_pr,
network_distance, haversine_distance);
if (new_value > current_viterbi[s_prime])
{
current_viterbi[s_prime] = new_value;
current_parents[s_prime] = std::make_pair(prev_unbroken_timestamp, s);
current_lengths[s_prime] = network_distance;
current_pruned[s_prime] = false;
current_suspicious[s_prime] = d_t > SUSPICIOUS_DISTANCE_DELTA;
model.breakage[t] = false;
}
}
}
if (model.breakage[t])
{
// save start of breakage -> we need this as split point
if (t < breakage_begin)
{
breakage_begin = t;
}
BOOST_ASSERT(prev_unbroken_timestamps.size() > 0);
// remove both ends of the breakage
prev_unbroken_timestamps.pop_back();
}
else
{
prev_unbroken_timestamps.push_back(t);
}
}
matching_debug.set_viterbi(model.viterbi, model.pruned, model.suspicious);
if (!prev_unbroken_timestamps.empty())
{
split_points.push_back(prev_unbroken_timestamps.back() + 1);
}
std::size_t sub_matching_begin = initial_timestamp;
for (const auto sub_matching_end : split_points)
{
SubMatching matching;
std::size_t parent_timestamp_index = sub_matching_end - 1;
while (parent_timestamp_index >= sub_matching_begin &&
model.breakage[parent_timestamp_index])
{
--parent_timestamp_index;
}
while (sub_matching_begin < sub_matching_end && model.breakage[sub_matching_begin])
{
++sub_matching_begin;
}
// matchings that only consist of one candidate are invalid
if (parent_timestamp_index - sub_matching_begin + 1 < 2)
{
sub_matching_begin = sub_matching_end;
continue;
}
// loop through the columns, and only compare the last entry
const auto max_element_iter =
std::max_element(model.viterbi[parent_timestamp_index].begin(),
model.viterbi[parent_timestamp_index].end());
std::size_t parent_candidate_index =
std::distance(model.viterbi[parent_timestamp_index].begin(), max_element_iter);
std::deque<std::pair<std::size_t, std::size_t>> reconstructed_indices;
while (parent_timestamp_index > sub_matching_begin)
{
if (model.breakage[parent_timestamp_index])
{
continue;
}
reconstructed_indices.emplace_front(parent_timestamp_index, parent_candidate_index);
const auto &next = model.parents[parent_timestamp_index][parent_candidate_index];
// make sure we can never get stuck in this loop
if (parent_timestamp_index == next.first)
{
break;
}
parent_timestamp_index = next.first;
parent_candidate_index = next.second;
}
reconstructed_indices.emplace_front(parent_timestamp_index, parent_candidate_index);
if (reconstructed_indices.size() < 2)
{
sub_matching_begin = sub_matching_end;
continue;
}
matching.length = 0.0;
matching.nodes.resize(reconstructed_indices.size());
matching.indices.resize(reconstructed_indices.size());
for (const auto i : util::irange<std::size_t>(0u, reconstructed_indices.size()))
{
const auto timestamp_index = reconstructed_indices[i].first;
const auto location_index = reconstructed_indices[i].second;
matching.indices[i] = timestamp_index;
matching.nodes[i] = candidates_list[timestamp_index][location_index].phantom_node;
matching.length += model.path_lengths[timestamp_index][location_index];
matching_debug.add_chosen(timestamp_index, location_index);
}
sub_matchings.push_back(matching);
sub_matching_begin = sub_matching_end;
}
matching_debug.add_breakage(model.breakage);
}
};
}
}
}
//[1] "Hidden Markov Map Matching Through Noise and Sparseness"; P. Newson and J. Krumm; 2009; ACM
// GIS
#endif /* MAP_MATCHING_HPP */