/* open source routing machine Copyright (C) Dennis Luxen, others 2010 This program is free software; you can redistribute it and/or modify it under the terms of the GNU AFFERO General Public License as published by the Free Software Foundation; either version 3 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or see http://www.gnu.org/licenses/agpl.txt. */ #ifndef MAP_MATCHING_H #define MAP_MATCHING_H #include "routing_base.hpp" #include "../data_structures/coordinate_calculation.hpp" #include "../util/simple_logger.hpp" #include #include #include #include template T makeJSONSave(T d) { if (std::isnan(d) || std::numeric_limits::infinity() == d) { return std::numeric_limits::max(); } if (-std::numeric_limits::infinity() == d) { return -std::numeric_limits::max(); } return d; } void appendToJSONArray(JSON::Array& a) { } template void appendToJSONArray(JSON::Array& a, T value, Args... args) { a.values.emplace_back(value); appendToJSONArray(a, args...); } template JSON::Array makeJSONArray(Args... args) { JSON::Array a; appendToJSONArray(a, args...); return a; } namespace Matching { typedef std::vector> CandidateList; typedef std::vector CandidateLists; typedef std::pair PhantomNodesWithProbability; } // implements a hidden markov model map matching algorithm template class MapMatching final : public BasicRoutingInterface> { using super = BasicRoutingInterface>; using QueryHeap = SearchEngineData::QueryHeap; SearchEngineData &engine_working_data; // FIXME this value should be a table based on samples/meter (or samples/min) constexpr static const double beta = 10.0; constexpr static const double sigma_z = 4.07; constexpr static const double log_sigma_z = std::log(sigma_z); constexpr static const double log_2_pi = std::log(2 * M_PI); // TODO: move to a probability util header and implement as normal distribution constexpr double emission_probability(const double distance) const { return (1. / (std::sqrt(2. * M_PI) * sigma_z)) * std::exp(-0.5 * std::pow((distance / sigma_z), 2.)); } constexpr double transition_probability(const float d_t, const float beta) const { return (1. / beta) * std::exp(-d_t / beta); } inline double log_emission_probability(const double distance) const { const double normed_distance = distance / sigma_z; return -0.5 * (log_2_pi + normed_distance * normed_distance) - log_sigma_z; } inline double log_transition_probability(const float d_t, const float beta) const { return -std::log(beta) - d_t / beta; } // TODO: needs to be estimated from the input locations // FIXME These values seem wrong. Higher beta for more samples/minute? Should be inverse proportional. //constexpr static const double beta = 1.; // samples/min and beta // 1 0.49037673 // 2 0.82918373 // 3 1.24364564 // 4 1.67079581 // 5 2.00719298 // 6 2.42513007 // 7 2.81248831 // 8 3.15745473 // 9 3.52645392 // 10 4.09511775 // 11 4.67319795 // 21 12.55107715 // 12 5.41088180 // 13 6.47666590 // 14 6.29010734 // 15 7.80752112 // 16 8.09074504 // 17 8.08550528 // 18 9.09405065 // 19 11.09090603 // 20 11.87752824 // 21 12.55107715 // 22 15.82820829 // 23 17.69496773 // 24 18.07655652 // 25 19.63438911 // 26 25.40832185 // 27 23.76001877 // 28 28.43289797 // 29 32.21683062 // 30 34.56991141 double get_distance_difference(const FixedPointCoordinate &location1, const FixedPointCoordinate &location2, const PhantomNode &source_phantom, const PhantomNode &target_phantom) const { // great circle distance of two locations - median/avg dist table(candidate list1/2) const auto network_distance = get_network_distance(source_phantom, target_phantom); const auto great_circle_distance = coordinate_calculation::great_circle_distance(location1, location2); if (great_circle_distance > network_distance) { return great_circle_distance - network_distance; } return network_distance - great_circle_distance; } double get_network_distance(const PhantomNode &source_phantom, const PhantomNode &target_phantom) const { EdgeWeight upper_bound = INVALID_EDGE_WEIGHT; NodeID middle_node = SPECIAL_NODEID; EdgeWeight edge_offset = std::min(0, -source_phantom.GetForwardWeightPlusOffset()); edge_offset = std::min(edge_offset, -source_phantom.GetReverseWeightPlusOffset()); engine_working_data.InitializeOrClearFirstThreadLocalStorage( super::facade->GetNumberOfNodes()); engine_working_data.InitializeOrClearSecondThreadLocalStorage( super::facade->GetNumberOfNodes()); QueryHeap &forward_heap = *(engine_working_data.forward_heap_1); QueryHeap &reverse_heap = *(engine_working_data.reverse_heap_1); if (source_phantom.forward_node_id != SPECIAL_NODEID) { forward_heap.Insert(source_phantom.forward_node_id, -source_phantom.GetForwardWeightPlusOffset(), source_phantom.forward_node_id); } if (source_phantom.reverse_node_id != SPECIAL_NODEID) { forward_heap.Insert(source_phantom.reverse_node_id, -source_phantom.GetReverseWeightPlusOffset(), source_phantom.reverse_node_id); } if (target_phantom.forward_node_id != SPECIAL_NODEID) { reverse_heap.Insert(target_phantom.forward_node_id, target_phantom.GetForwardWeightPlusOffset(), target_phantom.forward_node_id); } if (target_phantom.reverse_node_id != SPECIAL_NODEID) { reverse_heap.Insert(target_phantom.reverse_node_id, target_phantom.GetReverseWeightPlusOffset(), target_phantom.reverse_node_id); } // search from s and t till new_min/(1+epsilon) > length_of_shortest_path while (0 < (forward_heap.Size() + reverse_heap.Size())) { if (0 < forward_heap.Size()) { super::RoutingStep( forward_heap, reverse_heap, &middle_node, &upper_bound, edge_offset, true); } if (0 < reverse_heap.Size()) { super::RoutingStep( reverse_heap, forward_heap, &middle_node, &upper_bound, edge_offset, false); } } double distance = std::numeric_limits::max(); if (upper_bound != INVALID_EDGE_WEIGHT) { std::vector packed_leg; super::RetrievePackedPathFromHeap(forward_heap, reverse_heap, middle_node, packed_leg); std::vector unpacked_path; PhantomNodes nodes; nodes.source_phantom = source_phantom; nodes.target_phantom = target_phantom; super::UnpackPath(packed_leg, nodes, unpacked_path); FixedPointCoordinate previous_coordinate = source_phantom.location; FixedPointCoordinate current_coordinate; distance = 0; for (const auto& p : unpacked_path) { current_coordinate = super::facade->GetCoordinateOfNode(p.node); distance += coordinate_calculation::great_circle_distance(previous_coordinate, current_coordinate); previous_coordinate = current_coordinate; } distance += coordinate_calculation::great_circle_distance(previous_coordinate, target_phantom.location); } return distance; } public: MapMatching(DataFacadeT *facade, SearchEngineData &engine_working_data) : super(facade), engine_working_data(engine_working_data) { } // TODO optimize: a lot of copying that could probably be avoided void expandCandidates(const Matching::CandidateLists &candidates_lists, Matching::CandidateLists &expanded_lists, const std::vector& uturn_indicators) const { // expand list of PhantomNodes to be single-directional expanded_lists.resize(candidates_lists.size()); for (const auto i : osrm::irange(0lu, candidates_lists.size())) { for (const auto& candidate : candidates_lists[i]) { // bi-directional edge, split phantom node if we don't expect a uturn if (!uturn_indicators[i] && candidate.first.forward_node_id != SPECIAL_NODEID && candidate.first.reverse_node_id != SPECIAL_NODEID) { PhantomNode forward_node(candidate.first); PhantomNode reverse_node(candidate.first); forward_node.reverse_node_id = SPECIAL_NODEID; reverse_node.forward_node_id = SPECIAL_NODEID; expanded_lists[i].emplace_back(forward_node, candidate.second); expanded_lists[i].emplace_back(reverse_node, candidate.second); } else { expanded_lists[i].push_back(candidate); } } } } void operator()(const Matching::CandidateLists &candidates_lists, const std::vector coordinate_list, const std::vector& uturn_indicators, std::vector& matched_nodes, JSON::Object& _debug_info) const { BOOST_ASSERT(candidates_lists.size() == coordinate_list.size()); BOOST_ASSERT(candidates_lists.size() == uturn_indicators.size()); Matching::CandidateLists timestamp_list; expandCandidates(candidates_lists, timestamp_list, uturn_indicators); std::vector breakage(timestamp_list.size(), true); BOOST_ASSERT(timestamp_list.size() > 0); // TODO for the viterbi values we actually only need the current and last row std::vector> viterbi; std::vector> parents; std::vector> pruned; for (const auto& l : timestamp_list) { viterbi.emplace_back(l.size(), -std::numeric_limits::infinity()); parents.emplace_back(l.size(), 0); pruned.emplace_back(l.size(), true); } JSON::Array _debug_timestamps; JSON::Array _debug_viterbi; JSON::Array _debug_pruned; unsigned initial_timestamp = 0; do { JSON::Array _debug_initial_viterbi; JSON::Array _debug_initial_pruned; for (auto s = 0u; s < viterbi[initial_timestamp].size(); ++s) { // this might need to be squared as pi_s is also defined as the emission // probability in the paper. viterbi[initial_timestamp][s] = log_emission_probability(timestamp_list[initial_timestamp][s].second); parents[initial_timestamp][s] = s; pruned[initial_timestamp][s] = viterbi[initial_timestamp][s] < -std::numeric_limits::max(); breakage[initial_timestamp] = breakage[initial_timestamp] && pruned[initial_timestamp][s]; } for (auto s = 0u; s < viterbi[initial_timestamp].size(); ++s) { _debug_initial_viterbi.values.push_back(makeJSONSave(viterbi[initial_timestamp][s])); _debug_initial_pruned.values.push_back(static_cast(pruned[initial_timestamp][s])); } _debug_viterbi.values.push_back(_debug_initial_viterbi); _debug_pruned.values.push_back(_debug_initial_pruned); if (initial_timestamp > 0) { JSON::Array _debug_transition_rows; for (auto s = 0u; s < viterbi[initial_timestamp-1].size(); ++s) { _debug_transition_rows.values.push_back(JSON::Array()); } _debug_timestamps.values.push_back(_debug_transition_rows); } ++initial_timestamp; } while (breakage[initial_timestamp - 1]); BOOST_ASSERT(initial_timestamp > 0 && initial_timestamp < viterbi.size()); --initial_timestamp; BOOST_ASSERT(breakage[initial_timestamp] == false); unsigned prev_unbroken_timestamp = initial_timestamp; for (auto t = initial_timestamp + 1; t < timestamp_list.size(); ++t) { const auto& prev_viterbi = viterbi[prev_unbroken_timestamp]; const auto& prev_pruned = pruned[prev_unbroken_timestamp]; const auto& prev_unbroken_timestamps_list = timestamp_list[prev_unbroken_timestamp]; const auto& prev_coordinate = coordinate_list[prev_unbroken_timestamp]; auto& current_viterbi = viterbi[t]; auto& current_pruned = pruned[t]; auto& current_parents = parents[t]; const auto& current_timestamps_list = timestamp_list[t]; const auto& current_coordinate = coordinate_list[t]; JSON::Array _debug_transition_rows; // compute d_t for this timestamp and the next one for (auto s = 0u; s < prev_viterbi.size(); ++s) { JSON::Array _debug_row; if (prev_pruned[s]) { _debug_transition_rows.values.push_back(_debug_row); continue; } for (auto s_prime = 0u; s_prime < current_viterbi.size(); ++s_prime) { // how likely is candidate s_prime at time t to be emitted? const double emission_pr = log_emission_probability(timestamp_list[t][s_prime].second); // get distance diff between loc1/2 and locs/s_prime const auto d_t = get_distance_difference(prev_coordinate, current_coordinate, prev_unbroken_timestamps_list[s].first, current_timestamps_list[s_prime].first); // plug probabilities together const double transition_pr = log_transition_probability(d_t, beta); const double new_value = prev_viterbi[s] + emission_pr + transition_pr; JSON::Array _debug_element = makeJSONArray( makeJSONSave(prev_viterbi[s]), makeJSONSave(emission_pr), makeJSONSave(transition_pr), get_network_distance(prev_unbroken_timestamps_list[s].first, current_timestamps_list[s_prime].first), coordinate_calculation::great_circle_distance(prev_coordinate, current_coordinate) ); _debug_row.values.push_back(_debug_element); if (new_value > current_viterbi[s_prime]) { current_viterbi[s_prime] = new_value; current_parents[s_prime] = s; current_pruned[s_prime] = false; breakage[t] = false; } } _debug_transition_rows.values.push_back(_debug_row); } _debug_timestamps.values.push_back(_debug_transition_rows); JSON::Array _debug_viterbi_col; JSON::Array _debug_pruned_col; for (auto s_prime = 0u; s_prime < current_viterbi.size(); ++s_prime) { _debug_viterbi_col.values.push_back(makeJSONSave(current_viterbi[s_prime])); _debug_pruned_col.values.push_back(static_cast(current_pruned[s_prime])); } _debug_viterbi.values.push_back(_debug_viterbi_col); _debug_pruned.values.push_back(_debug_pruned_col); if (!breakage[t]) { prev_unbroken_timestamp = t; } } _debug_info.values["transitions"] = _debug_timestamps; _debug_info.values["viterbi"] = _debug_viterbi; _debug_info.values["pruned"] = _debug_pruned; _debug_info.values["beta"] = beta; // loop through the columns, and only compare the last entry auto max_element_iter = std::max_element(viterbi[prev_unbroken_timestamp].begin(), viterbi[prev_unbroken_timestamp].end()); auto parent_index = std::distance(viterbi[prev_unbroken_timestamp].begin(), max_element_iter); std::deque> reconstructed_indices; for (auto i = prev_unbroken_timestamp; i > initial_timestamp; --i) { if (breakage[i]) continue; reconstructed_indices.emplace_front(i, parent_index); parent_index = parents[i][parent_index]; } reconstructed_indices.emplace_front(initial_timestamp, parent_index); matched_nodes.resize(reconstructed_indices.size()); for (auto i = 0u; i < reconstructed_indices.size(); ++i) { auto timestamp_index = reconstructed_indices[i].first; auto location_index = reconstructed_indices[i].second; matched_nodes[i] = timestamp_list[timestamp_index][location_index].first; } unsigned removed = candidates_lists.size() - matched_nodes.size(); if (removed > 10) SimpleLogger().Write(logWARNING) << "Warning: removed " << removed << " candiates."; JSON::Array _debug_chosen_candidates; auto _debug_candidate_iter = reconstructed_indices.begin(); for (auto i = 0u; i < timestamp_list.size(); ++i) { if (_debug_candidate_iter != reconstructed_indices.end() && _debug_candidate_iter->first == i) { _debug_chosen_candidates.values.push_back(_debug_candidate_iter->second); _debug_candidate_iter = std::next(_debug_candidate_iter); } else { _debug_chosen_candidates.values.push_back(JSON::Null()); } } _debug_info.values["chosen_candidates"] = _debug_chosen_candidates; JSON::Array _debug_breakage; for (auto b : breakage) { _debug_breakage.values.push_back(static_cast(b)); } _debug_info.values["breakage"] = _debug_breakage; JSON::Array _debug_expanded_candidates; for (const auto& l : timestamp_list) { JSON::Array _debug_expanded_candidates_col; for (const auto& pair : l) { const auto& coord = pair.first.location; _debug_expanded_candidates_col.values.push_back(makeJSONArray(coord.lat / COORDINATE_PRECISION, coord.lon / COORDINATE_PRECISION)); } _debug_expanded_candidates.values.push_back(_debug_expanded_candidates_col); } _debug_info.values["expanded_candidates"] = _debug_expanded_candidates; } }; //[1] "Hidden Markov Map Matching Through Noise and Sparseness"; P. Newson and J. Krumm; 2009; ACM GIS #endif /* MAP_MATCHING_H */