Implement more general pruning

This commit is contained in:
Patrick Niklaus 2015-02-13 14:34:44 +01:00
parent 0c3102721c
commit d620da365e

View File

@ -26,6 +26,9 @@ or see http://www.gnu.org/licenses/agpl.txt.
#include "../data_structures/coordinate_calculation.hpp" #include "../data_structures/coordinate_calculation.hpp"
#include "../util/simple_logger.hpp" #include "../util/simple_logger.hpp"
#include <variant/variant.hpp>
#include <osrm/json_container.hpp>
#include <algorithm> #include <algorithm>
#include <iomanip> #include <iomanip>
#include <numeric> #include <numeric>
@ -87,24 +90,23 @@ template <class DataFacadeT> class MapMatching final
constexpr static const double log_sigma_z = std::log(sigma_z); constexpr static const double log_sigma_z = std::log(sigma_z);
constexpr static const double log_2_pi = std::log(2 * M_PI); constexpr static const double log_2_pi = std::log(2 * M_PI);
// TODO: move to a probability util header and implement as normal distribution constexpr static double emission_probability(const double distance)
constexpr double emission_probability(const double distance) const
{ {
return (1. / (std::sqrt(2. * M_PI) * sigma_z)) * return (1. / (std::sqrt(2. * M_PI) * sigma_z)) *
std::exp(-0.5 * std::pow((distance / sigma_z), 2.)); std::exp(-0.5 * std::pow((distance / sigma_z), 2.));
} }
constexpr double transition_probability(const float d_t, const float beta) const constexpr static double transition_probability(const float d_t, const float beta)
{ {
return (1. / beta) * std::exp(-d_t / beta); return (1. / beta) * std::exp(-d_t / beta);
} }
inline double log_emission_probability(const double distance) const { constexpr static double log_emission_probability(const double distance)
const double normed_distance = distance / sigma_z; {
return -0.5 * (log_2_pi + normed_distance * normed_distance) - log_sigma_z; return -0.5 * (log_2_pi + (distance / sigma_z) * (distance / sigma_z)) - log_sigma_z;
} }
inline double log_transition_probability(const float d_t, const float beta) const constexpr static double log_transition_probability(const float d_t, const float beta)
{ {
return -std::log(beta) - d_t / beta; return -std::log(beta) - d_t / beta;
} }
@ -228,70 +230,65 @@ template <class DataFacadeT> class MapMatching final
return distance; return distance;
} }
public: struct HiddenMarkovModel
MapMatching(DataFacadeT *facade, SearchEngineData &engine_working_data)
: super(facade), engine_working_data(engine_working_data)
{ {
}
void operator()(const Matching::CandidateLists &timestamp_list,
const std::vector<FixedPointCoordinate> coordinate_list,
std::vector<PhantomNode>& matched_nodes,
float& matched_length,
JSON::Object& _debug_info) const
{
std::vector<bool> breakage(timestamp_list.size(), true);
BOOST_ASSERT(timestamp_list.size() > 0);
// TODO for the viterbi values we actually only need the current and last row
std::vector<std::vector<double>> viterbi; std::vector<std::vector<double>> viterbi;
std::vector<std::vector<std::size_t>> parents; std::vector<std::vector<std::size_t>> parents;
std::vector<std::vector<float>> segment_lengths; std::vector<std::vector<float>> path_lengths;
std::vector<std::vector<bool>> pruned; std::vector<std::vector<bool>> pruned;
std::vector<bool> breakage;
const Matching::CandidateLists& timestamp_list;
constexpr static double IMPOSSIBLE_LOG_PROB = -std::numeric_limits<double>::infinity();
constexpr static double MINIMAL_LOG_PROB = -std::numeric_limits<double>::max();
HiddenMarkovModel(const Matching::CandidateLists& timestamp_list)
: breakage(timestamp_list.size())
, timestamp_list(timestamp_list)
{
for (const auto& l : timestamp_list) for (const auto& l : timestamp_list)
{ {
viterbi.emplace_back(l.size(), -std::numeric_limits<double>::infinity()); viterbi.emplace_back(l.size());
parents.emplace_back(l.size(), 0); parents.emplace_back(l.size());
segment_lengths.emplace_back(l.size(), 0); path_lengths.emplace_back(l.size());
pruned.emplace_back(l.size(), true); pruned.emplace_back(l.size());
} }
JSON::Array _debug_states; clear(0);
for (const auto& l : timestamp_list) }
void clear(unsigned initial_timestamp)
{ {
JSON::Array _debug_timestamps; BOOST_ASSERT(viterbi.size() == parents.size()
for (unsigned i = 0; i < l.size(); i++) && parents.size() == path_lengths.size()
&& path_lengths.size() == pruned.size()
&& pruned.size() == breakage.size());
for (unsigned t = initial_timestamp; t < viterbi.size(); t++)
{ {
JSON::Object _debug_state; std::fill(viterbi[t].begin(), viterbi[t].end(), IMPOSSIBLE_LOG_PROB);
_debug_state.values["transitions"] = JSON::Array(); std::fill(parents[t].begin(), parents[t].end(), 0);
_debug_state.values["coordinate"] = makeJSONArray(l[i].first.location.lat / COORDINATE_PRECISION, std::fill(path_lengths[t].begin(), path_lengths[t].end(), 0);
l[i].first.location.lon / COORDINATE_PRECISION); std::fill(pruned[t].begin(), pruned[t].end(), true);
_debug_timestamps.values.push_back(_debug_state);
} }
_debug_states.values.push_back(_debug_timestamps); std::fill(breakage.begin()+initial_timestamp, breakage.end(), true);
} }
unsigned initial_timestamp = 0; unsigned initialize(unsigned initial_timestamp)
{
BOOST_ASSERT(initial_timestamp < timestamp_list.size());
do do
{ {
for (auto s = 0u; s < viterbi[initial_timestamp].size(); ++s) for (auto s = 0u; s < viterbi[initial_timestamp].size(); ++s)
{ {
// this might need to be squared as pi_s is also defined as the emission
// probability in the paper.
viterbi[initial_timestamp][s] = log_emission_probability(timestamp_list[initial_timestamp][s].second); viterbi[initial_timestamp][s] = log_emission_probability(timestamp_list[initial_timestamp][s].second);
parents[initial_timestamp][s] = s; parents[initial_timestamp][s] = s;
pruned[initial_timestamp][s] = viterbi[initial_timestamp][s] < -std::numeric_limits<double>::max(); pruned[initial_timestamp][s] = viterbi[initial_timestamp][s] < MINIMAL_LOG_PROB;
breakage[initial_timestamp] = breakage[initial_timestamp] && pruned[initial_timestamp][s]; breakage[initial_timestamp] = breakage[initial_timestamp] && pruned[initial_timestamp][s];
_debug_states.values[initial_timestamp]
.get<JSONVariantArray>().get().values[s]
.get<JSONVariantObject>().get().values["viterbi"] = makeJSONSafe(viterbi[initial_timestamp][s]);
_debug_states.values[initial_timestamp]
.get<JSONVariantArray>().get().values[s]
.get<JSONVariantObject>().get().values["pruned"] = static_cast<unsigned>(pruned[initial_timestamp][s]);
} }
++initial_timestamp; ++initial_timestamp;
@ -302,18 +299,70 @@ template <class DataFacadeT> class MapMatching final
BOOST_ASSERT(breakage[initial_timestamp] == false); BOOST_ASSERT(breakage[initial_timestamp] == false);
unsigned prev_unbroken_timestamp = initial_timestamp; return initial_timestamp;
}
};
public:
MapMatching(DataFacadeT *facade, SearchEngineData &engine_working_data)
: super(facade), engine_working_data(engine_working_data)
{
}
void operator()(const Matching::CandidateLists &timestamp_list,
const std::vector<FixedPointCoordinate> coordinate_list,
std::vector<PhantomNode>& matched_nodes,
float& matched_length,
JSON::Object& _debug_info) const
{
BOOST_ASSERT(timestamp_list.size() > 0);
HiddenMarkovModel model(timestamp_list);
unsigned initial_timestamp = model.initialize(0);
JSON::Array _debug_states;
for (unsigned t = 0; t < timestamp_list.size(); t++)
{
JSON::Array _debug_timestamps;
for (unsigned s = 0; s < timestamp_list[t].size(); s++)
{
JSON::Object _debug_state;
_debug_state.values["transitions"] = JSON::Array();
_debug_state.values["coordinate"] = makeJSONArray(timestamp_list[t][s].first.location.lat / COORDINATE_PRECISION,
timestamp_list[t][s].first.location.lon / COORDINATE_PRECISION);
if (t < initial_timestamp)
{
_debug_state.values["viterbi"] = makeJSONSafe(HiddenMarkovModel::IMPOSSIBLE_LOG_PROB);
_debug_state.values["pruned"] = 0u;
}
else if (t == initial_timestamp)
{
_debug_state.values["viterbi"] = makeJSONSafe(model.viterbi[t][s]);
_debug_state.values["pruned"] = static_cast<unsigned>(model.pruned[t][s]);
}
_debug_timestamps.values.push_back(_debug_state);
}
_debug_states.values.push_back(_debug_timestamps);
}
std::vector<unsigned> prev_unbroken_timestamps;
prev_unbroken_timestamps.reserve(timestamp_list.size());
prev_unbroken_timestamps.push_back(initial_timestamp);
for (auto t = initial_timestamp + 1; t < timestamp_list.size(); ++t) for (auto t = initial_timestamp + 1; t < timestamp_list.size(); ++t)
{ {
const auto& prev_viterbi = viterbi[prev_unbroken_timestamp]; unsigned prev_unbroken_timestamp = prev_unbroken_timestamps.back();
const auto& prev_pruned = pruned[prev_unbroken_timestamp]; const auto& prev_viterbi = model.viterbi[prev_unbroken_timestamp];
const auto& prev_pruned = model.pruned[prev_unbroken_timestamp];
const auto& prev_unbroken_timestamps_list = timestamp_list[prev_unbroken_timestamp]; const auto& prev_unbroken_timestamps_list = timestamp_list[prev_unbroken_timestamp];
const auto& prev_coordinate = coordinate_list[prev_unbroken_timestamp]; const auto& prev_coordinate = coordinate_list[prev_unbroken_timestamp];
auto& current_viterbi = viterbi[t]; auto& current_viterbi = model.viterbi[t];
auto& current_pruned = pruned[t]; auto& current_pruned = model.pruned[t];
auto& current_parents = parents[t]; auto& current_parents = model.parents[t];
auto& current_lengths = segment_lengths[t]; auto& current_lengths = model.path_lengths[t];
const auto& current_timestamps_list = timestamp_list[t]; const auto& current_timestamps_list = timestamp_list[t];
const auto& current_coordinate = coordinate_list[t]; const auto& current_coordinate = coordinate_list[t];
@ -361,14 +410,13 @@ template <class DataFacadeT> class MapMatching final
.get<JSONVariantObject>().get().values["transitions"] .get<JSONVariantObject>().get().values["transitions"]
.get<JSONVariantArray>().get().values.push_back(_debug_transistion); .get<JSONVariantArray>().get().values.push_back(_debug_transistion);
if (new_value > current_viterbi[s_prime]) if (new_value > current_viterbi[s_prime])
{ {
current_viterbi[s_prime] = new_value; current_viterbi[s_prime] = new_value;
current_parents[s_prime] = s; current_parents[s_prime] = s;
current_lengths[s_prime] = network_distance; current_lengths[s_prime] = network_distance;
current_pruned[s_prime] = false; current_pruned[s_prime] = false;
breakage[t] = false; model.breakage[t] = false;
} }
} }
} }
@ -383,23 +431,45 @@ template <class DataFacadeT> class MapMatching final
.get<JSONVariantObject>().get().values["pruned"] = static_cast<unsigned>(current_pruned[s_prime]); .get<JSONVariantObject>().get().values["pruned"] = static_cast<unsigned>(current_pruned[s_prime]);
} }
if (!breakage[t]) if (model.breakage[t])
{ {
prev_unbroken_timestamp = t; if (prev_unbroken_timestamps.size() > 1)
{
// remove both ends of the breakge
prev_unbroken_timestamps.pop_back();
}
// we reached the beginning of the trace, discard the whole beginning
else
{
model.clear(t);
model.initialize(t);
} }
} }
else
{
prev_unbroken_timestamps.push_back(t);
}
}
if (prev_unbroken_timestamps.size() < 1)
{
return;
}
unsigned last_unbroken_timestamp = prev_unbroken_timestamps.back();
// loop through the columns, and only compare the last entry // loop through the columns, and only compare the last entry
auto max_element_iter = std::max_element(viterbi[prev_unbroken_timestamp].begin(), viterbi[prev_unbroken_timestamp].end()); auto max_element_iter = std::max_element(model.viterbi[last_unbroken_timestamp].begin(),
auto parent_index = std::distance(viterbi[prev_unbroken_timestamp].begin(), max_element_iter); model.viterbi[last_unbroken_timestamp].end());
auto parent_index = std::distance(model.viterbi[last_unbroken_timestamp].begin(), max_element_iter);
std::deque<std::pair<std::size_t, std::size_t>> reconstructed_indices; std::deque<std::pair<std::size_t, std::size_t>> reconstructed_indices;
for (auto i = prev_unbroken_timestamp; i > initial_timestamp; --i) for (auto i = last_unbroken_timestamp; i > initial_timestamp; --i)
{ {
if (breakage[i]) if (model.breakage[i])
continue; continue;
reconstructed_indices.emplace_front(i, parent_index); reconstructed_indices.emplace_front(i, parent_index);
parent_index = parents[i][parent_index]; parent_index = model.parents[i][parent_index];
} }
reconstructed_indices.emplace_front(initial_timestamp, parent_index); reconstructed_indices.emplace_front(initial_timestamp, parent_index);
@ -411,7 +481,7 @@ template <class DataFacadeT> class MapMatching final
auto location_index = reconstructed_indices[i].second; auto location_index = reconstructed_indices[i].second;
matched_nodes[i] = timestamp_list[timestamp_index][location_index].first; matched_nodes[i] = timestamp_list[timestamp_index][location_index].first;
matched_length += segment_lengths[timestamp_index][location_index]; matched_length += model.path_lengths[timestamp_index][location_index];
_debug_states.values[timestamp_index] _debug_states.values[timestamp_index]
.get<JSONVariantArray>().get().values[location_index] .get<JSONVariantArray>().get().values[location_index]
@ -419,7 +489,7 @@ template <class DataFacadeT> class MapMatching final
} }
JSON::Array _debug_breakage; JSON::Array _debug_breakage;
for (auto b : breakage) { for (auto b : model.breakage) {
_debug_breakage.values.push_back(static_cast<unsigned>(b)); _debug_breakage.values.push_back(static_cast<unsigned>(b));
} }