Add bayes classifier

This commit is contained in:
Patrick Niklaus 2015-02-06 02:15:51 +01:00
parent f092fc3fc6
commit fe07f9208c
2 changed files with 123 additions and 2 deletions

View File

@ -0,0 +1,113 @@
/*
Copyright (c) 2015, Project OSRM, Dennis Luxen, others
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BAYES_CLASSIFIER_HPP
#define BAYES_CLASSIFIER_HPP
#include <vector>
#include <cmath>
struct NormalDistribution
{
NormalDistribution(const double mean, const double standard_deviation)
: mean(mean)
, standard_deviation(standard_deviation)
{
}
// FIXME implement log-probability version since its faster
double probabilityDensityFunction(const double val)
{
const double x = val - mean;
return 1.0 / (std::sqrt(2*M_PI) * standard_deviation) * std::exp(-x*x / (standard_deviation * standard_deviation));
}
double mean;
double standard_deviation;
};
struct LaplaceDistribution
{
LaplaceDistribution(const double location, const double scale)
: location(location)
, scale(scale)
{
}
// FIXME implement log-probability version since its faster
double probabilityDensityFunction(const double val)
{
const double x = std::abs(val - location);
return 1.0 / (2*scale) * std::exp(-x / scale);
}
double location;
double scale;
};
template<typename PositiveDistributionT, typename NegativeDistributionT, typename ValueT>
class BayesClassifier
{
public:
enum class ClassLabel : unsigned {NEGATIVE = 0, POSITIVE};
BayesClassifier(const PositiveDistributionT& positive_distribution,
const NegativeDistributionT& negative_distribution,
const double positive_apriori_probability)
: positive_distribution(positive_distribution)
, negative_distribution(negative_distribution)
, positive_apriori_probability(positive_apriori_probability)
, negative_apriori_probability(1 - positive_apriori_probability)
{
}
/*
* Returns label and the probability of the label.
*/
std::pair<ClassLabel, double> classify(const ValueT& v)
{
const double positive_postpriori = positive_apriori_probability * positive_distribution.probabilityDensityFunction(v);
const double negative_postpriori = negative_apriori_probability * negative_distribution.probabilityDensityFunction(v);
const double norm = positive_postpriori + negative_postpriori;
if (positive_postpriori > negative_postpriori)
{
return std::make_pair(ClassLabel::POSITIVE, positive_postpriori / norm);
}
return std::make_pair(ClassLabel::NEGATIVE, negative_postpriori / norm);
}
private:
PositiveDistributionT positive_distribution;
NegativeDistributionT negative_distribution;
double positive_apriori_probability;
double negative_apriori_probability;
};
#endif /* BAYES_CLASSIFIER_HPP */

View File

@ -23,6 +23,7 @@ or see http://www.gnu.org/licenses/agpl.txt.
#include "plugin_base.hpp"
#include "../algorithms/bayes_classifier.hpp"
#include "../algorithms/object_encoder.hpp"
#include "../util/integer_range.hpp"
#include "../data_structures/search_engine.hpp"
@ -48,7 +49,14 @@ template <class DataFacadeT> class MapMatchingPlugin : public BasePlugin
std::shared_ptr<SearchEngine<DataFacadeT>> search_engine_ptr;
public:
MapMatchingPlugin(DataFacadeT *facade) : descriptor_string("match"), facade(facade)
MapMatchingPlugin(DataFacadeT *facade)
: descriptor_string("match")
, facade(facade)
// the values where derived from fitting a laplace distribution
// to the values of manually classified traces
, classifier(LaplaceDistribution(0.0057154021891018675, 0.020294704891166186),
LaplaceDistribution(0.11467696742821254, 0.49918444000368756),
0.7977883096366508) // valid apriori probability
{
descriptor_table.emplace("json", 0);
descriptor_table.emplace("gpx", 1);
@ -118,7 +126,6 @@ template <class DataFacadeT> class MapMatchingPlugin : public BasePlugin
JSON::Object debug_info;
search_engine_ptr->map_matching(candidate_lists, input_coords, uturn_indicators, matched_nodes, debug_info);
InternalRouteResult raw_route;
PhantomNodes current_phantom_node_pair;
for (unsigned i = 0; i < matched_nodes.size() - 1; ++i)
@ -176,6 +183,7 @@ template <class DataFacadeT> class MapMatchingPlugin : public BasePlugin
private:
std::string descriptor_string;
DataFacadeT *facade;
BayesClassifier<LaplaceDistribution, LaplaceDistribution, double> classifier;
};
#endif /* MAP_MATCHING_PLUGIN_H */