Merge commit '6eb4f090f98f6b17a23c57768c16b7716b6c9cbd' as 'third_party/libosmium'

This commit is contained in:
Patrick Niklaus
2017-08-30 09:30:27 +00:00
434 changed files with 81367 additions and 0 deletions
+53
View File
@@ -0,0 +1,53 @@
#-----------------------------------------------------------------------------
#
# CMake Config
#
# Libosmium benchmarks
#
#-----------------------------------------------------------------------------
message(STATUS "Configuring benchmarks")
set(BENCHMARKS
count
count_tag
index_map
mercator
static_vs_dynamic_index
write_pbf
CACHE STRING "Benchmark programs"
)
#-----------------------------------------------------------------------------
#
# Configure benchmarks
#
#-----------------------------------------------------------------------------
message(STATUS "Configuring benchmarks - Building these benchmarks:")
foreach(benchmark ${BENCHMARKS})
message(STATUS " - osmium_benchmark_${benchmark}")
add_executable(osmium_benchmark_${benchmark}
"osmium_benchmark_${benchmark}.cpp")
target_link_libraries(osmium_benchmark_${benchmark}
${OSMIUM_IO_LIBRARIES}
${BENCHMARK_LIBS_${benchmark}})
set_pthread_on_target(osmium_benchmark_${benchmark})
configure_file(run_benchmark_${benchmark}.sh
${CMAKE_CURRENT_BINARY_DIR}/run_benchmark_${benchmark}.sh
@ONLY)
endforeach()
string(TOUPPER "${CMAKE_BUILD_TYPE}" _cmake_build_type)
set(_cxx_flags "${CMAKE_CXX_FLAGS_${_cmake_build_type}}")
foreach(file setup run_benchmarks)
configure_file(${file}.sh ${CMAKE_CURRENT_BINARY_DIR}/${file}.sh @ONLY)
endforeach()
#-----------------------------------------------------------------------------
message(STATUS "Configuring benchmarks - done")
#-----------------------------------------------------------------------------
+41
View File
@@ -0,0 +1,41 @@
# Benchmarks
Benchmarks check the performance of different parts of Libosmium.
## Preparations
To run the benchmarks first make a directory for the data files somewhere
(outside the repository) and set the `DATA_DIR` environment variable:
export DATA_DIR=benchmark_data
mkdir $DATA_DIR
Then copy the OSM files you want to do the benchmarks with into this directory.
You can use the `download_data.sh` script to download a selection of OSM files
in different sizes, but you can use a different selection, too. The benchmarks
will use whatever files you have in the `DATA_DIR` directory.
The download script will start the data files names with a number in order of
the size of the file from smallest to largest. You can use the same convention
or use a different one. Benchmarks will be run on the files in alphabetical
order.
The files don't have to be in that directory, you can add soft links from that
directory to the real file locations if that suits you.
## Compiling the benchmarks
To build the benchmarks set the `BUILD_BENCHMARKS` option when configuring with
CMake and run the compilation by calling `make` (or whatever build tool you
are using).
## Running the benchmarks
Go to the build directory and run `benchmarks/run_benchmarks.sh`. You can also
run each benchmark on its own by calling the respective script in the
`benchmarks` directory.
Results of the benchmarks will be printed to stdout, you might want to redirect
them into a file.
+12
View File
@@ -0,0 +1,12 @@
#!/bin/sh
#
# download_data.sh
#
cd $DATA_DIR
curl --location --output 1_liechtenstein.osm.pbf http://download.geofabrik.de/europe/liechtenstein-latest.osm.pbf # about 2 MB
curl --location --output 2_bremen.osm.pbf http://download.geofabrik.de/europe/germany/bremen-latest.osm.pbf # about 16 MB
curl --location --output 3_sachsen.osm.pbf http://download.geofabrik.de/europe/germany/sachsen-latest.osm.pbf # about 160 MB
curl --location --output 4_germany.osm.pbf http://download.geofabrik.de/europe/germany-latest.osm.pbf # about 3 GB
curl --location --output 5_planet.osm.pbf http://planet.osm.org/pbf/planet-latest.osm.pbf # about 35 GB
@@ -0,0 +1,55 @@
/*
The code in this file is released into the Public Domain.
*/
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <string>
#include <osmium/io/any_input.hpp>
#include <osmium/handler.hpp>
#include <osmium/visitor.hpp>
struct CountHandler : public osmium::handler::Handler {
uint64_t nodes = 0;
uint64_t ways = 0;
uint64_t relations = 0;
void node(const osmium::Node&) {
++nodes;
}
void way(const osmium::Way&) {
++ways;
}
void relation(const osmium::Relation&) {
++relations;
}
};
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " OSMFILE\n";
std::exit(1);
}
const std::string input_filename{argv[1]};
osmium::io::Reader reader{input_filename};
CountHandler handler;
osmium::apply(reader, handler);
reader.close();
std::cout << "Nodes: " << handler.nodes << "\n";
std::cout << "Ways: " << handler.ways << "\n";
std::cout << "Relations: " << handler.relations << "\n";
}
@@ -0,0 +1,56 @@
/*
The code in this file is released into the Public Domain.
*/
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <string>
#include <osmium/io/any_input.hpp>
#include <osmium/handler.hpp>
#include <osmium/visitor.hpp>
struct CountHandler : public osmium::handler::Handler {
uint64_t counter = 0;
uint64_t all = 0;
void node(const osmium::Node& node) {
++all;
const char* amenity = node.tags().get_value_by_key("amenity");
if (amenity && !strcmp(amenity, "post_box")) {
++counter;
}
}
void way(const osmium::Way&) {
++all;
}
void relation(const osmium::Relation&) {
++all;
}
};
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " OSMFILE\n";
std::exit(1);
}
const std::string input_filename{argv[1]};
osmium::io::Reader reader{input_filename};
CountHandler handler;
osmium::apply(reader, handler);
reader.close();
std::cout << "r_all=" << handler.all << " r_counter=" << handler.counter << "\n";
}
@@ -0,0 +1,41 @@
/*
The code in this file is released into the Public Domain.
*/
#include <cstdlib>
#include <iostream>
#include <string>
#include <osmium/index/map/all.hpp>
#include <osmium/handler/node_locations_for_ways.hpp>
#include <osmium/visitor.hpp>
#include <osmium/io/any_input.hpp>
#include <osmium/handler.hpp>
using index_type = osmium::index::map::Map<osmium::unsigned_object_id_type, osmium::Location>;
using location_handler_type = osmium::handler::NodeLocationsForWays<index_type>;
int main(int argc, char* argv[]) {
if (argc != 3) {
std::cerr << "Usage: " << argv[0] << " OSMFILE FORMAT\n";
std::exit(1);
}
const std::string input_filename{argv[1]};
const std::string location_store{argv[2]};
osmium::io::Reader reader{input_filename};
const auto& map_factory = osmium::index::MapFactory<osmium::unsigned_object_id_type, osmium::Location>::instance();
std::unique_ptr<index_type> index = map_factory.create_map(location_store);
location_handler_type location_handler{*index};
location_handler.ignore_errors();
osmium::apply(reader, location_handler);
reader.close();
}
@@ -0,0 +1,43 @@
/*
The code in this file is released into the Public Domain.
*/
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <string>
#include <osmium/io/any_input.hpp>
#include <osmium/handler.hpp>
#include <osmium/visitor.hpp>
#include <osmium/geom/wkb.hpp>
#include <osmium/geom/mercator_projection.hpp>
struct GeomHandler : public osmium::handler::Handler {
osmium::geom::WKBFactory<osmium::geom::MercatorProjection> factory;
void node(const osmium::Node& node) {
const std::string geom = factory.create_point(node);
}
};
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " OSMFILE\n";
std::exit(1);
}
const std::string input_filename{argv[1]};
osmium::io::Reader reader{input_filename};
GeomHandler handler;
osmium::apply(reader, handler);
reader.close();
}
@@ -0,0 +1,137 @@
/*
This benchmarks compares the run time for statically vs. dynamically
configured index maps. You can configure index maps at compile-time using
typedefs or at run-time using polymorphism.
This will read the input file into a buffer and then run the
NodeLocationForWays handler multiple times over the complete data. The
number of runs depends on the size of the input, but is never smaller
than 10.
Do not run this with very large input files! It will need about 10 times
as much RAM as the file size of the input file.
The code in this file is released into the Public Domain.
*/
#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <limits>
#include <string>
#include <osmium/index/map/all.hpp>
#include <osmium/handler/node_locations_for_ways.hpp>
#include <osmium/visitor.hpp>
#include <osmium/io/any_input.hpp>
#include <osmium/handler.hpp>
using static_index_type = osmium::index::map::SparseMemArray<osmium::unsigned_object_id_type, osmium::Location>;
const std::string location_store{"sparse_mem_array"};
using dynamic_index_type = osmium::index::map::Map<osmium::unsigned_object_id_type, osmium::Location>;
using static_location_handler_type = osmium::handler::NodeLocationsForWays<static_index_type>;
using dynamic_location_handler_type = osmium::handler::NodeLocationsForWays<dynamic_index_type>;
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " OSMFILE\n";
std::exit(1);
}
const std::string input_filename{argv[1]};
osmium::memory::Buffer buffer{osmium::io::read_file(input_filename)};
const auto& map_factory = osmium::index::MapFactory<osmium::unsigned_object_id_type, osmium::Location>::instance();
const auto buffer_size = buffer.committed() / (1024*1024); // buffer size in MBytes
const int runs = std::max(10, static_cast<int>(5000ull / buffer_size));
std::cout << "input: filename=" << input_filename << " buffer_size=" << buffer_size << "MBytes\n";
std::cout << "runs: " << runs << "\n";
double static_min = std::numeric_limits<double>::max();
double static_sum = 0;
double static_max = 0;
double dynamic_min = std::numeric_limits<double>::max();
double dynamic_sum = 0;
double dynamic_max = 0;
for (int i = 0; i < runs; ++i) {
{
// static index
osmium::memory::Buffer tmp_buffer{buffer.committed()};
for (const auto& item : buffer) {
tmp_buffer.add_item(item);
tmp_buffer.commit();
}
static_index_type static_index;
static_location_handler_type static_location_handler{static_index};
const auto start = std::chrono::steady_clock::now();
osmium::apply(tmp_buffer, static_location_handler);
const auto end = std::chrono::steady_clock::now();
const double duration = std::chrono::duration<double, std::milli>(end-start).count();
if (duration < static_min) static_min = duration;
if (duration > static_max) static_max = duration;
static_sum += duration;
}
{
// dynamic index
osmium::memory::Buffer tmp_buffer{buffer.committed()};
for (const auto& item : buffer) {
tmp_buffer.add_item(item);
tmp_buffer.commit();
}
std::unique_ptr<dynamic_index_type> index = map_factory.create_map(location_store);
dynamic_location_handler_type dynamic_location_handler{*index};
dynamic_location_handler.ignore_errors();
const auto start = std::chrono::steady_clock::now();
osmium::apply(tmp_buffer, dynamic_location_handler);
const auto end = std::chrono::steady_clock::now();
const double duration = std::chrono::duration<double, std::milli>(end-start).count();
if (duration < dynamic_min) dynamic_min = duration;
if (duration > dynamic_max) dynamic_max = duration;
dynamic_sum += duration;
}
}
const double static_avg = static_sum/runs;
const double dynamic_avg = dynamic_sum/runs;
std::cout << "static min=" << static_min << "ms avg=" << static_avg << "ms max=" << static_max << "ms\n";
std::cout << "dynamic min=" << dynamic_min << "ms avg=" << dynamic_avg << "ms max=" << dynamic_max << "ms\n";
const double rfactor = 100.0;
const double diff_min = std::round((dynamic_min - static_min) * rfactor) / rfactor;
const double diff_avg = std::round((dynamic_avg - static_avg) * rfactor) / rfactor;
const double diff_max = std::round((dynamic_max - static_max) * rfactor) / rfactor;
const double prfactor = 10.0;
const double percent_min = std::round((100.0 * diff_min / static_min) * prfactor) / prfactor;
const double percent_avg = std::round((100.0 * diff_avg / static_avg) * prfactor) / prfactor;
const double percent_max = std::round((100.0 * diff_max / static_max) * prfactor) / prfactor;
std::cout << "difference:";
std::cout << " min=" << diff_min << "ms (" << percent_min << "%)";
std::cout << " avg=" << diff_avg << "ms (" << percent_avg << "%)";
std::cout << " max=" << diff_max << "ms (" << percent_max << "%)\n";
}
@@ -0,0 +1,35 @@
/*
The code in this file is released into the Public Domain.
*/
#include <cstdlib>
#include <iostream>
#include <string>
#include <osmium/io/any_input.hpp>
#include <osmium/io/any_output.hpp>
int main(int argc, char* argv[]) {
if (argc != 3) {
std::cerr << "Usage: " << argv[0] << " INPUT-FILE OUTPUT-FILE\n";
std::exit(1);
}
std::string input_filename{argv[1]};
std::string output_filename{argv[2]};
osmium::io::Reader reader{input_filename};
osmium::io::File output_file{output_filename, "pbf"};
osmium::io::Header header;
osmium::io::Writer writer{output_file, header, osmium::io::overwrite::allow};
while (osmium::memory::Buffer buffer = reader.read()) {
writer(std::move(buffer));
}
writer.close();
reader.close();
}
+22
View File
@@ -0,0 +1,22 @@
#!/bin/sh
#
# run_benchmark_count.sh
#
set -e
BENCHMARK_NAME=count
. @CMAKE_BINARY_DIR@/benchmarks/setup.sh
CMD=$OB_DIR/osmium_benchmark_$BENCHMARK_NAME
echo "# file size num mem time cpu_kernel cpu_user cpu_percent cmd options"
for data in $OB_DATA_FILES; do
filename=`basename $data`
filesize=`stat --format="%s" --dereference $data`
for n in $OB_SEQ; do
$OB_TIME_CMD -f "$filename $filesize $n $OB_TIME_FORMAT" $CMD $data 2>&1 >/dev/null | sed -e "s%$DATA_DIR/%%" | sed -e "s%$OB_DIR/%%"
done
done
+22
View File
@@ -0,0 +1,22 @@
#!/bin/sh
#
# run_benchmark_count_tag.sh
#
set -e
BENCHMARK_NAME=count_tag
. @CMAKE_BINARY_DIR@/benchmarks/setup.sh
CMD=$OB_DIR/osmium_benchmark_$BENCHMARK_NAME
echo "# file size num mem time cpu_kernel cpu_user cpu_percent cmd options"
for data in $OB_DATA_FILES; do
filename=`basename $data`
filesize=`stat --format="%s" --dereference $data`
for n in $OB_SEQ; do
$OB_TIME_CMD -f "$filename $filesize $n $OB_TIME_FORMAT" $CMD $data 2>&1 >/dev/null | sed -e "s%$DATA_DIR/%%" | sed -e "s%$OB_DIR/%%"
done
done
+27
View File
@@ -0,0 +1,27 @@
#!/bin/sh
#
# run_benchmark_index_map.sh
#
set -e
BENCHMARK_NAME=index_map
. @CMAKE_BINARY_DIR@/benchmarks/setup.sh
CMD=$OB_DIR/osmium_benchmark_$BENCHMARK_NAME
#MAPS="sparse_mem_map sparse_mem_table sparse_mem_array sparse_mmap_array sparse_file_array dense_mem_array dense_mmap_array dense_file_array"
MAPS="sparse_mem_map sparse_mem_table sparse_mem_array sparse_mmap_array sparse_file_array"
echo "# file size num mem time cpu_kernel cpu_user cpu_percent cmd options"
for data in $OB_DATA_FILES; do
filename=`basename $data`
filesize=`stat --format="%s" --dereference $data`
for map in $MAPS; do
for n in $OB_SEQ; do
$OB_TIME_CMD -f "$filename $filesize $n $OB_TIME_FORMAT" $CMD $data $map 2>&1 >/dev/null | sed -e "s%$DATA_DIR/%%" | sed -e "s%$OB_DIR/%%"
done
done
done
+22
View File
@@ -0,0 +1,22 @@
#!/bin/sh
#
# run_benchmark_mercator.sh
#
set -e
BENCHMARK_NAME=mercator
. @CMAKE_BINARY_DIR@/benchmarks/setup.sh
CMD=$OB_DIR/osmium_benchmark_$BENCHMARK_NAME
echo "# file size num mem time cpu_kernel cpu_user cpu_percent cmd options"
for data in $OB_DATA_FILES; do
filename=`basename $data`
filesize=`stat --format="%s" --dereference $data`
for n in $OB_SEQ; do
$OB_TIME_CMD -f "$filename $filesize $n $OB_TIME_FORMAT" $CMD $data 2>&1 >/dev/null | sed -e "s%$DATA_DIR/%%" | sed -e "s%$OB_DIR/%%"
done
done
@@ -0,0 +1,21 @@
#!/bin/sh
#
# run_benchmark_static_vs_dynamic_index.sh
#
set -e
BENCHMARK_NAME=static_vs_dynamic_index
. @CMAKE_BINARY_DIR@/benchmarks/setup.sh
CMD=$OB_DIR/osmium_benchmark_$BENCHMARK_NAME
for data in $OB_DATA_FILES; do
filesize=`stat --format="%s" --dereference $data`
if [ $filesize -lt 500000000 ]; then
echo "========================"
$CMD $data
fi
done
+28
View File
@@ -0,0 +1,28 @@
#!/bin/sh
#
# run_benchmark_write_pbf.sh
#
# Will read the input file and after reading it into memory completely,
# write it to /dev/null. Because this will need the time to read *and* write
# the file, it will report the times for reading and writing. You can
# subtract the times needed for the "count" benchmark to (roughly) get the
# write times.
#
set -e
BENCHMARK_NAME=write_pbf
. @CMAKE_BINARY_DIR@/benchmarks/setup.sh
CMD=$OB_DIR/osmium_benchmark_$BENCHMARK_NAME
echo "# file size num mem time cpu_kernel cpu_user cpu_percent cmd options"
for data in $OB_DATA_FILES; do
filename=`basename $data`
filesize=`stat --format="%s" --dereference $data`
for n in $OB_SEQ; do
$OB_TIME_CMD -f "$filename $filesize $n $OB_TIME_FORMAT" $CMD $data /dev/null 2>&1 >/dev/null | sed -e "s%$DATA_DIR/%%" | sed -e "s%$OB_DIR/%%"
done
done
+15
View File
@@ -0,0 +1,15 @@
#!/bin/sh
#
# run_benchmarks.sh
#
# Run all benchmarks.
#
set -e
for benchmark in @CMAKE_BINARY_DIR@/benchmarks/run_benchmark_*.sh; do
name=`basename $benchmark`
echo "Running $name..."
$benchmark
done
+44
View File
@@ -0,0 +1,44 @@
#!/bin/sh
#
# setup.sh
#
if [ -z $DATA_DIR ]; then
echo "Please set DATA_DIR environment variable before running benchmark"
exit 1
fi
OB_DIR=@CMAKE_BINARY_DIR@/benchmarks
OB_BUILD_TYPE=@CMAKE_BUILD_TYPE@
OB_COMPILER=@CMAKE_CXX_COMPILER@
OB_COMPILER_VERSION=`$OB_COMPILER --version | head -1`
OB_CXXFLAGS="@_cxx_flags@"
OB_RUNS=3
OB_SEQ=`seq -s' ' 1 $OB_RUNS`
OB_TIME_CMD=/usr/bin/time
OB_TIME_FORMAT="%M %e %S %U %P %C"
OB_DATA_FILES=`find -L $DATA_DIR -mindepth 1 -maxdepth 1 -type f | sort`
echo "BENCHMARK: $BENCHMARK_NAME"
echo "---------------------"
echo "BUILD:"
echo "build type\t: $OB_BUILD_TYPE"
echo "compiler\t: $OB_COMPILER"
echo "CXX version\t: $OB_COMPILER_VERSION"
echo "CXX flags\t: $OB_CXXFLAGS"
echo "---------------------"
echo "CPU:"
grep '^model name' /proc/cpuinfo | tail -1
grep '^cpu MHz' /proc/cpuinfo | tail -1
grep '^cpu cores' /proc/cpuinfo | tail -1
grep '^siblings' /proc/cpuinfo | tail -1
echo "---------------------"
echo "MEMORY:"
free
echo "---------------------"
echo "RESULTS:"