Merge commit '788bc67faa7738cf7c6b2a192ecf3e3567d1c20e' into develop

This commit is contained in:
Patrick Niklaus
2015-08-28 12:42:03 +02:00
150 changed files with 12325 additions and 4321 deletions
-776
View File
@@ -1,776 +0,0 @@
/*
*
* Copyright (c) 2004
* John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE unicode_iterator.hpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: Iterator adapters for converting between different Unicode encodings.
*/
/****************************************************************************
Contents:
~~~~~~~~~
1) Read Only, Input Adapters:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
template <class BaseIterator, class U8Type = ::boost::uint8_t>
class u32_to_u8_iterator;
Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8.
template <class BaseIterator, class U32Type = ::boost::uint32_t>
class u8_to_u32_iterator;
Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32.
template <class BaseIterator, class U16Type = ::boost::uint16_t>
class u32_to_u16_iterator;
Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16.
template <class BaseIterator, class U32Type = ::boost::uint32_t>
class u16_to_u32_iterator;
Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32.
2) Single pass output iterator adapters:
template <class BaseIterator>
class utf8_output_iterator;
Accepts UTF-32 code points and forwards them on as UTF-8 code points.
template <class BaseIterator>
class utf16_output_iterator;
Accepts UTF-32 code points and forwards them on as UTF-16 code points.
****************************************************************************/
#ifndef BOOST_REGEX_UNICODE_ITERATOR_HPP
#define BOOST_REGEX_UNICODE_ITERATOR_HPP
#include <boost/cstdint.hpp>
#include <boost/assert.hpp>
#include <boost/iterator/iterator_facade.hpp>
#include <boost/static_assert.hpp>
#include <boost/throw_exception.hpp>
#include <stdexcept>
#ifndef BOOST_NO_STD_LOCALE
#include <sstream>
#include <ios>
#endif
#include <limits.h> // CHAR_BIT
namespace boost{
namespace detail{
static const ::boost::uint16_t high_surrogate_base = 0xD7C0u;
static const ::boost::uint16_t low_surrogate_base = 0xDC00u;
static const ::boost::uint32_t ten_bit_mask = 0x3FFu;
inline bool is_high_surrogate(::boost::uint16_t v)
{
return (v & 0xFFFFFC00u) == 0xd800u;
}
inline bool is_low_surrogate(::boost::uint16_t v)
{
return (v & 0xFFFFFC00u) == 0xdc00u;
}
template <class T>
inline bool is_surrogate(T v)
{
return (v & 0xFFFFF800u) == 0xd800;
}
inline unsigned utf8_byte_count(boost::uint8_t c)
{
// if the most significant bit with a zero in it is in position
// 8-N then there are N bytes in this UTF-8 sequence:
boost::uint8_t mask = 0x80u;
unsigned result = 0;
while(c & mask)
{
++result;
mask >>= 1;
}
return (result == 0) ? 1 : ((result > 4) ? 4 : result);
}
inline unsigned utf8_trailing_byte_count(boost::uint8_t c)
{
return utf8_byte_count(c) - 1;
}
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4100)
#endif
inline void invalid_utf32_code_point(::boost::uint32_t val)
{
#ifndef BOOST_NO_STD_LOCALE
std::stringstream ss;
ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence";
std::out_of_range e(ss.str());
#else
std::out_of_range e("Invalid UTF-32 code point encountered while trying to encode UTF-16 sequence");
#endif
boost::throw_exception(e);
}
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
} // namespace detail
template <class BaseIterator, class U16Type = ::boost::uint16_t>
class u32_to_u16_iterator
: public boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type>
{
typedef boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type;
#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32);
BOOST_STATIC_ASSERT(sizeof(U16Type)*CHAR_BIT == 16);
#endif
public:
typename base_type::reference
dereference()const
{
if(m_current == 2)
extract_current();
return m_values[m_current];
}
bool equal(const u32_to_u16_iterator& that)const
{
if(m_position == that.m_position)
{
// Both m_currents must be equal, or both even
// this is the same as saying their sum must be even:
return (m_current + that.m_current) & 1u ? false : true;
}
return false;
}
void increment()
{
// if we have a pending read then read now, so that we know whether
// to skip a position, or move to a low-surrogate:
if(m_current == 2)
{
// pending read:
extract_current();
}
// move to the next surrogate position:
++m_current;
// if we've reached the end skip a position:
if(m_values[m_current] == 0)
{
m_current = 2;
++m_position;
}
}
void decrement()
{
if(m_current != 1)
{
// decrementing an iterator always leads to a valid position:
--m_position;
extract_current();
m_current = m_values[1] ? 1 : 0;
}
else
{
m_current = 0;
}
}
BaseIterator base()const
{
return m_position;
}
// construct:
u32_to_u16_iterator() : m_position(), m_current(0)
{
m_values[0] = 0;
m_values[1] = 0;
m_values[2] = 0;
}
u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2)
{
m_values[0] = 0;
m_values[1] = 0;
m_values[2] = 0;
}
private:
void extract_current()const
{
// begin by checking for a code point out of range:
::boost::uint32_t v = *m_position;
if(v >= 0x10000u)
{
if(v > 0x10FFFFu)
detail::invalid_utf32_code_point(*m_position);
// split into two surrogates:
m_values[0] = static_cast<U16Type>(v >> 10) + detail::high_surrogate_base;
m_values[1] = static_cast<U16Type>(v & detail::ten_bit_mask) + detail::low_surrogate_base;
m_current = 0;
BOOST_ASSERT(detail::is_high_surrogate(m_values[0]));
BOOST_ASSERT(detail::is_low_surrogate(m_values[1]));
}
else
{
// 16-bit code point:
m_values[0] = static_cast<U16Type>(*m_position);
m_values[1] = 0;
m_current = 0;
// value must not be a surrogate:
if(detail::is_surrogate(m_values[0]))
detail::invalid_utf32_code_point(*m_position);
}
}
BaseIterator m_position;
mutable U16Type m_values[3];
mutable unsigned m_current;
};
template <class BaseIterator, class U32Type = ::boost::uint32_t>
class u16_to_u32_iterator
: public boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type>
{
typedef boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
// special values for pending iterator reads:
BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu);
#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 16);
BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32);
#endif
public:
typename base_type::reference
dereference()const
{
if(m_value == pending_read)
extract_current();
return m_value;
}
bool equal(const u16_to_u32_iterator& that)const
{
return m_position == that.m_position;
}
void increment()
{
// skip high surrogate first if there is one:
if(detail::is_high_surrogate(*m_position)) ++m_position;
++m_position;
m_value = pending_read;
}
void decrement()
{
--m_position;
// if we have a low surrogate then go back one more:
if(detail::is_low_surrogate(*m_position))
--m_position;
m_value = pending_read;
}
BaseIterator base()const
{
return m_position;
}
// construct:
u16_to_u32_iterator() : m_position()
{
m_value = pending_read;
}
u16_to_u32_iterator(BaseIterator b) : m_position(b)
{
m_value = pending_read;
}
//
// Range checked version:
//
u16_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b)
{
m_value = pending_read;
//
// The range must not start with a low surrogate, or end in a high surrogate,
// otherwise we run the risk of running outside the underlying input range.
// Likewise b must not be located at a low surrogate.
//
boost::uint16_t val;
if(start != end)
{
if((b != start) && (b != end))
{
val = *b;
if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u))
invalid_code_point(val);
}
val = *start;
if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u))
invalid_code_point(val);
val = *--end;
if(detail::is_high_surrogate(val))
invalid_code_point(val);
}
}
private:
static void invalid_code_point(::boost::uint16_t val)
{
#ifndef BOOST_NO_STD_LOCALE
std::stringstream ss;
ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence";
std::out_of_range e(ss.str());
#else
std::out_of_range e("Misplaced UTF-16 surrogate encountered while trying to encode UTF-32 sequence");
#endif
boost::throw_exception(e);
}
void extract_current()const
{
m_value = static_cast<U32Type>(static_cast< ::boost::uint16_t>(*m_position));
// if the last value is a high surrogate then adjust m_position and m_value as needed:
if(detail::is_high_surrogate(*m_position))
{
// precondition; next value must have be a low-surrogate:
BaseIterator next(m_position);
::boost::uint16_t t = *++next;
if((t & 0xFC00u) != 0xDC00u)
invalid_code_point(t);
m_value = (m_value - detail::high_surrogate_base) << 10;
m_value |= (static_cast<U32Type>(static_cast< ::boost::uint16_t>(t)) & detail::ten_bit_mask);
}
// postcondition; result must not be a surrogate:
if(detail::is_surrogate(m_value))
invalid_code_point(static_cast< ::boost::uint16_t>(m_value));
}
BaseIterator m_position;
mutable U32Type m_value;
};
template <class BaseIterator, class U8Type = ::boost::uint8_t>
class u32_to_u8_iterator
: public boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type>
{
typedef boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type> base_type;
#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32);
BOOST_STATIC_ASSERT(sizeof(U8Type)*CHAR_BIT == 8);
#endif
public:
typename base_type::reference
dereference()const
{
if(m_current == 4)
extract_current();
return m_values[m_current];
}
bool equal(const u32_to_u8_iterator& that)const
{
if(m_position == that.m_position)
{
// either the m_current's must be equal, or one must be 0 and
// the other 4: which means neither must have bits 1 or 2 set:
return (m_current == that.m_current)
|| (((m_current | that.m_current) & 3) == 0);
}
return false;
}
void increment()
{
// if we have a pending read then read now, so that we know whether
// to skip a position, or move to a low-surrogate:
if(m_current == 4)
{
// pending read:
extract_current();
}
// move to the next surrogate position:
++m_current;
// if we've reached the end skip a position:
if(m_values[m_current] == 0)
{
m_current = 4;
++m_position;
}
}
void decrement()
{
if((m_current & 3) == 0)
{
--m_position;
extract_current();
m_current = 3;
while(m_current && (m_values[m_current] == 0))
--m_current;
}
else
--m_current;
}
BaseIterator base()const
{
return m_position;
}
// construct:
u32_to_u8_iterator() : m_position(), m_current(0)
{
m_values[0] = 0;
m_values[1] = 0;
m_values[2] = 0;
m_values[3] = 0;
m_values[4] = 0;
}
u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4)
{
m_values[0] = 0;
m_values[1] = 0;
m_values[2] = 0;
m_values[3] = 0;
m_values[4] = 0;
}
private:
void extract_current()const
{
boost::uint32_t c = *m_position;
if(c > 0x10FFFFu)
detail::invalid_utf32_code_point(c);
if(c < 0x80u)
{
m_values[0] = static_cast<unsigned char>(c);
m_values[1] = static_cast<unsigned char>(0u);
m_values[2] = static_cast<unsigned char>(0u);
m_values[3] = static_cast<unsigned char>(0u);
}
else if(c < 0x800u)
{
m_values[0] = static_cast<unsigned char>(0xC0u + (c >> 6));
m_values[1] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
m_values[2] = static_cast<unsigned char>(0u);
m_values[3] = static_cast<unsigned char>(0u);
}
else if(c < 0x10000u)
{
m_values[0] = static_cast<unsigned char>(0xE0u + (c >> 12));
m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
m_values[2] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
m_values[3] = static_cast<unsigned char>(0u);
}
else
{
m_values[0] = static_cast<unsigned char>(0xF0u + (c >> 18));
m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu));
m_values[2] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
m_values[3] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
}
m_current= 0;
}
BaseIterator m_position;
mutable U8Type m_values[5];
mutable unsigned m_current;
};
template <class BaseIterator, class U32Type = ::boost::uint32_t>
class u8_to_u32_iterator
: public boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type>
{
typedef boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
// special values for pending iterator reads:
BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu);
#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 8);
BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32);
#endif
public:
typename base_type::reference
dereference()const
{
if(m_value == pending_read)
extract_current();
return m_value;
}
bool equal(const u8_to_u32_iterator& that)const
{
return m_position == that.m_position;
}
void increment()
{
// We must not start with a continuation character:
if((static_cast<boost::uint8_t>(*m_position) & 0xC0) == 0x80)
invalid_sequence();
// skip high surrogate first if there is one:
unsigned c = detail::utf8_byte_count(*m_position);
if(m_value == pending_read)
{
// Since we haven't read in a value, we need to validate the code points:
for(unsigned i = 0; i < c; ++i)
{
++m_position;
// We must have a continuation byte:
if((i != c - 1) && ((static_cast<boost::uint8_t>(*m_position) & 0xC0) != 0x80))
invalid_sequence();
}
}
else
{
std::advance(m_position, c);
}
m_value = pending_read;
}
void decrement()
{
// Keep backtracking until we don't have a trailing character:
unsigned count = 0;
while((*--m_position & 0xC0u) == 0x80u) ++count;
// now check that the sequence was valid:
if(count != detail::utf8_trailing_byte_count(*m_position))
invalid_sequence();
m_value = pending_read;
}
BaseIterator base()const
{
return m_position;
}
// construct:
u8_to_u32_iterator() : m_position()
{
m_value = pending_read;
}
u8_to_u32_iterator(BaseIterator b) : m_position(b)
{
m_value = pending_read;
}
//
// Checked constructor:
//
u8_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b)
{
m_value = pending_read;
//
// We must not start with a continuation character, or end with a
// truncated UTF-8 sequence otherwise we run the risk of going past
// the start/end of the underlying sequence:
//
if(start != end)
{
unsigned char v = *start;
if((v & 0xC0u) == 0x80u)
invalid_sequence();
if((b != start) && (b != end) && ((*b & 0xC0u) == 0x80u))
invalid_sequence();
BaseIterator pos = end;
do
{
v = *--pos;
}
while((start != pos) && ((v & 0xC0u) == 0x80u));
std::ptrdiff_t extra = detail::utf8_byte_count(v);
if(std::distance(pos, end) < extra)
invalid_sequence();
}
}
private:
static void invalid_sequence()
{
std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character");
boost::throw_exception(e);
}
void extract_current()const
{
m_value = static_cast<U32Type>(static_cast< ::boost::uint8_t>(*m_position));
// we must not have a continuation character:
if((m_value & 0xC0u) == 0x80u)
invalid_sequence();
// see how many extra bytes we have:
unsigned extra = detail::utf8_trailing_byte_count(*m_position);
// extract the extra bits, 6 from each extra byte:
BaseIterator next(m_position);
for(unsigned c = 0; c < extra; ++c)
{
++next;
m_value <<= 6;
// We must have a continuation byte:
if((static_cast<boost::uint8_t>(*next) & 0xC0) != 0x80)
invalid_sequence();
m_value += static_cast<boost::uint8_t>(*next) & 0x3Fu;
}
// we now need to remove a few of the leftmost bits, but how many depends
// upon how many extra bytes we've extracted:
static const boost::uint32_t masks[4] =
{
0x7Fu,
0x7FFu,
0xFFFFu,
0x1FFFFFu,
};
m_value &= masks[extra];
// check the result:
if(m_value > static_cast<U32Type>(0x10FFFFu))
invalid_sequence();
}
BaseIterator m_position;
mutable U32Type m_value;
};
template <class BaseIterator>
class utf16_output_iterator
{
public:
typedef void difference_type;
typedef void value_type;
typedef boost::uint32_t* pointer;
typedef boost::uint32_t& reference;
typedef std::output_iterator_tag iterator_category;
utf16_output_iterator(const BaseIterator& b)
: m_position(b){}
utf16_output_iterator(const utf16_output_iterator& that)
: m_position(that.m_position){}
utf16_output_iterator& operator=(const utf16_output_iterator& that)
{
m_position = that.m_position;
return *this;
}
const utf16_output_iterator& operator*()const
{
return *this;
}
void operator=(boost::uint32_t val)const
{
push(val);
}
utf16_output_iterator& operator++()
{
return *this;
}
utf16_output_iterator& operator++(int)
{
return *this;
}
BaseIterator base()const
{
return m_position;
}
private:
void push(boost::uint32_t v)const
{
if(v >= 0x10000u)
{
// begin by checking for a code point out of range:
if(v > 0x10FFFFu)
detail::invalid_utf32_code_point(v);
// split into two surrogates:
*m_position++ = static_cast<boost::uint16_t>(v >> 10) + detail::high_surrogate_base;
*m_position++ = static_cast<boost::uint16_t>(v & detail::ten_bit_mask) + detail::low_surrogate_base;
}
else
{
// 16-bit code point:
// value must not be a surrogate:
if(detail::is_surrogate(v))
detail::invalid_utf32_code_point(v);
*m_position++ = static_cast<boost::uint16_t>(v);
}
}
mutable BaseIterator m_position;
};
template <class BaseIterator>
class utf8_output_iterator
{
public:
typedef void difference_type;
typedef void value_type;
typedef boost::uint32_t* pointer;
typedef boost::uint32_t& reference;
typedef std::output_iterator_tag iterator_category;
utf8_output_iterator(const BaseIterator& b)
: m_position(b){}
utf8_output_iterator(const utf8_output_iterator& that)
: m_position(that.m_position){}
utf8_output_iterator& operator=(const utf8_output_iterator& that)
{
m_position = that.m_position;
return *this;
}
const utf8_output_iterator& operator*()const
{
return *this;
}
void operator=(boost::uint32_t val)const
{
push(val);
}
utf8_output_iterator& operator++()
{
return *this;
}
utf8_output_iterator& operator++(int)
{
return *this;
}
BaseIterator base()const
{
return m_position;
}
private:
void push(boost::uint32_t c)const
{
if(c > 0x10FFFFu)
detail::invalid_utf32_code_point(c);
if(c < 0x80u)
{
*m_position++ = static_cast<unsigned char>(c);
}
else if(c < 0x800u)
{
*m_position++ = static_cast<unsigned char>(0xC0u + (c >> 6));
*m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
}
else if(c < 0x10000u)
{
*m_position++ = static_cast<unsigned char>(0xE0u + (c >> 12));
*m_position++ = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
*m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
}
else
{
*m_position++ = static_cast<unsigned char>(0xF0u + (c >> 18));
*m_position++ = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu));
*m_position++ = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
*m_position++ = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
}
}
mutable BaseIterator m_position;
};
} // namespace boost
#endif // BOOST_REGEX_UNICODE_ITERATOR_HPP
-103
View File
@@ -1,103 +0,0 @@
#ifndef MMAP_FOR_WINDOWS_HPP
#define MMAP_FOR_WINDOWS_HPP
/* mmap() replacement for Windows
*
* Author: Mike Frysinger <vapier@gentoo.org>
* Placed into the public domain
*/
/* References:
* CreateFileMapping: http://msdn.microsoft.com/en-us/library/aa366537(VS.85).aspx
* CloseHandle: http://msdn.microsoft.com/en-us/library/ms724211(VS.85).aspx
* MapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366761(VS.85).aspx
* UnmapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366882(VS.85).aspx
*/
#include <io.h>
#include <windows.h>
#include <sys/types.h>
#define PROT_READ 0x1
#define PROT_WRITE 0x2
/* This flag is only available in WinXP+ */
#ifdef FILE_MAP_EXECUTE
#define PROT_EXEC 0x4
#else
#define PROT_EXEC 0x0
#define FILE_MAP_EXECUTE 0
#endif
#define MAP_SHARED 0x01
#define MAP_PRIVATE 0x02
#define MAP_ANONYMOUS 0x20
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FAILED ((void *) -1)
static DWORD dword_hi(uint64_t x) {
return static_cast<DWORD>(x >> 32);
}
static DWORD dword_lo(uint64_t x) {
return static_cast<DWORD>(x & 0xffffffff);
}
static void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset)
{
if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC))
return MAP_FAILED;
if (fd == -1) {
if (!(flags & MAP_ANON) || offset)
return MAP_FAILED;
} else if (flags & MAP_ANON)
return MAP_FAILED;
DWORD flProtect;
if (prot & PROT_WRITE) {
if (prot & PROT_EXEC)
flProtect = PAGE_EXECUTE_READWRITE;
else
flProtect = PAGE_READWRITE;
} else if (prot & PROT_EXEC) {
if (prot & PROT_READ)
flProtect = PAGE_EXECUTE_READ;
else if (prot & PROT_EXEC)
flProtect = PAGE_EXECUTE;
} else
flProtect = PAGE_READONLY;
uint64_t end = static_cast<uint64_t>(length) + offset;
HANDLE mmap_fd;
if (fd == -1)
mmap_fd = INVALID_HANDLE_VALUE;
else
mmap_fd = (HANDLE)_get_osfhandle(fd);
HANDLE h = CreateFileMapping(mmap_fd, NULL, flProtect, dword_hi(end), dword_lo(end), NULL);
if (h == NULL)
return MAP_FAILED;
DWORD dwDesiredAccess;
if (prot & PROT_WRITE)
dwDesiredAccess = FILE_MAP_WRITE;
else
dwDesiredAccess = FILE_MAP_READ;
if (prot & PROT_EXEC)
dwDesiredAccess |= FILE_MAP_EXECUTE;
if (flags & MAP_PRIVATE)
dwDesiredAccess |= FILE_MAP_COPY;
void *ret = MapViewOfFile(h, dwDesiredAccess, dword_hi(offset), dword_lo(offset), length);
if (ret == NULL) {
CloseHandle(h);
ret = MAP_FAILED;
}
return ret;
}
static int munmap(void *addr, size_t length)
{
return UnmapViewOfFile(addr) ? 0 : -1;
/* ruh-ro, we leaked handle from CreateFileMapping() ... */
}
#endif
+9 -9
View File
@@ -171,7 +171,7 @@ namespace osmium {
}
void add_tags_to_area(osmium::builder::AreaBuilder& builder, const osmium::Relation& relation) const {
auto count = std::count_if(relation.tags().begin(), relation.tags().end(), filter());
const auto count = std::count_if(relation.tags().begin(), relation.tags().end(), filter());
if (debug()) {
std::cerr << " found " << count << " tags on relation (without ignored ones)\n";
@@ -331,7 +331,7 @@ namespace osmium {
if (debug()) {
std::cerr << " has_closed_subring_back()\n";
}
auto end = ring.segments().end();
const auto end = ring.segments().end();
for (auto it = ring.segments().begin() + 1; it != end - 1; ++it) {
if (has_same_location(nr, it->first())) {
split_off_subring(ring, it, it, end);
@@ -348,7 +348,7 @@ namespace osmium {
if (debug()) {
std::cerr << " has_closed_subring_front()\n";
}
auto end = ring.segments().end();
const auto end = ring.segments().end();
for (auto it = ring.segments().begin() + 1; it != end - 1; ++it) {
if (has_same_location(nr, it->second())) {
split_off_subring(ring, it, ring.segments().begin(), it+1);
@@ -366,22 +366,22 @@ namespace osmium {
osmium::area::detail::ProtoRing::segments_type segments(ring.segments().size());
std::copy(ring.segments().begin(), ring.segments().end(), segments.begin());
std::sort(segments.begin(), segments.end());
auto it = std::adjacent_find(segments.begin(), segments.end(), [this](const osmium::area::detail::NodeRefSegment& s1, const osmium::area::detail::NodeRefSegment& s2) {
const auto it = std::adjacent_find(segments.begin(), segments.end(), [this](const osmium::area::detail::NodeRefSegment& s1, const osmium::area::detail::NodeRefSegment& s2) {
return has_same_location(s1.first(), s2.first());
});
if (it == segments.end()) {
return false;
}
auto r1 = std::find_first_of(ring.segments().begin(), ring.segments().end(), it, it+1);
const auto r1 = std::find_first_of(ring.segments().begin(), ring.segments().end(), it, it+1);
assert(r1 != ring.segments().end());
auto r2 = std::find_first_of(ring.segments().begin(), ring.segments().end(), it+1, it+2);
const auto r2 = std::find_first_of(ring.segments().begin(), ring.segments().end(), it+1, it+2);
assert(r2 != ring.segments().end());
if (debug()) {
std::cerr << " found subring in ring " << ring << " at " << it->first() << "\n";
}
auto m = std::minmax(r1, r2);
const auto m = std::minmax(r1, r2);
ProtoRing new_ring(m.first, m.second);
ring.remove_segments(m.first, m.second);
@@ -537,7 +537,7 @@ namespace osmium {
}
for (const auto ringptr : m_outer_rings) {
for (const auto segment : ringptr->segments()) {
for (const auto& segment : ringptr->segments()) {
if (!segment.role_outer()) {
++m_inner_outer_mismatches;
if (debug()) {
@@ -550,7 +550,7 @@ namespace osmium {
}
}
for (const auto ringptr : m_inner_rings) {
for (const auto segment : ringptr->segments()) {
for (const auto& segment : ringptr->segments()) {
if (!segment.role_inner()) {
++m_inner_outer_mismatches;
if (debug()) {
@@ -41,6 +41,8 @@ DEALINGS IN THE SOFTWARE.
#include <osmium/memory/buffer.hpp>
#include <osmium/osm/item_type.hpp>
#include <osmium/osm/location.hpp>
#include <osmium/osm/node_ref.hpp>
#include <osmium/osm/relation.hpp>
#include <osmium/osm/tag.hpp>
#include <osmium/osm/way.hpp>
@@ -49,8 +51,6 @@ DEALINGS IN THE SOFTWARE.
namespace osmium {
struct invalid_location;
namespace relations {
class RelationMeta;
}
@@ -107,8 +107,8 @@ namespace osmium {
}
/**
* We are interested in all relations tagged with type=multipolygon or
* type=boundary.
* We are interested in all relations tagged with type=multipolygon
* or type=boundary.
*
* Overwritten from the base class.
*/
@@ -142,15 +142,22 @@ namespace osmium {
* Overwritten from the base class.
*/
void way_not_in_any_relation(const osmium::Way& way) {
if (way.nodes().size() > 3 && way.ends_have_same_location()) {
// way is closed and has enough nodes, build simple multipolygon
try {
// you need at least 4 nodes to make up a polygon
if (way.nodes().size() <= 3) {
return;
}
try {
if (!way.nodes().front().location() || !way.nodes().back().location()) {
throw osmium::invalid_location("invalid location");
}
if (way.ends_have_same_location()) {
// way is closed and has enough nodes, build simple multipolygon
TAssembler assembler(m_assembler_config);
assembler(way, m_output_buffer);
possibly_flush_output_buffer();
} catch (osmium::invalid_location&) {
// XXX ignore
}
} catch (osmium::invalid_location&) {
// XXX ignore
}
}
+19 -5
View File
@@ -147,6 +147,7 @@ namespace osmium {
* @param length Length of data in bytes. If data is a
* \0-terminated string, length must contain the
* \0 byte.
* @returns The number of bytes appended (length).
*/
osmium::memory::item_size_type append(const char* data, const osmium::memory::item_size_type length) {
unsigned char* target = m_buffer.reserve_space(length);
@@ -156,11 +157,24 @@ namespace osmium {
/**
* Append \0-terminated string to buffer.
*
* @param str \0-terminated string.
* @returns The number of bytes appended (strlen(str) + 1).
*/
osmium::memory::item_size_type append(const char* str) {
return append(str, static_cast<osmium::memory::item_size_type>(std::strlen(str) + 1));
}
/**
* Append '\0' to the buffer.
*
* @returns The number of bytes appended (always 1).
*/
osmium::memory::item_size_type append_zero() {
*m_buffer.reserve_space(1) = '\0';
return 1;
}
/// Return the buffer this builder is using.
osmium::memory::Buffer& buffer() noexcept {
return m_buffer;
@@ -188,11 +202,11 @@ namespace osmium {
* Add user name to buffer.
*
* @param user Pointer to user name.
* @param length Length of user name including \0 byte.
* @param length Length of user name (without \0 termination).
*/
void add_user(const char* user, const string_size_type length) {
object().set_user_size(length);
add_size(append(user, length));
object().set_user_size(length + 1);
add_size(append(user, length) + append_zero());
add_padding(true);
}
@@ -202,7 +216,7 @@ namespace osmium {
* @param user Pointer to \0-terminated user name.
*/
void add_user(const char* user) {
add_user(user, static_cast_with_assert<string_size_type>(std::strlen(user) + 1));
add_user(user, static_cast_with_assert<string_size_type>(std::strlen(user)));
}
/**
@@ -211,7 +225,7 @@ namespace osmium {
* @param user User name.
*/
void add_user(const std::string& user) {
add_user(user.data(), static_cast_with_assert<string_size_type>(user.size() + 1));
add_user(user.data(), static_cast_with_assert<string_size_type>(user.size()));
}
}; // class ObjectBuilder
@@ -72,13 +72,25 @@ namespace osmium {
/**
* Add tag to buffer.
*
* @param key Tag key.
* @param value Tag value.
* @param key Tag key (0-terminated string).
* @param value Tag value (0-terminated string).
*/
void add_tag(const char* key, const char* value) {
add_size(append(key) + append(value));
}
/**
* Add tag to buffer.
*
* @param key Pointer to tag key.
* @param key_length Length of key (not including the \0 byte).
* @param value Pointer to tag value.
* @param value_length Length of value (not including the \0 byte).
*/
void add_tag(const char* key, const string_size_type key_length, const char* value, const string_size_type value_length) {
add_size(append(key, key_length) + append_zero() + append(value, value_length) + append_zero());
}
/**
* Add tag to buffer.
*
@@ -128,11 +140,11 @@ namespace osmium {
* @param member Relation member object where the length of the role
* will be set.
* @param role The role.
* @param length Length of role string including \0 termination.
* @param length Length of role (without \0 termination).
*/
void add_role(osmium::RelationMember& member, const char* role, const string_size_type length) {
member.set_role_size(length);
add_size(append(role, length));
member.set_role_size(length + 1);
add_size(append(role, length) + append_zero());
add_padding(true);
}
@@ -144,7 +156,7 @@ namespace osmium {
* @param role \0-terminated role.
*/
void add_role(osmium::RelationMember& member, const char* role) {
add_role(member, role, static_cast_with_assert<string_size_type>(std::strlen(role) + 1));
add_role(member, role, static_cast_with_assert<string_size_type>(std::strlen(role)));
}
/**
@@ -155,7 +167,7 @@ namespace osmium {
* @param role Role.
*/
void add_role(osmium::RelationMember& member, const std::string& role) {
add_role(member, role.data(), static_cast_with_assert<string_size_type>(role.size() + 1));
add_role(member, role.data(), static_cast_with_assert<string_size_type>(role.size()));
}
public:
@@ -174,18 +186,33 @@ namespace osmium {
* @param type The type (node, way, or relation).
* @param ref The ID of the member.
* @param role The role of the member.
* @param role_length Length of the role (without \0 termination).
* @param full_member Optional pointer to the member object. If it
* is available a copy will be added to the
* relation.
*/
void add_member(osmium::item_type type, object_id_type ref, const char* role, const string_size_type role_length, const osmium::OSMObject* full_member = nullptr) {
osmium::RelationMember* member = reserve_space_for<osmium::RelationMember>();
new (member) osmium::RelationMember(ref, type, full_member != nullptr);
add_size(sizeof(RelationMember));
add_role(*member, role, role_length);
if (full_member) {
add_item(full_member);
}
}
/**
* Add a member to the relation.
*
* @param type The type (node, way, or relation).
* @param ref The ID of the member.
* @param role The role of the member (\0 terminated string).
* @param full_member Optional pointer to the member object. If it
* is available a copy will be added to the
* relation.
*/
void add_member(osmium::item_type type, object_id_type ref, const char* role, const osmium::OSMObject* full_member = nullptr) {
osmium::RelationMember* member = reserve_space_for<osmium::RelationMember>();
new (member) osmium::RelationMember(ref, type, full_member != nullptr);
add_size(sizeof(RelationMember));
add_role(*member, role);
if (full_member) {
add_item(full_member);
}
add_member(type, ref, role, strlen(role), full_member);
}
/**
@@ -199,13 +226,7 @@ namespace osmium {
* relation.
*/
void add_member(osmium::item_type type, object_id_type ref, const std::string& role, const osmium::OSMObject* full_member = nullptr) {
osmium::RelationMember* member = reserve_space_for<osmium::RelationMember>();
new (member) osmium::RelationMember(ref, type, full_member != nullptr);
add_size(sizeof(RelationMember));
add_role(*member, role);
if (full_member) {
add_item(full_member);
}
add_member(type, ref, role.data(), role.size(), full_member);
}
}; // class RelationMemberListBuilder
+127 -37
View File
@@ -54,14 +54,43 @@ namespace osmium {
* Exception thrown when an invalid geometry is encountered. An example
* would be a linestring with less than two points.
*/
struct geometry_error : public std::runtime_error {
class geometry_error : public std::runtime_error {
geometry_error(const std::string& what) :
std::runtime_error(what) {
std::string m_message;
osmium::object_id_type m_id;
public:
geometry_error(const std::string& message, const char* object_type = "", osmium::object_id_type id = 0) :
std::runtime_error(message),
m_message(message),
m_id(id) {
if (m_id != 0) {
m_message += " (";
m_message += object_type;
m_message += "_id=";
m_message += std::to_string(m_id);
m_message += ")";
}
}
geometry_error(const char* what) :
std::runtime_error(what) {
void set_id(const char* object_type, osmium::object_id_type id) {
if (m_id == 0 && id != 0) {
m_message += " (";
m_message += object_type;
m_message += "_id=";
m_message += std::to_string(id);
m_message += ")";
}
m_id = id;
}
osmium::object_id_type id() const noexcept {
return m_id;
}
virtual const char* what() const noexcept override {
return m_message.c_str();
}
}; // struct geometry_error
@@ -174,11 +203,21 @@ namespace osmium {
}
point_type create_point(const osmium::Node& node) {
return create_point(node.location());
try {
return create_point(node.location());
} catch (osmium::geometry_error& e) {
e.set_id("node", node.id());
throw;
}
}
point_type create_point(const osmium::NodeRef& node_ref) {
return create_point(node_ref.location());
try {
return create_point(node_ref.location());
} catch (osmium::geometry_error& e) {
e.set_id("node", node_ref.ref());
throw;
}
}
/* LineString */
@@ -240,14 +279,19 @@ namespace osmium {
}
if (num_points < 2) {
throw osmium::geometry_error("not enough points for linestring");
throw osmium::geometry_error("need at least two points for linestring");
}
return linestring_finish(num_points);
}
linestring_type create_linestring(const osmium::Way& way, use_nodes un=use_nodes::unique, direction dir=direction::forward) {
return create_linestring(way.nodes(), un, dir);
try {
return create_linestring(way.nodes(), un, dir);
} catch (osmium::geometry_error& e) {
e.set_id("way", way.id());
throw;
}
}
/* Polygon */
@@ -283,40 +327,86 @@ namespace osmium {
return m_impl.polygon_finish(num_points);
}
/* MultiPolygon */
polygon_type create_polygon(const osmium::WayNodeList& wnl, use_nodes un = use_nodes::unique, direction dir = direction::forward) {
polygon_start();
size_t num_points = 0;
multipolygon_type create_multipolygon(const osmium::Area& area) {
size_t num_polygons = 0;
size_t num_rings = 0;
m_impl.multipolygon_start();
for (auto it = area.cbegin(); it != area.cend(); ++it) {
const osmium::OuterRing& ring = static_cast<const osmium::OuterRing&>(*it);
if (it->type() == osmium::item_type::outer_ring) {
if (num_polygons > 0) {
m_impl.multipolygon_polygon_finish();
}
m_impl.multipolygon_polygon_start();
m_impl.multipolygon_outer_ring_start();
add_points(ring);
m_impl.multipolygon_outer_ring_finish();
++num_rings;
++num_polygons;
} else if (it->type() == osmium::item_type::inner_ring) {
m_impl.multipolygon_inner_ring_start();
add_points(ring);
m_impl.multipolygon_inner_ring_finish();
++num_rings;
if (un == use_nodes::unique) {
osmium::Location last_location;
switch (dir) {
case direction::forward:
num_points = fill_polygon_unique(wnl.cbegin(), wnl.cend());
break;
case direction::backward:
num_points = fill_polygon_unique(wnl.crbegin(), wnl.crend());
break;
}
} else {
switch (dir) {
case direction::forward:
num_points = fill_polygon(wnl.cbegin(), wnl.cend());
break;
case direction::backward:
num_points = fill_polygon(wnl.crbegin(), wnl.crend());
break;
}
}
// if there are no rings, this area is invalid
if (num_rings == 0) {
throw osmium::geometry_error("invalid area");
if (num_points < 4) {
throw osmium::geometry_error("need at least four points for polygon");
}
m_impl.multipolygon_polygon_finish();
return m_impl.multipolygon_finish();
return polygon_finish(num_points);
}
polygon_type create_polygon(const osmium::Way& way, use_nodes un=use_nodes::unique, direction dir=direction::forward) {
try {
return create_polygon(way.nodes(), un, dir);
} catch (osmium::geometry_error& e) {
e.set_id("way", way.id());
throw;
}
}
/* MultiPolygon */
multipolygon_type create_multipolygon(const osmium::Area& area) {
try {
size_t num_polygons = 0;
size_t num_rings = 0;
m_impl.multipolygon_start();
for (auto it = area.cbegin(); it != area.cend(); ++it) {
const osmium::OuterRing& ring = static_cast<const osmium::OuterRing&>(*it);
if (it->type() == osmium::item_type::outer_ring) {
if (num_polygons > 0) {
m_impl.multipolygon_polygon_finish();
}
m_impl.multipolygon_polygon_start();
m_impl.multipolygon_outer_ring_start();
add_points(ring);
m_impl.multipolygon_outer_ring_finish();
++num_rings;
++num_polygons;
} else if (it->type() == osmium::item_type::inner_ring) {
m_impl.multipolygon_inner_ring_start();
add_points(ring);
m_impl.multipolygon_inner_ring_finish();
++num_rings;
}
}
// if there are no rings, this area is invalid
if (num_rings == 0) {
throw osmium::geometry_error("area contains no rings");
}
m_impl.multipolygon_polygon_finish();
return m_impl.multipolygon_finish();
} catch (osmium::geometry_error& e) {
e.set_id("area", area.id());
throw;
}
}
}; // class GeometryFactory
+47 -37
View File
@@ -42,6 +42,8 @@ DEALINGS IN THE SOFTWARE.
* @attention If you include this file, you'll need to link with `libgeos`.
*/
#include <memory>
#include <string>
#include <utility>
#include <geos/geom/Coordinate.h>
@@ -69,8 +71,8 @@ namespace osmium {
struct geos_geometry_error : public geometry_error {
geos_geometry_error() :
geometry_error("geometry creation failed in GEOS library, see nested exception for details") {
geos_geometry_error(const char* message) :
geometry_error(std::string("geometry creation failed in GEOS library: ") + message) {
}
}; // struct geos_geometry_error
@@ -81,8 +83,9 @@ namespace osmium {
class GEOSFactoryImpl {
geos::geom::PrecisionModel m_precision_model;
geos::geom::GeometryFactory m_geos_factory;
std::unique_ptr<const geos::geom::PrecisionModel> m_precision_model;
std::unique_ptr<geos::geom::GeometryFactory> m_our_geos_factory;
geos::geom::GeometryFactory* m_geos_factory;
std::unique_ptr<geos::geom::CoordinateSequence> m_coordinate_sequence;
std::vector<std::unique_ptr<geos::geom::LinearRing>> m_rings;
@@ -96,18 +99,25 @@ namespace osmium {
typedef std::unique_ptr<geos::geom::MultiPolygon> multipolygon_type;
typedef std::unique_ptr<geos::geom::LinearRing> ring_type;
explicit GEOSFactoryImpl(geos::geom::GeometryFactory& geos_factory) :
m_precision_model(nullptr),
m_our_geos_factory(nullptr),
m_geos_factory(&geos_factory) {
}
explicit GEOSFactoryImpl(int srid = -1) :
m_precision_model(),
m_geos_factory(&m_precision_model, srid) {
m_precision_model(new geos::geom::PrecisionModel),
m_our_geos_factory(new geos::geom::GeometryFactory(m_precision_model.get(), srid)),
m_geos_factory(m_our_geos_factory.get()) {
}
/* Point */
point_type make_point(const osmium::geom::Coordinates& xy) const {
try {
return point_type(m_geos_factory.createPoint(geos::geom::Coordinate(xy.x, xy.y)));
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
return point_type(m_geos_factory->createPoint(geos::geom::Coordinate(xy.x, xy.y)));
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
@@ -115,25 +125,25 @@ namespace osmium {
void linestring_start() {
try {
m_coordinate_sequence.reset(m_geos_factory.getCoordinateSequenceFactory()->create(static_cast<size_t>(0), 2));
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
m_coordinate_sequence.reset(m_geos_factory->getCoordinateSequenceFactory()->create(static_cast<size_t>(0), 2));
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
void linestring_add_location(const osmium::geom::Coordinates& xy) {
try {
m_coordinate_sequence->add(geos::geom::Coordinate(xy.x, xy.y));
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
linestring_type linestring_finish(size_t /* num_points */) {
try {
return linestring_type(m_geos_factory.createLineString(m_coordinate_sequence.release()));
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
return linestring_type(m_geos_factory->createLineString(m_coordinate_sequence.release()));
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
@@ -154,50 +164,50 @@ namespace osmium {
std::transform(std::next(m_rings.begin(), 1), m_rings.end(), std::back_inserter(*inner_rings), [](std::unique_ptr<geos::geom::LinearRing>& r) {
return r.release();
});
m_polygons.emplace_back(m_geos_factory.createPolygon(m_rings[0].release(), inner_rings));
m_polygons.emplace_back(m_geos_factory->createPolygon(m_rings[0].release(), inner_rings));
m_rings.clear();
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
void multipolygon_outer_ring_start() {
try {
m_coordinate_sequence.reset(m_geos_factory.getCoordinateSequenceFactory()->create(static_cast<size_t>(0), 2));
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
m_coordinate_sequence.reset(m_geos_factory->getCoordinateSequenceFactory()->create(static_cast<size_t>(0), 2));
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
void multipolygon_outer_ring_finish() {
try {
m_rings.emplace_back(m_geos_factory.createLinearRing(m_coordinate_sequence.release()));
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
m_rings.emplace_back(m_geos_factory->createLinearRing(m_coordinate_sequence.release()));
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
void multipolygon_inner_ring_start() {
try {
m_coordinate_sequence.reset(m_geos_factory.getCoordinateSequenceFactory()->create(static_cast<size_t>(0), 2));
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
m_coordinate_sequence.reset(m_geos_factory->getCoordinateSequenceFactory()->create(static_cast<size_t>(0), 2));
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
void multipolygon_inner_ring_finish() {
try {
m_rings.emplace_back(m_geos_factory.createLinearRing(m_coordinate_sequence.release()));
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
m_rings.emplace_back(m_geos_factory->createLinearRing(m_coordinate_sequence.release()));
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
void multipolygon_add_location(const osmium::geom::Coordinates& xy) {
try {
m_coordinate_sequence->add(geos::geom::Coordinate(xy.x, xy.y));
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
@@ -208,9 +218,9 @@ namespace osmium {
return p.release();
});
m_polygons.clear();
return multipolygon_type(m_geos_factory.createMultiPolygon(polygons));
} catch (geos::util::GEOSException&) {
THROW(osmium::geos_geometry_error());
return multipolygon_type(m_geos_factory->createMultiPolygon(polygons));
} catch (geos::util::GEOSException& e) {
THROW(osmium::geos_geometry_error(e.what()));
}
}
@@ -47,6 +47,7 @@ namespace osmium {
namespace detail {
constexpr double earth_radius_for_epsg3857 = 6378137.0;
constexpr double max_coordinate_epsg3857 = 20037508.34;
constexpr inline double lon_to_x(double lon) {
return earth_radius_for_epsg3857 * deg_to_rad(lon);
@@ -0,0 +1,190 @@
#ifndef OSMIUM_GEOM_RAPID_GEOJSON_HPP
#define OSMIUM_GEOM_RAPID_GEOJSON_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <osmium/geom/coordinates.hpp>
#include <osmium/geom/factory.hpp>
namespace osmium {
namespace geom {
namespace detail {
/**
* A geometry factory implementation that can be used with the
* RapidJSON (https://github.com/miloyip/rapidjson) JSON writer.
*/
template <class TWriter>
class RapidGeoJSONFactoryImpl {
TWriter* m_writer;
public:
typedef void point_type;
typedef void linestring_type;
typedef void polygon_type;
typedef void multipolygon_type;
typedef void ring_type;
RapidGeoJSONFactoryImpl(TWriter& writer) :
m_writer(&writer) {
}
/* Point */
// { "type": "Point", "coordinates": [100.0, 0.0] }
point_type make_point(const osmium::geom::Coordinates& xy) const {
m_writer->String("geometry");
m_writer->StartObject();
m_writer->String("type");
m_writer->String("Point");
m_writer->String("coordinates");
m_writer->StartArray();
m_writer->Double(xy.x);
m_writer->Double(xy.y);
m_writer->EndArray();
m_writer->EndObject();
}
/* LineString */
// { "type": "LineString", "coordinates": [ [100.0, 0.0], [101.0, 1.0] ] }
void linestring_start() {
m_writer->String("geometry");
m_writer->StartObject();
m_writer->String("type");
m_writer->String("LineString");
m_writer->String("coordinates");
m_writer->StartArray();
}
void linestring_add_location(const osmium::geom::Coordinates& xy) {
m_writer->StartArray();
m_writer->Double(xy.x);
m_writer->Double(xy.y);
m_writer->EndArray();
}
linestring_type linestring_finish(size_t /* num_points */) {
m_writer->EndArray();
m_writer->EndObject();
}
/* Polygon */
// { "type": "Polygon", "coordinates": [[[100.0, 0.0], [101.0, 1.0]]] }
void polygon_start() {
m_writer->String("geometry");
m_writer->StartObject();
m_writer->String("type");
m_writer->String("Polygon");
m_writer->String("coordinates");
m_writer->StartArray();
m_writer->StartArray();
}
void polygon_add_location(const osmium::geom::Coordinates& xy) {
m_writer->StartArray();
m_writer->Double(xy.x);
m_writer->Double(xy.y);
m_writer->EndArray();
}
polygon_type polygon_finish(size_t /* num_points */) {
m_writer->EndArray();
m_writer->EndArray();
m_writer->EndObject();
}
/* MultiPolygon */
void multipolygon_start() {
m_writer->String("geometry");
m_writer->StartObject();
m_writer->String("type");
m_writer->String("MultiPolygon");
m_writer->String("coordinates");
m_writer->StartArray();
}
void multipolygon_polygon_start() {
m_writer->StartArray();
}
void multipolygon_polygon_finish() {
m_writer->EndArray();
}
void multipolygon_outer_ring_start() {
m_writer->StartArray();
}
void multipolygon_outer_ring_finish() {
m_writer->EndArray();
}
void multipolygon_inner_ring_start() {
m_writer->StartArray();
}
void multipolygon_inner_ring_finish() {
m_writer->EndArray();
}
void multipolygon_add_location(const osmium::geom::Coordinates& xy) {
m_writer->StartArray();
m_writer->Double(xy.x);
m_writer->Double(xy.y);
m_writer->EndArray();
}
multipolygon_type multipolygon_finish() {
m_writer->EndArray();
m_writer->EndObject();
}
}; // class RapidGeoJSONFactoryImpl
} // namespace detail
template <class TWriter, class TProjection = IdentityProjection>
using RapidGeoJSONFactory = GeometryFactory<detail::RapidGeoJSONFactoryImpl<TWriter>, TProjection>;
} // namespace geom
} // namespace osmium
#endif // OSMIUM_GEOM_RAPID_GEOJSON_HPP
+101
View File
@@ -0,0 +1,101 @@
#ifndef OSMIUM_GEOM_TILE_HPP
#define OSMIUM_GEOM_TILE_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <cstdint>
#include <osmium/geom/mercator_projection.hpp>
namespace osmium {
namespace geom {
namespace detail {
template <typename T>
inline T restrict_to_range(T value, T min, T max) {
if (value < min) return min;
if (value > max) return max;
return value;
}
} // namespace detail
/**
* A tile in the usual Mercator projection.
*/
struct Tile {
uint32_t x;
uint32_t y;
uint32_t z;
explicit Tile(uint32_t zoom, uint32_t tx, uint32_t ty) noexcept : x(tx), y(ty), z(zoom) {
}
explicit Tile(uint32_t zoom, const osmium::Location& location) :
z(zoom) {
osmium::geom::Coordinates c = lonlat_to_mercator(location);
const int32_t n = 1LL << zoom;
const double scale = detail::max_coordinate_epsg3857 * 2 / n;
x = detail::restrict_to_range<int32_t>((c.x + detail::max_coordinate_epsg3857) / scale, 0, n-1);
y = detail::restrict_to_range<int32_t>((detail::max_coordinate_epsg3857 - c.y) / scale, 0, n-1);
}
}; // struct Tile
inline bool operator==(const Tile& a, const Tile& b) {
return a.z == b.z && a.x == b.x && a.y == b.y;
}
inline bool operator!=(const Tile& a, const Tile& b) {
return ! (a == b);
}
/**
* This defines an arbitrary order on tiles for use in std::map etc.
*/
inline bool operator<(const Tile& a, const Tile& b) {
if (a.z < b.z) return true;
if (a.z > b.z) return false;
if (a.x < b.x) return true;
if (a.x > b.x) return false;
return a.y < b.y;
}
} // namespace geom
} // namespace osmium
#endif // OSMIUM_GEOM_TILE_HPP
+1 -9
View File
@@ -37,18 +37,10 @@ DEALINGS IN THE SOFTWARE.
#include <cstdint>
#include <string>
// Windows is only available for little endian architectures
// http://stackoverflow.com/questions/6449468/can-i-safely-assume-that-windows-installations-will-always-be-little-endian
#if !defined(_WIN32) && !defined(__APPLE__)
# include <endian.h>
#else
# define __LITTLE_ENDIAN 1234
# define __BYTE_ORDER __LITTLE_ENDIAN
#endif
#include <osmium/geom/coordinates.hpp>
#include <osmium/geom/factory.hpp>
#include <osmium/util/cast.hpp>
#include <osmium/util/endian.hpp>
namespace osmium {
@@ -0,0 +1,83 @@
#ifndef OSMIUM_INDEX_BOOL_VECTOR_HPP
#define OSMIUM_INDEX_BOOL_VECTOR_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <type_traits>
#include <vector>
namespace osmium {
namespace index {
/**
* Index storing one bit for each Id. The index automatically scales
* with the Ids stored. Default value is 'false'. Storage uses
* std::vector<bool> and needs a minimum of memory if the Ids are
* dense.
*/
template <typename T>
class BoolVector {
static_assert(std::is_unsigned<T>::value, "Needs unsigned type");
std::vector<bool> m_bits;
public:
BoolVector() = default;
BoolVector(const BoolVector&) = default;
BoolVector(BoolVector&&) = default;
BoolVector& operator=(const BoolVector&) = default;
BoolVector& operator=(BoolVector&&) = default;
~BoolVector() = default;
void set(T id, bool value = true) {
if (m_bits.size() <= id) {
m_bits.resize(id + 1024 * 1024);
}
m_bits[id] = value;
}
bool get(T id) const {
return id < m_bits.size() && m_bits[id];
}
}; // class BoolVector
} // namespace index
} // namespace osmium
#endif // OSMIUM_INDEX_BOOL_VECTOR_HPP
@@ -39,8 +39,6 @@ DEALINGS IN THE SOFTWARE.
#include <fcntl.h>
#include <stdexcept>
#include <string>
#include <sys/stat.h>
#include <sys/types.h>
#include <vector>
namespace osmium {
@@ -35,9 +35,6 @@ DEALINGS IN THE SOFTWARE.
#ifdef __linux__
#include <cstddef>
#include <osmium/index/detail/typed_mmap.hpp>
#include <osmium/index/detail/mmap_vector_base.hpp>
namespace osmium {
@@ -45,26 +42,16 @@ namespace osmium {
namespace detail {
/**
* This class looks and behaves like STL vector, but uses mmap internally.
* This class looks and behaves like STL vector, but uses mmap
* internally.
*/
template <typename T>
class mmap_vector_anon : public mmap_vector_base<T, mmap_vector_anon> {
class mmap_vector_anon : public mmap_vector_base<T> {
public:
mmap_vector_anon() :
mmap_vector_base<T, osmium::detail::mmap_vector_anon>(
-1,
osmium::detail::mmap_vector_size_increment,
0,
osmium::detail::typed_mmap<T>::map(osmium::detail::mmap_vector_size_increment)) {
}
void reserve(size_t new_capacity) {
if (new_capacity > this->capacity()) {
this->data(osmium::detail::typed_mmap<T>::remap(this->data(), this->capacity(), new_capacity));
this->m_capacity = new_capacity;
}
mmap_vector_base<T>() {
}
}; // class mmap_vector_anon
@@ -34,11 +34,10 @@ DEALINGS IN THE SOFTWARE.
*/
#include <cstddef>
#include <new>
#include <new> // IWYU pragma: keep
#include <stdexcept>
#include <osmium/index/detail/typed_mmap.hpp>
#include <osmium/util/compatibility.hpp>
#include <osmium/util/memory_mapping.hpp>
namespace osmium {
@@ -48,40 +47,29 @@ namespace osmium {
/**
* This is a base class for implementing classes that look like
* STL vector but use mmap internally. This class can not be used
* on it's own. Use the derived classes mmap_vector_anon or
* mmap_vector_file.
* STL vector but use mmap internally. Do not use this class itself,
* use the derived classes mmap_vector_anon or mmap_vector_file.
*/
template <typename T, template <typename> class TDerived>
template <typename T>
class mmap_vector_base {
protected:
int m_fd;
size_t m_capacity;
size_t m_size;
T* m_data;
explicit mmap_vector_base(int fd, size_t capacity, size_t size, T* data) noexcept :
m_fd(fd),
m_capacity(capacity),
m_size(size),
m_data(data) {
}
explicit mmap_vector_base(int fd, size_t capacity, size_t size) :
m_fd(fd),
m_capacity(capacity),
m_size(size),
m_data(osmium::detail::typed_mmap<T>::grow_and_map(capacity, m_fd)) {
}
void data(T* data) {
m_data = data;
}
osmium::util::TypedMemoryMapping<T> m_mapping;
public:
explicit mmap_vector_base(int fd, size_t capacity, size_t size = 0) :
m_size(size),
m_mapping(capacity, osmium::util::MemoryMapping::mapping_mode::write_shared, fd) {
}
explicit mmap_vector_base(size_t capacity = mmap_vector_size_increment) :
m_size(0),
m_mapping(capacity) {
}
typedef T value_type;
typedef T& reference;
typedef const T& const_reference;
@@ -90,12 +78,14 @@ namespace osmium {
typedef T* iterator;
typedef const T* const_iterator;
~mmap_vector_base() {
osmium::detail::typed_mmap<T>::unmap(m_data, m_capacity);
~mmap_vector_base() = default;
void close() {
m_mapping.unmap();
}
size_t capacity() const noexcept {
return m_capacity;
return m_mapping.size();
}
size_t size() const noexcept {
@@ -106,23 +96,23 @@ namespace osmium {
return m_size == 0;
}
const T* data() const noexcept {
return m_data;
const T* data() const {
return m_mapping.begin();
}
T* data() noexcept {
return m_data;
T* data() {
return m_mapping.begin();
}
T& operator[](size_t n) {
return m_data[n];
return data()[n];
}
T at(size_t n) const {
if (n >= m_size) {
throw std::out_of_range("out of range");
}
return m_data[n];
return data()[n];
}
void clear() noexcept {
@@ -134,16 +124,22 @@ namespace osmium {
}
void push_back(const T& value) {
if (m_size >= m_capacity) {
if (m_size >= capacity()) {
resize(m_size+1);
}
m_data[m_size] = value;
data()[m_size] = value;
++m_size;
}
void reserve(size_t new_capacity) {
if (new_capacity > capacity()) {
m_mapping.resize(new_capacity);
}
}
void resize(size_t new_size) {
if (new_size > capacity()) {
static_cast<TDerived<T>*>(this)->reserve(new_size + osmium::detail::mmap_vector_size_increment);
reserve(new_size + osmium::detail::mmap_vector_size_increment);
}
if (new_size > size()) {
new (data() + size()) T[new_size - size()];
@@ -152,27 +148,27 @@ namespace osmium {
}
iterator begin() noexcept {
return m_data;
return data();
}
iterator end() noexcept {
return m_data + m_size;
return data() + m_size;
}
const_iterator begin() const noexcept {
return m_data;
return data();
}
const_iterator end() const noexcept {
return m_data + m_size;
return data() + m_size;
}
const_iterator cbegin() noexcept {
return m_data;
const_iterator cbegin() const noexcept {
return data();
}
const_iterator cend() noexcept {
return m_data + m_size;
const_iterator cend() const noexcept {
return data() + m_size;
}
}; // class mmap_vector_base
@@ -33,11 +33,9 @@ DEALINGS IN THE SOFTWARE.
*/
#include <cstddef>
#include <osmium/index/detail/typed_mmap.hpp>
#include <osmium/index/detail/mmap_vector_base.hpp>
#include <osmium/index/detail/tmpfile.hpp>
#include <osmium/util/file.hpp>
namespace osmium {
@@ -48,32 +46,19 @@ namespace osmium {
* internally.
*/
template <typename T>
class mmap_vector_file : public mmap_vector_base<T, mmap_vector_file> {
class mmap_vector_file : public mmap_vector_base<T> {
public:
explicit mmap_vector_file() :
mmap_vector_base<T, osmium::detail::mmap_vector_file>(
explicit mmap_vector_file() : mmap_vector_base<T>(
osmium::detail::create_tmp_file(),
osmium::detail::mmap_vector_size_increment,
0) {
osmium::detail::mmap_vector_size_increment) {
}
explicit mmap_vector_file(int fd) :
mmap_vector_base<T, osmium::detail::mmap_vector_file>(
explicit mmap_vector_file(int fd) : mmap_vector_base<T>(
fd,
osmium::detail::typed_mmap<T>::file_size(fd) == 0 ?
osmium::detail::mmap_vector_size_increment :
osmium::detail::typed_mmap<T>::file_size(fd),
osmium::detail::typed_mmap<T>::file_size(fd)) {
}
void reserve(size_t new_capacity) {
if (new_capacity > this->capacity()) {
typed_mmap<T>::unmap(this->data(), this->capacity());
this->data(typed_mmap<T>::grow_and_map(new_capacity, this->m_fd));
this->m_capacity = new_capacity;
}
osmium::util::file_size(fd) / sizeof(T),
osmium::util::file_size(fd) / sizeof(T)) {
}
}; // class mmap_vector_file
@@ -1,229 +0,0 @@
#ifndef OSMIUM_INDEX_DETAIL_TYPED_MMAP_HPP
#define OSMIUM_INDEX_DETAIL_TYPED_MMAP_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <cerrno>
#include <cstddef>
#include <stdexcept>
#include <system_error>
#include <sys/stat.h>
#ifndef _WIN32
# include <sys/mman.h>
#else
# include <mmap_for_windows.hpp>
#endif
#ifndef _MSC_VER
# include <unistd.h>
#else
# define ftruncate _chsize
#endif
// for bsd systems
#ifndef MAP_ANONYMOUS
# define MAP_ANONYMOUS MAP_ANON
#endif
#include <osmium/util/cast.hpp>
namespace osmium {
/**
* @brief Namespace for Osmium internal use
*/
namespace detail {
/**
* This is a helper class for working with memory mapped files and
* anonymous shared memory. It wraps the necessary system calls
* adding:
* - error checking: all functions throw exceptions where needed
* - internal casts and size calculations allow use with user defined
* type T instead of void*
*
* This class only contains static functions. It should never be
* instantiated.
*
* @tparam T Type of objects we want to store.
*/
template <typename T>
class typed_mmap {
public:
/**
* Create anonymous private memory mapping with enough space for size
* objects of type T.
*
* Note that no constructor is called for any of the objects in this memory!
*
* @param size Number of objects of type T that should fit into this memory
* @returns Pointer to mapped memory
* @throws std::system_error If mmap(2) failed
*/
static T* map(size_t size) {
void* addr = ::mmap(nullptr, sizeof(T) * size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
if (addr == MAP_FAILED) {
throw std::system_error(errno, std::system_category(), "mmap failed");
}
#pragma GCC diagnostic pop
return reinterpret_cast<T*>(addr);
}
/**
* Create shared memory mapping of a file with enough space for size
* objects of type T. The file must already have at least the
* required size.
*
* Note that no constructor is called for any of the objects in this memory!
*
* @param size Number of objects of type T that should fit into this memory
* @param fd File descriptor
* @param write True if data should be writable
* @returns Pointer to mapped memory
* @throws std::system_error If mmap(2) failed
*/
static T* map(size_t size, int fd, bool write = false) {
int prot = PROT_READ;
if (write) {
prot |= PROT_WRITE;
}
void* addr = ::mmap(nullptr, sizeof(T) * size, prot, MAP_SHARED, fd, 0);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
if (addr == MAP_FAILED) {
throw std::system_error(errno, std::system_category(), "mmap failed");
}
#pragma GCC diagnostic pop
return reinterpret_cast<T*>(addr);
}
// mremap(2) is only available on linux systems
#ifdef __linux__
/**
* Grow memory mapping created with map().
*
* Note that no constructor is called for any of the objects in this memory!
*
* @param data Pointer to current mapping (as returned by typed_mmap())
* @param old_size Number of objects currently stored in this memory
* @param new_size Number of objects we want to have space for
* @throws std::system_error If mremap(2) call failed
*/
static T* remap(T* data, size_t old_size, size_t new_size) {
void* addr = ::mremap(reinterpret_cast<void*>(data), sizeof(T) * old_size, sizeof(T) * new_size, MREMAP_MAYMOVE);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
if (addr == MAP_FAILED) {
throw std::system_error(errno, std::system_category(), "mremap failed");
}
#pragma GCC diagnostic pop
return reinterpret_cast<T*>(addr);
}
#endif
/**
* Release memory from map() call.
*
* Note that no destructor is called for the objects in this memory!
*
* @param data Pointer to the data
* @param size Number of objects of type T stored
* @throws std::system_error If munmap(2) call failed
*/
static void unmap(T* data, size_t size) {
if (::munmap(reinterpret_cast<void*>(data), sizeof(T) * size) != 0) {
throw std::system_error(errno, std::system_category(), "munmap failed");
}
}
/**
* Get number of objects of type T that would fit into a file.
*
* @param fd File descriptor
* @returns Number of objects of type T in this file
* @throws std::system_error If fstat(2) call failed
* @throws std::length_error If size of the file isn't a multiple of sizeof(T)
*/
static size_t file_size(int fd) {
struct stat s;
if (fstat(fd, &s) < 0) {
throw std::system_error(errno, std::system_category(), "fstat failed");
}
if (static_cast<size_t>(s.st_size) % sizeof(T) != 0) {
throw std::length_error("file size has to be multiple of object size");
}
return static_cast<size_t>(s.st_size) / sizeof(T);
}
/**
* Grow file so there is enough space for at least new_size objects
* of type T. If the file is large enough already, nothing is done.
* The file is never shrunk.
*
* @param new_size Number of objects of type T that should fit into this file
* @param fd File descriptor
* @throws std::system_error If ftruncate(2) call failed
*/
static void grow_file(size_t new_size, int fd) {
if (file_size(fd) < new_size) {
if (::ftruncate(fd, static_cast_with_assert<off_t>(sizeof(T) * new_size)) < 0) {
throw std::system_error(errno, std::system_category(), "ftruncate failed");
}
}
}
/**
* Grow file to given size (if it is smaller) and mmap it.
*
* @param size Number of objects of type T that should fit into this file
* @param fd File descriptor
* @throws Errors thrown by grow_file() or map()
*/
static T* grow_and_map(size_t size, int fd) {
grow_file(size, fd);
return map(size, fd, true);
}
}; // class typed_mmap
} // namespace detail
} // namespace osmium
#endif // OSMIUM_INDEX_DETAIL_TYPED_MMAP_HPP
@@ -68,7 +68,7 @@ namespace osmium {
m_vector(fd) {
}
~VectorBasedDenseMap() {}
~VectorBasedDenseMap() = default;
void reserve(const size_t size) override final {
m_vector.reserve(size);
@@ -97,6 +97,10 @@ namespace osmium {
return m_vector.size();
}
size_t byte_size() const {
return m_vector.size() * sizeof(element_type);
}
size_t used_memory() const override final {
return sizeof(TValue) * size();
}
@@ -106,6 +110,10 @@ namespace osmium {
m_vector.shrink_to_fit();
}
void dump_as_array(const int fd) override final {
osmium::io::detail::reliable_write(fd, reinterpret_cast<const char*>(m_vector.data()), byte_size());
}
iterator begin() {
return m_vector.begin();
}
@@ -67,6 +67,16 @@ namespace osmium {
public:
VectorBasedSparseMultimap() :
m_vector() {
}
explicit VectorBasedSparseMultimap(int fd) :
m_vector(fd) {
}
~VectorBasedSparseMultimap() = default;
void set(const TId id, const TValue value) override final {
m_vector.push_back(element_type(id, value));
}
@@ -141,6 +151,30 @@ namespace osmium {
osmium::io::detail::reliable_write(fd, reinterpret_cast<const char*>(m_vector.data()), byte_size());
}
iterator begin() {
return m_vector.begin();
}
iterator end() {
return m_vector.end();
}
const_iterator cbegin() const {
return m_vector.cbegin();
}
const_iterator cend() const {
return m_vector.cend();
}
const_iterator begin() const {
return m_vector.cbegin();
}
const_iterator end() const {
return m_vector.cend();
}
}; // class VectorBasedSparseMultimap
} // namespace multimap
+1 -1
View File
@@ -67,7 +67,7 @@ namespace osmium {
template <typename TKey>
OSMIUM_NORETURN void not_found_error(TKey key) {
std::stringstream s;
s << "id " << key << " no found";
s << "id " << key << " not found";
throw not_found(s.str());
}
+14 -2
View File
@@ -148,7 +148,11 @@ namespace osmium {
}
virtual void dump_as_list(const int /*fd*/) {
std::runtime_error("can't dump as list");
throw std::runtime_error("can't dump as list");
}
virtual void dump_as_array(const int /*fd*/) {
throw std::runtime_error("can't dump as array");
}
}; // class Map
@@ -195,6 +199,10 @@ namespace osmium {
return m_callbacks.emplace(map_type_name, func).second;
}
bool has_map_type(const std::string& map_type_name) const {
return m_callbacks.count(map_type_name);
}
std::vector<std::string> map_types() const {
std::vector<std::string> result;
@@ -242,9 +250,13 @@ namespace osmium {
});
}
#define OSMIUM_CONCATENATE_DETAIL_(x, y) x##y
#define OSMIUM_CONCATENATE_(x, y) OSMIUM_CONCATENATE_DETAIL_(x, y)
#define OSMIUM_MAKE_UNIQUE_(x) OSMIUM_CONCATENATE_(x, __COUNTER__)
#define REGISTER_MAP(id, value, klass, name) \
namespace { \
const bool registered_index_map_##name = osmium::index::register_map<id, value, klass>(#name); \
const bool OSMIUM_MAKE_UNIQUE_(registered_index_map_##name) = osmium::index::register_map<id, value, klass>(#name); \
}
} // namespace index
@@ -35,7 +35,7 @@ DEALINGS IN THE SOFTWARE.
#ifdef __linux__
#include <osmium/index/detail/mmap_vector_anon.hpp>
#include <osmium/index/detail/mmap_vector_anon.hpp> // IWYU pragma: keep
#include <osmium/index/detail/vector_map.hpp>
#define OSMIUM_HAS_INDEX_MAP_DENSE_MMAP_ARRAY
@@ -33,7 +33,7 @@ DEALINGS IN THE SOFTWARE.
*/
#include <algorithm>
#include <algorithm> // IWYU pragma: keep (for std::copy)
#include <cstddef>
#include <iterator>
#include <map>
+2 -2
View File
@@ -39,8 +39,8 @@ DEALINGS IN THE SOFTWARE.
* Include this file if you want to read all kinds of OSM files.
*
* @attention If you include this file, you'll need to link with
* `libprotobuf-lite`, `libosmpbf`, `ws2_32` (Windows only),
* `libexpat`, `libz`, `libbz2`, and enable multithreading.
* `ws2_32` (Windows only), `libexpat`, `libz`, `libbz2`,
* and enable multithreading.
*/
#include <osmium/io/any_compression.hpp> // IWYU pragma: export
+3 -2
View File
@@ -39,12 +39,13 @@ DEALINGS IN THE SOFTWARE.
* Include this file if you want to write all kinds of OSM files.
*
* @attention If you include this file, you'll need to link with
* `libprotobuf-lite`, `libosmpbf`, `ws2_32` (Windows only),
* `libz`, `libbz2`, and enable multithreading.
* `ws2_32` (Windows only), `libz`, `libbz2`, and enable
* multithreading.
*/
#include <osmium/io/any_compression.hpp> // IWYU pragma: export
#include <osmium/io/debug_output.hpp> // IWYU pragma: export
#include <osmium/io/opl_output.hpp> // IWYU pragma: export
#include <osmium/io/pbf_output.hpp> // IWYU pragma: export
#include <osmium/io/xml_output.hpp> // IWYU pragma: export
@@ -274,11 +274,16 @@ namespace osmium {
namespace {
// we want the register_compression() function to run, setting the variable
// is only a side-effect, it will never be used
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
const bool registered_bzip2_compression = osmium::io::CompressionFactory::instance().register_compression(osmium::io::file_compression::bzip2,
[](int fd) { return new osmium::io::Bzip2Compressor(fd); },
[](int fd) { return new osmium::io::Bzip2Decompressor(fd); },
[](const char* buffer, size_t size) { return new osmium::io::Bzip2BufferDecompressor(buffer, size); }
);
#pragma GCC diagnostic pop
} // anonymous namespace
@@ -266,11 +266,16 @@ namespace osmium {
namespace {
// we want the register_compression() function to run, setting the variable
// is only a side-effect, it will never be used
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
const bool registered_no_compression = osmium::io::CompressionFactory::instance().register_compression(osmium::io::file_compression::none,
[](int fd) { return new osmium::io::NoCompressor(fd); },
[](int fd) { return new osmium::io::NoDecompressor(fd); },
[](const char* buffer, size_t size) { return new osmium::io::NoDecompressor(buffer, size); }
);
#pragma GCC diagnostic pop
} // anonymous namespace
@@ -0,0 +1,39 @@
#ifndef OSMIUM_IO_DEBUG_OUTPUT_HPP
#define OSMIUM_IO_DEBUG_OUTPUT_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <osmium/io/writer.hpp> // IWYU pragma: export
#include <osmium/io/detail/debug_output_format.hpp> // IWYU pragma: export
#endif // OSMIUM_IO_DEBUG_OUTPUT_HPP
@@ -0,0 +1,482 @@
#ifndef OSMIUM_IO_DETAIL_DEBUG_OUTPUT_FORMAT_HPP
#define OSMIUM_IO_DETAIL_DEBUG_OUTPUT_FORMAT_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <chrono>
#include <cinttypes>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <future>
#include <iterator>
#include <memory>
#include <ratio>
#include <string>
#include <thread>
#include <utility>
#include <utf8.h>
#include <osmium/handler.hpp>
#include <osmium/io/detail/output_format.hpp>
#include <osmium/io/file_format.hpp>
#include <osmium/memory/buffer.hpp>
#include <osmium/memory/collection.hpp>
#include <osmium/osm/box.hpp>
#include <osmium/osm/changeset.hpp>
#include <osmium/osm/item_type.hpp>
#include <osmium/osm/location.hpp>
#include <osmium/osm/node.hpp>
#include <osmium/osm/object.hpp>
#include <osmium/osm/relation.hpp>
#include <osmium/osm/tag.hpp>
#include <osmium/osm/timestamp.hpp>
#include <osmium/osm/way.hpp>
#include <osmium/thread/pool.hpp>
#include <osmium/util/minmax.hpp>
#include <osmium/visitor.hpp>
namespace osmium {
namespace io {
class File;
namespace detail {
constexpr const char* color_bold = "\x1b[1m";
constexpr const char* color_black = "\x1b[30m";
constexpr const char* color_gray = "\x1b[30;1m";
constexpr const char* color_red = "\x1b[31m";
constexpr const char* color_green = "\x1b[32m";
constexpr const char* color_yellow = "\x1b[33m";
constexpr const char* color_blue = "\x1b[34m";
constexpr const char* color_magenta = "\x1b[35m";
constexpr const char* color_cyan = "\x1b[36m";
constexpr const char* color_white = "\x1b[37m";
constexpr const char* color_reset = "\x1b[0m";
/**
* Writes out one buffer with OSM data in Debug format.
*/
class DebugOutputBlock : public osmium::handler::Handler {
static constexpr size_t tmp_buffer_size = 50;
std::shared_ptr<osmium::memory::Buffer> m_input_buffer;
std::shared_ptr<std::string> m_out;
char m_tmp_buffer[tmp_buffer_size+1];
bool m_add_metadata;
bool m_use_color;
template <typename... TArgs>
void output_formatted(const char* format, TArgs&&... args) {
#ifndef NDEBUG
int len =
#endif
#ifndef _MSC_VER
snprintf(m_tmp_buffer, tmp_buffer_size, format, std::forward<TArgs>(args)...);
#else
_snprintf(m_tmp_buffer, tmp_buffer_size, format, std::forward<TArgs>(args)...);
#endif
assert(len > 0 && static_cast<size_t>(len) < tmp_buffer_size);
*m_out += m_tmp_buffer;
}
void append_encoded_string(const char* data) {
const char* end = data + std::strlen(data);
while (data != end) {
const char* last = data;
uint32_t c = utf8::next(data, end);
// This is a list of Unicode code points that we let
// through instead of escaping them. It is incomplete
// and can be extended later.
// Generally we don't want to let through any
// non-printing characters.
if ((0x0020 <= c && c <= 0x0021) ||
(0x0023 <= c && c <= 0x003b) ||
(0x003d == c) ||
(0x003f <= c && c <= 0x007e) ||
(0x00a1 <= c && c <= 0x00ac) ||
(0x00ae <= c && c <= 0x05ff)) {
m_out->append(last, data);
} else {
write_color(color_red);
output_formatted("<U+%04X>", c);
write_color(color_blue);
}
}
}
void write_color(const char* color) {
if (m_use_color) {
*m_out += color;
}
}
void write_string(const char* string) {
*m_out += '"';
write_color(color_blue);
append_encoded_string(string);
write_color(color_reset);
*m_out += '"';
}
void write_object_type(const char* object_type, bool visible = true) {
if (visible) {
write_color(color_bold);
} else {
write_color(color_white);
}
*m_out += object_type;
write_color(color_reset);
*m_out += ' ';
}
void write_fieldname(const char* name) {
*m_out += " ";
write_color(color_cyan);
*m_out += name;
write_color(color_reset);
*m_out += ": ";
}
void write_error(const char* msg) {
write_color(color_red);
*m_out += msg;
write_color(color_reset);
}
void write_meta(const osmium::OSMObject& object) {
output_formatted("%" PRId64 "\n", object.id());
if (m_add_metadata) {
write_fieldname("version");
output_formatted(" %d", object.version());
if (object.visible()) {
*m_out += " visible\n";
} else {
write_error(" deleted\n");
}
write_fieldname("changeset");
output_formatted("%d\n", object.changeset());
write_fieldname("timestamp");
*m_out += object.timestamp().to_iso();
output_formatted(" (%d)\n", object.timestamp());
write_fieldname("user");
output_formatted(" %d ", object.uid());
write_string(object.user());
*m_out += '\n';
}
}
void write_tags(const osmium::TagList& tags, const char* padding="") {
if (!tags.empty()) {
write_fieldname("tags");
*m_out += padding;
output_formatted(" %d\n", tags.size());
osmium::max_op<int> max;
for (const auto& tag : tags) {
max.update(std::strlen(tag.key()));
}
for (const auto& tag : tags) {
*m_out += " ";
write_string(tag.key());
int spacing = max() - std::strlen(tag.key());
while (spacing--) {
*m_out += " ";
}
*m_out += " = ";
write_string(tag.value());
*m_out += '\n';
}
}
}
void write_location(const osmium::Location& location) {
write_fieldname("lon/lat");
output_formatted(" %.7f,%.7f", location.lon_without_check(), location.lat_without_check());
if (!location.valid()) {
write_error(" INVALID LOCATION!");
}
*m_out += '\n';
}
void write_box(const osmium::Box& box) {
write_fieldname("box l/b/r/t");
if (!box) {
write_error("BOX NOT SET!\n");
return;
}
const auto& bl = box.bottom_left();
const auto& tr = box.top_right();
output_formatted("%.7f,%.7f %.7f,%.7f", bl.lon_without_check(), bl.lat_without_check(), tr.lon_without_check(), tr.lat_without_check());
if (!box.valid()) {
write_error(" INVALID BOX!");
}
*m_out += '\n';
}
public:
explicit DebugOutputBlock(osmium::memory::Buffer&& buffer, bool add_metadata, bool use_color) :
m_input_buffer(std::make_shared<osmium::memory::Buffer>(std::move(buffer))),
m_out(std::make_shared<std::string>()),
m_tmp_buffer(),
m_add_metadata(add_metadata),
m_use_color(use_color) {
}
DebugOutputBlock(const DebugOutputBlock&) = default;
DebugOutputBlock& operator=(const DebugOutputBlock&) = default;
DebugOutputBlock(DebugOutputBlock&&) = default;
DebugOutputBlock& operator=(DebugOutputBlock&&) = default;
~DebugOutputBlock() = default;
std::string operator()() {
osmium::apply(m_input_buffer->cbegin(), m_input_buffer->cend(), *this);
std::string out;
std::swap(out, *m_out);
return out;
}
void node(const osmium::Node& node) {
write_object_type("node", node.visible());
write_meta(node);
if (node.visible()) {
write_location(node.location());
}
write_tags(node.tags());
*m_out += '\n';
}
void way(const osmium::Way& way) {
write_object_type("way", way.visible());
write_meta(way);
write_tags(way.tags());
write_fieldname("nodes");
output_formatted(" %d", way.nodes().size());
if (way.nodes().size() < 2) {
write_error(" LESS THAN 2 NODES!\n");
} else if (way.nodes().size() > 2000) {
write_error(" MORE THAN 2000 NODES!\n");
} else if (way.nodes().is_closed()) {
*m_out += " (closed)\n";
} else {
*m_out += " (open)\n";
}
int width = int(log10(way.nodes().size())) + 1;
int n = 0;
for (const auto& node_ref : way.nodes()) {
output_formatted(" %0*d: %10" PRId64, width, n++, node_ref.ref());
if (node_ref.location().valid()) {
output_formatted(" (%.7f,%.7f)", node_ref.location().lon_without_check(), node_ref.location().lat_without_check());
}
*m_out += '\n';
}
*m_out += '\n';
}
void relation(const osmium::Relation& relation) {
static const char* short_typename[] = { "node", "way ", "rel " };
write_object_type("relation", relation.visible());
write_meta(relation);
write_tags(relation.tags());
write_fieldname("members");
output_formatted(" %d\n", relation.members().size());
int width = int(log10(relation.members().size())) + 1;
int n = 0;
for (const auto& member : relation.members()) {
output_formatted(" %0*d: ", width, n++);
*m_out += short_typename[item_type_to_nwr_index(member.type())];
output_formatted(" %10" PRId64 " ", member.ref());
write_string(member.role());
*m_out += '\n';
}
*m_out += '\n';
}
void changeset(const osmium::Changeset& changeset) {
write_object_type("changeset");
output_formatted("%d\n", changeset.id());
write_fieldname("num changes");
output_formatted("%d", changeset.num_changes());
if (changeset.num_changes() == 0) {
write_error(" NO CHANGES!");
}
*m_out += '\n';
write_fieldname("created at");
*m_out += ' ';
*m_out += changeset.created_at().to_iso();
output_formatted(" (%d)\n", changeset.created_at());
write_fieldname("closed at");
*m_out += " ";
if (changeset.closed()) {
*m_out += changeset.closed_at().to_iso();
output_formatted(" (%d)\n", changeset.closed_at());
} else {
write_error("OPEN!\n");
}
write_fieldname("user");
output_formatted(" %d ", changeset.uid());
write_string(changeset.user());
*m_out += '\n';
write_box(changeset.bounds());
write_tags(changeset.tags(), " ");
*m_out += '\n';
}
}; // DebugOutputBlock
class DebugOutputFormat : public osmium::io::detail::OutputFormat {
bool m_add_metadata;
bool m_use_color;
public:
DebugOutputFormat(const osmium::io::File& file, data_queue_type& output_queue) :
OutputFormat(file, output_queue),
m_add_metadata(file.get("add_metadata") != "false"),
m_use_color(file.get("color") == "true") {
}
DebugOutputFormat(const DebugOutputFormat&) = delete;
DebugOutputFormat& operator=(const DebugOutputFormat&) = delete;
void write_buffer(osmium::memory::Buffer&& buffer) override final {
m_output_queue.push(osmium::thread::Pool::instance().submit(DebugOutputBlock{std::move(buffer), m_add_metadata, m_use_color}));
}
void write_fieldname(std::string& out, const char* name) {
out += " ";
if (m_use_color) {
out += color_cyan;
}
out += name;
if (m_use_color) {
out += color_reset;
}
out += ": ";
}
void write_header(const osmium::io::Header& header) override final {
std::string out;
if (m_use_color) {
out += color_bold;
}
out += "header\n";
if (m_use_color) {
out += color_reset;
}
write_fieldname(out, "multiple object versions");
out += header.has_multiple_object_versions() ? "yes" : "no";
out += '\n';
write_fieldname(out, "bounding boxes");
out += '\n';
for (const auto& box : header.boxes()) {
out += " ";
box.bottom_left().as_string(std::back_inserter(out), ',');
out += " ";
box.top_right().as_string(std::back_inserter(out), ',');
out += '\n';
}
write_fieldname(out, "options");
out += '\n';
for (const auto& opt : header) {
out += " ";
out += opt.first;
out += " = ";
out += opt.second;
out += '\n';
}
out += "\n=============================================\n\n";
std::promise<std::string> promise;
m_output_queue.push(promise.get_future());
promise.set_value(std::move(out));
}
void close() override final {
std::string out;
std::promise<std::string> promise;
m_output_queue.push(promise.get_future());
promise.set_value(out);
}
}; // class DebugOutputFormat
namespace {
// we want the register_output_format() function to run, setting the variable
// is only a side-effect, it will never be used
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
const bool registered_debug_output = osmium::io::detail::OutputFormatFactory::instance().register_output_format(osmium::io::file_format::debug,
[](const osmium::io::File& file, data_queue_type& output_queue) {
return new osmium::io::detail::DebugOutputFormat(file, output_queue);
});
#pragma GCC diagnostic pop
} // anonymous namespace
} // namespace detail
} // namespace io
} // namespace osmium
#endif // OSMIUM_IO_DETAIL_DEBUG_OUTPUT_FORMAT_HPP
@@ -46,23 +46,7 @@ DEALINGS IN THE SOFTWARE.
#include <thread>
#include <utility>
#include <boost/version.hpp>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wmissing-noreturn"
# pragma clang diagnostic ignored "-Wsign-conversion"
#endif
#if BOOST_VERSION >= 104800
# include <boost/regex/pending/unicode_iterator.hpp>
#else
# include <boost_unicode_iterator.hpp>
#endif
#ifdef __clang__
# pragma clang diagnostic pop
#endif
#include <utf8.h>
#include <osmium/handler.hpp>
#include <osmium/io/detail/output_format.hpp>
@@ -103,6 +87,8 @@ namespace osmium {
char m_tmp_buffer[tmp_buffer_size+1];
bool m_add_metadata;
template <typename... TArgs>
void output_formatted(const char* format, TArgs&&... args) {
#ifndef NDEBUG
@@ -117,13 +103,12 @@ namespace osmium {
*m_out += m_tmp_buffer;
}
void append_encoded_string(const std::string& data) {
boost::u8_to_u32_iterator<std::string::const_iterator> it(data.cbegin(), data.cbegin(), data.cend());
boost::u8_to_u32_iterator<std::string::const_iterator> end(data.cend(), data.cend(), data.cend());
boost::utf8_output_iterator<std::back_insert_iterator<std::string>> oit(std::back_inserter(*m_out));
void append_encoded_string(const char* data) {
const char* end = data + std::strlen(data);
for (; it != end; ++it) {
uint32_t c = *it;
while (data != end) {
const char* last = data;
uint32_t c = utf8::next(data, end);
// This is a list of Unicode code points that we let
// through instead of escaping them. It is incomplete
@@ -138,21 +123,29 @@ namespace osmium {
(0x0041 <= c && c <= 0x007e) ||
(0x00a1 <= c && c <= 0x00ac) ||
(0x00ae <= c && c <= 0x05ff)) {
*oit = c;
m_out->append(last, data);
} else {
*m_out += '%';
output_formatted("%04x", c);
if (c <= 0xff) {
output_formatted("%02x", c);
} else {
output_formatted("%04x", c);
}
*m_out += '%';
}
}
}
void write_meta(const osmium::OSMObject& object) {
output_formatted("%" PRId64 " v%d d", object.id(), object.version());
*m_out += (object.visible() ? 'V' : 'D');
output_formatted(" c%d t", object.changeset());
*m_out += object.timestamp().to_iso();
output_formatted(" i%d u", object.uid());
append_encoded_string(object.user());
output_formatted("%" PRId64, object.id());
if (m_add_metadata) {
output_formatted(" v%d d", object.version());
*m_out += (object.visible() ? 'V' : 'D');
output_formatted(" c%d t", object.changeset());
*m_out += object.timestamp().to_iso();
output_formatted(" i%d u", object.uid());
append_encoded_string(object.user());
}
*m_out += " T";
bool first = true;
for (const auto& tag : object.tags()) {
@@ -180,10 +173,11 @@ namespace osmium {
public:
explicit OPLOutputBlock(osmium::memory::Buffer&& buffer) :
explicit OPLOutputBlock(osmium::memory::Buffer&& buffer, bool add_metadata) :
m_input_buffer(std::make_shared<osmium::memory::Buffer>(std::move(buffer))),
m_out(std::make_shared<std::string>()),
m_tmp_buffer() {
m_tmp_buffer(),
m_add_metadata(add_metadata) {
}
OPLOutputBlock(const OPLOutputBlock&) = default;
@@ -240,7 +234,7 @@ namespace osmium {
}
*m_out += item_type_to_char(member.type());
output_formatted("%" PRId64 "@", member.ref());
*m_out += member.role();
append_encoded_string(member.role());
}
*m_out += '\n';
}
@@ -274,17 +268,20 @@ namespace osmium {
class OPLOutputFormat : public osmium::io::detail::OutputFormat {
OPLOutputFormat(const OPLOutputFormat&) = delete;
OPLOutputFormat& operator=(const OPLOutputFormat&) = delete;
bool m_add_metadata;
public:
OPLOutputFormat(const osmium::io::File& file, data_queue_type& output_queue) :
OutputFormat(file, output_queue) {
OutputFormat(file, output_queue),
m_add_metadata(file.get("add_metadata") != "false") {
}
OPLOutputFormat(const OPLOutputFormat&) = delete;
OPLOutputFormat& operator=(const OPLOutputFormat&) = delete;
void write_buffer(osmium::memory::Buffer&& buffer) override final {
m_output_queue.push(osmium::thread::Pool::instance().submit(OPLOutputBlock{std::move(buffer)}));
m_output_queue.push(osmium::thread::Pool::instance().submit(OPLOutputBlock{std::move(buffer), m_add_metadata}));
}
void close() override final {
@@ -298,6 +295,8 @@ namespace osmium {
namespace {
// we want the register_output_format() function to run, setting the variable
// is only a side-effect, it will never be used
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
const bool registered_opl_output = osmium::io::detail::OutputFormatFactory::instance().register_output_format(osmium::io::file_format::opl,
+22 -32
View File
@@ -33,9 +33,7 @@ DEALINGS IN THE SOFTWARE.
*/
#include <stdexcept>
#include <osmpbf/osmpbf.h>
#include <string>
// needed for htonl and ntohl
#ifndef _WIN32
@@ -45,38 +43,10 @@ DEALINGS IN THE SOFTWARE.
#endif
#include <osmium/io/error.hpp>
#include <osmium/osm/item_type.hpp>
#include <osmium/osm/location.hpp>
namespace osmium {
// avoid g++ false positive
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wreturn-type"
inline item_type osmpbf_membertype_to_item_type(const OSMPBF::Relation::MemberType mt) {
switch (mt) {
case OSMPBF::Relation::NODE:
return item_type::node;
case OSMPBF::Relation::WAY:
return item_type::way;
case OSMPBF::Relation::RELATION:
return item_type::relation;
}
}
#pragma GCC diagnostic pop
inline OSMPBF::Relation::MemberType item_type_to_osmpbf_membertype(const item_type type) {
switch (type) {
case item_type::node:
return OSMPBF::Relation::NODE;
case item_type::way:
return OSMPBF::Relation::WAY;
case item_type::relation:
return OSMPBF::Relation::RELATION;
default:
throw std::runtime_error("Unknown relation member type");
}
}
/**
* Exception thrown when there was a problem with parsing the PBF format of
* a file.
@@ -93,6 +63,26 @@ namespace osmium {
}; // struct pbf_error
namespace io {
namespace detail {
// the maximum size of a blob header in bytes
const int max_blob_header_size = 64 * 1024; // 64 kB
// the maximum size of an uncompressed blob in bytes
const uint64_t max_uncompressed_blob_size = 32 * 1024 * 1024; // 32 MB
// resolution for longitude/latitude used for conversion
// between representation as double and as int
const int64_t lonlat_resolution = 1000 * 1000 * 1000;
const int64_t resolution_convert = lonlat_resolution / osmium::Location::coordinate_precision;
}
}
} // namespace osmium
#endif // OSMIUM_IO_DETAIL_PBF_HPP
@@ -0,0 +1,760 @@
#ifndef OSMIUM_IO_DETAIL_PBF_DECODER_HPP
#define OSMIUM_IO_DETAIL_PBF_DECODER_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <algorithm>
#include <iterator>
#include <limits>
#include <protozero/pbf_message.hpp>
#include <osmium/builder/osm_object_builder.hpp>
#include <osmium/io/detail/pbf.hpp> // IWYU pragma: export
#include <osmium/io/detail/protobuf_tags.hpp>
#include <osmium/io/detail/zlib.hpp>
#include <osmium/io/header.hpp>
#include <osmium/osm/location.hpp>
#include <osmium/osm/node.hpp>
#include <osmium/osm/types.hpp>
#include <osmium/memory/buffer.hpp>
#include <osmium/osm/entity_bits.hpp>
#include <osmium/util/cast.hpp>
#include <osmium/util/delta.hpp>
namespace osmium {
namespace io {
namespace detail {
using ptr_len_type = std::pair<const char*, size_t>;
class PBFPrimitiveBlockDecoder {
static constexpr size_t initial_buffer_size = 2 * 1024 * 1024;
ptr_len_type m_data;
std::vector<ptr_len_type> m_stringtable;
int64_t m_lon_offset = 0;
int64_t m_lat_offset = 0;
int64_t m_date_factor = 1000;
int32_t m_granularity = 100;
osmium::osm_entity_bits::type m_read_types;
osmium::memory::Buffer m_buffer { initial_buffer_size };
void decode_stringtable(const ptr_len_type& data) {
if (!m_stringtable.empty()) {
throw osmium::pbf_error("more than one stringtable in pbf file");
}
protozero::pbf_message<OSMFormat::StringTable> pbf_string_table(data);
while (pbf_string_table.next(OSMFormat::StringTable::repeated_bytes_s)) {
m_stringtable.push_back(pbf_string_table.get_data());
}
}
void decode_primitive_block_metadata() {
protozero::pbf_message<OSMFormat::PrimitiveBlock> pbf_primitive_block(m_data);
while (pbf_primitive_block.next()) {
switch (pbf_primitive_block.tag()) {
case OSMFormat::PrimitiveBlock::required_StringTable_stringtable:
decode_stringtable(pbf_primitive_block.get_data());
break;
case OSMFormat::PrimitiveBlock::optional_int32_granularity:
m_granularity = pbf_primitive_block.get_int32();
break;
case OSMFormat::PrimitiveBlock::optional_int32_date_granularity:
m_date_factor = pbf_primitive_block.get_int32();
break;
case OSMFormat::PrimitiveBlock::optional_int64_lat_offset:
m_lat_offset = pbf_primitive_block.get_int64();
break;
case OSMFormat::PrimitiveBlock::optional_int64_lon_offset:
m_lon_offset = pbf_primitive_block.get_int64();
break;
default:
pbf_primitive_block.skip();
}
}
}
void decode_primitive_block_data() {
protozero::pbf_message<OSMFormat::PrimitiveBlock> pbf_primitive_block(m_data);
while (pbf_primitive_block.next(OSMFormat::PrimitiveBlock::repeated_PrimitiveGroup_primitivegroup)) {
protozero::pbf_message<OSMFormat::PrimitiveGroup> pbf_primitive_group = pbf_primitive_block.get_message();
while (pbf_primitive_group.next()) {
switch (pbf_primitive_group.tag()) {
case OSMFormat::PrimitiveGroup::repeated_Node_nodes:
if (m_read_types & osmium::osm_entity_bits::node) {
decode_node(pbf_primitive_group.get_data());
} else {
pbf_primitive_group.skip();
}
break;
case OSMFormat::PrimitiveGroup::optional_DenseNodes_dense:
if (m_read_types & osmium::osm_entity_bits::node) {
decode_dense_nodes(pbf_primitive_group.get_data());
} else {
pbf_primitive_group.skip();
}
break;
case OSMFormat::PrimitiveGroup::repeated_Way_ways:
if (m_read_types & osmium::osm_entity_bits::way) {
decode_way(pbf_primitive_group.get_data());
} else {
pbf_primitive_group.skip();
}
break;
case OSMFormat::PrimitiveGroup::repeated_Relation_relations:
if (m_read_types & osmium::osm_entity_bits::relation) {
decode_relation(pbf_primitive_group.get_data());
} else {
pbf_primitive_group.skip();
}
break;
default:
pbf_primitive_group.skip();
}
}
}
}
ptr_len_type decode_info(const ptr_len_type& data, osmium::OSMObject& object) {
ptr_len_type user = std::make_pair("", 0);
protozero::pbf_message<OSMFormat::Info> pbf_info(data);
while (pbf_info.next()) {
switch (pbf_info.tag()) {
case OSMFormat::Info::optional_int32_version:
{
auto version = pbf_info.get_int32();
if (version < 0) {
throw osmium::pbf_error("object version must not be negative");
}
object.set_version(static_cast_with_assert<object_version_type>(version));
}
break;
case OSMFormat::Info::optional_int64_timestamp:
object.set_timestamp(pbf_info.get_int64() * m_date_factor / 1000);
break;
case OSMFormat::Info::optional_int64_changeset:
{
auto changeset_id = pbf_info.get_int64();
if (changeset_id < 0) {
throw osmium::pbf_error("object changeset_id must not be negative");
}
object.set_changeset(static_cast_with_assert<changeset_id_type>(changeset_id));
}
break;
case OSMFormat::Info::optional_int32_uid:
object.set_uid_from_signed(pbf_info.get_int32());
break;
case OSMFormat::Info::optional_uint32_user_sid:
user = m_stringtable.at(pbf_info.get_uint32());
break;
case OSMFormat::Info::optional_bool_visible:
object.set_visible(pbf_info.get_bool());
break;
default:
pbf_info.skip();
}
}
return user;
}
using kv_type = std::pair<protozero::pbf_reader::const_uint32_iterator, protozero::pbf_reader::const_uint32_iterator>;
void build_tag_list(osmium::builder::Builder& builder, const kv_type& keys, const kv_type& vals) {
if (keys.first != keys.second) {
osmium::builder::TagListBuilder tl_builder(m_buffer, &builder);
auto kit = keys.first;
auto vit = vals.first;
while (kit != keys.second) {
if (vit == vals.second) {
// this is against the spec, must have same number of elements
throw osmium::pbf_error("PBF format error");
}
const auto& k = m_stringtable.at(*kit++);
const auto& v = m_stringtable.at(*vit++);
tl_builder.add_tag(k.first, k.second, v.first, v.second);
}
}
}
int32_t convert_pbf_coordinate(int64_t c) const {
return (c * m_granularity + m_lon_offset) / resolution_convert;
}
void decode_node(const ptr_len_type& data) {
osmium::builder::NodeBuilder builder(m_buffer);
osmium::Node& node = builder.object();
kv_type keys;
kv_type vals;
int64_t lon = std::numeric_limits<int64_t>::max();
int64_t lat = std::numeric_limits<int64_t>::max();
ptr_len_type user = { "", 0 };
protozero::pbf_message<OSMFormat::Node> pbf_node(data);
while (pbf_node.next()) {
switch (pbf_node.tag()) {
case OSMFormat::Node::required_sint64_id:
node.set_id(pbf_node.get_sint64());
break;
case OSMFormat::Node::packed_uint32_keys:
keys = pbf_node.get_packed_uint32();
break;
case OSMFormat::Node::packed_uint32_vals:
vals = pbf_node.get_packed_uint32();
break;
case OSMFormat::Node::optional_Info_info:
user = decode_info(pbf_node.get_data(), builder.object());
break;
case OSMFormat::Node::required_sint64_lat:
lat = pbf_node.get_sint64();
break;
case OSMFormat::Node::required_sint64_lon:
lon = pbf_node.get_sint64();
break;
default:
pbf_node.skip();
}
}
if (node.visible()) {
if (lon == std::numeric_limits<int64_t>::max() ||
lat == std::numeric_limits<int64_t>::max()) {
throw osmium::pbf_error("illegal coordinate format");
}
node.set_location(osmium::Location(
convert_pbf_coordinate(lon),
convert_pbf_coordinate(lat)
));
}
builder.add_user(user.first, user.second);
build_tag_list(builder, keys, vals);
m_buffer.commit();
}
void decode_way(const ptr_len_type& data) {
osmium::builder::WayBuilder builder(m_buffer);
kv_type keys;
kv_type vals;
std::pair<protozero::pbf_reader::const_sint64_iterator, protozero::pbf_reader::const_sint64_iterator> refs;
ptr_len_type user = { "", 0 };
protozero::pbf_message<OSMFormat::Way> pbf_way(data);
while (pbf_way.next()) {
switch (pbf_way.tag()) {
case OSMFormat::Way::required_int64_id:
builder.object().set_id(pbf_way.get_int64());
break;
case OSMFormat::Way::packed_uint32_keys:
keys = pbf_way.get_packed_uint32();
break;
case OSMFormat::Way::packed_uint32_vals:
vals = pbf_way.get_packed_uint32();
break;
case OSMFormat::Way::optional_Info_info:
user = decode_info(pbf_way.get_data(), builder.object());
break;
case OSMFormat::Way::packed_sint64_refs:
refs = pbf_way.get_packed_sint64();
break;
default:
pbf_way.skip();
}
}
builder.add_user(user.first, user.second);
if (refs.first != refs.second) {
osmium::builder::WayNodeListBuilder wnl_builder(m_buffer, &builder);
osmium::util::DeltaDecode<int64_t> ref;
while (refs.first != refs.second) {
wnl_builder.add_node_ref(ref.update(*refs.first++));
}
}
build_tag_list(builder, keys, vals);
m_buffer.commit();
}
void decode_relation(const ptr_len_type& data) {
osmium::builder::RelationBuilder builder(m_buffer);
kv_type keys;
kv_type vals;
std::pair<protozero::pbf_reader::const_int32_iterator, protozero::pbf_reader::const_int32_iterator> roles;
std::pair<protozero::pbf_reader::const_sint64_iterator, protozero::pbf_reader::const_sint64_iterator> refs;
std::pair<protozero::pbf_reader::const_int32_iterator, protozero::pbf_reader::const_int32_iterator> types;
ptr_len_type user = { "", 0 };
protozero::pbf_message<OSMFormat::Relation> pbf_relation(data);
while (pbf_relation.next()) {
switch (pbf_relation.tag()) {
case OSMFormat::Relation::required_int64_id:
builder.object().set_id(pbf_relation.get_int64());
break;
case OSMFormat::Relation::packed_uint32_keys:
keys = pbf_relation.get_packed_uint32();
break;
case OSMFormat::Relation::packed_uint32_vals:
vals = pbf_relation.get_packed_uint32();
break;
case OSMFormat::Relation::optional_Info_info:
user = decode_info(pbf_relation.get_data(), builder.object());
break;
case OSMFormat::Relation::packed_int32_roles_sid:
roles = pbf_relation.get_packed_int32();
break;
case OSMFormat::Relation::packed_sint64_memids:
refs = pbf_relation.get_packed_sint64();
break;
case OSMFormat::Relation::packed_MemberType_types:
types = pbf_relation.get_packed_enum();
break;
default:
pbf_relation.skip();
}
}
builder.add_user(user.first, user.second);
if (refs.first != refs.second) {
osmium::builder::RelationMemberListBuilder rml_builder(m_buffer, &builder);
osmium::util::DeltaDecode<int64_t> ref;
while (roles.first != roles.second && refs.first != refs.second && types.first != types.second) {
const auto& r = m_stringtable.at(*roles.first++);
int type = *types.first++;
if (type < 0 || type > 2) {
throw osmium::pbf_error("unknown relation member type");
}
rml_builder.add_member(
osmium::item_type(type + 1),
ref.update(*refs.first++),
r.first,
r.second
);
}
}
build_tag_list(builder, keys, vals);
m_buffer.commit();
}
void decode_dense_nodes(const ptr_len_type& data) {
bool has_info = false;
bool has_visibles = false;
std::pair<protozero::pbf_reader::const_sint64_iterator, protozero::pbf_reader::const_sint64_iterator> ids;
std::pair<protozero::pbf_reader::const_sint64_iterator, protozero::pbf_reader::const_sint64_iterator> lats;
std::pair<protozero::pbf_reader::const_sint64_iterator, protozero::pbf_reader::const_sint64_iterator> lons;
std::pair<protozero::pbf_reader::const_int32_iterator, protozero::pbf_reader::const_int32_iterator> tags;
std::pair<protozero::pbf_reader::const_int32_iterator, protozero::pbf_reader::const_int32_iterator> versions;
std::pair<protozero::pbf_reader::const_sint64_iterator, protozero::pbf_reader::const_sint64_iterator> timestamps;
std::pair<protozero::pbf_reader::const_sint64_iterator, protozero::pbf_reader::const_sint64_iterator> changesets;
std::pair<protozero::pbf_reader::const_sint32_iterator, protozero::pbf_reader::const_sint32_iterator> uids;
std::pair<protozero::pbf_reader::const_sint32_iterator, protozero::pbf_reader::const_sint32_iterator> user_sids;
std::pair<protozero::pbf_reader::const_int32_iterator, protozero::pbf_reader::const_int32_iterator> visibles;
protozero::pbf_message<OSMFormat::DenseNodes> pbf_dense_nodes(data);
while (pbf_dense_nodes.next()) {
switch (pbf_dense_nodes.tag()) {
case OSMFormat::DenseNodes::packed_sint64_id:
ids = pbf_dense_nodes.get_packed_sint64();
break;
case OSMFormat::DenseNodes::optional_DenseInfo_denseinfo:
{
has_info = true;
protozero::pbf_message<OSMFormat::DenseInfo> pbf_dense_info = pbf_dense_nodes.get_message();
while (pbf_dense_info.next()) {
switch (pbf_dense_info.tag()) {
case OSMFormat::DenseInfo::packed_int32_version:
versions = pbf_dense_info.get_packed_int32();
break;
case OSMFormat::DenseInfo::packed_sint64_timestamp:
timestamps = pbf_dense_info.get_packed_sint64();
break;
case OSMFormat::DenseInfo::packed_sint64_changeset:
changesets = pbf_dense_info.get_packed_sint64();
break;
case OSMFormat::DenseInfo::packed_sint32_uid:
uids = pbf_dense_info.get_packed_sint32();
break;
case OSMFormat::DenseInfo::packed_sint32_user_sid:
user_sids = pbf_dense_info.get_packed_sint32();
break;
case OSMFormat::DenseInfo::packed_bool_visible:
has_visibles = true;
visibles = pbf_dense_info.get_packed_bool();
break;
default:
pbf_dense_info.skip();
}
}
}
break;
case OSMFormat::DenseNodes::packed_sint64_lat:
lats = pbf_dense_nodes.get_packed_sint64();
break;
case OSMFormat::DenseNodes::packed_sint64_lon:
lons = pbf_dense_nodes.get_packed_sint64();
break;
case OSMFormat::DenseNodes::packed_int32_keys_vals:
tags = pbf_dense_nodes.get_packed_int32();
break;
default:
pbf_dense_nodes.skip();
}
}
osmium::util::DeltaDecode<int64_t> dense_id;
osmium::util::DeltaDecode<int64_t> dense_latitude;
osmium::util::DeltaDecode<int64_t> dense_longitude;
osmium::util::DeltaDecode<int64_t> dense_uid;
osmium::util::DeltaDecode<int64_t> dense_user_sid;
osmium::util::DeltaDecode<int64_t> dense_changeset;
osmium::util::DeltaDecode<int64_t> dense_timestamp;
auto tag_it = tags.first;
while (ids.first != ids.second) {
if (lons.first == lons.second ||
lats.first == lats.second) {
// this is against the spec, must have same number of elements
throw osmium::pbf_error("PBF format error");
}
bool visible = true;
osmium::builder::NodeBuilder builder(m_buffer);
osmium::Node& node = builder.object();
node.set_id(dense_id.update(*ids.first++));
if (has_info) {
if (versions.first == versions.second ||
changesets.first == changesets.second ||
timestamps.first == timestamps.second ||
uids.first == uids.second ||
user_sids.first == user_sids.second) {
// this is against the spec, must have same number of elements
throw osmium::pbf_error("PBF format error");
}
auto version = *versions.first++;
if (version < 0) {
throw osmium::pbf_error("object version must not be negative");
}
node.set_version(static_cast<osmium::object_version_type>(version));
auto changeset_id = dense_changeset.update(*changesets.first++);
if (changeset_id < 0) {
throw osmium::pbf_error("object changeset_id must not be negative");
}
node.set_changeset(static_cast<osmium::changeset_id_type>(changeset_id));
node.set_timestamp(dense_timestamp.update(*timestamps.first++) * m_date_factor / 1000);
node.set_uid_from_signed(static_cast<osmium::signed_user_id_type>(dense_uid.update(*uids.first++)));
if (has_visibles) {
if (visibles.first == visibles.second) {
// this is against the spec, must have same number of elements
throw osmium::pbf_error("PBF format error");
}
visible = *visibles.first++;
}
node.set_visible(visible);
const auto& u = m_stringtable.at(dense_user_sid.update(*user_sids.first++));
builder.add_user(u.first, u.second);
} else {
builder.add_user("");
}
if (visible) {
builder.object().set_location(osmium::Location(
convert_pbf_coordinate(dense_longitude.update(*lons.first++)),
convert_pbf_coordinate(dense_latitude.update(*lats.first++))
));
}
if (tag_it != tags.second) {
osmium::builder::TagListBuilder tl_builder(m_buffer, &builder);
while (tag_it != tags.second && *tag_it != 0) {
const auto& k = m_stringtable.at(*tag_it++);
if (tag_it == tags.second) {
throw osmium::pbf_error("PBF format error"); // this is against the spec, keys/vals must come in pairs
}
const auto& v = m_stringtable.at(*tag_it++);
tl_builder.add_tag(k.first, k.second, v.first, v.second);
}
if (tag_it != tags.second) {
++tag_it;
}
}
m_buffer.commit();
}
}
public:
explicit PBFPrimitiveBlockDecoder(const ptr_len_type& data, osmium::osm_entity_bits::type read_types) :
m_data(data),
m_read_types(read_types) {
}
PBFPrimitiveBlockDecoder(const PBFPrimitiveBlockDecoder&) = delete;
PBFPrimitiveBlockDecoder& operator=(const PBFPrimitiveBlockDecoder&) = delete;
PBFPrimitiveBlockDecoder(PBFPrimitiveBlockDecoder&&) = delete;
PBFPrimitiveBlockDecoder& operator=(PBFPrimitiveBlockDecoder&&) = delete;
~PBFPrimitiveBlockDecoder() = default;
osmium::memory::Buffer operator()() {
try {
decode_primitive_block_metadata();
decode_primitive_block_data();
} catch (std::out_of_range&) {
throw osmium::pbf_error("string id out of range");
}
return std::move(m_buffer);
}
}; // class PBFPrimitiveBlockDecoder
inline ptr_len_type decode_blob(const std::string& blob_data, std::string& output) {
int32_t raw_size;
std::pair<const char*, protozero::pbf_length_type> zlib_data;
protozero::pbf_message<FileFormat::Blob> pbf_blob(blob_data);
while (pbf_blob.next()) {
switch (pbf_blob.tag()) {
case FileFormat::Blob::optional_bytes_raw:
{
auto data_len = pbf_blob.get_data();
if (data_len.second > max_uncompressed_blob_size) {
throw osmium::pbf_error("illegal blob size");
}
return data_len;
}
case FileFormat::Blob::optional_int32_raw_size:
raw_size = pbf_blob.get_int32();
if (raw_size <= 0 || uint32_t(raw_size) > max_uncompressed_blob_size) {
throw osmium::pbf_error("illegal blob size");
}
break;
case FileFormat::Blob::optional_bytes_zlib_data:
zlib_data = pbf_blob.get_data();
break;
case FileFormat::Blob::optional_bytes_lzma_data:
throw osmium::pbf_error("lzma blobs not implemented");
default:
throw osmium::pbf_error("unknown compression");
}
}
if (zlib_data.second != 0) {
return osmium::io::detail::zlib_uncompress_string(
zlib_data.first,
static_cast<unsigned long>(zlib_data.second),
static_cast<unsigned long>(raw_size),
output
);
}
throw osmium::pbf_error("blob contains no data");
}
inline osmium::Box decode_header_bbox(const ptr_len_type& data) {
int64_t left = std::numeric_limits<int64_t>::max();
int64_t right = std::numeric_limits<int64_t>::max();
int64_t top = std::numeric_limits<int64_t>::max();
int64_t bottom = std::numeric_limits<int64_t>::max();
protozero::pbf_message<OSMFormat::HeaderBBox> pbf_header_bbox(data);
while (pbf_header_bbox.next()) {
switch (pbf_header_bbox.tag()) {
case OSMFormat::HeaderBBox::required_sint64_left:
left = pbf_header_bbox.get_sint64();
break;
case OSMFormat::HeaderBBox::required_sint64_right:
right = pbf_header_bbox.get_sint64();
break;
case OSMFormat::HeaderBBox::required_sint64_top:
top = pbf_header_bbox.get_sint64();
break;
case OSMFormat::HeaderBBox::required_sint64_bottom:
bottom = pbf_header_bbox.get_sint64();
break;
default:
pbf_header_bbox.skip();
}
}
if (left == std::numeric_limits<int64_t>::max() ||
right == std::numeric_limits<int64_t>::max() ||
top == std::numeric_limits<int64_t>::max() ||
bottom == std::numeric_limits<int64_t>::max()) {
throw osmium::pbf_error("invalid bbox");
}
osmium::Box box;
box.extend(osmium::Location(left / resolution_convert, bottom / resolution_convert));
box.extend(osmium::Location(right / resolution_convert, top / resolution_convert));
return box;
}
inline osmium::io::Header decode_header_block(const ptr_len_type& data) {
osmium::io::Header header;
int i = 0;
protozero::pbf_message<OSMFormat::HeaderBlock> pbf_header_block(data);
while (pbf_header_block.next()) {
switch (pbf_header_block.tag()) {
case OSMFormat::HeaderBlock::optional_HeaderBBox_bbox:
header.add_box(decode_header_bbox(pbf_header_block.get_data()));
break;
case OSMFormat::HeaderBlock::repeated_string_required_features:
{
auto feature = pbf_header_block.get_data();
if (!strncmp("OsmSchema-V0.6", feature.first, feature.second)) {
// intentionally left blank
} else if (!strncmp("DenseNodes", feature.first, feature.second)) {
header.set("pbf_dense_nodes", true);
} else if (!strncmp("HistoricalInformation", feature.first, feature.second)) {
header.set_has_multiple_object_versions(true);
} else {
std::string msg("required feature not supported: ");
msg.append(feature.first, feature.second);
throw osmium::pbf_error(msg);
}
}
break;
case OSMFormat::HeaderBlock::repeated_string_optional_features:
header.set("pbf_optional_feature_" + std::to_string(i++), pbf_header_block.get_string());
break;
case OSMFormat::HeaderBlock::optional_string_writingprogram:
header.set("generator", pbf_header_block.get_string());
break;
case OSMFormat::HeaderBlock::optional_int64_osmosis_replication_timestamp:
header.set("osmosis_replication_timestamp", osmium::Timestamp(pbf_header_block.get_int64()).to_iso());
break;
case OSMFormat::HeaderBlock::optional_int64_osmosis_replication_sequence_number:
header.set("osmosis_replication_sequence_number", std::to_string(pbf_header_block.get_int64()));
break;
case OSMFormat::HeaderBlock::optional_string_osmosis_replication_base_url:
header.set("osmosis_replication_base_url", pbf_header_block.get_string());
break;
default:
pbf_header_block.skip();
}
}
return header;
}
/**
* Decode HeaderBlock.
*
* @param header_block_data Input data
* @returns Header object
* @throws osmium::pbf_error If there was a parsing error
*/
inline osmium::io::Header decode_header(const std::string& header_block_data) {
std::string output;
return decode_header_block(decode_blob(header_block_data, output));
}
class PBFDataBlobDecoder {
std::shared_ptr<std::string> m_input_buffer;
osmium::osm_entity_bits::type m_read_types;
public:
PBFDataBlobDecoder(std::string&& input_buffer, osmium::osm_entity_bits::type read_types) :
m_input_buffer(std::make_shared<std::string>(std::move(input_buffer))),
m_read_types(read_types) {
}
PBFDataBlobDecoder(const PBFDataBlobDecoder&) = default;
PBFDataBlobDecoder& operator=(const PBFDataBlobDecoder&) = default;
PBFDataBlobDecoder(PBFDataBlobDecoder&&) = default;
PBFDataBlobDecoder& operator=(PBFDataBlobDecoder&&) = default;
~PBFDataBlobDecoder() = default;
osmium::memory::Buffer operator()() {
std::string output;
PBFPrimitiveBlockDecoder decoder(decode_blob(*m_input_buffer, output), m_read_types);
return decoder();
}
}; // class PBFDataBlobDecoder
} // namespace detail
} // namespace io
} // namespace osmium
#endif // OSMIUM_IO_DETAIL_PBF_DECODER_HPP
@@ -49,9 +49,12 @@ DEALINGS IN THE SOFTWARE.
#include <thread>
#include <type_traits>
#include <protozero/pbf_message.hpp>
#include <osmium/io/detail/input_format.hpp>
#include <osmium/io/detail/pbf.hpp> // IWYU pragma: export
#include <osmium/io/detail/pbf_parser.hpp>
#include <osmium/io/detail/pbf_decoder.hpp>
#include <osmium/io/detail/protobuf_tags.hpp>
#include <osmium/io/error.hpp>
#include <osmium/io/file.hpp>
#include <osmium/io/file_format.hpp>
@@ -76,13 +79,13 @@ namespace osmium {
namespace detail {
typedef osmium::thread::Queue<std::future<osmium::memory::Buffer>> queue_type;
/**
* Class for parsing PBF files.
*/
class PBFInputFormat : public osmium::io::detail::InputFormat {
typedef osmium::thread::Queue<std::future<osmium::memory::Buffer>> queue_type;
bool m_use_thread_pool;
bool m_eof { false };
queue_type m_queue;
@@ -115,15 +118,10 @@ namespace osmium {
}
/**
* Read BlobHeader by first reading the size and then the
* BlobHeader. The BlobHeader contains a type field (which is
* checked against the expected type) and a size field.
*
* @param expected_type Expected type of data ("OSMHeader" or
* "OSMData").
* @returns Size of the data read from BlobHeader (0 on EOF).
* Read 4 bytes in network byte order from file. They contain
* the length of the following BlobHeader.
*/
size_t read_blob_header(const char* expected_type) {
uint32_t read_blob_header_size_from_file() {
uint32_t size_in_network_byte_order;
try {
@@ -133,37 +131,76 @@ namespace osmium {
return 0; // EOF
}
uint32_t size = ntohl(size_in_network_byte_order);
if (size > static_cast<uint32_t>(OSMPBF::max_blob_header_size)) {
const uint32_t size = ntohl(size_in_network_byte_order);
if (size > static_cast<uint32_t>(max_blob_header_size)) {
throw osmium::pbf_error("invalid BlobHeader size (> max_blob_header_size)");
}
OSMPBF::BlobHeader blob_header;
if (!blob_header.ParseFromString(read_from_input_queue(size))) {
throw osmium::pbf_error("failed to parse BlobHeader");
return size;
}
/**
* Decode the BlobHeader. Make sure it contains the expected
* type. Return the size of the following Blob.
*/
size_t decode_blob_header(protozero::pbf_message<FileFormat::BlobHeader>&& pbf_blob_header, const char* expected_type) {
std::pair<const char*, size_t> blob_header_type;
size_t blob_header_datasize = 0;
while (pbf_blob_header.next()) {
switch (pbf_blob_header.tag()) {
case FileFormat::BlobHeader::required_string_type:
blob_header_type = pbf_blob_header.get_data();
break;
case FileFormat::BlobHeader::required_int32_datasize:
blob_header_datasize = pbf_blob_header.get_int32();
break;
default:
pbf_blob_header.skip();
}
}
if (blob_header.type() != expected_type) {
if (blob_header_datasize == 0) {
throw osmium::pbf_error("PBF format error: BlobHeader.datasize missing or zero.");
}
if (strncmp(expected_type, blob_header_type.first, blob_header_type.second)) {
throw osmium::pbf_error("blob does not have expected type (OSMHeader in first blob, OSMData in following blobs)");
}
return static_cast<size_t>(blob_header.datasize());
return blob_header_datasize;
}
size_t check_type_and_get_blob_size(const char* expected_type) {
assert(expected_type);
auto size = read_blob_header_size_from_file();
if (size == 0) { // EOF
return 0;
}
std::string blob_header = read_from_input_queue(size);
return decode_blob_header(protozero::pbf_message<FileFormat::BlobHeader>(blob_header), expected_type);
}
void parse_osm_data(osmium::osm_entity_bits::type read_types) {
osmium::thread::set_thread_name("_osmium_pbf_in");
int n = 0;
while (auto size = read_blob_header("OSMData")) {
while (auto size = check_type_and_get_blob_size("OSMData")) {
std::string input_buffer = read_from_input_queue(size);
if (input_buffer.size() > max_uncompressed_blob_size) {
throw osmium::pbf_error(std::string("invalid blob size: " + std::to_string(input_buffer.size())));
}
if (m_use_thread_pool) {
m_queue.push(osmium::thread::Pool::instance().submit(DataBlobParser{read_from_input_queue(size), read_types}));
m_queue.push(osmium::thread::Pool::instance().submit(PBFDataBlobDecoder{ std::move(input_buffer), read_types }));
} else {
std::promise<osmium::memory::Buffer> promise;
m_queue.push(promise.get_future());
DataBlobParser data_blob_parser{read_from_input_queue(size), read_types};
PBFDataBlobDecoder data_blob_parser{ std::move(input_buffer), read_types };
promise.set_value(data_blob_parser());
}
++n;
if (m_quit_input_thread) {
return;
@@ -197,11 +234,10 @@ namespace osmium {
m_quit_input_thread(false),
m_input_queue(input_queue),
m_input_buffer() {
GOOGLE_PROTOBUF_VERIFY_VERSION;
// handle OSMHeader
auto size = read_blob_header("OSMHeader");
m_header = parse_header_blob(read_from_input_queue(size));
const auto size = check_type_and_get_blob_size("OSMHeader");
m_header = decode_header(read_from_input_queue(size));
if (m_read_which_entities != osmium::osm_entity_bits::nothing) {
m_reader = std::thread(&PBFInputFormat::parse_osm_data, this, m_read_which_entities);
@@ -246,10 +282,15 @@ namespace osmium {
namespace {
// we want the register_input_format() function to run, setting the variable
// is only a side-effect, it will never be used
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
const bool registered_pbf_input = osmium::io::detail::InputFormatFactory::instance().register_input_format(osmium::io::file_format::pbf,
[](const osmium::io::File& file, osmium::osm_entity_bits::type read_which_entities, osmium::thread::Queue<std::string>& input_queue) {
return new osmium::io::detail::PBFInputFormat(file, read_which_entities, input_queue);
});
#pragma GCC diagnostic pop
} // anonymous namespace
File diff suppressed because it is too large Load Diff
@@ -1,455 +0,0 @@
#ifndef OSMIUM_IO_DETAIL_PBF_PRIMITIVE_BLOCK_PARSER_HPP
#define OSMIUM_IO_DETAIL_PBF_PRIMITIVE_BLOCK_PARSER_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <algorithm>
#include <osmpbf/osmpbf.h>
#include <osmium/builder/osm_object_builder.hpp>
#include <osmium/io/detail/pbf.hpp> // IWYU pragma: export
#include <osmium/io/detail/zlib.hpp>
#include <osmium/io/header.hpp>
#include <osmium/osm/location.hpp>
#include <osmium/osm/node.hpp>
#include <osmium/osm/types.hpp>
#include <osmium/memory/buffer.hpp>
#include <osmium/osm/entity_bits.hpp>
#include <osmium/util/cast.hpp>
namespace osmium {
namespace io {
namespace detail {
class PBFPrimitiveBlockParser {
static constexpr size_t initial_buffer_size = 2 * 1024 * 1024;
const std::string& m_data;
const OSMPBF::StringTable* m_stringtable;
int64_t m_lon_offset;
int64_t m_lat_offset;
int64_t m_date_factor;
int32_t m_granularity;
osmium::osm_entity_bits::type m_read_types;
osmium::memory::Buffer m_buffer;
PBFPrimitiveBlockParser(const PBFPrimitiveBlockParser&) = delete;
PBFPrimitiveBlockParser(PBFPrimitiveBlockParser&&) = delete;
PBFPrimitiveBlockParser& operator=(const PBFPrimitiveBlockParser&) = delete;
PBFPrimitiveBlockParser& operator=(PBFPrimitiveBlockParser&&) = delete;
public:
explicit PBFPrimitiveBlockParser(const std::string& data, osmium::osm_entity_bits::type read_types) :
m_data(data),
m_stringtable(nullptr),
m_lon_offset(0),
m_lat_offset(0),
m_date_factor(1000),
m_granularity(100),
m_read_types(read_types),
m_buffer(initial_buffer_size) {
}
~PBFPrimitiveBlockParser() = default;
osmium::memory::Buffer operator()() {
OSMPBF::PrimitiveBlock pbf_primitive_block;
if (!pbf_primitive_block.ParseFromString(m_data)) {
throw osmium::pbf_error("failed to parse PrimitiveBlock");
}
m_stringtable = &pbf_primitive_block.stringtable();
m_lon_offset = pbf_primitive_block.lon_offset();
m_lat_offset = pbf_primitive_block.lat_offset();
m_date_factor = pbf_primitive_block.date_granularity() / 1000;
m_granularity = pbf_primitive_block.granularity();
for (int i = 0; i < pbf_primitive_block.primitivegroup_size(); ++i) {
const OSMPBF::PrimitiveGroup& group = pbf_primitive_block.primitivegroup(i);
if (group.has_dense()) {
if (m_read_types & osmium::osm_entity_bits::node) parse_dense_node_group(group);
} else if (group.ways_size() != 0) {
if (m_read_types & osmium::osm_entity_bits::way) parse_way_group(group);
} else if (group.relations_size() != 0) {
if (m_read_types & osmium::osm_entity_bits::relation) parse_relation_group(group);
} else if (group.nodes_size() != 0) {
if (m_read_types & osmium::osm_entity_bits::node) parse_node_group(group);
} else {
throw osmium::pbf_error("group of unknown type");
}
}
return std::move(m_buffer);
}
private:
template <class TBuilder, class TPBFObject>
void parse_attributes(TBuilder& builder, const TPBFObject& pbf_object) {
auto& object = builder.object();
object.set_id(pbf_object.id());
if (pbf_object.has_info()) {
object.set_version(static_cast_with_assert<object_version_type>(pbf_object.info().version()))
.set_changeset(static_cast_with_assert<changeset_id_type>(pbf_object.info().changeset()))
.set_timestamp(pbf_object.info().timestamp() * m_date_factor)
.set_uid_from_signed(pbf_object.info().uid());
if (pbf_object.info().has_visible()) {
object.set_visible(pbf_object.info().visible());
}
builder.add_user(m_stringtable->s(static_cast_with_assert<int>(pbf_object.info().user_sid())));
} else {
builder.add_user("", 1);
}
}
void parse_node_group(const OSMPBF::PrimitiveGroup& group) {
for (int i = 0; i < group.nodes_size(); ++i) {
osmium::builder::NodeBuilder builder(m_buffer);
const OSMPBF::Node& pbf_node = group.nodes(i);
parse_attributes(builder, pbf_node);
if (builder.object().visible()) {
builder.object().set_location(osmium::Location(
(pbf_node.lon() * m_granularity + m_lon_offset) / (OSMPBF::lonlat_resolution / osmium::Location::coordinate_precision),
(pbf_node.lat() * m_granularity + m_lat_offset) / (OSMPBF::lonlat_resolution / osmium::Location::coordinate_precision)));
}
if (pbf_node.keys_size() > 0) {
osmium::builder::TagListBuilder tl_builder(m_buffer, &builder);
for (int tag = 0; tag < pbf_node.keys_size(); ++tag) {
tl_builder.add_tag(m_stringtable->s(static_cast<int>(pbf_node.keys(tag))),
m_stringtable->s(static_cast<int>(pbf_node.vals(tag))));
}
}
m_buffer.commit();
}
}
void parse_way_group(const OSMPBF::PrimitiveGroup& group) {
for (int i = 0; i < group.ways_size(); ++i) {
osmium::builder::WayBuilder builder(m_buffer);
const OSMPBF::Way& pbf_way = group.ways(i);
parse_attributes(builder, pbf_way);
if (pbf_way.refs_size() > 0) {
osmium::builder::WayNodeListBuilder wnl_builder(m_buffer, &builder);
int64_t ref = 0;
for (int n = 0; n < pbf_way.refs_size(); ++n) {
ref += pbf_way.refs(n);
wnl_builder.add_node_ref(ref);
}
}
if (pbf_way.keys_size() > 0) {
osmium::builder::TagListBuilder tl_builder(m_buffer, &builder);
for (int tag = 0; tag < pbf_way.keys_size(); ++tag) {
tl_builder.add_tag(m_stringtable->s(static_cast<int>(pbf_way.keys(tag))),
m_stringtable->s(static_cast<int>(pbf_way.vals(tag))));
}
}
m_buffer.commit();
}
}
void parse_relation_group(const OSMPBF::PrimitiveGroup& group) {
for (int i = 0; i < group.relations_size(); ++i) {
osmium::builder::RelationBuilder builder(m_buffer);
const OSMPBF::Relation& pbf_relation = group.relations(i);
parse_attributes(builder, pbf_relation);
if (pbf_relation.types_size() > 0) {
osmium::builder::RelationMemberListBuilder rml_builder(m_buffer, &builder);
int64_t ref = 0;
for (int n = 0; n < pbf_relation.types_size(); ++n) {
ref += pbf_relation.memids(n);
rml_builder.add_member(osmpbf_membertype_to_item_type(pbf_relation.types(n)), ref, m_stringtable->s(pbf_relation.roles_sid(n)));
}
}
if (pbf_relation.keys_size() > 0) {
osmium::builder::TagListBuilder tl_builder(m_buffer, &builder);
for (int tag = 0; tag < pbf_relation.keys_size(); ++tag) {
tl_builder.add_tag(m_stringtable->s(static_cast<int>(pbf_relation.keys(tag))),
m_stringtable->s(static_cast<int>(pbf_relation.vals(tag))));
}
}
m_buffer.commit();
}
}
int add_tags(const OSMPBF::DenseNodes& dense, int n, osmium::builder::NodeBuilder* builder) {
if (n >= dense.keys_vals_size()) {
return n;
}
if (dense.keys_vals(n) == 0) {
return n+1;
}
osmium::builder::TagListBuilder tl_builder(m_buffer, builder);
while (n < dense.keys_vals_size()) {
int tag_key_pos = dense.keys_vals(n++);
if (tag_key_pos == 0) {
break;
}
tl_builder.add_tag(m_stringtable->s(tag_key_pos),
m_stringtable->s(dense.keys_vals(n)));
++n;
}
return n;
}
void parse_dense_node_group(const OSMPBF::PrimitiveGroup& group) {
int64_t last_dense_id = 0;
int64_t last_dense_latitude = 0;
int64_t last_dense_longitude = 0;
int64_t last_dense_uid = 0;
int64_t last_dense_user_sid = 0;
int64_t last_dense_changeset = 0;
int64_t last_dense_timestamp = 0;
int last_dense_tag = 0;
const OSMPBF::DenseNodes& dense = group.dense();
for (int i = 0; i < dense.id_size(); ++i) {
bool visible = true;
last_dense_id += dense.id(i);
last_dense_latitude += dense.lat(i);
last_dense_longitude += dense.lon(i);
if (dense.has_denseinfo()) {
last_dense_changeset += dense.denseinfo().changeset(i);
last_dense_timestamp += dense.denseinfo().timestamp(i);
last_dense_uid += dense.denseinfo().uid(i);
last_dense_user_sid += dense.denseinfo().user_sid(i);
if (dense.denseinfo().visible_size() > 0) {
visible = dense.denseinfo().visible(i);
}
assert(last_dense_changeset >= 0);
assert(last_dense_timestamp >= 0);
assert(last_dense_uid >= -1);
assert(last_dense_user_sid >= 0);
}
osmium::builder::NodeBuilder builder(m_buffer);
osmium::Node& node = builder.object();
node.set_id(last_dense_id);
if (dense.has_denseinfo()) {
auto v = dense.denseinfo().version(i);
assert(v > 0);
node.set_version(static_cast<osmium::object_version_type>(v));
node.set_changeset(static_cast<osmium::changeset_id_type>(last_dense_changeset));
node.set_timestamp(last_dense_timestamp * m_date_factor);
node.set_uid_from_signed(static_cast<osmium::signed_user_id_type>(last_dense_uid));
node.set_visible(visible);
builder.add_user(m_stringtable->s(static_cast<int>(last_dense_user_sid)));
} else {
builder.add_user("", 1);
}
if (visible) {
builder.object().set_location(osmium::Location(
(last_dense_longitude * m_granularity + m_lon_offset) / (OSMPBF::lonlat_resolution / osmium::Location::coordinate_precision),
(last_dense_latitude * m_granularity + m_lat_offset) / (OSMPBF::lonlat_resolution / osmium::Location::coordinate_precision)));
}
last_dense_tag = add_tags(dense, last_dense_tag, &builder);
m_buffer.commit();
}
}
}; // class PBFPrimitiveBlockParser
/**
* PBF blobs can optionally be packed with the zlib algorithm.
* This function returns the raw data (if it was unpacked) or
* the unpacked data (if it was packed).
*
* @param input_data Reference to input data.
* @returns Unpacked data
* @throws osmium::pbf_error If there was a problem parsing the PBF
*/
inline std::unique_ptr<const std::string> unpack_blob(const std::string& input_data) {
OSMPBF::Blob pbf_blob;
if (!pbf_blob.ParseFromString(input_data)) {
throw osmium::pbf_error("failed to parse blob");
}
if (pbf_blob.has_raw()) {
return std::unique_ptr<std::string>(pbf_blob.release_raw());
} else if (pbf_blob.has_zlib_data()) {
auto raw_size = pbf_blob.raw_size();
assert(raw_size >= 0);
assert(raw_size <= OSMPBF::max_uncompressed_blob_size);
return osmium::io::detail::zlib_uncompress(pbf_blob.zlib_data(), static_cast<unsigned long>(raw_size));
} else if (pbf_blob.has_lzma_data()) {
throw osmium::pbf_error("lzma blobs not implemented");
} else {
throw osmium::pbf_error("blob contains no data");
}
}
/**
* Parse blob as a HeaderBlock.
*
* @param input_buffer Blob data
* @returns Header object
* @throws osmium::pbf_error If there was a parsing error
*/
inline osmium::io::Header parse_header_blob(const std::string& input_buffer) {
const std::unique_ptr<const std::string> data = unpack_blob(input_buffer);
OSMPBF::HeaderBlock pbf_header_block;
if (!pbf_header_block.ParseFromString(*data)) {
throw osmium::pbf_error("failed to parse HeaderBlock");
}
osmium::io::Header header;
for (int i = 0; i < pbf_header_block.required_features_size(); ++i) {
const std::string& feature = pbf_header_block.required_features(i);
if (feature == "OsmSchema-V0.6") continue;
if (feature == "DenseNodes") {
header.set("pbf_dense_nodes", true);
continue;
}
if (feature == "HistoricalInformation") {
header.set_has_multiple_object_versions(true);
continue;
}
throw osmium::pbf_error(std::string("required feature not supported: ") + feature);
}
for (int i = 0; i < pbf_header_block.optional_features_size(); ++i) {
const std::string& feature = pbf_header_block.optional_features(i);
header.set("pbf_optional_feature_" + std::to_string(i), feature);
}
if (pbf_header_block.has_writingprogram()) {
header.set("generator", pbf_header_block.writingprogram());
}
if (pbf_header_block.has_bbox()) {
const OSMPBF::HeaderBBox& pbf_bbox = pbf_header_block.bbox();
const int64_t resolution_convert = OSMPBF::lonlat_resolution / osmium::Location::coordinate_precision;
osmium::Box box;
box.extend(osmium::Location(pbf_bbox.left() / resolution_convert, pbf_bbox.bottom() / resolution_convert));
box.extend(osmium::Location(pbf_bbox.right() / resolution_convert, pbf_bbox.top() / resolution_convert));
header.add_box(box);
}
if (pbf_header_block.has_osmosis_replication_timestamp()) {
header.set("osmosis_replication_timestamp", osmium::Timestamp(pbf_header_block.osmosis_replication_timestamp()).to_iso());
}
if (pbf_header_block.has_osmosis_replication_sequence_number()) {
header.set("osmosis_replication_sequence_number", std::to_string(pbf_header_block.osmosis_replication_sequence_number()));
}
if (pbf_header_block.has_osmosis_replication_base_url()) {
header.set("osmosis_replication_base_url", pbf_header_block.osmosis_replication_base_url());
}
return header;
}
class DataBlobParser {
std::shared_ptr<std::string> m_input_buffer;
osmium::osm_entity_bits::type m_read_types;
public:
DataBlobParser(std::string&& input_buffer, osmium::osm_entity_bits::type read_types) :
m_input_buffer(std::make_shared<std::string>(std::move(input_buffer))),
m_read_types(read_types) {
if (input_buffer.size() > OSMPBF::max_uncompressed_blob_size) {
throw osmium::pbf_error(std::string("invalid blob size: " + std::to_string(input_buffer.size())));
}
}
/*
DataBlobParser(const DataBlobParser& other) :
m_input_buffer(std::move(other.m_input_buffer)),
m_read_types(other.m_read_types) {
}*/
DataBlobParser(const DataBlobParser&) = default;
DataBlobParser& operator=(const DataBlobParser&) = default;
DataBlobParser(DataBlobParser&&) = default;
DataBlobParser& operator=(DataBlobParser&&) = default;
~DataBlobParser() = default;
osmium::memory::Buffer operator()() {
const std::unique_ptr<const std::string> data = unpack_blob(*m_input_buffer);
PBFPrimitiveBlockParser parser(*data, m_read_types);
return parser();
}
}; // class DataBlobParser
} // namespace detail
} // namespace io
} // namespace osmium
#endif // OSMIUM_IO_DETAIL_PBF_PRIMITIVE_BLOCK_PARSER_HPP
@@ -1,218 +0,0 @@
#ifndef OSMIUM_IO_DETAIL_PBF_STRINGTABLE_HPP
#define OSMIUM_IO_DETAIL_PBF_STRINGTABLE_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <algorithm>
#include <cstdint>
#include <iterator>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include <osmpbf/osmpbf.h>
#include <osmium/util/cast.hpp>
namespace osmium {
namespace io {
namespace detail {
/**
* StringTable management for PBF writer
*
* All strings are stored as indexes to rows in a StringTable. The StringTable contains
* one row for each used string, so strings that are used multiple times need to be
* stored only once. The StringTable is sorted by usage-count, so the most often used
* string is stored at index 1.
*/
class StringTable {
public:
/// type for string IDs (interim and final)
typedef uint16_t string_id_type;
private:
/**
* this is the struct used to build the StringTable. It is stored as
* the value-part in the strings-map.
*
* when a new string is added to the map, its count is set to 0 and
* the interim_id is set to the current size of the map. This interim_id
* is then stored into the pbf-objects.
*
* before the PrimitiveBlock is serialized, the map is sorted by count
* and stored into the pbf-StringTable. Afterwards the interim-ids are
* mapped to the "real" id in the StringTable.
*
* this way often used strings get lower ids in the StringTable. As the
* protobuf-serializer stores numbers in variable bit-lengths, lower
* IDs means less used space in the resulting file.
*/
struct string_info {
/// number of occurrences of this string
uint16_t count;
/// an intermediate-id
string_id_type interim_id;
}; // struct string_info
/**
* Interim StringTable, storing all strings that should be written to
* the StringTable once the block is written to disk.
*/
typedef std::map<std::string, string_info> string2string_info_type;
string2string_info_type m_strings;
/**
* This vector is used to map the interim IDs to real StringTable IDs after
* writing all strings to the StringTable.
*/
typedef std::vector<string_id_type> interim_id2id_type;
interim_id2id_type m_id2id_map;
size_t m_size = 0;
public:
StringTable() {
}
friend bool operator<(const string_info& lhs, const string_info& rhs) {
return lhs.count > rhs.count;
}
/**
* record a string in the interim StringTable if it's missing, otherwise just increase its counter,
* return the interim-id assigned to the string.
*/
string_id_type record_string(const std::string& string) {
string_info& info = m_strings[string];
if (info.interim_id == 0) {
++m_size;
info.interim_id = static_cast_with_assert<string_id_type>(m_size);
} else {
info.count++;
}
return info.interim_id;
}
/**
* Sort the interim StringTable and store it to the real protobuf StringTable.
* while storing to the real table, this function fills the id2id_map with
* pairs, mapping the interim-ids to final and real StringTable ids.
*
* Note that the m_strings table is a std::map and as such is sorted lexicographically.
* When the transformation into the sortedby multimap is done, it gets sorted by
* the count. The end result (at least with the glibc standard container/algorithm
* implementation) is that the string table is sorted first by reverse count (ie descending)
* and then by reverse lexicographic order.
*/
void store_stringtable(OSMPBF::StringTable* st, bool sort) {
// add empty StringTable entry at index 0
// StringTable index 0 is reserved as delimiter in the densenodes key/value list
// this line also ensures that there's always a valid StringTable
st->add_s("");
if (sort) {
std::multimap<string_info, std::string> sortedbycount;
m_id2id_map.resize(m_size+1);
std::transform(m_strings.begin(), m_strings.end(),
std::inserter(sortedbycount, sortedbycount.begin()),
[](const std::pair<std::string, string_info>& p) {
return std::pair<string_info, std::string>(p.second, p.first);
});
string_id_type n = 0;
for (const auto& mapping : sortedbycount) {
// add the string of the current item to the pbf StringTable
st->add_s(mapping.second);
// store the mapping from the interim-id to the real id
m_id2id_map[mapping.first.interim_id] = ++n;
}
} else {
std::vector<std::pair<string_id_type, const char*>> sortedbyid;
sortedbyid.reserve(m_strings.size());
for (const auto& p : m_strings) {
sortedbyid.emplace_back(p.second.interim_id, p.first.c_str());
}
std::sort(sortedbyid.begin(), sortedbyid.end());
for (const auto& mapping : sortedbyid) {
st->add_s(mapping.second);
}
}
}
/**
* Map from an interim ID to a real string ID.
*/
string_id_type map_string_id(const string_id_type interim_id) const {
return m_id2id_map[interim_id];
}
template <typename T>
string_id_type map_string_id(const T interim_id) const {
return map_string_id(static_cast_with_assert<string_id_type>(interim_id));
}
/**
* Clear the stringtable, preparing for the next block.
*/
void clear() {
m_strings.clear();
m_id2id_map.clear();
m_size = 0;
}
}; // class StringTable
} // namespace detail
} // namespace io
} // namespace osmium
#endif // OSMIUM_IO_DETAIL_PBF_STRINGTABLE_HPP
@@ -0,0 +1,170 @@
#ifndef OSMIUM_IO_DETAIL_PROTOBUF_TAGS_HPP
#define OSMIUM_IO_DETAIL_PROTOBUF_TAGS_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <protozero/pbf_types.hpp>
namespace osmium {
namespace io {
namespace detail {
// directly translated from
// https://github.com/scrosby/OSM-binary/blob/master/src/fileformat.proto
namespace FileFormat {
enum class Blob : protozero::pbf_tag_type {
optional_bytes_raw = 1,
optional_int32_raw_size = 2,
optional_bytes_zlib_data = 3,
optional_bytes_lzma_data = 4
};
enum class BlobHeader : protozero::pbf_tag_type {
required_string_type = 1,
optional_bytes_indexdata = 2,
required_int32_datasize = 3
};
} // namespace FileFormat
// directly translated from
// https://github.com/scrosby/OSM-binary/blob/master/src/osmformat.proto
namespace OSMFormat {
enum class HeaderBlock : protozero::pbf_tag_type {
optional_HeaderBBox_bbox = 1,
repeated_string_required_features = 4,
repeated_string_optional_features = 5,
optional_string_writingprogram = 16,
optional_string_source = 17,
optional_int64_osmosis_replication_timestamp = 32,
optional_int64_osmosis_replication_sequence_number = 33,
optional_string_osmosis_replication_base_url = 34
};
enum class HeaderBBox : protozero::pbf_tag_type {
required_sint64_left = 1,
required_sint64_right = 2,
required_sint64_top = 3,
required_sint64_bottom = 4
};
enum class PrimitiveBlock : protozero::pbf_tag_type {
required_StringTable_stringtable = 1,
repeated_PrimitiveGroup_primitivegroup = 2,
optional_int32_granularity = 17,
optional_int32_date_granularity = 18,
optional_int64_lat_offset = 19,
optional_int64_lon_offset = 20
};
enum class PrimitiveGroup : protozero::pbf_tag_type {
unknown = 0,
repeated_Node_nodes = 1,
optional_DenseNodes_dense = 2,
repeated_Way_ways = 3,
repeated_Relation_relations = 4,
repeated_ChangeSet_changesets = 5
};
enum class StringTable : protozero::pbf_tag_type {
repeated_bytes_s = 1
};
enum class Info : protozero::pbf_tag_type {
optional_int32_version = 1,
optional_int64_timestamp = 2,
optional_int64_changeset = 3,
optional_int32_uid = 4,
optional_uint32_user_sid = 5,
optional_bool_visible = 6
};
enum class DenseInfo : protozero::pbf_tag_type {
packed_int32_version = 1,
packed_sint64_timestamp = 2,
packed_sint64_changeset = 3,
packed_sint32_uid = 4,
packed_sint32_user_sid = 5,
packed_bool_visible = 6
};
enum class Node : protozero::pbf_tag_type {
required_sint64_id = 1,
packed_uint32_keys = 2,
packed_uint32_vals = 3,
optional_Info_info = 4,
required_sint64_lat = 8,
required_sint64_lon = 9
};
enum class DenseNodes : protozero::pbf_tag_type {
packed_sint64_id = 1,
optional_DenseInfo_denseinfo = 5,
packed_sint64_lat = 8,
packed_sint64_lon = 9,
packed_int32_keys_vals = 10
};
enum class Way : protozero::pbf_tag_type {
required_int64_id = 1,
packed_uint32_keys = 2,
packed_uint32_vals = 3,
optional_Info_info = 4,
packed_sint64_refs = 8
};
enum class Relation : protozero::pbf_tag_type {
required_int64_id = 1,
packed_uint32_keys = 2,
packed_uint32_vals = 3,
optional_Info_info = 4,
packed_int32_roles_sid = 8,
packed_sint64_memids = 9,
packed_MemberType_types = 10
};
} // namespace OSMFormat
} // namespace detail
} // namespace io
} // namespace osmium
#endif // OSMIUM_IO_DETAIL_PROTOBUF_TAGS_HPP
@@ -122,7 +122,7 @@ namespace osmium {
* @throws std::system_error On error.
*/
inline void reliable_write(const int fd, const unsigned char* output_buffer, const size_t size) {
constexpr size_t max_write = 100 * 1024 * 1024; // Max 100 MByte per write
constexpr size_t max_write = 100L * 1024L * 1024L; // Max 100 MByte per write
size_t offset = 0;
do {
auto write_count = size - offset;
@@ -0,0 +1,250 @@
#ifndef OSMIUM_IO_DETAIL_STRING_TABLE_HPP
#define OSMIUM_IO_DETAIL_STRING_TABLE_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <iterator>
#include <list>
#include <map>
#include <string>
namespace osmium {
namespace io {
namespace detail {
/**
* class StringStore
*
* Storage of lots of strings (const char *). Memory is allocated in chunks.
* If a string is added and there is no space in the current chunk, a new
* chunk will be allocated. Strings added to the store must not be larger
* than the chunk size.
*
* All memory is released when the destructor is called. There is no other way
* to release all or part of the memory.
*
*/
class StringStore {
size_t m_chunk_size;
std::list<std::string> m_chunks;
void add_chunk() {
m_chunks.push_front(std::string());
m_chunks.front().reserve(m_chunk_size);
}
public:
StringStore(size_t chunk_size) :
m_chunk_size(chunk_size),
m_chunks() {
add_chunk();
}
void clear() noexcept {
m_chunks.erase(std::next(m_chunks.begin()), m_chunks.end());
m_chunks.front().clear();
}
/**
* Add a null terminated string to the store. This will
* automatically get more memory if we are out.
* Returns a pointer to the copy of the string we have
* allocated.
*/
const char* add(const char* string) {
size_t len = std::strlen(string) + 1;
assert(len <= m_chunk_size);
size_t chunk_len = m_chunks.front().size();
if (chunk_len + len > m_chunks.front().capacity()) {
add_chunk();
chunk_len = 0;
}
m_chunks.front().append(string);
m_chunks.front().append(1, '\0');
return m_chunks.front().c_str() + chunk_len;
}
class const_iterator : public std::iterator<std::forward_iterator_tag, const char*> {
typedef std::list<std::string>::const_iterator it_type;
it_type m_it;
const it_type m_last;
const char* m_pos;
public:
const_iterator(it_type it, it_type last) :
m_it(it),
m_last(last),
m_pos(it == last ? nullptr : m_it->c_str()) {
}
const_iterator& operator++() {
assert(m_it != m_last);
auto last_pos = m_it->c_str() + m_it->size();
while (m_pos != last_pos && *m_pos) ++m_pos;
if (m_pos != last_pos) ++m_pos;
if (m_pos == last_pos) {
++m_it;
if (m_it != m_last) {
m_pos = m_it->c_str();
} else {
m_pos = nullptr;
}
}
return *this;
}
const_iterator operator++(int) {
const_iterator tmp(*this);
operator++();
return tmp;
}
bool operator==(const const_iterator& rhs) const {
return m_it == rhs.m_it && m_pos == rhs.m_pos;
}
bool operator!=(const const_iterator& rhs) const {
return !(*this == rhs);
}
const char* operator*() const {
assert(m_it != m_last);
assert(m_pos != nullptr);
return m_pos;
}
}; // class const_iterator
const_iterator begin() const {
if (m_chunks.front().empty()) {
return end();
}
return const_iterator(m_chunks.begin(), m_chunks.end());
}
const_iterator end() const {
return const_iterator(m_chunks.end(), m_chunks.end());
}
// These functions get you some idea how much memory was
// used.
int get_chunk_size() const noexcept {
return m_chunk_size;
}
int get_chunk_count() const noexcept {
return m_chunks.size();
}
int get_used_bytes_in_last_chunk() const noexcept {
return m_chunks.front().size();
}
}; // class StringStore
struct StrComp {
bool operator()(const char* lhs, const char* rhs) const {
return strcmp(lhs, rhs) < 0;
}
}; // struct StrComp
class StringTable {
StringStore m_strings;
std::map<const char*, size_t, StrComp> m_index;
size_t m_size;
public:
StringTable() :
m_strings(1024 * 1024),
m_index(),
m_size(0) {
m_strings.add("");
}
void clear() {
m_strings.clear();
m_index.clear();
m_size = 0;
m_strings.add("");
}
size_t size() const noexcept {
return m_size + 1;
}
size_t add(const char* s) {
auto f = m_index.find(s);
if (f != m_index.end()) {
return f->second;
}
const char* cs = m_strings.add(s);
m_index[cs] = ++m_size;
return m_size;
}
StringStore::const_iterator begin() const {
return m_strings.begin();
}
StringStore::const_iterator end() const {
return m_strings.end();
}
}; // class StringTable
} // namespace detail
} // namespace io
} // namespace osmium
#endif // OSMIUM_IO_DETAIL_STRING_TABLE_HPP
@@ -66,6 +66,7 @@ DEALINGS IN THE SOFTWARE.
#include <osmium/osm/location.hpp>
#include <osmium/osm/object.hpp>
#include <osmium/osm/types.hpp>
#include <osmium/osm/types_from_string.hpp>
#include <osmium/thread/queue.hpp>
#include <osmium/thread/util.hpp>
#include <osmium/util/cast.hpp>
@@ -191,6 +192,8 @@ namespace osmium {
std::atomic<bool>& m_done;
bool m_header_is_done;
/**
* A C++ wrapper for the Expat parser that makes sure no memory is leaked.
*/
@@ -246,16 +249,25 @@ namespace osmium {
T& m_data;
std::promise<T>& m_promise;
bool m_done;
public:
PromiseKeeper(T& data, std::promise<T>& promise) :
m_data(data),
m_promise(promise) {
m_promise(promise),
m_done(false) {
}
void fullfill_promise() {
if (!m_done) {
m_promise.set_value(m_data);
m_done = true;
}
}
~PromiseKeeper() {
m_promise.set_value(m_data);
fullfill_promise();
}
}; // class PromiseKeeper
@@ -279,7 +291,8 @@ namespace osmium {
m_queue(queue),
m_header_promise(header_promise),
m_read_types(read_types),
m_done(done) {
m_done(done),
m_header_is_done(false) {
}
/**
@@ -305,7 +318,8 @@ namespace osmium {
m_queue(other.m_queue),
m_header_promise(other.m_header_promise),
m_read_types(other.m_read_types),
m_done(other.m_done) {
m_done(other.m_done),
m_header_is_done(other.m_header_is_done) {
}
XMLParser(XMLParser&&) = default;
@@ -326,6 +340,9 @@ namespace osmium {
last = data.empty();
try {
parser(data, last);
if (m_header_is_done) {
promise_keeper.fullfill_promise();
}
} catch (ParserIsDone&) {
return true;
} catch (...) {
@@ -343,8 +360,7 @@ namespace osmium {
private:
const char* init_object(osmium::OSMObject& object, const XML_Char** attrs) {
static const char* empty = "";
const char* user = empty;
const char* user = "";
if (m_in_delete_section) {
object.set_visible(false);
@@ -371,8 +387,7 @@ namespace osmium {
}
void init_changeset(osmium::builder::ChangesetBuilder* builder, const XML_Char** attrs) {
static const char* empty = "";
const char* user = empty;
const char* user = "";
osmium::Changeset& new_changeset = builder->object();
osmium::Location min;
@@ -421,6 +436,7 @@ namespace osmium {
}
void header_is_done() {
m_header_is_done = true;
if (m_read_types == osmium::osm_entity_bits::nothing) {
throw ParserIsDone();
}
@@ -722,10 +738,15 @@ namespace osmium {
namespace {
// we want the register_input_format() function to run, setting the variable
// is only a side-effect, it will never be used
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
const bool registered_xml_input = osmium::io::detail::InputFormatFactory::instance().register_input_format(osmium::io::file_format::xml,
[](const osmium::io::File& file, osmium::osm_entity_bits::type read_which_entities, osmium::thread::Queue<std::string>& input_queue) {
return new osmium::io::detail::XMLInputFormat(file, read_which_entities, input_queue);
});
#pragma GCC diagnostic pop
} // anonymous namespace
@@ -85,6 +85,9 @@ namespace osmium {
case '\'': out += "&apos;"; break;
case '<': out += "&lt;"; break;
case '>': out += "&gt;"; break;
case '\n': out += "&#xA;"; break;
case '\r': out += "&#xD;"; break;
case '\t': out += "&#x9;"; break;
default: out += *in; break;
}
}
@@ -126,6 +129,7 @@ namespace osmium {
operation m_last_op {operation::op_none};
const bool m_add_metadata;
const bool m_write_visible_flag;
const bool m_write_change_ops;
@@ -146,31 +150,33 @@ namespace osmium {
void write_meta(const osmium::OSMObject& object) {
oprintf(*m_out, " id=\"%" PRId64 "\"", object.id());
if (object.version()) {
oprintf(*m_out, " version=\"%d\"", object.version());
}
if (m_add_metadata) {
if (object.version()) {
oprintf(*m_out, " version=\"%d\"", object.version());
}
if (object.timestamp()) {
*m_out += " timestamp=\"";
*m_out += object.timestamp().to_iso();
*m_out += "\"";
}
if (object.timestamp()) {
*m_out += " timestamp=\"";
*m_out += object.timestamp().to_iso();
*m_out += "\"";
}
if (!object.user_is_anonymous()) {
oprintf(*m_out, " uid=\"%d\" user=\"", object.uid());
xml_string(*m_out, object.user());
*m_out += "\"";
}
if (!object.user_is_anonymous()) {
oprintf(*m_out, " uid=\"%d\" user=\"", object.uid());
xml_string(*m_out, object.user());
*m_out += "\"";
}
if (object.changeset()) {
oprintf(*m_out, " changeset=\"%d\"", object.changeset());
}
if (object.changeset()) {
oprintf(*m_out, " changeset=\"%d\"", object.changeset());
}
if (m_write_visible_flag) {
if (object.visible()) {
*m_out += " visible=\"true\"";
} else {
*m_out += " visible=\"false\"";
if (m_write_visible_flag) {
if (object.visible()) {
*m_out += " visible=\"true\"";
} else {
*m_out += " visible=\"false\"";
}
}
}
}
@@ -224,9 +230,10 @@ namespace osmium {
public:
explicit XMLOutputBlock(osmium::memory::Buffer&& buffer, bool write_visible_flag, bool write_change_ops) :
explicit XMLOutputBlock(osmium::memory::Buffer&& buffer, bool add_metadata, bool write_visible_flag, bool write_change_ops) :
m_input_buffer(std::make_shared<osmium::memory::Buffer>(std::move(buffer))),
m_out(std::make_shared<std::string>()),
m_add_metadata(add_metadata),
m_write_visible_flag(write_visible_flag && !write_change_ops),
m_write_change_ops(write_change_ops) {
}
@@ -392,12 +399,14 @@ namespace osmium {
class XMLOutputFormat : public osmium::io::detail::OutputFormat, public osmium::handler::Handler {
bool m_add_metadata;
bool m_write_visible_flag;
public:
XMLOutputFormat(const osmium::io::File& file, data_queue_type& output_queue) :
OutputFormat(file, output_queue),
m_add_metadata(file.get("add_metadata") != "false"),
m_write_visible_flag(file.has_multiple_object_versions() || m_file.is_true("force_visible_flag")) {
}
@@ -408,7 +417,7 @@ namespace osmium {
}
void write_buffer(osmium::memory::Buffer&& buffer) override final {
m_output_queue.push(osmium::thread::Pool::instance().submit(XMLOutputBlock{std::move(buffer), m_write_visible_flag, m_file.is_true("xml_change_format")}));
m_output_queue.push(osmium::thread::Pool::instance().submit(XMLOutputBlock{std::move(buffer), m_add_metadata, m_write_visible_flag, m_file.is_true("xml_change_format")}));
}
void write_header(const osmium::io::Header& header) override final {
@@ -468,10 +477,15 @@ namespace osmium {
namespace {
// we want the register_output_format() function to run, setting the variable
// is only a side-effect, it will never be used
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
const bool registered_xml_output = osmium::io::detail::OutputFormatFactory::instance().register_output_format(osmium::io::file_format::xml,
[](const osmium::io::File& file, data_queue_type& output_queue) {
return new osmium::io::detail::XMLOutputFormat(file, output_queue);
});
#pragma GCC diagnostic pop
} // anonymous namespace
+8 -7
View File
@@ -85,23 +85,24 @@ namespace osmium {
*
* @param input Compressed input data.
* @param raw_size Size of uncompressed data.
* @returns Uncompressed data.
* @param output Uncompressed result data.
* @returns Pointer and size to incompressed data.
*/
inline std::unique_ptr<std::string> zlib_uncompress(const std::string& input, unsigned long raw_size) {
auto output = std::unique_ptr<std::string>(new std::string(raw_size, '\0'));
inline std::pair<const char*, size_t> zlib_uncompress_string(const char* input, unsigned long input_size, unsigned long raw_size, std::string& output) {
output.resize(raw_size);
auto result = ::uncompress(
reinterpret_cast<unsigned char*>(const_cast<char *>(output->data())),
reinterpret_cast<unsigned char*>(&*output.begin()),
&raw_size,
reinterpret_cast<const unsigned char*>(input.data()),
osmium::static_cast_with_assert<unsigned long>(input.size())
reinterpret_cast<const unsigned char*>(input),
input_size
);
if (result != Z_OK) {
throw std::runtime_error(std::string("failed to uncompress data: ") + zError(result));
}
return output;
return std::make_pair(output.data(), output.size());
}
} // namespace detail
+25 -43
View File
@@ -97,7 +97,9 @@ namespace osmium {
* of the file will be taken from the suffix.
* An empty filename or "-" means stdin or stdout.
* @param format File format as string. See the description of the
* parse_format() function for details.
* parse_format() function for details. If this is
* empty the format will be deduced from the suffix
* of the filename.
*/
explicit File(const std::string& filename = "", const std::string& format = "") :
Options(),
@@ -107,20 +109,19 @@ namespace osmium {
m_format_string(format) {
// stdin/stdout
if (filename == "" || filename == "-") {
if (m_filename == "-") {
m_filename = "";
default_settings_for_stdinout();
}
// filename is actually a URL
// if filename is a URL, default to XML format
std::string protocol = m_filename.substr(0, m_filename.find_first_of(':'));
if (protocol == "http" || protocol == "https") {
default_settings_for_url();
m_file_format = file_format::xml;
}
detect_format_from_suffix(m_filename);
if (format != "") {
if (format.empty()) {
detect_format_from_suffix(m_filename);
} else {
parse_format(format);
}
}
@@ -140,9 +141,6 @@ namespace osmium {
m_buffer(buffer),
m_buffer_size(size),
m_format_string(format) {
default_settings_for_stdinout();
if (format != "") {
parse_format(format);
}
@@ -220,6 +218,20 @@ namespace osmium {
} else if (suffixes.back() == "opl") {
m_file_format = file_format::opl;
suffixes.pop_back();
} else if (suffixes.back() == "json") {
m_file_format = file_format::json;
suffixes.pop_back();
} else if (suffixes.back() == "o5m") {
m_file_format = file_format::o5m;
suffixes.pop_back();
} else if (suffixes.back() == "o5c") {
m_file_format = file_format::o5m;
m_has_multiple_object_versions = true;
set("o5c_change_format", true);
suffixes.pop_back();
} else if (suffixes.back() == "debug") {
m_file_format = file_format::debug;
suffixes.pop_back();
}
if (suffixes.empty()) return;
@@ -240,8 +252,8 @@ namespace osmium {
}
/**
* Check file format etc. for consistency and throw exception if there
* is a problem.
* Check file format etc. for consistency and throw exception if
* there is a problem.
*
* @throws std::runtime_error
*/
@@ -265,36 +277,6 @@ namespace osmium {
}
}
/**
* Set default settings for type and encoding when the filename is
* empty or "-". If you want to have a different default setting
* override this in a subclass.
*/
void default_settings_for_stdinout() {
m_file_format = file_format::unknown;
m_file_compression = file_compression::none;
}
/**
* Set default settings for type and encoding when the filename is
* a normal file. If you want to have a different default setting
* override this in a subclass.
*/
void default_settings_for_file() {
m_file_format = file_format::unknown;
m_file_compression = file_compression::none;
}
/**
* Set default settings for type and encoding when the filename is a URL.
* If you want to have a different default setting override this in a
* subclass.
*/
void default_settings_for_url() {
m_file_format = file_format::xml;
m_file_compression = file_compression::none;
}
file_format format() const noexcept {
return m_file_format;
}
+7 -1
View File
@@ -44,7 +44,9 @@ namespace osmium {
xml = 1,
pbf = 2,
opl = 3,
json = 4
json = 4,
o5m = 5,
debug = 6
};
// avoid g++ false positive
@@ -62,6 +64,10 @@ namespace osmium {
return "OPL";
case file_format::json:
return "JSON";
case file_format::o5m:
return "O5M";
case file_format::debug:
return "DEBUG";
}
}
#pragma GCC diagnostic pop
@@ -231,11 +231,16 @@ namespace osmium {
namespace {
// we want the register_compression() function to run, setting the variable
// is only a side-effect, it will never be used
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
const bool registered_gzip_compression = osmium::io::CompressionFactory::instance().register_compression(osmium::io::file_compression::gzip,
[](int fd) { return new osmium::io::GzipCompressor(fd); },
[](int fd) { return new osmium::io::GzipDecompressor(fd); },
[](const char* buffer, size_t size) { return new osmium::io::GzipBufferDecompressor(buffer, size); }
);
#pragma GCC diagnostic pop
} // anonymous namespace
-1
View File
@@ -39,7 +39,6 @@ DEALINGS IN THE SOFTWARE.
* Include this file if you want to read OSM PBF files.
*
* @attention If you include this file, you'll need to link with
* `libprotobuf-lite`, `libosmpbf`, `ws2_32` (Windows only),
* `libz`, and enable multithreading.
*/
@@ -39,7 +39,6 @@ DEALINGS IN THE SOFTWARE.
* Include this file if you want to write OSM PBF files.
*
* @attention If you include this file, you'll need to link with
* `libprotobuf-lite`, `libosmpbf`, `ws2_32` (Windows only),
* `libz`, and enable multithreading.
*/
+19 -2
View File
@@ -37,7 +37,6 @@ DEALINGS IN THE SOFTWARE.
#include <cassert>
#include <cstddef>
#include <cstring>
#include <exception>
#include <functional>
#include <iterator>
#include <stdexcept>
@@ -83,7 +82,7 @@ namespace osmium {
* Buffers exist in two flavours, those with external memory management and
* those with internal memory management. If you already have some memory
* with data in it (for instance read from disk), you create a Buffer with
* external memory managment. It is your job then to free the memory once
* external memory management. It is your job then to free the memory once
* the buffer isn't used any more. If you don't have memory already, you can
* create a Buffer object and have it manage the memory internally. It will
* dynamically allocate memory and free it again after use.
@@ -413,6 +412,15 @@ namespace osmium {
return iterator(m_data, m_data + m_committed);
}
template <class T>
t_iterator<T> get_iterator(size_t offset) {
return t_iterator<T>(m_data + offset, m_data + m_committed);
}
iterator get_iterator(size_t offset) {
return iterator(m_data + offset, m_data + m_committed);
}
template <class T>
t_iterator<T> end() {
return t_iterator<T>(m_data + m_committed, m_data + m_committed);
@@ -431,6 +439,15 @@ namespace osmium {
return const_iterator(m_data, m_data + m_committed);
}
template <class T>
t_const_iterator<T> get_iterator(size_t offset) const {
return t_const_iterator<T>(m_data + offset, m_data + m_committed);
}
const_iterator get_iterator(size_t offset) const {
return const_iterator(m_data + offset, m_data + m_committed);
}
template <class T>
t_const_iterator<T> cend() const {
return t_const_iterator<T>(m_data + m_committed, m_data + m_committed);
@@ -38,7 +38,6 @@ DEALINGS IN THE SOFTWARE.
#include <type_traits>
#include <osmium/memory/item.hpp>
#include <osmium/util/compatibility.hpp>
namespace osmium {
-1
View File
@@ -33,7 +33,6 @@ DEALINGS IN THE SOFTWARE.
*/
#include <cstddef>
#include <cstdint>
#include <type_traits>
+1 -1
View File
@@ -33,7 +33,6 @@ DEALINGS IN THE SOFTWARE.
*/
#include <cstdint>
#include <cstring>
#include <osmium/memory/collection.hpp>
@@ -44,6 +43,7 @@ DEALINGS IN THE SOFTWARE.
#include <osmium/osm/tag.hpp>
#include <osmium/osm/timestamp.hpp>
#include <osmium/osm/types.hpp>
#include <osmium/osm/types_from_string.hpp>
namespace osmium {
+223
View File
@@ -0,0 +1,223 @@
#ifndef OSMIUM_OSM_CRC_HPP
#define OSMIUM_OSM_CRC_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <cstdint>
#include <osmium/osm/area.hpp>
#include <osmium/osm/changeset.hpp>
#include <osmium/osm/location.hpp>
#include <osmium/osm/node.hpp>
#include <osmium/osm/node_ref_list.hpp>
#include <osmium/osm/relation.hpp>
#include <osmium/osm/way.hpp>
#include <osmium/util/endian.hpp>
namespace osmium {
template <class TCRC>
class CRC {
static inline uint16_t byte_swap_16(uint16_t value) noexcept {
# if defined(__GNUC__) || defined(__clang__)
return __builtin_bswap16(value);
# else
return (value >> 8) | (value << 8);
# endif
}
static inline uint32_t byte_swap_32(uint32_t value) noexcept {
# if defined(__GNUC__) || defined(__clang__)
return __builtin_bswap32(value);
# else
return (value >> 24) |
((value >> 8) & 0x0000FF00) |
((value << 8) & 0x00FF0000) |
(value << 24);
# endif
}
static inline uint64_t byte_swap_64(uint64_t value) noexcept {
# if defined(__GNUC__) || defined(__clang__)
return __builtin_bswap64(value);
# else
uint64_t val1 = byte_swap_32(value & 0xFFFFFFFF);
uint64_t val2 = byte_swap_32(value >> 32);
return (val1 << 32) & val2;
# endif
}
TCRC m_crc;
public:
TCRC& operator()() {
return m_crc;
}
const TCRC& operator()() const {
return m_crc;
}
void update_bool(bool value) {
m_crc.process_byte(value);
}
void update_int8(uint8_t value) {
m_crc.process_byte(value);
}
void update_int16(uint16_t value) {
#if __BYTE_ORDER == __LITTLE_ENDIAN
m_crc.process_bytes(&value, sizeof(uint16_t));
#else
uint16_t v = byte_swap_16(value);
m_crc.process_bytes(&v, sizeof(uint16_t));
#endif
}
void update_int32(uint32_t value) {
#if __BYTE_ORDER == __LITTLE_ENDIAN
m_crc.process_bytes(&value, sizeof(uint32_t));
#else
uint32_t v = byte_swap_32(value);
m_crc.process_bytes(&v, sizeof(uint32_t));
#endif
}
void update_int64(uint64_t value) {
#if __BYTE_ORDER == __LITTLE_ENDIAN
m_crc.process_bytes(&value, sizeof(uint64_t));
#else
uint64_t v = byte_swap_64(value);
m_crc.process_bytes(&v, sizeof(uint64_t));
#endif
}
void update_string(const char* str) {
while (*str) {
m_crc.process_byte(*str++);
}
}
void update(const Timestamp& timestamp) {
update_int32(uint32_t(timestamp));
}
void update(const osmium::Location& location) {
update_int32(location.x());
update_int32(location.y());
}
void update(const osmium::Box& box) {
update(box.bottom_left());
update(box.top_right());
}
void update(const NodeRef& node_ref) {
update_int64(node_ref.ref());
}
void update(const NodeRefList& node_refs) {
for (const NodeRef& node_ref : node_refs) {
update(node_ref);
}
}
void update(const TagList& tags) {
m_crc.process_bytes(tags.data(), tags.byte_size());
}
void update(const osmium::RelationMember& member) {
update_int64(member.ref());
update_int16(uint16_t(member.type()));
update_string(member.role());
}
void update(const osmium::RelationMemberList& members) {
for (const RelationMember& member : members) {
update(member);
}
}
void update(const osmium::OSMObject& object) {
update_int64(object.id());
update_bool(object.visible());
update_int32(object.version());
update(object.timestamp());
update_int32(object.uid());
update_string(object.user());
update(object.tags());
}
void update(const osmium::Node& node) {
update(static_cast<const osmium::OSMObject&>(node));
update(node.location());
}
void update(const osmium::Way& way) {
update(static_cast<const osmium::OSMObject&>(way));
update(way.nodes());
}
void update(const osmium::Relation& relation) {
update(static_cast<const osmium::OSMObject&>(relation));
update(relation.members());
}
void update(const osmium::Area& area) {
update(static_cast<const osmium::OSMObject&>(area));
for (auto it = area.cbegin(); it != area.cend(); ++it) {
if (it->type() == osmium::item_type::outer_ring ||
it->type() == osmium::item_type::inner_ring) {
update(static_cast<const osmium::NodeRefList&>(*it));
}
}
}
void update(const osmium::Changeset& changeset) {
update_int64(changeset.id());
update(changeset.created_at());
update(changeset.closed_at());
update(changeset.bounds());
update_int32(changeset.num_changes());
update_int32(changeset.uid());
update_string(changeset.user());
}
}; // class CRC
} // namespace osmium
#endif // OSMIUM_OSM_CRC
+28 -1
View File
@@ -112,8 +112,35 @@ namespace osmium {
return m_curr->timestamp();
}
/**
* Return the timestamp when the current version of the object is
* not valid any more, ie the time when the next version of the object
* is valid. If this is the last version of the object, this will
* return a special "end of time" timestamp that is guaranteed to
* be larger than any normal timestamp.
*/
const osmium::Timestamp end_time() const noexcept {
return last() ? osmium::Timestamp() : m_next->timestamp();
return last() ? osmium::end_of_time() : m_next->timestamp();
}
/**
* Current object version is valid between time "from" (inclusive) and
* time "to" (not inclusive).
*
* This is a bit more complex than you'd think, because we have to
* handle the case properly where the start_time() == end_time().
*/
bool is_between(const osmium::Timestamp& from, const osmium::Timestamp& to) const noexcept {
return start_time() < to &&
((start_time() != end_time() && end_time() > from) ||
(start_time() == end_time() && end_time() >= from));
}
/**
* Current object version is visible at the given timestamp.
*/
bool is_visible_at(const osmium::Timestamp& timestamp) const noexcept {
return start_time() <= timestamp && end_time() > timestamp && m_curr->visible();
}
}; // class DiffObject
+1
View File
@@ -35,6 +35,7 @@ DEALINGS IN THE SOFTWARE.
#include <osmium/memory/item.hpp>
#include <osmium/osm/entity_bits.hpp>
#include <osmium/osm/item_type.hpp>
namespace osmium {
+20
View File
@@ -33,6 +33,7 @@ DEALINGS IN THE SOFTWARE.
*/
#include <cassert>
#include <cstdint> // IWYU pragma: keep
#include <iosfwd>
#include <stdexcept>
@@ -56,6 +57,25 @@ namespace osmium {
}; // enum class item_type
/**
* Return item_type for index:
* 0 -> node, 1 -> way, 2 -> relation
*/
inline item_type nwr_index_to_item_type(unsigned int i) noexcept {
assert(i <= 2);
return item_type(i+1);
}
/**
* Return index for item_type:
* node -> 0, way -> 1, relation -> 2
*/
inline unsigned int item_type_to_nwr_index(item_type type) noexcept {
unsigned int i = static_cast<unsigned int>(type);
assert(i >= 1 && i <= 3);
return i - 1;
}
inline item_type char_to_item_type(const char c) noexcept {
switch (c) {
case 'X':
+1 -1
View File
@@ -33,11 +33,11 @@ DEALINGS IN THE SOFTWARE.
*/
#include <cstdint>
#include <cstdlib>
#include <iosfwd>
#include <osmium/memory/item.hpp>
#include <osmium/osm/item_type.hpp>
#include <osmium/osm/location.hpp>
#include <osmium/osm/types.hpp>
+1
View File
@@ -48,6 +48,7 @@ DEALINGS IN THE SOFTWARE.
#include <osmium/osm/tag.hpp>
#include <osmium/osm/timestamp.hpp>
#include <osmium/osm/types.hpp>
#include <osmium/osm/types_from_string.hpp>
namespace osmium {
+5
View File
@@ -120,6 +120,11 @@ namespace osmium {
return static_cast<unsigned_object_id_type>(std::abs(m_ref));
}
RelationMember& set_ref(const osmium::object_id_type ref) noexcept {
m_ref = ref;
return *this;
}
item_type type() const noexcept {
return m_type;
}
+11 -1
View File
@@ -39,9 +39,9 @@ DEALINGS IN THE SOFTWARE.
#include <limits>
#include <stdexcept>
#include <string>
#include <time.h>
#include <osmium/util/compatibility.hpp>
#include <osmium/util/minmax.hpp> // IWYU pragma: keep
namespace osmium {
@@ -170,6 +170,16 @@ namespace osmium {
return out;
}
template <>
inline osmium::Timestamp min_op_start_value<osmium::Timestamp>() {
return end_of_time();
}
template <>
inline osmium::Timestamp max_op_start_value<osmium::Timestamp>() {
return start_of_time();
}
} // namespace osmium
#endif // OSMIUM_OSM_TIMESTAMP_HPP
-21
View File
@@ -34,7 +34,6 @@ DEALINGS IN THE SOFTWARE.
*/
#include <cstdint>
#include <cstdlib>
namespace osmium {
@@ -58,26 +57,6 @@ namespace osmium {
*/
typedef uint16_t string_size_type;
inline object_id_type string_to_object_id(const char* string) {
return std::atoll(string);
}
inline object_version_type string_to_object_version(const char* string) {
return static_cast<object_version_type>(std::atol(string));
}
inline changeset_id_type string_to_changeset_id(const char* string) {
return static_cast<changeset_id_type>(std::atol(string));
}
inline signed_user_id_type string_to_user_id(const char* string) {
return static_cast<signed_user_id_type>(std::atol(string));
}
inline num_changes_type string_to_num_changes(const char* string) {
return static_cast<num_changes_type>(std::atol(string));
}
} // namespace osmium
#endif // OSMIUM_OSM_TYPES_HPP
@@ -0,0 +1,116 @@
#ifndef OSMIUM_OSM_TYPES_FROM_STRING_HPP
#define OSMIUM_OSM_TYPES_FROM_STRING_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <cassert>
#include <cctype>
#include <cstdint>
#include <cstdlib>
#include <limits>
#include <string>
#include <utility>
#include <osmium/osm/entity_bits.hpp>
#include <osmium/osm/types.hpp>
namespace osmium {
inline object_id_type string_to_object_id(const char* input) {
assert(input);
if (*input != '\0' && !std::isspace(*input)) {
char* end;
auto id = std::strtoll(input, &end, 10);
if (id != std::numeric_limits<long long>::min() && id != std::numeric_limits<long long>::max() && *end == '\0') {
return id;
}
}
throw std::range_error(std::string("illegal id: '") + input + "'");
}
inline std::pair<osmium::item_type, osmium::object_id_type> string_to_object_id(const char* input, osmium::osm_entity_bits::type types) {
assert(input);
assert(types != osmium::osm_entity_bits::nothing);
if (*input != '\0') {
if (std::isdigit(*input)) {
return std::make_pair(osmium::item_type::undefined, string_to_object_id(input));
}
osmium::item_type t = osmium::char_to_item_type(*input);
if (osmium::osm_entity_bits::from_item_type(t) & types) {
return std::make_pair(t, string_to_object_id(input+1));
}
}
throw std::range_error(std::string("not a valid id: '") + input + "'");
}
namespace detail {
inline long string_to_ulong(const char* input, const char *name) {
if (*input != '\0' && *input != '-' && !std::isspace(*input)) {
char* end;
auto value = std::strtoul(input, &end, 10);
if (value != std::numeric_limits<unsigned long>::max() && *end == '\0') {
return value;
}
}
throw std::range_error(std::string("illegal ") + name + ": '" + input + "'");
}
} // namespace detail
inline object_version_type string_to_object_version(const char* input) {
assert(input);
return static_cast<object_version_type>(detail::string_to_ulong(input, "version"));
}
inline changeset_id_type string_to_changeset_id(const char* input) {
assert(input);
return static_cast<changeset_id_type>(detail::string_to_ulong(input, "changeset"));
}
inline signed_user_id_type string_to_user_id(const char* input) {
assert(input);
if (input[0] == '-' && input[1] == '1' && input[2] == '\0') {
return -1;
}
return static_cast<signed_user_id_type>(detail::string_to_ulong(input, "user id"));
}
inline num_changes_type string_to_num_changes(const char* input) {
assert(input);
return static_cast<num_changes_type>(detail::string_to_ulong(input, "value for num changes"));
}
} // namespace osmium
#endif // OSMIUM_OSM_TYPES_FROM_STRING_HPP
@@ -512,7 +512,7 @@ namespace osmium {
double percent = static_cast<double>(size_before - size_after);
percent /= size_before;
percent *= 100;
std::cerr << "PURGE (size before=" << size_before << " after=" << size_after << " purged=" << (size_before - size_after) << " / " << static_cast<int>(percent) << "%)\n";
// std::cerr << "PURGE (size before=" << size_before << " after=" << size_after << " purged=" << (size_before - size_after) << " / " << static_cast<int>(percent) << "%)\n";
m_count_complete = 0;
}
}
+1
View File
@@ -149,6 +149,7 @@ namespace osmium {
~Pool() {
m_done = true;
m_work_queue.shutdown();
}
size_t queue_size() const {
+21 -10
View File
@@ -41,9 +41,7 @@ DEALINGS IN THE SOFTWARE.
#include <queue>
#include <string>
#include <thread>
#include <utility>
#include <osmium/util/compatibility.hpp>
#include <utility> // IWYU pragma: keep (for std::move)
namespace osmium {
@@ -71,6 +69,8 @@ namespace osmium {
/// Used to signal readers when data is available in the queue.
std::condition_variable m_data_available;
std::atomic<bool> m_done;
#ifdef OSMIUM_DEBUG_QUEUE_SIZE
/// The largest size the queue has been so far.
size_t m_largest_size;
@@ -94,7 +94,8 @@ namespace osmium {
m_name(name),
m_mutex(),
m_queue(),
m_data_available()
m_data_available(),
m_done(false)
#ifdef OSMIUM_DEBUG_QUEUE_SIZE
,
m_largest_size(0),
@@ -104,6 +105,7 @@ namespace osmium {
}
~Queue() {
shutdown();
#ifdef OSMIUM_DEBUG_QUEUE_SIZE
std::cerr << "queue '" << m_name << "' with max_size=" << m_max_size << " had largest size " << m_largest_size << " and was full " << m_full_counter << " times\n";
#endif
@@ -132,24 +134,33 @@ namespace osmium {
m_data_available.notify_one();
}
void shutdown() {
m_done = true;
m_data_available.notify_all();
}
void wait_and_pop(T& value) {
std::unique_lock<std::mutex> lock(m_mutex);
m_data_available.wait(lock, [this] {
return !m_queue.empty();
return !m_queue.empty() || m_done;
});
value = std::move(m_queue.front());
m_queue.pop();
if (!m_queue.empty()) {
value = std::move(m_queue.front());
m_queue.pop();
}
}
void wait_and_pop_with_timeout(T& value) {
std::unique_lock<std::mutex> lock(m_mutex);
if (!m_data_available.wait_for(lock, std::chrono::seconds(1), [this] {
return !m_queue.empty();
return !m_queue.empty() || m_done;
})) {
return;
}
value = std::move(m_queue.front());
m_queue.pop();
if (!m_queue.empty()) {
value = std::move(m_queue.front());
m_queue.pop();
}
}
bool try_pop(T& value) {
+1 -1
View File
@@ -58,7 +58,7 @@ namespace osmium {
/**
* Wait until the given future becomes ready. Will block if the future
* is not ready. Can be called more than once unless future.get().
* is not ready. Can be called more than once unlike future.get().
*/
template <class T>
inline void wait_until_done(std::future<T>& future) {
+194
View File
@@ -0,0 +1,194 @@
#ifndef OSMIUM_UTIL_DATA_FILE_HPP
#define OSMIUM_UTIL_DATA_FILE_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <cerrno>
#include <cstddef>
#include <cstdio>
#include <stdexcept>
#include <string>
#include <system_error>
#ifdef _WIN32
# include <io.h>
# include <windows.h>
#endif
#include <osmium/util/file.hpp>
namespace osmium {
namespace util {
/**
* Class wrapper for convenient access to some low-level file
* functions.
*/
class DataFile {
FILE* m_file;
public:
/**
* Create and open a temporary file. It is removed after opening.
*
* @throws std::system_error if something went wrong.
*/
DataFile() :
m_file(::tmpfile()) {
if (!m_file) {
throw std::system_error(errno, std::system_category(), "tmpfile failed");
}
}
/**
* Create and open a temporary file with the specified size. It
* is removed after opening.
*
* @throws std::system_error if something went wrong.
*/
explicit DataFile(size_t size) :
DataFile() {
grow(size);
}
/**
* Create and open a named file.
*
* @param filename the name of the file
* @param writable should the file be writable?
* @throws std::system_error if something went wrong.
*/
DataFile(const char* filename, bool writable) :
m_file(::fopen(filename, writable ? "wb+" : "rb" )) {
if (!m_file) {
throw std::system_error(errno, std::system_category(), "fopen failed");
}
}
/**
* Create and open a named file.
*
* @param filename the name of the file
* @param writable should the file be writable?
* @throws std::system_error if something went wrong.
*/
DataFile(const std::string& filename, bool writable) :
DataFile(filename.c_str(), writable) {
}
/**
* In boolean context the DataFile class returns true if the file
* is open.
*/
operator bool() const noexcept {
return m_file != nullptr;
}
/**
* Close the file.
*
* Does nothing if the file is already closed.
*
* @throws std::system_error if file could not be closed
*/
void close() {
if (m_file) {
if (::fclose(m_file) != 0) {
throw std::system_error(errno, std::system_category(), "fclose failed");
}
m_file = nullptr;
}
}
~DataFile() noexcept {
try {
close();
} catch (std::system_error&) {
// ignore
}
}
/**
* Get file descriptor of underlying file.
*
* @throws std::runtime_errro if file is not open
* @throws std::system_error if fileno(3) call failed
*/
int fd() const {
if (!m_file) {
throw std::runtime_error("no open file");
}
int fd = ::fileno(m_file);
if (fd == -1) {
throw std::system_error(errno, std::system_category(), "fileno failed");
}
return fd;
}
/**
* Ask the operating system for the size of this file.
*
* @throws std::system_error if fstat(2) call failed
*/
size_t size() const {
return osmium::util::file_size(fd());
}
/**
* Grow file to given size.
*
* If the file is large enough already, nothing is done.
* The file is never shrunk.
*
* @throws std::system_error if ftruncate(2) call failed
*/
void grow(size_t new_size) const {
if (size() < new_size) {
osmium::util::resize_file(fd(), new_size);
}
}
}; // class DataFile
} // namespace util
} // namespace osmium
#endif // OSMIUM_UTIL_DATA_FILE_HPP
+147
View File
@@ -0,0 +1,147 @@
#ifndef OSMIUM_UTIL_DELTA_HPP
#define OSMIUM_UTIL_DELTA_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <iterator>
#include <type_traits>
#include <utility>
namespace osmium {
namespace util {
/**
* Helper class for delta encoding.
*/
template <typename T>
class DeltaEncode {
T m_value;
public:
DeltaEncode(T value = 0) :
m_value(value) {
}
void clear() {
m_value = 0;
}
T update(T new_value) {
using std::swap;
swap(m_value, new_value);
return m_value - new_value;
}
}; // class DeltaEncode
/**
* Helper class for delta decoding.
*/
template <typename T>
class DeltaDecode {
T m_value;
public:
DeltaDecode() :
m_value(0) {
}
void clear() {
m_value = 0;
}
T update(T delta) {
m_value += delta;
return m_value;
}
}; // class DeltaDecode
template <typename TBaseIterator, typename TTransform, typename TValue>
class DeltaEncodeIterator : public std::iterator<std::input_iterator_tag, TValue> {
typedef TValue value_type;
TBaseIterator m_it;
TBaseIterator m_end;
value_type m_delta;
DeltaEncode<value_type> m_value;
TTransform m_trans;
public:
DeltaEncodeIterator(TBaseIterator first, TBaseIterator last, TTransform& trans) :
m_it(first),
m_end(last),
m_delta(m_trans(m_it)),
m_value(m_delta),
m_trans(trans) {
}
DeltaEncodeIterator& operator++() {
if (m_it != m_end) {
m_delta = m_value.update(m_trans(++m_it));
}
return *this;
}
DeltaEncodeIterator operator++(int) {
DeltaEncodeIterator tmp(*this);
operator++();
return tmp;
}
value_type operator*() {
return m_delta;
}
bool operator==(const DeltaEncodeIterator& rhs) const {
return m_it == rhs.m_it && m_end == rhs.m_end;
}
bool operator!=(const DeltaEncodeIterator& rhs) const {
return !(*this == rhs);
}
}; // class DeltaEncodeIterator
} // namespace util
} // namespace osmium
#endif // OSMIUM_UTIL_DELTA_HPP
+45
View File
@@ -0,0 +1,45 @@
#ifndef OSMIUM_UTIL_ENDIAN_HPP
#define OSMIUM_UTIL_ENDIAN_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
// Windows is only available for little endian architectures
// http://stackoverflow.com/questions/6449468/can-i-safely-assume-that-windows-installations-will-always-be-little-endian
#if !defined(_WIN32) && !defined(__APPLE__)
# include <endian.h>
#else
# define __LITTLE_ENDIAN 1234
# define __BYTE_ORDER __LITTLE_ENDIAN
#endif
#endif // OSMIUM_UTIL_ENDIAN_HPP
+119
View File
@@ -0,0 +1,119 @@
#ifndef OSMIUM_UTIL_FILE_HPP
#define OSMIUM_UTIL_FILE_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <cerrno>
#include <cstddef>
#include <cstdio>
#include <system_error>
#include <sys/stat.h>
#include <sys/types.h>
#ifdef _WIN32
# include <io.h>
# include <windows.h>
#endif
#ifndef _MSC_VER
# include <unistd.h>
#else
// https://msdn.microsoft.com/en-us/library/whx354w1.aspx
# define ftruncate _chsize_s
#endif
namespace osmium {
namespace util {
/**
* Get file size.
* This is a small wrapper around a system call.
*
* @param fd File descriptor
* @returns file size
* @throws std::system_error If system call failed
*/
inline size_t file_size(int fd) {
#ifdef _MSC_VER
// Windows implementation
// https://msdn.microsoft.com/en-us/library/dfbc2kec.aspx
auto size = ::_filelengthi64(fd);
if (size == -1L) {
throw std::system_error(errno, std::system_category(), "_filelengthi64 failed");
}
return size_t(size);
#else
// Unix implementation
struct stat s;
if (::fstat(fd, &s) != 0) {
throw std::system_error(errno, std::system_category(), "fstat failed");
}
return size_t(s.st_size);
#endif
}
/**
* Resize file.
* Small wrapper around ftruncate(2) system call.
*
* @param fd File descriptor
* @param new_size New size
* @throws std::system_error If ftruncate(2) call failed
*/
inline void resize_file(int fd, size_t new_size) {
if (::ftruncate(fd, new_size) != 0) {
throw std::system_error(errno, std::system_category(), "ftruncate failed");
}
}
/**
* Get the page size for this system.
*/
inline size_t get_pagesize() {
#ifdef _WIN32
// Windows implementation
SYSTEM_INFO si;
GetSystemInfo(&si);
return si.dwPageSize;
#else
// Unix implementation
return ::sysconf(_SC_PAGESIZE);
#endif
}
} // namespace util
} // namespace osmium
#endif // OSMIUM_UTIL_FILE_HPP
@@ -0,0 +1,750 @@
#ifndef OSMIUM_UTIL_MEMORY_MAPPING_HPP
#define OSMIUM_UTIL_MEMORY_MAPPING_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <cassert>
#include <cerrno>
#include <stdexcept>
#include <system_error>
#include <osmium/util/file.hpp>
#ifndef _WIN32
# include <sys/mman.h>
#else
# include <io.h>
# include <windows.h>
# include <sys/types.h>
#endif
namespace osmium {
namespace util {
/**
* Class for wrapping memory mapping system calls.
*
* Usage for anonymous mapping:
* @code
* MemoryMapping mapping(1024); // create anonymous mapping with size
* auto ptr = mapping.get_addr<char*>(); // get pointer to memory
* mapping.unmap(); // release mapping by calling unmap() (or at end of scope)
* @endcode
*
* Or for file-backed mapping:
* @code
* int fd = ::open(...);
* {
* MemoryMapping mapping(1024, MemoryMapping::mapping_mode::write_shared, fd, offset);
* // use mapping
* }
* ::close(fd);
* @endcode
*
* If the file backing a file-backed mapping is not large enough, it
* will be resized. This works, of course, only for writable files,
* so for read-only files you have to make sure they are large enough
* for any mapping you want.
*
* If you ask for a zero-sized mapping, a mapping of the systems page
* size will be created instead. For file-backed mapping this will only
* work if the file is writable.
*
* There are different implementations for Unix and Windows systems.
* On Unix systems this wraps the mmap(), munmap(), and the mremap()
* system calls. On Windows it wraps the CreateFileMapping(),
* CloseHandle(), MapViewOfFile(), and UnmapViewOfFile() functions.
*/
class MemoryMapping {
public:
enum class mapping_mode {
readonly = 0,
write_private = 1,
write_shared = 2
};
private:
/// The size of the mapping
size_t m_size;
/// Offset into the file
off_t m_offset;
/// File handle we got the mapping from
int m_fd;
/// Mapping mode
mapping_mode m_mapping_mode;
#ifdef _WIN32
HANDLE m_handle;
#endif
/// The address where the memory is mapped
void* m_addr;
bool is_valid() const noexcept;
void make_invalid() noexcept;
#ifdef _WIN32
typedef DWORD flag_type;
#else
typedef int flag_type;
#endif
flag_type get_protection() const noexcept;
flag_type get_flags() const noexcept;
// A zero-sized mapping is not allowed by the operating system.
// So if the user asks for a mapping of size 0, we map a full
// page instead. This way we don't have a special case in the rest
// of the code.
static size_t initial_size(size_t size) {
if (size == 0) {
return osmium::util::get_pagesize();
}
return size;
}
#ifdef _WIN32
HANDLE get_handle() const noexcept;
HANDLE osmium::util::MemoryMapping::create_file_mapping() const noexcept;
void* osmium::util::MemoryMapping::map_view_of_file() const noexcept;
#endif
int resize_fd(int fd) {
// Anonymous mapping doesn't need resizing.
if (fd == -1) {
return -1;
}
// Make sure the file backing this mapping is large enough.
if (osmium::util::file_size(fd) < m_size + m_offset) {
osmium::util::resize_file(fd, m_size + m_offset);
}
return fd;
}
public:
/**
* Create memory mapping of given size.
*
* If fd is not set (or fd == -1), an anonymous mapping will be
* created, otherwise a mapping based on the file descriptor will
* be created.
*
* @pre size > 0 or mode == write_shared oder write_private
*
* @param size Size of the mapping in bytes
* @param mode Mapping mode: readonly, or writable (shared or private)
* @param fd Open file descriptor of a file we want to map
* @param offset Offset into the file where the mapping should start
* @throws std::system_error if the mapping fails
*/
MemoryMapping(size_t size, mapping_mode mode, int fd=-1, off_t offset=0);
/// DEPRECATED: For backwards compatibility
MemoryMapping(size_t size, bool writable=true, int fd=-1, off_t offset=0) :
MemoryMapping(size, writable ? mapping_mode::write_shared : mapping_mode::readonly, fd, offset) {
}
/// You can not copy construct a MemoryMapping.
MemoryMapping(const MemoryMapping&) = delete;
/// You can not copy a MemoryMapping.
MemoryMapping& operator=(const MemoryMapping&) = delete;
/**
* Move construct a mapping from another one. The other mapping
* will be marked as invalid.
*/
MemoryMapping(MemoryMapping&& other);
/**
* Move a mapping. The other mapping will be marked as invalid.
*/
MemoryMapping& operator=(MemoryMapping&& other);
/**
* Releases the mapping by calling unmap(). Will never throw.
* Call unmap() instead if you want to be notified of any error.
*/
~MemoryMapping() noexcept {
try {
unmap();
} catch (std::system_error&) {
// ignore
}
}
/**
* Unmap a mapping. If the mapping is not valid, it will do
* nothing.
*
* @throws std::system_error if the unmapping fails
*/
void unmap();
/**
* Resize a mapping to the given new size.
*
* On Linux systems this will use the mremap() function. On other
* systems it will unmap and remap the memory. This can only be
* done for file-based mappings, not anonymous mappings!
*
* @param new_size Number of bytes to resize to
* @throws std::system_error if the remapping fails
*/
void resize(size_t new_size);
/**
* In a boolean context a MemoryMapping is true when it is a valid
* existing mapping.
*/
operator bool() const noexcept {
return is_valid();
}
/**
* The number of bytes mapped. This is the same size you created
* the mapping with. The actual mapping will probably be larger
* because the system will round it to the page size.
*/
size_t size() const noexcept {
return m_size;
}
/**
* The file descriptor this mapping was created from.
*
* @returns file descriptor, -1 for anonymous mappings
*/
int fd() const noexcept {
return m_fd;
}
/**
* Was this mapping created as a writable mapping?
*/
bool writable() const noexcept {
return m_mapping_mode != mapping_mode::readonly;
}
/**
* Get the address of the mapping as any pointer type you like.
*
* @throws std::runtime_error if the mapping is invalid
*/
template <typename T = void>
T* get_addr() const {
if (is_valid()) {
return reinterpret_cast<T*>(m_addr);
}
throw std::runtime_error("invalid memory mapping");
}
}; // class MemoryMapping
/**
* Anonymous memory mapping.
*
* Usage for anonymous mapping:
* @code
* AnonymousMemoryMapping mapping(1024); // create anonymous mapping with size
* auto ptr = mapping.get_addr<char*>(); // get pointer to memory
* mapping.unmap(); // release mapping by calling unmap() (or at end of scope)
* @endcode
*/
class AnonymousMemoryMapping : public MemoryMapping {
public:
AnonymousMemoryMapping(size_t size) :
MemoryMapping(size, mapping_mode::write_private) {
}
#ifndef __linux__
/**
* On systems other than Linux anonymous mappings can not be
* resized!
*/
void resize(size_t) = delete;
#endif
}; // class AnonymousMemoryMapping
/**
* A thin wrapper around the MemoryMapping class used when all the
* data in the mapped memory is of the same type. Instead of thinking
* about the number of bytes mapped, this counts sizes in the number
* of objects of that type.
*
* Note that no effort is made to actually initialize the objects in
* this memory. This has to be done by the caller!
*/
template <typename T>
class TypedMemoryMapping {
MemoryMapping m_mapping;
public:
/**
* Create anonymous typed memory mapping of given size.
*
* @param size Number of objects of type T to be mapped
* @throws std::system_error if the mapping fails
*/
TypedMemoryMapping(size_t size) :
m_mapping(sizeof(T) * size, MemoryMapping::mapping_mode::write_private) {
}
/**
* Create file-backed memory mapping of given size. The file must
* contain at least `sizeof(T) * size` bytes!
*
* @param size Number of objects of type T to be mapped
* @param mode Mapping mode: readonly, or writable (shared or private)
* @param fd Open file descriptor of a file we want to map
* @param offset Offset into the file where the mapping should start
* @throws std::system_error if the mapping fails
*/
TypedMemoryMapping(size_t size, MemoryMapping::mapping_mode mode, int fd, off_t offset = 0) :
m_mapping(sizeof(T) * size, mode, fd, sizeof(T) * offset) {
}
/// DEPRECATED: For backwards compatibility
TypedMemoryMapping(size_t size, bool writable, int fd, off_t offset = 0) :
m_mapping(sizeof(T) * size, writable ? MemoryMapping::mapping_mode::write_shared : MemoryMapping::mapping_mode::readonly, fd, sizeof(T) * offset) {
}
/// You can not copy construct a TypedMemoryMapping.
TypedMemoryMapping(const TypedMemoryMapping&) = delete;
/// You can not copy a MemoryMapping.
TypedMemoryMapping& operator=(const TypedMemoryMapping&) = delete;
/**
* Move construct a mapping from another one. The other mapping
* will be marked as invalid.
*/
TypedMemoryMapping(TypedMemoryMapping&& other) = default;
/**
* Move a mapping. The other mapping will be marked as invalid.
*/
TypedMemoryMapping& operator=(TypedMemoryMapping&& other) = default;
/**
* Releases the mapping by calling unmap(). Will never throw.
* Call unmap() instead if you want to be notified of any error.
*/
~TypedMemoryMapping() = default;
/**
* Unmap a mapping. If the mapping is not valid, it will do
* nothing.
*
* @throws std::system_error if the unmapping fails
*/
void unmap() {
m_mapping.unmap();
}
/**
* Resize a mapping to the given new size.
*
* On Linux systems this will use the mremap() function. On other
* systems it will unmap and remap the memory. This can only be
* done for file-based mappings, not anonymous mappings!
*
* @param new_size Number of objects of type T to resize to
* @throws std::system_error if the remapping fails
*/
void resize(size_t new_size) {
m_mapping.resize(sizeof(T) * new_size);
}
/**
* In a boolean context a TypedMemoryMapping is true when it is
* a valid existing mapping.
*/
operator bool() const noexcept {
return !!m_mapping;
}
/**
* The number of objects of class T mapped. This is the same size
* you created the mapping with. The actual mapping will probably
* be larger because the system will round it to the page size.
*/
size_t size() const noexcept {
assert(m_mapping.size() % sizeof(T) == 0);
return m_mapping.size() / sizeof(T);
}
/**
* The file descriptor this mapping was created from.
*
* @returns file descriptor, -1 for anonymous mappings
*/
int fd() const noexcept {
return m_mapping.fd();
}
/**
* Was this mapping created as a writable mapping?
*/
bool writable() const noexcept {
return m_mapping.writable();
}
/**
* Get the address of the beginning of the mapping.
*
* @throws std::runtime_error if the mapping is invalid
*/
T* begin() {
return m_mapping.get_addr<T>();
}
/**
* Get the address one past the end of the mapping.
*
* @throws std::runtime_error if the mapping is invalid
*/
T* end() {
return m_mapping.get_addr<T>() + size();
}
const T* cbegin() const {
return m_mapping.get_addr<T>();
}
const T* cend() const {
return m_mapping.get_addr<T>() + size();
}
const T* begin() const {
return m_mapping.get_addr<T>();
}
const T* end() const {
return m_mapping.get_addr<T>() + size();
}
}; // class TypedMemoryMapping
template <typename T>
class AnonymousTypedMemoryMapping : public TypedMemoryMapping<T> {
public:
AnonymousTypedMemoryMapping(size_t size) :
TypedMemoryMapping<T>(size) {
}
#ifndef __linux__
/**
* On systems other than Linux anonymous mappings can not be
* resized!
*/
void resize(size_t) = delete;
#endif
}; // class AnonymousTypedMemoryMapping
} // namespace util
} // namespace osmium
#ifndef _WIN32
// =========== Unix implementation =============
// MAP_FAILED is often a macro containing an old style cast
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
inline bool osmium::util::MemoryMapping::is_valid() const noexcept {
return m_addr != MAP_FAILED;
}
inline void osmium::util::MemoryMapping::make_invalid() noexcept {
m_addr = MAP_FAILED;
}
#pragma GCC diagnostic pop
// for BSD systems
#ifndef MAP_ANONYMOUS
# define MAP_ANONYMOUS MAP_ANON
#endif
inline int osmium::util::MemoryMapping::get_protection() const noexcept {
if (m_mapping_mode == mapping_mode::readonly) {
return PROT_READ;
}
return PROT_READ | PROT_WRITE;
}
inline int osmium::util::MemoryMapping::get_flags() const noexcept {
if (m_fd == -1) {
return MAP_PRIVATE | MAP_ANONYMOUS;
}
if (m_mapping_mode == mapping_mode::write_shared) {
return MAP_SHARED;
}
return MAP_PRIVATE;
}
inline osmium::util::MemoryMapping::MemoryMapping(size_t size, mapping_mode mode, int fd, off_t offset) :
m_size(initial_size(size)),
m_offset(offset),
m_fd(resize_fd(fd)),
m_mapping_mode(mode),
m_addr(::mmap(nullptr, m_size, get_protection(), get_flags(), m_fd, m_offset)) {
assert(!(fd == -1 && mode == mapping_mode::readonly));
if (!is_valid()) {
throw std::system_error(errno, std::system_category(), "mmap failed");
}
}
inline osmium::util::MemoryMapping::MemoryMapping(MemoryMapping&& other) :
m_size(other.m_size),
m_offset(other.m_offset),
m_fd(other.m_fd),
m_mapping_mode(other.m_mapping_mode),
m_addr(other.m_addr) {
other.make_invalid();
}
inline osmium::util::MemoryMapping& osmium::util::MemoryMapping::operator=(osmium::util::MemoryMapping&& other) {
unmap();
m_size = other.m_size;
m_offset = other.m_offset;
m_fd = other.m_fd;
m_mapping_mode = other.m_mapping_mode;
m_addr = other.m_addr;
other.make_invalid();
return *this;
}
inline void osmium::util::MemoryMapping::unmap() {
if (is_valid()) {
if (::munmap(m_addr, m_size) != 0) {
throw std::system_error(errno, std::system_category(), "munmap failed");
}
make_invalid();
}
}
inline void osmium::util::MemoryMapping::resize(size_t new_size) {
assert(new_size > 0 && "can not resize to zero size");
if (m_fd == -1) { // anonymous mapping
#ifdef __linux__
m_addr = ::mremap(m_addr, m_size, new_size, MREMAP_MAYMOVE);
if (!is_valid()) {
throw std::system_error(errno, std::system_category(), "mremap failed");
}
m_size = new_size;
#else
assert(false && "can't resize anonymous mappings on non-linux systems");
#endif
} else { // file-based mapping
unmap();
m_size = new_size;
resize_fd(m_fd);
m_addr = ::mmap(nullptr, new_size, get_protection(), get_flags(), m_fd, m_offset);
if (!is_valid()) {
throw std::system_error(errno, std::system_category(), "mmap (remap) failed");
}
}
}
#else
// =========== Windows implementation =============
/* References:
* CreateFileMapping: http://msdn.microsoft.com/en-us/library/aa366537(VS.85).aspx
* CloseHandle: http://msdn.microsoft.com/en-us/library/ms724211(VS.85).aspx
* MapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366761(VS.85).aspx
* UnmapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366882(VS.85).aspx
*/
namespace osmium {
namespace util {
inline DWORD dword_hi(uint64_t x) {
return static_cast<DWORD>(x >> 32);
}
inline DWORD dword_lo(uint64_t x) {
return static_cast<DWORD>(x & 0xffffffff);
}
} // namespace util
} // namespace osmium
inline DWORD osmium::util::MemoryMapping::get_protection() const noexcept {
switch (m_mapping_mode) {
case mapping_mode::readonly:
return PAGE_READONLY;
case mapping_mode::write_private:
return PAGE_WRITECOPY;
case mapping_mode::write_shared:
return PAGE_READWRITE;
}
}
inline DWORD osmium::util::MemoryMapping::get_flags() const noexcept {
switch (m_mapping_mode) {
case mapping_mode::readonly:
return FILE_MAP_READ;
case mapping_mode::write_private:
return FILE_MAP_COPY;
case mapping_mode::write_shared:
return FILE_MAP_WRITE;
}
}
inline HANDLE osmium::util::MemoryMapping::get_handle() const noexcept {
if (m_fd == -1) {
return INVALID_HANDLE_VALUE;
}
return reinterpret_cast<HANDLE>(_get_osfhandle(m_fd));
}
inline HANDLE osmium::util::MemoryMapping::create_file_mapping() const noexcept {
return CreateFileMapping(get_handle(), nullptr, get_protection(), osmium::util::dword_hi(static_cast<uint64_t>(m_size) + m_offset), osmium::util::dword_lo(static_cast<uint64_t>(m_size) + m_offset), nullptr);
}
inline void* osmium::util::MemoryMapping::map_view_of_file() const noexcept {
return MapViewOfFile(m_handle, get_flags(), osmium::util::dword_hi(m_offset), osmium::util::dword_lo(m_offset), m_size);
}
inline bool osmium::util::MemoryMapping::is_valid() const noexcept {
return m_addr != nullptr;
}
inline void osmium::util::MemoryMapping::make_invalid() noexcept {
m_addr = nullptr;
}
inline osmium::util::MemoryMapping::MemoryMapping(size_t size, MemoryMapping::mapping_mode mode, int fd, off_t offset) :
m_size(initial_size(size)),
m_offset(offset),
m_fd(resize_fd(fd)),
m_mapping_mode(mode),
m_handle(create_file_mapping()),
m_addr(nullptr) {
if (!m_handle) {
throw std::system_error(GetLastError(), std::system_category(), "CreateFileMapping failed");
}
m_addr = map_view_of_file();
if (!is_valid()) {
throw std::system_error(GetLastError(), std::system_category(), "MapViewOfFile failed");
}
}
inline osmium::util::MemoryMapping::MemoryMapping(MemoryMapping&& other) :
m_size(other.m_size),
m_offset(other.m_offset),
m_fd(other.m_fd),
m_mapping_mode(other.m_mapping_mode),
m_handle(std::move(other.m_handle)),
m_addr(other.m_addr) {
other.make_invalid();
other.m_handle = nullptr;
}
inline osmium::util::MemoryMapping& osmium::util::MemoryMapping::operator=(osmium::util::MemoryMapping&& other) {
unmap();
m_size = other.m_size;
m_offset = other.m_offset;
m_fd = other.m_fd;
m_mapping_mode = other.m_mapping_mode;
m_handle = std::move(other.m_handle);
m_addr = other.m_addr;
other.make_invalid();
other.m_handle = nullptr;
return *this;
}
inline void osmium::util::MemoryMapping::unmap() {
if (is_valid()) {
if (! UnmapViewOfFile(m_addr)) {
throw std::system_error(GetLastError(), std::system_category(), "UnmapViewOfFile failed");
}
make_invalid();
}
if (m_handle) {
if (! CloseHandle(m_handle)) {
throw std::system_error(GetLastError(), std::system_category(), "CloseHandle failed");
}
m_handle = nullptr;
}
}
inline void osmium::util::MemoryMapping::resize(size_t new_size) {
unmap();
m_size = new_size;
resize_fd(m_fd);
m_handle = create_file_mapping();
if (!m_handle) {
throw std::system_error(GetLastError(), std::system_category(), "CreateFileMapping failed");
}
m_addr = map_view_of_file();
if (!is_valid()) {
throw std::system_error(GetLastError(), std::system_category(), "MapViewOfFile failed");
}
}
#endif
#endif // OSMIUM_UTIL_MEMORY_MAPPING_HPP
+120
View File
@@ -0,0 +1,120 @@
#ifndef OSMIUM_UTIL_MINMAX_HPP
#define OSMIUM_UTIL_MINMAX_HPP
/*
This file is part of Osmium (http://osmcode.org/libosmium).
Copyright 2013-2015 Jochen Topf <jochen@topf.org> and others (see README).
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#include <limits>
namespace osmium {
template <typename T>
inline T min_op_start_value() {
return std::numeric_limits<T>::max();
}
/**
* Class for calculating the minimum of a bunch of values.
* Works with any numeric type.
*
* Usage:
*
* min_op<int> x;
* x.update(27);
* x.update(12);
* auto min = x.get(); // 12
*/
template <typename T>
class min_op {
T m_value;
public:
explicit min_op(T start_value = min_op_start_value<T>()) :
m_value(start_value) {
}
void update(T value) noexcept {
if (value < m_value) {
m_value = value;
}
}
T operator()() const noexcept {
return m_value;
}
};
template <typename T>
inline T max_op_start_value() {
return std::numeric_limits<T>::min();
}
/**
* Class for calculating the maximum of a bunch of values.
* Works with any numeric type.
*
* Usage:
*
* max_op<int> x;
* x.update(27);
* x.update(12);
* auto max = x.get(); // 27
*/
template <typename T>
class max_op {
T m_value;
public:
explicit max_op(T start_value = max_op_start_value<T>()) :
m_value(start_value) {
}
void update(T value) noexcept {
if (value > m_value) {
m_value = value;
}
}
T operator()() const noexcept {
return m_value;
}
};
} // namespace osmium
#endif // OSMIUM_UTIL_MINMAX_HPP
+38 -4
View File
@@ -43,21 +43,55 @@ namespace osmium {
* Split string on the separator character.
*
* @param str The string to be split.
* @param sep The separastor character.
* @param sep The separator character.
* @param compact Set this to true to remove empty strings from result
* @returns Vector with the parts of the string split up.
*/
inline std::vector<std::string> split_string(const std::string& str, const char sep) {
inline std::vector<std::string> split_string(const std::string& str, const char sep, bool compact = false) {
std::vector<std::string> tokens;
if (!str.empty()) {
size_t pos = 0;
size_t nextpos = str.find_first_of(sep);
while (nextpos != std::string::npos) {
tokens.push_back(str.substr(pos, nextpos-pos));
if (!compact || (nextpos - pos != 0)) {
tokens.push_back(str.substr(pos, nextpos-pos));
}
pos = nextpos + 1;
nextpos = str.find_first_of(sep, pos);
}
tokens.push_back(str.substr(pos));
if (!compact || pos != str.size()) {
tokens.push_back(str.substr(pos));
}
}
return tokens;
}
/**
* Split string on the separator character(s).
*
* @param str The string to be split.
* @param sep The separator character(s).
* @param compact Set this to true to remove empty strings from result
* @returns Vector with the parts of the string split up.
*/
inline std::vector<std::string> split_string(const std::string& str, const char* sep, bool compact = false) {
std::vector<std::string> tokens;
if (!str.empty()) {
size_t pos = 0;
size_t nextpos = str.find_first_of(sep);
while (nextpos != std::string::npos) {
if (!compact || (nextpos - pos != 0)) {
tokens.push_back(str.substr(pos, nextpos-pos));
}
pos = nextpos + 1;
nextpos = str.find_first_of(sep, pos);
}
if (!compact || pos != str.size()) {
tokens.push_back(str.substr(pos));
}
}
return tokens;
+49
View File
@@ -0,0 +1,49 @@
#ifndef PROTOZERO_BYTESWAP_HPP
#define PROTOZERO_BYTESWAP_HPP
/*****************************************************************************
protozero - Minimalistic protocol buffer decoder and encoder in C++.
This file is from https://github.com/mapbox/protozero where you can find more
documentation.
*****************************************************************************/
#include <cassert>
namespace protozero {
template <int N>
inline void byteswap(const char* /*data*/, char* /*result*/) {
assert(false);
}
template <>
inline void byteswap<1>(const char* data, char* result) {
result[0] = data[0];
}
template <>
inline void byteswap<4>(const char* data, char* result) {
result[3] = data[0];
result[2] = data[1];
result[1] = data[2];
result[0] = data[3];
}
template <>
inline void byteswap<8>(const char* data, char* result) {
result[7] = data[0];
result[6] = data[1];
result[5] = data[2];
result[4] = data[3];
result[3] = data[4];
result[2] = data[5];
result[1] = data[6];
result[0] = data[7];
}
} // end namespace protozero
#endif // PROTOZERO_BYTESWAP_HPP
+68
View File
@@ -0,0 +1,68 @@
#ifndef PROTOZERO_EXCEPTION_HPP
#define PROTOZERO_EXCEPTION_HPP
/*****************************************************************************
protozero - Minimalistic protocol buffer decoder and encoder in C++.
This file is from https://github.com/mapbox/protozero where you can find more
documentation.
*****************************************************************************/
/**
* @file exception.hpp
*
* @brief Contains the exceptions used in the protozero library.
*/
#include <exception>
/**
* @brief All parts of the protozero header-only library are in this namespace.
*/
namespace protozero {
/**
* All exceptions explicitly thrown by the functions of the protozero library
* derive from this exception.
*/
struct exception : std::exception {
/// Returns the explanatory string.
const char *what() const noexcept { return "pbf exception"; }
};
/**
* This exception is thrown when parsing a varint thats larger than allowed.
* This should never happen unless the data is corrupted.
*/
struct varint_too_long_exception : exception {
/// Returns the explanatory string.
const char *what() const noexcept { return "varint too long exception"; }
};
/**
* This exception is thrown when the wire type of a pdf field is unknown.
* This should never happen unless the data is corrupted.
*/
struct unknown_pbf_wire_type_exception : exception {
/// Returns the explanatory string.
const char *what() const noexcept { return "unknown pbf field type exception"; }
};
/**
* This exception is thrown when we are trying to read a field and there
* are not enough bytes left in the buffer to read it. Almost all functions
* of the pbf_reader class can throw this exception.
*
* This should never happen unless the data is corrupted or you have
* initialized the pbf_reader object with incomplete data.
*/
struct end_of_buffer_exception : exception {
/// Returns the explanatory string.
const char *what() const noexcept { return "end of buffer exception"; }
};
} // end namespace protozero
#endif // PROTOZERO_EXCEPTION_HPP
+111
View File
@@ -0,0 +1,111 @@
#ifndef PROTOZERO_PBF_BUILDER_HPP
#define PROTOZERO_PBF_BUILDER_HPP
/*****************************************************************************
protozero - Minimalistic protocol buffer decoder and encoder in C++.
This file is from https://github.com/mapbox/protozero where you can find more
documentation.
*****************************************************************************/
#include <type_traits>
#include <protozero/pbf_types.hpp>
#include <protozero/pbf_writer.hpp>
namespace protozero {
template <typename T>
class pbf_builder : public pbf_writer {
static_assert(std::is_same<pbf_tag_type, typename std::underlying_type<T>::type>::value, "T must be enum with underlying type protozero::pbf_tag_type");
public:
using enum_type = T;
pbf_builder(std::string& data) noexcept :
pbf_writer(data) {
}
template <typename P>
pbf_builder(pbf_writer& parent_writer, P tag) noexcept :
pbf_writer(parent_writer, pbf_tag_type(tag)) {
}
#define PROTOZERO_WRITER_WRAP_ADD_SCALAR(name, type) \
inline void add_##name(T tag, type value) { \
pbf_writer::add_##name(pbf_tag_type(tag), value); \
}
PROTOZERO_WRITER_WRAP_ADD_SCALAR(bool, bool)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(enum, int32_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(int32, int32_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(sint32, int32_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(uint32, uint32_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(int64, int64_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(sint64, int64_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(uint64, uint64_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(fixed32, uint32_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(sfixed32, int32_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(fixed64, uint64_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(sfixed64, int64_t)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(float, float)
PROTOZERO_WRITER_WRAP_ADD_SCALAR(double, double)
inline void add_bytes(T tag, const char* value, size_t size) {
pbf_writer::add_bytes(pbf_tag_type(tag), value, size);
}
inline void add_bytes(T tag, const std::string& value) {
pbf_writer::add_bytes(pbf_tag_type(tag), value);
}
inline void add_string(T tag, const char* value, size_t size) {
pbf_writer::add_string(pbf_tag_type(tag), value, size);
}
inline void add_string(T tag, const std::string& value) {
pbf_writer::add_string(pbf_tag_type(tag), value);
}
inline void add_string(T tag, const char* value) {
pbf_writer::add_string(pbf_tag_type(tag), value);
}
inline void add_message(T tag, const char* value, size_t size) {
pbf_writer::add_message(pbf_tag_type(tag), value, size);
}
inline void add_message(T tag, const std::string& value) {
pbf_writer::add_message(pbf_tag_type(tag), value);
}
#define PROTOZERO_WRITER_WRAP_ADD_PACKED(name) \
template <typename InputIterator> \
inline void add_packed_##name(T tag, InputIterator first, InputIterator last) { \
pbf_writer::add_packed_##name(pbf_tag_type(tag), first, last); \
}
PROTOZERO_WRITER_WRAP_ADD_PACKED(bool)
PROTOZERO_WRITER_WRAP_ADD_PACKED(enum)
PROTOZERO_WRITER_WRAP_ADD_PACKED(int32)
PROTOZERO_WRITER_WRAP_ADD_PACKED(sint32)
PROTOZERO_WRITER_WRAP_ADD_PACKED(uint32)
PROTOZERO_WRITER_WRAP_ADD_PACKED(int64)
PROTOZERO_WRITER_WRAP_ADD_PACKED(sint64)
PROTOZERO_WRITER_WRAP_ADD_PACKED(uint64)
PROTOZERO_WRITER_WRAP_ADD_PACKED(fixed32)
PROTOZERO_WRITER_WRAP_ADD_PACKED(sfixed32)
PROTOZERO_WRITER_WRAP_ADD_PACKED(fixed64)
PROTOZERO_WRITER_WRAP_ADD_PACKED(sfixed64)
PROTOZERO_WRITER_WRAP_ADD_PACKED(float)
PROTOZERO_WRITER_WRAP_ADD_PACKED(double)
};
} // end namespace protozero
#endif // PROTOZERO_PBF_BUILDER_HPP
+50
View File
@@ -0,0 +1,50 @@
#ifndef PROTOZERO_PBF_MESSAGE_HPP
#define PROTOZERO_PBF_MESSAGE_HPP
/*****************************************************************************
protozero - Minimalistic protocol buffer decoder and encoder in C++.
This file is from https://github.com/mapbox/protozero where you can find more
documentation.
*****************************************************************************/
#include <type_traits>
#include <protozero/pbf_reader.hpp>
#include <protozero/pbf_types.hpp>
namespace protozero {
template <typename T>
class pbf_message : public pbf_reader {
static_assert(std::is_same<pbf_tag_type, typename std::underlying_type<T>::type>::value, "T must be enum with underlying type protozero::pbf_tag_type");
public:
using enum_type = T;
template <typename... Args>
pbf_message(Args&&... args) noexcept :
pbf_reader(std::forward<Args>(args)...) {
}
inline bool next() {
return pbf_reader::next();
}
inline bool next(T tag) {
return pbf_reader::next(pbf_tag_type(tag));
}
inline T tag() const noexcept {
return T(pbf_reader::tag());
}
};
} // end namespace protozero
#endif // PROTOZERO_PBF_MESSAGE_HPP
File diff suppressed because it is too large Load Diff
+49
View File
@@ -0,0 +1,49 @@
#ifndef PROTOZERO_PBF_TYPES_HPP
#define PROTOZERO_PBF_TYPES_HPP
/*****************************************************************************
protozero - Minimalistic protocol buffer decoder and encoder in C++.
This file is from https://github.com/mapbox/protozero where you can find more
documentation.
*****************************************************************************/
/**
* @file pbf_types.hpp
*
* @brief Contains the declaration of low-level types used in the pbf format.
*/
#include <cstdint>
namespace protozero {
/**
* The type used for field tags (field numbers).
*/
typedef uint32_t pbf_tag_type;
/**
* The type used to encode type information.
* See the table on
* https://developers.google.com/protocol-buffers/docs/encoding
*/
enum class pbf_wire_type : uint32_t {
varint = 0, // int32/64, uint32/64, sint32/64, bool, enum
fixed64 = 1, // fixed64, sfixed64, double
length_delimited = 2, // string, bytes, embedded messages,
// packed repeated fields
fixed32 = 5, // fixed32, sfixed32, float
unknown = 99 // used for default setting in this library
};
/**
* The type used for length values, such as the length of a field.
*/
typedef uint32_t pbf_length_type;
} // end namespace protozero
#endif // PROTOZERO_PBF_TYPES_HPP
+664
View File
@@ -0,0 +1,664 @@
#ifndef PROTOZERO_PBF_WRITER_HPP
#define PROTOZERO_PBF_WRITER_HPP
/*****************************************************************************
protozero - Minimalistic protocol buffer decoder and encoder in C++.
This file is from https://github.com/mapbox/protozero where you can find more
documentation.
*****************************************************************************/
/**
* @file pbf_writer.hpp
*
* @brief Contains the pbf_writer class.
*/
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <limits>
#include <string>
#include <protozero/pbf_types.hpp>
#include <protozero/varint.hpp>
#if __BYTE_ORDER != __LITTLE_ENDIAN
# include <protozero/byteswap.hpp>
#endif
/// Wrapper for assert() used for testing
#ifndef protozero_assert
# define protozero_assert(x) assert(x)
#endif
namespace protozero {
/**
* The pbf_writer is used to write PBF formatted messages into a buffer.
*
* Almost all methods in this class can throw an std::bad_alloc exception if
* the std::string used as a buffer wants to resize.
*/
class pbf_writer {
std::string* m_data;
pbf_writer* m_parent_writer;
size_t m_pos = 0;
inline void add_varint(uint64_t value) {
protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage");
protozero_assert(m_data);
write_varint(std::back_inserter(*m_data), value);
}
inline void add_field(pbf_tag_type tag, pbf_wire_type type) {
protozero_assert(((tag > 0 && tag < 19000) || (tag > 19999 && tag <= ((1 << 29) - 1))) && "tag out of range");
uint32_t b = (tag << 3) | uint32_t(type);
add_varint(b);
}
inline void add_tagged_varint(pbf_tag_type tag, uint64_t value) {
add_field(tag, pbf_wire_type::varint);
add_varint(value);
}
template <typename T>
inline void add_fixed(T value) {
protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage");
protozero_assert(m_data);
#if __BYTE_ORDER == __LITTLE_ENDIAN
m_data->append(reinterpret_cast<const char*>(&value), sizeof(T));
#else
auto size = m_data->size();
m_data->resize(size + sizeof(T));
byteswap<sizeof(T)>(reinterpret_cast<const char*>(&value), const_cast<char*>(m_data->data() + size));
#endif
}
template <typename T, typename It>
inline void add_packed_fixed(pbf_tag_type tag, It first, It last, std::input_iterator_tag) {
if (first == last) {
return;
}
pbf_writer sw(*this, tag);
while (first != last) {
sw.add_fixed<T>(*first++);
}
}
template <typename T, typename It>
inline void add_packed_fixed(pbf_tag_type tag, It first, It last, std::forward_iterator_tag) {
if (first == last) {
return;
}
add_length_varint(tag, sizeof(T) * pbf_length_type(std::distance(first, last)));
while (first != last) {
add_fixed<T>(*first++);
}
}
template <typename It>
inline void add_packed_varint(pbf_tag_type tag, It first, It last) {
if (first == last) {
return;
}
pbf_writer sw(*this, tag);
while (first != last) {
sw.add_varint(uint64_t(*first++));
}
}
template <typename It>
inline void add_packed_svarint(pbf_tag_type tag, It first, It last) {
if (first == last) {
return;
}
pbf_writer sw(*this, tag);
while (first != last) {
sw.add_varint(encode_zigzag64(*first++));
}
}
// The number of bytes to reserve for the varint holding the length of
// a length-delimited field. The length has to fit into pbf_length_type,
// and a varint needs 8 bit for every 7 bit.
static const int reserve_bytes = sizeof(pbf_length_type) * 8 / 7 + 1;
inline void open_submessage(pbf_tag_type tag) {
protozero_assert(m_pos == 0);
protozero_assert(m_data);
add_field(tag, pbf_wire_type::length_delimited);
m_data->append(size_t(reserve_bytes), '\0');
m_pos = m_data->size();
}
inline void close_submessage() {
protozero_assert(m_pos != 0);
protozero_assert(m_data);
auto length = pbf_length_type(m_data->size() - m_pos);
protozero_assert(m_data->size() >= m_pos - reserve_bytes);
auto n = write_varint(m_data->begin() + long(m_pos) - reserve_bytes, length);
m_data->erase(m_data->begin() + long(m_pos) - reserve_bytes + n, m_data->begin() + long(m_pos));
m_pos = 0;
}
inline void add_length_varint(pbf_tag_type tag, pbf_length_type length) {
add_field(tag, pbf_wire_type::length_delimited);
add_varint(length);
}
public:
/**
* Create a writer using the given string as a data store. The pbf_writer
* stores a reference to that string and adds all data to it.
*/
inline explicit pbf_writer(std::string& data) noexcept :
m_data(&data),
m_parent_writer(nullptr),
m_pos(0) {
}
/**
* Create a writer without a data store. In this form the writer can not
* be used!
*/
inline pbf_writer() noexcept :
m_data(nullptr),
m_parent_writer(nullptr),
m_pos(0) {
}
/**
* Construct a pbf_writer for a submessage from the pbf_writer of the
* parent message.
*
* @param parent_writer The pbf_writer
* @param tag Tag (field number) of the field that will be written
*/
inline pbf_writer(pbf_writer& parent_writer, pbf_tag_type tag) :
m_data(parent_writer.m_data),
m_parent_writer(&parent_writer),
m_pos(0) {
m_parent_writer->open_submessage(tag);
}
/// A pbf_writer object can be copied
pbf_writer(const pbf_writer&) noexcept = default;
/// A pbf_writer object can be copied
pbf_writer& operator=(const pbf_writer&) noexcept = default;
/// A pbf_writer object can be moved
inline pbf_writer(pbf_writer&&) noexcept = default;
/// A pbf_writer object can be moved
inline pbf_writer& operator=(pbf_writer&&) noexcept = default;
inline ~pbf_writer() {
if (m_parent_writer) {
m_parent_writer->close_submessage();
}
}
///@{
/**
* @name Scalar field writer functions
*/
/**
* Add "bool" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_bool(pbf_tag_type tag, bool value) {
add_field(tag, pbf_wire_type::varint);
add_fixed<char>(value);
}
/**
* Add "enum" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_enum(pbf_tag_type tag, int32_t value) {
add_tagged_varint(tag, uint64_t(value));
}
/**
* Add "int32" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_int32(pbf_tag_type tag, int32_t value) {
add_tagged_varint(tag, uint64_t(value));
}
/**
* Add "sint32" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_sint32(pbf_tag_type tag, int32_t value) {
add_tagged_varint(tag, encode_zigzag32(value));
}
/**
* Add "uint32" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_uint32(pbf_tag_type tag, uint32_t value) {
add_tagged_varint(tag, value);
}
/**
* Add "int64" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_int64(pbf_tag_type tag, int64_t value) {
add_tagged_varint(tag, uint64_t(value));
}
/**
* Add "sint64" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_sint64(pbf_tag_type tag, int64_t value) {
add_tagged_varint(tag, encode_zigzag64(value));
}
/**
* Add "uint64" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_uint64(pbf_tag_type tag, uint64_t value) {
add_tagged_varint(tag, value);
}
/**
* Add "fixed32" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_fixed32(pbf_tag_type tag, uint32_t value) {
add_field(tag, pbf_wire_type::fixed32);
add_fixed<uint32_t>(value);
}
/**
* Add "sfixed32" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_sfixed32(pbf_tag_type tag, int32_t value) {
add_field(tag, pbf_wire_type::fixed32);
add_fixed<int32_t>(value);
}
/**
* Add "fixed64" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_fixed64(pbf_tag_type tag, uint64_t value) {
add_field(tag, pbf_wire_type::fixed64);
add_fixed<uint64_t>(value);
}
/**
* Add "sfixed64" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_sfixed64(pbf_tag_type tag, int64_t value) {
add_field(tag, pbf_wire_type::fixed64);
add_fixed<int64_t>(value);
}
/**
* Add "float" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_float(pbf_tag_type tag, float value) {
add_field(tag, pbf_wire_type::fixed32);
add_fixed<float>(value);
}
/**
* Add "double" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_double(pbf_tag_type tag, double value) {
add_field(tag, pbf_wire_type::fixed64);
add_fixed<double>(value);
}
/**
* Add "bytes" field to data.
*
* @param tag Tag (field number) of the field
* @param value Pointer to value to be written
* @param size Number of bytes to be written
*/
inline void add_bytes(pbf_tag_type tag, const char* value, size_t size) {
protozero_assert(m_pos == 0 && "you can't add fields to a parent pbf_writer if there is an existing pbf_writer for a submessage");
protozero_assert(m_data);
assert(size <= std::numeric_limits<pbf_length_type>::max());
add_length_varint(tag, pbf_length_type(size));
m_data->append(value, size);
}
/**
* Add "bytes" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_bytes(pbf_tag_type tag, const std::string& value) {
add_bytes(tag, value.data(), value.size());
}
/**
* Add "string" field to data.
*
* @param tag Tag (field number) of the field
* @param value Pointer to value to be written
* @param size Number of bytes to be written
*/
inline void add_string(pbf_tag_type tag, const char* value, size_t size) {
add_bytes(tag, value, size);
}
/**
* Add "string" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written
*/
inline void add_string(pbf_tag_type tag, const std::string& value) {
add_bytes(tag, value.data(), value.size());
}
/**
* Add "string" field to data. Bytes from the value are written until
* a null byte is encountered. The null byte is not added.
*
* @param tag Tag (field number) of the field
* @param value Pointer to value to be written
*/
inline void add_string(pbf_tag_type tag, const char* value) {
add_bytes(tag, value, std::strlen(value));
}
/**
* Add "message" field to data.
*
* @param tag Tag (field number) of the field
* @param value Pointer to message to be written
* @param size Length of the message
*/
inline void add_message(pbf_tag_type tag, const char* value, size_t size) {
add_bytes(tag, value, size);
}
/**
* Add "message" field to data.
*
* @param tag Tag (field number) of the field
* @param value Value to be written. The value must be a complete message.
*/
inline void add_message(pbf_tag_type tag, const std::string& value) {
add_bytes(tag, value.data(), value.size());
}
///@}
///@{
/**
* @name Repeated packed field writer functions
*/
/**
* Add "repeated packed bool" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to bool.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_bool(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_varint(tag, first, last);
}
/**
* Add "repeated packed enum" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to int32_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_enum(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_varint(tag, first, last);
}
/**
* Add "repeated packed int32" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to int32_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_int32(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_varint(tag, first, last);
}
/**
* Add "repeated packed sint32" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to int32_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_sint32(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_svarint(tag, first, last);
}
/**
* Add "repeated packed uint32" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to uint32_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_uint32(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_varint(tag, first, last);
}
/**
* Add "repeated packed int64" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to int64_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_int64(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_varint(tag, first, last);
}
/**
* Add "repeated packed sint64" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to int64_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_sint64(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_svarint(tag, first, last);
}
/**
* Add "repeated packed uint64" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to uint64_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_uint64(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_varint(tag, first, last);
}
/**
* Add "repeated packed fixed32" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to uint32_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_fixed32(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_fixed<uint32_t, InputIterator>(tag, first, last,
typename std::iterator_traits<InputIterator>::iterator_category());
}
/**
* Add "repeated packed sfixed32" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to int32_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_sfixed32(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_fixed<int32_t, InputIterator>(tag, first, last,
typename std::iterator_traits<InputIterator>::iterator_category());
}
/**
* Add "repeated packed fixed64" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to uint64_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_fixed64(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_fixed<uint64_t, InputIterator>(tag, first, last,
typename std::iterator_traits<InputIterator>::iterator_category());
}
/**
* Add "repeated packed sfixed64" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to int64_t.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_sfixed64(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_fixed<int64_t, InputIterator>(tag, first, last,
typename std::iterator_traits<InputIterator>::iterator_category());
}
/**
* Add "repeated packed float" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to float.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_float(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_fixed<float, InputIterator>(tag, first, last,
typename std::iterator_traits<InputIterator>::iterator_category());
}
/**
* Add "repeated packed double" field to data.
*
* @tparam InputIterator An type satisfying the InputIterator concept.
* Dereferencing the iterator must yield a type assignable to double.
* @param tag Tag (field number) of the field
* @param first Iterator pointing to the beginning of the data
* @param last Iterator pointing one past the end of data
*/
template <typename InputIterator>
inline void add_packed_double(pbf_tag_type tag, InputIterator first, InputIterator last) {
add_packed_fixed<double, InputIterator>(tag, first, last,
typename std::iterator_traits<InputIterator>::iterator_category());
}
///@}
}; // class pbf_writer
} // end namespace protozero
#endif // PROTOZERO_PBF_WRITER_HPP
+136
View File
@@ -0,0 +1,136 @@
#ifndef PROTOZERO_VARINT_HPP
#define PROTOZERO_VARINT_HPP
/*****************************************************************************
protozero - Minimalistic protocol buffer decoder and encoder in C++.
This file is from https://github.com/mapbox/protozero where you can find more
documentation.
*****************************************************************************/
/**
* @file varint.hpp
*
* @brief Contains low-level varint and zigzag encoding and decoding functions.
*/
#if __BYTE_ORDER != __LITTLE_ENDIAN
# error "This code only works on little endian machines."
#endif
#include <cstdint>
#include <protozero/exception.hpp>
namespace protozero {
/**
* The maximum length of a 64bit varint.
*/
const int8_t max_varint_length = sizeof(uint64_t) * 8 / 7 + 1;
// from https://github.com/facebook/folly/blob/master/folly/Varint.h
/**
* Decode a 64bit varint.
*
* String exception guarantee: if there is an exception the data pointer will
* not be changed.
*
* @param[in,out] data Pointer to pointer to the input data. After the function
* returns this will point to the next data to be read.
* @param[in] end Pointer one past the end of the input data.
* @returns The decoded integer
* @throws varint_too_long_exception if the varint is longer then the maximum
* length that would fit in a 64bit int. Usually this means your data
* is corrupted or you are trying to read something as a varint that
* isn't.
* @throws end_of_buffer_exception if the *end* of the buffer was reached
* before the end of the varint.
*/
inline uint64_t decode_varint(const char** data, const char* end) {
const int8_t* begin = reinterpret_cast<const int8_t*>(*data);
const int8_t* iend = reinterpret_cast<const int8_t*>(end);
const int8_t* p = begin;
uint64_t val = 0;
if (iend - begin >= max_varint_length) { // fast path
do {
int64_t b;
b = *p++; val = uint64_t((b & 0x7f) ); if (b >= 0) break;
b = *p++; val |= uint64_t((b & 0x7f) << 7); if (b >= 0) break;
b = *p++; val |= uint64_t((b & 0x7f) << 14); if (b >= 0) break;
b = *p++; val |= uint64_t((b & 0x7f) << 21); if (b >= 0) break;
b = *p++; val |= uint64_t((b & 0x7f) << 28); if (b >= 0) break;
b = *p++; val |= uint64_t((b & 0x7f) << 35); if (b >= 0) break;
b = *p++; val |= uint64_t((b & 0x7f) << 42); if (b >= 0) break;
b = *p++; val |= uint64_t((b & 0x7f) << 49); if (b >= 0) break;
b = *p++; val |= uint64_t((b & 0x7f) << 56); if (b >= 0) break;
b = *p++; val |= uint64_t((b & 0x7f) << 63); if (b >= 0) break;
throw varint_too_long_exception();
} while (false);
} else {
int shift = 0;
while (p != iend && *p < 0) {
val |= uint64_t(*p++ & 0x7f) << shift;
shift += 7;
}
if (p == iend) {
throw end_of_buffer_exception();
}
val |= uint64_t(*p++) << shift;
}
*data = reinterpret_cast<const char*>(p);
return val;
}
/**
* Varint-encode a 64bit integer.
*/
template <typename OutputIterator>
inline int write_varint(OutputIterator data, uint64_t value) {
int n=1;
while (value >= 0x80) {
*data++ = char((value & 0x7f) | 0x80);
value >>= 7;
++n;
}
*data++ = char(value);
return n;
}
/**
* ZigZag encodes a 32 bit integer.
*/
inline uint32_t encode_zigzag32(int32_t value) noexcept {
return (static_cast<uint32_t>(value) << 1) ^ (static_cast<uint32_t>(value >> 31));
}
/**
* ZigZag encodes a 64 bit integer.
*/
inline uint64_t encode_zigzag64(int64_t value) noexcept {
return (static_cast<uint64_t>(value) << 1) ^ (static_cast<uint64_t>(value >> 63));
}
/**
* Decodes a 32 bit ZigZag-encoded integer.
*/
inline int32_t decode_zigzag32(uint32_t value) noexcept {
return int32_t(value >> 1) ^ -int32_t(value & 1);
}
/**
* Decodes a 64 bit ZigZag-encoded integer.
*/
inline int64_t decode_zigzag64(uint64_t value) noexcept {
return int64_t(value >> 1) ^ -int64_t(value & 1);
}
} // end namespace protozero
#endif // PROTOZERO_VARINT_HPP
+34
View File
@@ -0,0 +1,34 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "utf8/checked.h"
#include "utf8/unchecked.h"
#endif // header guard
+327
View File
@@ -0,0 +1,327 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
#include <stdexcept>
namespace utf8
{
// Base for the exceptions that may be thrown from the library
class exception : public ::std::exception {
};
// Exceptions that may be thrown from the library functions.
class invalid_code_point : public exception {
uint32_t cp;
public:
invalid_code_point(uint32_t cp) : cp(cp) {}
virtual const char* what() const throw() { return "Invalid code point"; }
uint32_t code_point() const {return cp;}
};
class invalid_utf8 : public exception {
uint8_t u8;
public:
invalid_utf8 (uint8_t u) : u8(u) {}
virtual const char* what() const throw() { return "Invalid UTF-8"; }
uint8_t utf8_octet() const {return u8;}
};
class invalid_utf16 : public exception {
uint16_t u16;
public:
invalid_utf16 (uint16_t u) : u16(u) {}
virtual const char* what() const throw() { return "Invalid UTF-16"; }
uint16_t utf16_word() const {return u16;}
};
class not_enough_room : public exception {
public:
virtual const char* what() const throw() { return "Not enough space"; }
};
/// The library API - functions intended to be called by the users
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (!utf8::internal::is_code_point_valid(cp))
throw invalid_code_point(cp);
if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
}
template <typename octet_iterator, typename output_iterator>
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
{
while (start != end) {
octet_iterator sequence_start = start;
internal::utf_error err_code = utf8::internal::validate_next(start, end);
switch (err_code) {
case internal::UTF8_OK :
for (octet_iterator it = sequence_start; it != start; ++it)
*out++ = *it;
break;
case internal::NOT_ENOUGH_ROOM:
throw not_enough_room();
case internal::INVALID_LEAD:
out = utf8::append (replacement, out);
++start;
break;
case internal::INCOMPLETE_SEQUENCE:
case internal::OVERLONG_SEQUENCE:
case internal::INVALID_CODE_POINT:
out = utf8::append (replacement, out);
++start;
// just one replacement mark for the sequence
while (start != end && utf8::internal::is_trail(*start))
++start;
break;
}
}
return out;
}
template <typename octet_iterator, typename output_iterator>
inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
{
static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
return utf8::replace_invalid(start, end, out, replacement_marker);
}
template <typename octet_iterator>
uint32_t next(octet_iterator& it, octet_iterator end)
{
uint32_t cp = 0;
internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
switch (err_code) {
case internal::UTF8_OK :
break;
case internal::NOT_ENOUGH_ROOM :
throw not_enough_room();
case internal::INVALID_LEAD :
case internal::INCOMPLETE_SEQUENCE :
case internal::OVERLONG_SEQUENCE :
throw invalid_utf8(*it);
case internal::INVALID_CODE_POINT :
throw invalid_code_point(cp);
}
return cp;
}
template <typename octet_iterator>
uint32_t peek_next(octet_iterator it, octet_iterator end)
{
return utf8::next(it, end);
}
template <typename octet_iterator>
uint32_t prior(octet_iterator& it, octet_iterator start)
{
// can't do much if it == start
if (it == start)
throw not_enough_room();
octet_iterator end = it;
// Go back until we hit either a lead octet or start
while (utf8::internal::is_trail(*(--it)))
if (it == start)
throw invalid_utf8(*it); // error - no lead byte in the sequence
return utf8::peek_next(it, end);
}
/// Deprecated in versions that include "prior"
template <typename octet_iterator>
uint32_t previous(octet_iterator& it, octet_iterator pass_start)
{
octet_iterator end = it;
while (utf8::internal::is_trail(*(--it)))
if (it == pass_start)
throw invalid_utf8(*it); // error - no lead byte in the sequence
octet_iterator temp = it;
return utf8::next(temp, end);
}
template <typename octet_iterator, typename distance_type>
void advance (octet_iterator& it, distance_type n, octet_iterator end)
{
for (distance_type i = 0; i < n; ++i)
utf8::next(it, end);
}
template <typename octet_iterator>
typename std::iterator_traits<octet_iterator>::difference_type
distance (octet_iterator first, octet_iterator last)
{
typename std::iterator_traits<octet_iterator>::difference_type dist;
for (dist = 0; first < last; ++dist)
utf8::next(first, last);
return dist;
}
template <typename u16bit_iterator, typename octet_iterator>
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
{
while (start != end) {
uint32_t cp = utf8::internal::mask16(*start++);
// Take care of surrogate pairs first
if (utf8::internal::is_lead_surrogate(cp)) {
if (start != end) {
uint32_t trail_surrogate = utf8::internal::mask16(*start++);
if (utf8::internal::is_trail_surrogate(trail_surrogate))
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
else
throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
}
else
throw invalid_utf16(static_cast<uint16_t>(cp));
}
// Lone trail surrogate
else if (utf8::internal::is_trail_surrogate(cp))
throw invalid_utf16(static_cast<uint16_t>(cp));
result = utf8::append(cp, result);
}
return result;
}
template <typename u16bit_iterator, typename octet_iterator>
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
{
while (start != end) {
uint32_t cp = utf8::next(start, end);
if (cp > 0xffff) { //make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
}
else
*result++ = static_cast<uint16_t>(cp);
}
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
{
while (start != end)
result = utf8::append(*(start++), result);
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
{
while (start != end)
(*result++) = utf8::next(start, end);
return result;
}
// The iterator class
template <typename octet_iterator>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
octet_iterator it;
octet_iterator range_start;
octet_iterator range_end;
public:
iterator () {}
explicit iterator (const octet_iterator& octet_it,
const octet_iterator& range_start,
const octet_iterator& range_end) :
it(octet_it), range_start(range_start), range_end(range_end)
{
if (it < range_start || it > range_end)
throw std::out_of_range("Invalid utf-8 iterator position");
}
// the default "big three" are OK
octet_iterator base () const { return it; }
uint32_t operator * () const
{
octet_iterator temp = it;
return utf8::next(temp, range_end);
}
bool operator == (const iterator& rhs) const
{
if (range_start != rhs.range_start || range_end != rhs.range_end)
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
return (it == rhs.it);
}
bool operator != (const iterator& rhs) const
{
return !(operator == (rhs));
}
iterator& operator ++ ()
{
utf8::next(it, range_end);
return *this;
}
iterator operator ++ (int)
{
iterator temp = *this;
utf8::next(it, range_end);
return temp;
}
iterator& operator -- ()
{
utf8::prior(it, range_start);
return *this;
}
iterator operator -- (int)
{
iterator temp = *this;
utf8::prior(it, range_start);
return temp;
}
}; // class iterator
} // namespace utf8
#endif //header guard
+329
View File
@@ -0,0 +1,329 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include <iterator>
namespace utf8
{
// The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
// You may need to change them to match your system.
// These typedefs have the same names as ones from cstdint, or boost/cstdint
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
// Helper code - not intended to be directly called by the library users. May be changed at any time
namespace internal
{
// Unicode constants
// Leading (high) surrogates: 0xd800 - 0xdbff
// Trailing (low) surrogates: 0xdc00 - 0xdfff
const uint16_t LEAD_SURROGATE_MIN = 0xd800u;
const uint16_t LEAD_SURROGATE_MAX = 0xdbffu;
const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);
const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
// Maximum valid value for a Unicode code point
const uint32_t CODE_POINT_MAX = 0x0010ffffu;
template<typename octet_type>
inline uint8_t mask8(octet_type oc)
{
return static_cast<uint8_t>(0xff & oc);
}
template<typename u16_type>
inline uint16_t mask16(u16_type oc)
{
return static_cast<uint16_t>(0xffff & oc);
}
template<typename octet_type>
inline bool is_trail(octet_type oc)
{
return ((utf8::internal::mask8(oc) >> 6) == 0x2);
}
template <typename u16>
inline bool is_lead_surrogate(u16 cp)
{
return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
}
template <typename u16>
inline bool is_trail_surrogate(u16 cp)
{
return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
}
template <typename u16>
inline bool is_surrogate(u16 cp)
{
return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
}
template <typename u32>
inline bool is_code_point_valid(u32 cp)
{
return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
}
template <typename octet_iterator>
inline typename std::iterator_traits<octet_iterator>::difference_type
sequence_length(octet_iterator lead_it)
{
uint8_t lead = utf8::internal::mask8(*lead_it);
if (lead < 0x80)
return 1;
else if ((lead >> 5) == 0x6)
return 2;
else if ((lead >> 4) == 0xe)
return 3;
else if ((lead >> 3) == 0x1e)
return 4;
else
return 0;
}
template <typename octet_difference_type>
inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
{
if (cp < 0x80) {
if (length != 1)
return true;
}
else if (cp < 0x800) {
if (length != 2)
return true;
}
else if (cp < 0x10000) {
if (length != 3)
return true;
}
return false;
}
enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
/// Helper for get_sequence_x
template <typename octet_iterator>
utf_error increase_safely(octet_iterator& it, octet_iterator end)
{
if (++it == end)
return NOT_ENOUGH_ROOM;
if (!utf8::internal::is_trail(*it))
return INCOMPLETE_SEQUENCE;
return UTF8_OK;
}
#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
/// get_sequence_x functions decode utf-8 sequences of the length x
template <typename octet_iterator>
utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
return NOT_ENOUGH_ROOM;
code_point = utf8::internal::mask8(*it);
return UTF8_OK;
}
template <typename octet_iterator>
utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
return NOT_ENOUGH_ROOM;
code_point = utf8::internal::mask8(*it);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
return UTF8_OK;
}
template <typename octet_iterator>
utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
return NOT_ENOUGH_ROOM;
code_point = utf8::internal::mask8(*it);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (*it) & 0x3f;
return UTF8_OK;
}
template <typename octet_iterator>
utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
return NOT_ENOUGH_ROOM;
code_point = utf8::internal::mask8(*it);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
code_point += (*it) & 0x3f;
return UTF8_OK;
}
#undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
template <typename octet_iterator>
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
// Save the original value of it so we can go back in case of failure
// Of course, it does not make much sense with i.e. stream iterators
octet_iterator original_it = it;
uint32_t cp = 0;
// Determine the sequence length based on the lead octet
typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
const octet_difference_type length = utf8::internal::sequence_length(it);
// Get trail octets and calculate the code point
utf_error err = UTF8_OK;
switch (length) {
case 0:
return INVALID_LEAD;
case 1:
err = utf8::internal::get_sequence_1(it, end, cp);
break;
case 2:
err = utf8::internal::get_sequence_2(it, end, cp);
break;
case 3:
err = utf8::internal::get_sequence_3(it, end, cp);
break;
case 4:
err = utf8::internal::get_sequence_4(it, end, cp);
break;
}
if (err == UTF8_OK) {
// Decoding succeeded. Now, security checks...
if (utf8::internal::is_code_point_valid(cp)) {
if (!utf8::internal::is_overlong_sequence(cp, length)){
// Passed! Return here.
code_point = cp;
++it;
return UTF8_OK;
}
else
err = OVERLONG_SEQUENCE;
}
else
err = INVALID_CODE_POINT;
}
// Failure branch - restore the original value of the iterator
it = original_it;
return err;
}
template <typename octet_iterator>
inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
uint32_t ignored;
return utf8::internal::validate_next(it, end, ignored);
}
} // namespace internal
/// The library API - functions intended to be called by the users
// Byte order mark
const uint8_t bom[] = {0xef, 0xbb, 0xbf};
template <typename octet_iterator>
octet_iterator find_invalid(octet_iterator start, octet_iterator end)
{
octet_iterator result = start;
while (result != end) {
utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
if (err_code != internal::UTF8_OK)
return result;
}
return result;
}
template <typename octet_iterator>
inline bool is_valid(octet_iterator start, octet_iterator end)
{
return (utf8::find_invalid(start, end) == end);
}
template <typename octet_iterator>
inline bool starts_with_bom (octet_iterator it, octet_iterator end)
{
return (
((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
((it != end) && (utf8::internal::mask8(*it)) == bom[2])
);
}
//Deprecated in release 2.3
template <typename octet_iterator>
inline bool is_bom (octet_iterator it)
{
return (
(utf8::internal::mask8(*it++)) == bom[0] &&
(utf8::internal::mask8(*it++)) == bom[1] &&
(utf8::internal::mask8(*it)) == bom[2]
);
}
} // namespace utf8
#endif // header guard
+228
View File
@@ -0,0 +1,228 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
namespace utf8
{
namespace unchecked
{
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
}
template <typename octet_iterator>
uint32_t next(octet_iterator& it)
{
uint32_t cp = utf8::internal::mask8(*it);
typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
switch (length) {
case 1:
break;
case 2:
it++;
cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
break;
case 3:
++it;
cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
++it;
cp += (*it) & 0x3f;
break;
case 4:
++it;
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
++it;
cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
++it;
cp += (*it) & 0x3f;
break;
}
++it;
return cp;
}
template <typename octet_iterator>
uint32_t peek_next(octet_iterator it)
{
return utf8::unchecked::next(it);
}
template <typename octet_iterator>
uint32_t prior(octet_iterator& it)
{
while (utf8::internal::is_trail(*(--it))) ;
octet_iterator temp = it;
return utf8::unchecked::next(temp);
}
// Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
template <typename octet_iterator>
inline uint32_t previous(octet_iterator& it)
{
return utf8::unchecked::prior(it);
}
template <typename octet_iterator, typename distance_type>
void advance (octet_iterator& it, distance_type n)
{
for (distance_type i = 0; i < n; ++i)
utf8::unchecked::next(it);
}
template <typename octet_iterator>
typename std::iterator_traits<octet_iterator>::difference_type
distance (octet_iterator first, octet_iterator last)
{
typename std::iterator_traits<octet_iterator>::difference_type dist;
for (dist = 0; first < last; ++dist)
utf8::unchecked::next(first);
return dist;
}
template <typename u16bit_iterator, typename octet_iterator>
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
{
while (start != end) {
uint32_t cp = utf8::internal::mask16(*start++);
// Take care of surrogate pairs first
if (utf8::internal::is_lead_surrogate(cp)) {
uint32_t trail_surrogate = utf8::internal::mask16(*start++);
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
}
result = utf8::unchecked::append(cp, result);
}
return result;
}
template <typename u16bit_iterator, typename octet_iterator>
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
{
while (start < end) {
uint32_t cp = utf8::unchecked::next(start);
if (cp > 0xffff) { //make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
}
else
*result++ = static_cast<uint16_t>(cp);
}
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
{
while (start != end)
result = utf8::unchecked::append(*(start++), result);
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
{
while (start < end)
(*result++) = utf8::unchecked::next(start);
return result;
}
// The iterator class
template <typename octet_iterator>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
octet_iterator it;
public:
iterator () {}
explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
// the default "big three" are OK
octet_iterator base () const { return it; }
uint32_t operator * () const
{
octet_iterator temp = it;
return utf8::unchecked::next(temp);
}
bool operator == (const iterator& rhs) const
{
return (it == rhs.it);
}
bool operator != (const iterator& rhs) const
{
return !(operator == (rhs));
}
iterator& operator ++ ()
{
::std::advance(it, utf8::internal::sequence_length(it));
return *this;
}
iterator operator ++ (int)
{
iterator temp = *this;
::std::advance(it, utf8::internal::sequence_length(it));
return temp;
}
iterator& operator -- ()
{
utf8::unchecked::prior(it);
return *this;
}
iterator operator -- (int)
{
iterator temp = *this;
utf8::unchecked::prior(it);
return temp;
}
}; // class iterator
} // namespace utf8::unchecked
} // namespace utf8
#endif // header guard