git-subtree-dir: third_party/libosmium git-subtree-split: ce865381fb752323ff1e66181f5a49b7f500ffa3
235 lines
9.7 KiB
C++
235 lines
9.7 KiB
C++
#ifndef OSMIUM_IO_DETAIL_STRING_UTIL_HPP
|
|
#define OSMIUM_IO_DETAIL_STRING_UTIL_HPP
|
|
|
|
/*
|
|
|
|
This file is part of Osmium (http://osmcode.org/libosmium).
|
|
|
|
Copyright 2013-2017 Jochen Topf <jochen@topf.org> and others (see README).
|
|
|
|
Boost Software License - Version 1.0 - August 17th, 2003
|
|
|
|
Permission is hereby granted, free of charge, to any person or organization
|
|
obtaining a copy of the software and accompanying documentation covered by
|
|
this license (the "Software") to use, reproduce, display, distribute,
|
|
execute, and transmit the Software, and to prepare derivative works of the
|
|
Software, and to permit third-parties to whom the Software is furnished to
|
|
do so, all subject to the following:
|
|
|
|
The copyright notices in the Software and this entire statement, including
|
|
the above license grant, this restriction and the following disclaimer,
|
|
must be included in all copies of the Software, in whole or in part, and
|
|
all derivative works of the Software, unless such copies or derivative
|
|
works are solely in the form of machine-executable object code generated by
|
|
a source language processor.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
|
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
|
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
DEALINGS IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
#include <cassert>
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
#include <utf8.h>
|
|
|
|
namespace osmium {
|
|
|
|
namespace io {
|
|
|
|
namespace detail {
|
|
|
|
#ifndef _MSC_VER
|
|
# define SNPRINTF std::snprintf
|
|
#else
|
|
# define SNPRINTF _snprintf
|
|
#endif
|
|
|
|
template <typename... TArgs>
|
|
inline int string_snprintf(std::string& out,
|
|
size_t old_size,
|
|
size_t max_size,
|
|
const char* format,
|
|
TArgs&&... args) {
|
|
out.resize(old_size + max_size);
|
|
|
|
return SNPRINTF(max_size ? const_cast<char*>(out.c_str()) + old_size : nullptr,
|
|
max_size,
|
|
format,
|
|
std::forward<TArgs>(args)...);
|
|
}
|
|
|
|
#undef SNPRINTF
|
|
|
|
/**
|
|
* This is a helper function for writing printf-like formatted
|
|
* data into a std::string.
|
|
*
|
|
* @param out The data will be appended to this string.
|
|
* @param format A string with formatting instructions a la printf.
|
|
* @param args Any further arguments like in printf.
|
|
* @throws std::bad_alloc If the string needed to grow and there
|
|
* wasn't enough memory.
|
|
*/
|
|
template <typename... TArgs>
|
|
inline void append_printf_formatted_string(std::string& out,
|
|
const char* format,
|
|
TArgs&&... args) {
|
|
|
|
// First try to write string with the max_size, if that doesn't
|
|
// work snprintf will tell us how much space it needs. We
|
|
// reserve that much space and try again. So this will always
|
|
// work, even if the output is larger than the given max_size.
|
|
//
|
|
// Unfortunately this trick doesn't work on Windows, because
|
|
// the _snprintf() function there only returns the length it
|
|
// needs if max_size==0 and the buffer pointer is the null
|
|
// pointer. So we have to take this into account.
|
|
|
|
#ifndef _MSC_VER
|
|
static const size_t max_size = 100;
|
|
#else
|
|
static const size_t max_size = 0;
|
|
#endif
|
|
|
|
const size_t old_size = out.size();
|
|
|
|
const int len = string_snprintf(out,
|
|
old_size,
|
|
max_size,
|
|
format,
|
|
std::forward<TArgs>(args)...);
|
|
assert(len > 0);
|
|
|
|
if (size_t(len) >= max_size) {
|
|
#ifndef NDEBUG
|
|
const int len2 =
|
|
#endif
|
|
string_snprintf(out,
|
|
old_size,
|
|
size_t(len) + 1,
|
|
format,
|
|
std::forward<TArgs>(args)...);
|
|
assert(len2 == len);
|
|
}
|
|
|
|
out.resize(old_size + size_t(len));
|
|
}
|
|
|
|
// Write out the value with exactly two hex digits.
|
|
inline void append_2_hex_digits(std::string& out, uint32_t value, const char* const hex_digits) {
|
|
out += hex_digits[(value >> 4) & 0xf];
|
|
out += hex_digits[ value & 0xf];
|
|
}
|
|
|
|
// Write out the value with four or more hex digits.
|
|
inline void append_min_4_hex_digits(std::string& out, uint32_t value, const char* const hex_digits) {
|
|
auto
|
|
v = value & 0xf0000000; if (v) { out += hex_digits[v >> 28]; }
|
|
v = value & 0x0f000000; if (v) { out += hex_digits[v >> 24]; }
|
|
v = value & 0x00f00000; if (v) { out += hex_digits[v >> 20]; }
|
|
v = value & 0x000f0000; if (v) { out += hex_digits[v >> 16]; }
|
|
|
|
out += hex_digits[(value >> 12) & 0xf];
|
|
out += hex_digits[(value >> 8) & 0xf];
|
|
out += hex_digits[(value >> 4) & 0xf];
|
|
out += hex_digits[ value & 0xf];
|
|
}
|
|
|
|
inline void append_utf8_encoded_string(std::string& out, const char* data) {
|
|
static const char* lookup_hex = "0123456789abcdef";
|
|
const char* end = data + std::strlen(data);
|
|
|
|
while (data != end) {
|
|
const char* last = data;
|
|
const uint32_t c = utf8::next(data, end);
|
|
|
|
// This is a list of Unicode code points that we let
|
|
// through instead of escaping them. It is incomplete
|
|
// and can be extended later.
|
|
// Generally we don't want to let through any character
|
|
// that has special meaning in the OPL format such as
|
|
// space, comma, @, etc. and any non-printing characters.
|
|
if ((0x0021 <= c && c <= 0x0024) ||
|
|
(0x0026 <= c && c <= 0x002b) ||
|
|
(0x002d <= c && c <= 0x003c) ||
|
|
(0x003e <= c && c <= 0x003f) ||
|
|
(0x0041 <= c && c <= 0x007e) ||
|
|
(0x00a1 <= c && c <= 0x00ac) ||
|
|
(0x00ae <= c && c <= 0x05ff)) {
|
|
out.append(last, data);
|
|
} else {
|
|
out += '%';
|
|
if (c <= 0xff) {
|
|
append_2_hex_digits(out, c, lookup_hex);
|
|
} else {
|
|
append_min_4_hex_digits(out, c, lookup_hex);
|
|
}
|
|
out += '%';
|
|
}
|
|
}
|
|
}
|
|
|
|
inline void append_xml_encoded_string(std::string& out, const char* data) {
|
|
for (; *data != '\0'; ++data) {
|
|
switch (*data) {
|
|
case '&': out += "&"; break;
|
|
case '\"': out += """; break;
|
|
case '\'': out += "'"; break;
|
|
case '<': out += "<"; break;
|
|
case '>': out += ">"; break;
|
|
case '\n': out += "
"; break;
|
|
case '\r': out += "
"; break;
|
|
case '\t': out += "	"; break;
|
|
default: out += *data; break;
|
|
}
|
|
}
|
|
}
|
|
|
|
inline void append_debug_encoded_string(std::string& out, const char* data, const char* prefix, const char* suffix) {
|
|
static const char* lookup_hex = "0123456789ABCDEF";
|
|
const char* end = data + std::strlen(data);
|
|
|
|
while (data != end) {
|
|
const char* last = data;
|
|
uint32_t c = utf8::next(data, end);
|
|
|
|
// This is a list of Unicode code points that we let
|
|
// through instead of escaping them. It is incomplete
|
|
// and can be extended later.
|
|
// Generally we don't want to let through any
|
|
// non-printing characters.
|
|
if ((0x0020 <= c && c <= 0x0021) ||
|
|
(0x0023 <= c && c <= 0x003b) ||
|
|
(0x003d == c) ||
|
|
(0x003f <= c && c <= 0x007e) ||
|
|
(0x00a1 <= c && c <= 0x00ac) ||
|
|
(0x00ae <= c && c <= 0x05ff)) {
|
|
out.append(last, data);
|
|
} else {
|
|
out.append(prefix);
|
|
out.append("<U+");
|
|
append_min_4_hex_digits(out, c, lookup_hex);
|
|
out.append(">");
|
|
out.append(suffix);
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace detail
|
|
|
|
} // namespace io
|
|
|
|
} // namespace osmium
|
|
|
|
#endif // OSMIUM_IO_DETAIL_STRING_UTIL_HPP
|