osrm-backend/include/osmium/io/detail/string_util.hpp

#ifndef OSMIUM_IO_DETAIL_STRING_UTIL_HPP
#define OSMIUM_IO_DETAIL_STRING_UTIL_HPP

/*

This file is part of Osmium (http://osmcode.org/libosmium).

Copyright 2013-2017 Jochen Topf <jochen@topf.org> and others (see README).

Boost Software License - Version 1.0 - August 17th, 2003

Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:

The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

*/

#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <string>
#include <utility>

#include <utf8.h>

namespace osmium {

    namespace io {

        namespace detail {

#ifndef _MSC_VER
# define SNPRINTF std::snprintf
#else
# define SNPRINTF _snprintf
#endif

            template <typename... TArgs>
            inline int string_snprintf(std::string& out,
                                       size_t old_size,
                                       size_t max_size,
                                       const char* format,
                                       TArgs&&... args) {
                out.resize(old_size + max_size);

                return SNPRINTF(max_size ? const_cast<char*>(out.c_str()) + old_size : nullptr,
                                max_size,
                                format,
                                std::forward<TArgs>(args)...);
            }

#undef SNPRINTF

            /**
             * This is a helper function for writing printf-like formatted
             * data into a std::string.
             *
             * @param out The data will be appended to this string.
             * @param format A string with formatting instructions a la printf.
             * @param args Any further arguments like in printf.
             * @throws std::bad_alloc If the string needed to grow and there
             *         wasn't enough memory.
             */
            template <typename... TArgs>
            inline void append_printf_formatted_string(std::string& out,
                                                   const char* format,
                                                   TArgs&&... args) {

                // First try to write string with the max_size, if that doesn't
                // work snprintf will tell us how much space it needs. We
                // reserve that much space and try again. So this will always
                // work, even if the output is larger than the given max_size.
                //
                // Unfortunately this trick doesn't work on Windows, because
                // the _snprintf() function there only returns the length it
                // needs if max_size==0 and the buffer pointer is the null
                // pointer. So we have to take this into account.

#ifndef _MSC_VER
                static const size_t max_size = 100;
#else
                static const size_t max_size = 0;
#endif

                const size_t old_size = out.size();

                const int len = string_snprintf(out,
                                                old_size,
                                                max_size,
                                                format,
                                                std::forward<TArgs>(args)...);
                assert(len > 0);

                if (size_t(len) >= max_size) {
#ifndef NDEBUG
                    const int len2 =
#endif
                                     string_snprintf(out,
                                                     old_size,
                                                     size_t(len) + 1,
                                                     format,
                                                     std::forward<TArgs>(args)...);
                    assert(len2 == len);
                }

                out.resize(old_size + size_t(len));
            }

            // Write out the value with exactly two hex digits.
            inline void append_2_hex_digits(std::string& out, uint32_t value, const char* const hex_digits) {
                out += hex_digits[(value >> 4) & 0xf];
                out += hex_digits[ value       & 0xf];
            }

            // Write out the value with four or more hex digits.
            inline void append_min_4_hex_digits(std::string& out, uint32_t value, const char* const hex_digits) {
                auto
                v = value & 0xf0000000; if (v) { out += hex_digits[v >> 28]; }
                v = value & 0x0f000000; if (v) { out += hex_digits[v >> 24]; }
                v = value & 0x00f00000; if (v) { out += hex_digits[v >> 20]; }
                v = value & 0x000f0000; if (v) { out += hex_digits[v >> 16]; }

                out += hex_digits[(value >> 12) & 0xf];
                out += hex_digits[(value >>  8) & 0xf];
                out += hex_digits[(value >>  4) & 0xf];
                out += hex_digits[ value        & 0xf];
            }

            inline void append_utf8_encoded_string(std::string& out, const char* data) {
                static const char* lookup_hex = "0123456789abcdef";
                const char* end = data + std::strlen(data);

                while (data != end) {
                    const char* last = data;
                    const uint32_t c = utf8::next(data, end);

                    // This is a list of Unicode code points that we let
                    // through instead of escaping them. It is incomplete
                    // and can be extended later.
                    // Generally we don't want to let through any character
                    // that has special meaning in the OPL format such as
                    // space, comma, @, etc. and any non-printing characters.
                    if ((0x0021 <= c && c <= 0x0024) ||
                        (0x0026 <= c && c <= 0x002b) ||
                        (0x002d <= c && c <= 0x003c) ||
                        (0x003e <= c && c <= 0x003f) ||
                        (0x0041 <= c && c <= 0x007e) ||
                        (0x00a1 <= c && c <= 0x00ac) ||
                        (0x00ae <= c && c <= 0x05ff)) {
                        out.append(last, data);
                    } else {
                        out += '%';
                        if (c <= 0xff) {
                            append_2_hex_digits(out, c, lookup_hex);
                        } else {
                            append_min_4_hex_digits(out, c, lookup_hex);
                        }
                        out += '%';
                    }
                }
            }

            inline void append_xml_encoded_string(std::string& out, const char* data) {
                for (; *data != '\0'; ++data) {
                    switch (*data) {
                        case '&':  out += "&amp;";  break;
                        case '\"': out += "&quot;"; break;
                        case '\'': out += "&apos;"; break;
                        case '<':  out += "&lt;";   break;
                        case '>':  out += "&gt;";   break;
                        case '\n': out += "&#xA;";  break;
                        case '\r': out += "&#xD;";  break;
                        case '\t': out += "&#x9;";  break;
                        default:   out += *data;    break;
                    }
                }
            }

            inline void append_debug_encoded_string(std::string& out, const char* data, const char* prefix, const char* suffix) {
                static const char* lookup_hex = "0123456789ABCDEF";
                const char* end = data + std::strlen(data);

                while (data != end) {
                    const char* last = data;
                    uint32_t c = utf8::next(data, end);

                    // This is a list of Unicode code points that we let
                    // through instead of escaping them. It is incomplete
                    // and can be extended later.
                    // Generally we don't want to let through any
                    // non-printing characters.
                    if ((0x0020 <= c && c <= 0x0021) ||
                        (0x0023 <= c && c <= 0x003b) ||
                        (0x003d == c) ||
                        (0x003f <= c && c <= 0x007e) ||
                        (0x00a1 <= c && c <= 0x00ac) ||
                        (0x00ae <= c && c <= 0x05ff)) {
                        out.append(last, data);
                    } else {
                        out.append(prefix);
                        out.append("<U+");
                        append_min_4_hex_digits(out, c, lookup_hex);
                        out.append(">");
                        out.append(suffix);
                    }
                }
            }

        } // namespace detail

    } // namespace io

} // namespace osmium

#endif // OSMIUM_IO_DETAIL_STRING_UTIL_HPP