From 0f0db4c823184eb9d05811210333ff6e17f4a17e Mon Sep 17 00:00:00 2001
From: "Daniel J. Hofmann" <daniel@trvx.org>
Date: Thu, 24 Mar 2016 12:31:05 +0100
Subject: [PATCH] Provide correct Base64 implementation.

Phew, this was painful. Turns out most hints out there on how to use the
Boost serialization iterators are wrong. Here's why:

    transform_width<6, 8>

needs an input stream of length: common multiple of 6 and 8.

That is, the padding needs to happen _before_ using the provided
iterators, otherwise the behavior is undefined!

See: http://www.boost.org/doc/libs/1_60_0/boost/archive/iterators/transform_width.hpp

Thanks @mokob for pointing that out to me!

We also need to manually add as many padding chars "=" to the encoded
result as many bytes we had to append to the input to conform to the
rule above.

Decoding then knows the number of padding chars by counting for "=" and
then using it in order to split off the last bytes from the decoded
result.
---
 include/engine/base64.hpp | 57 ++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 21 deletions(-)
diff --git a/include/engine/base64.hpp b/include/engine/base64.hpp
index 1fc63b48d..648296183 100644
--- a/include/engine/base64.hpp
+++ b/include/engine/base64.hpp
@@ -2,6 +2,7 @@
 #define OSRM_BASE64_HPP
 
 #include <string>
+#include <vector>
 #include <iterator>
 #include <type_traits>
 
@@ -16,12 +17,26 @@
 
 // RFC 4648 "The Base16, Base32, and Base64 Data Encodings"
 // See: https://tools.ietf.org/html/rfc4648
-// Implementation adapted from: http://stackoverflow.com/a/28471421
 
+namespace detail
+{
 // The C++ standard guarantees none of this by default, but we need it in the following.
 static_assert(CHAR_BIT == 8u, "we assume a byte holds 8 bits");
 static_assert(sizeof(char) == 1u, "we assume a char is one byte large");
 
+using Base64FromBinary = boost::archive::iterators::base64_from_binary<
+    boost::archive::iterators::transform_width<const char *, // sequence of chars
+                                               6,            // get view of 6 bit
+                                               8             // from sequence of 8 bit
+                                               >>;
+
+using BinaryFromBase64 = boost::archive::iterators::transform_width<
+    boost::archive::iterators::binary_from_base64<std::string::const_iterator>,
+    8, // get a view of 8 bit
+    6  // from a sequence of 6 bit
+    >;
+} // ns detail
+
 namespace osrm
 {
 namespace engine
@@ -32,18 +47,24 @@ namespace engine
 // Encodes a chunk of memory to Base64.
 inline std::string encodeBase64(const unsigned char *first, std::size_t size)
 {
-    using namespace boost::archive::iterators;
+    std::vector<unsigned char> bytes{first, first + size};
+    BOOST_ASSERT(!bytes.empty());
 
-    const std::string bytes{first, first + size};
+    std::size_t bytes_to_pad{0};
 
-    using Iter = base64_from_binary<transform_width<std::string::const_iterator, 6, 8>>;
+    while (bytes.size() % 3 != 0)
+    {
+        bytes_to_pad += 1;
+        bytes.push_back(0);
+    }
 
-    Iter view_first{begin(bytes)};
-    Iter view_last{end(bytes)};
+    BOOST_ASSERT(bytes_to_pad == 0 || bytes_to_pad == 1 || bytes_to_pad == 2);
+    BOOST_ASSERT_MSG(0 == bytes.size() % 3, "base64 input data size is not a multiple of 3");
 
-    std::string encoded{view_first, view_last};
+    std::string encoded{detail::Base64FromBinary{bytes.data()},
+                        detail::Base64FromBinary{bytes.data() + (bytes.size() - bytes_to_pad)}};
 
-    return encoded.append((3 - size % 3) % 3, '=');
+    return encoded.append(bytes_to_pad, '=');
 }
 
 // C++11 standard 3.9.1/1: Plain char, signed char, and unsigned char are three distinct types
@@ -78,22 +99,16 @@ template <typename T> std::string encodeBase64Bytewise(const T &x)
 // Decodes into a chunk of memory that is at least as large as the input.
 template <typename OutputIter> void decodeBase64(const std::string &encoded, OutputIter out)
 {
-    using namespace boost::archive::iterators;
-    using namespace boost::algorithm;
+    auto unpadded = encoded;
 
-    using Iter = transform_width<binary_from_base64<std::string::const_iterator>, 8, 6>;
+    const auto num_padded = std::count(begin(encoded), end(encoded), '=');
+    std::replace(begin(unpadded), end(unpadded), '=', 'A'); // A_64 == \0
 
-    Iter view_first{begin(encoded)};
-    Iter view_last{end(encoded)};
+    std::string decoded{detail::BinaryFromBase64{begin(unpadded)},
+                        detail::BinaryFromBase64{begin(unpadded) + unpadded.length()}};
 
-    const auto null = [](const unsigned char c)
-    {
-        return c == '\0';
-    };
-
-    const auto bytes = trim_right_copy_if(std::string{view_first, view_last}, null);
-
-    boost::copy(bytes, out);
+    decoded.erase(end(decoded) - num_padded, end(decoded));
+    std::copy(begin(decoded), end(decoded), out);
 }
 
 // Convenience specialization, filling string instead of byte-dumping into it.