Merge commit 'f1087e81ecdca5a59ba5ffca684c955c5b38f7c2' as 'third_party/unordered_dense'

This commit is contained in:
Siarhei Fedartsou
2024-05-30 19:06:16 +02:00
2383 changed files with 16243 additions and 0 deletions
+205
View File
@@ -0,0 +1,205 @@
#pragma once
#include <climits>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <limits>
#include <string>
#include <type_traits>
namespace fuzz {
// Helper to provide a little bit more convenient interface than FuzzedDataProvider itself
class provider {
uint8_t const* m_data;
size_t m_remaining_bytes;
// Reads one byte and returns a bool, or false when no data remains.
[[nodiscard]] inline auto consume_bool() -> bool {
return (1U & consume_integral<uint8_t>()) != 0U;
}
// Returns a number in the range [Type's min, Type's max]. The value might
// not be uniformly distributed in the given range. If there's no input data
// left, always returns |min|.
template <typename T>
[[nodiscard]] auto consume_integral() -> T {
return consume_integral_in_range(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
}
// Returns a number in the range [min, max] by consuming bytes from the
// input data. The value might not be uniformly distributed in the given
// range. If there's no input data left, always returns |min|. |min| must
// be less than or equal to |max|.
template <typename T>
[[nodiscard]] auto consume_integral_in_range(T min, T max) -> T {
static_assert(std::is_integral<T>::value, "An integral type is required.");
static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
if (min > max) {
std::abort();
}
// Use the biggest type possible to hold the range and the result.
uint64_t range = static_cast<uint64_t>(max) - static_cast<uint64_t>(min);
uint64_t result = 0;
size_t offset = 0;
while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 && m_remaining_bytes != 0) {
// Pull bytes off the end of the seed data. Experimentally, this seems to
// allow the fuzzer to more easily explore the input space. This makes
// sense, since it works by modifying inputs that caused new code to run,
// and this data is often used to encode length of data read by
// |ConsumeBytes|. Separating out read lengths makes it easier modify the
// contents of the data that is actually read.
--m_remaining_bytes;
result = (result << CHAR_BIT) | m_data[m_remaining_bytes];
offset += CHAR_BIT;
}
// Avoid division by 0, in case |range + 1| results in overflow.
if (range != std::numeric_limits<decltype(range)>::max()) {
result = result % (range + 1);
}
return static_cast<T>(static_cast<uint64_t>(min) + result);
}
inline void advance_unchecked(size_t num_bytes) {
m_data += num_bytes;
m_remaining_bytes -= num_bytes;
}
// Returns a std::string of length from 0 to |max_length|. When it runs out of
// input data, returns what remains of the input. Designed to be more stable
// with respect to a fuzzer inserting characters than just picking a random
// length and then consuming that many bytes with |ConsumeBytes|.
[[nodiscard]] inline auto consume_random_length_string(size_t max_length) -> std::string {
// Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
// followed by anything else to the end of the string. As a result of this
// logic, a fuzzer can insert characters into the string, and the string
// will be lengthened to include those new characters, resulting in a more
// stable fuzzer than picking the length of a string independently from
// picking its contents.
std::string result;
// Reserve the anticipated capacity to prevent several reallocations.
result.reserve(std::min(max_length, m_remaining_bytes));
for (size_t i = 0; i < max_length && m_remaining_bytes != 0; ++i) {
auto next = m_data[0];
advance_unchecked(1);
if (next == '\\' && m_remaining_bytes != 0) {
next = m_data[0];
advance_unchecked(1);
if (next != '\\') {
break;
}
}
result += static_cast<char>(next);
}
result.shrink_to_fit();
return result;
}
provider(provider const&) = default;
auto operator=(provider const&) -> provider& = default;
public:
provider(provider&&) = default;
auto operator=(provider&&) -> provider& = default;
~provider() = default;
[[nodiscard]] auto copy() const -> provider {
return *this;
}
inline explicit provider(void const* data, size_t size)
: m_data(reinterpret_cast<uint8_t const*>(data)) /* NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) */
, m_remaining_bytes(size) /* NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) */ {}
// random number in inclusive range [min, max]
template <typename T>
auto range(T min, T max) -> T {
return consume_integral_in_range<T>(min, max);
}
template <typename T>
auto bounded(T max_exclusive) -> T {
if (0 == max_exclusive) {
return {};
}
return consume_integral_in_range<T>(0, max_exclusive - 1);
}
template <typename T>
auto integral() -> T {
if constexpr (std::is_same_v<bool, T>) {
return consume_bool();
} else {
return consume_integral<T>();
}
}
inline auto string(size_t max_length) -> std::string {
return consume_random_length_string(max_length);
}
template <typename... Args>
auto pick(Args&&... args) -> std::common_type_t<decltype(args)...>& {
static constexpr auto num_ops = sizeof...(args);
auto idx = size_t{};
auto const chosen_idx = consume_integral_in_range<size_t>(0, num_ops - 1);
std::common_type_t<decltype(args)...>* result = nullptr;
((idx++ == chosen_idx ? (result = &args, true) : false) || ...);
return *result;
}
template <typename... Ops>
void repeat_oneof(Ops&&... op) {
static constexpr auto num_ops = sizeof...(op);
do {
if constexpr (num_ops == 1) {
(op(), ...);
} else {
auto chosen_op_idx = range<size_t>(0, num_ops - 1);
auto op_idx = size_t{};
((op_idx++ == chosen_op_idx ? op() : void()), ...);
}
} while (0 != m_remaining_bytes);
}
template <typename... Ops>
void limited_repeat_oneof(size_t min, size_t max, Ops&&... op) {
static constexpr auto num_ops = sizeof...(op);
size_t const num_evaluations = consume_integral_in_range(min, max);
for (size_t i = 0; i < num_evaluations; ++i) {
if constexpr (num_ops == 1) {
(op(), ...);
} else {
auto chosen_op_idx = range<size_t>(0, num_ops - 1);
auto op_idx = size_t{};
((op_idx++ == chosen_op_idx ? op() : void()), ...);
}
if (m_remaining_bytes == 0) {
return;
}
}
}
[[nodiscard]] auto has_remaining_bytes() const -> bool {
return 0U != m_remaining_bytes;
}
static inline void require(bool b) {
if (!b) {
std::abort();
}
}
};
} // namespace fuzz
+133
View File
@@ -0,0 +1,133 @@
#include <fuzz/run.h>
#include <app/doctest.h>
#include <fmt/format.h>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <optional>
#include <stdexcept>
#include <string>
#include <string_view>
namespace fuzz::detail {
namespace {
[[nodiscard]] constexpr auto is_alpha(char c) -> bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
[[nodiscard]] constexpr auto is_digit(char c) -> bool {
return c >= '0' && c <= '9';
}
[[nodiscard]] constexpr auto is_alnum(char c) -> bool {
return is_alpha(c) || is_digit(c);
}
[[nodiscard]] constexpr auto contains(std::string_view haystack, char needle) -> bool {
return std::string_view::npos != haystack.find_first_of(needle);
}
[[nodiscard]] constexpr auto is_valid_filename(std::string_view name) -> bool {
using namespace std::literals;
for (auto c : name) {
if (!is_alnum(c) && !contains("_-+", c)) {
return false;
}
}
return true;
}
auto env(char const* varname) -> std::optional<std::string> {
#ifdef _MSC_VER
char* pValue = nullptr;
size_t len = 0;
errno_t err = _dupenv_s(&pValue, &len, varname);
if (err || nullptr == pValue) {
return {};
}
auto str = std::string(pValue);
free(pValue);
return str;
#else
char const* val = std::getenv(varname); // NOLINT(concurrency-mt-unsafe,clang-analyzer-cplusplus.StringChecker)
if (nullptr == val) {
return {};
}
return val;
#endif
}
[[nodiscard]] auto read_file(std::filesystem::path const& p) -> std::optional<std::string> {
auto f = std::ifstream(p);
if (!f) {
return {};
}
auto content = std::string((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
if (f.bad()) {
return {};
}
return content;
}
[[nodiscard]] auto find_fuzz_corpus_base_dir() -> std::optional<std::filesystem::path> {
auto corpus_base_dir = env("FUZZ_CORPUS_BASE_DIR");
if (corpus_base_dir) {
return corpus_base_dir.value();
}
auto p = std::filesystem::current_path();
while (true) {
auto const filename = p / ".fuzz-corpus-base-dir";
// INFO(fmt::format("trying '{}'", filename.string()));
if (std::filesystem::exists(filename)) {
if (auto file_content = read_file(p / ".fuzz-corpus-base-dir"); file_content) {
auto f = std::filesystem::path(file_content.value()).make_preferred();
// INFO(fmt::format("got it! p='{}, f='{}', p/f='{}'\n", p.string(), f.string(), (p / f).string()));
return p / f;
}
// could not read file
throw std::runtime_error(fmt::format("could not read '{}'", filename.string()));
}
if (p == p.root_path()) {
return {};
}
p = p.parent_path();
}
}
} // namespace
void evaluate_corpus(std::function<void(provider)> const& op) {
if (!is_valid_filename(doctest::current_test_name())) {
throw std::runtime_error("test case name needs to be a valid filename. only [a-zA-Z0-9_-+] are allowed");
}
// 2 ways
auto corpus_base_dir = find_fuzz_corpus_base_dir();
if (!corpus_base_dir) {
throw std::runtime_error("could not find corpus base dir :-(");
}
auto path = std::filesystem::path(corpus_base_dir.value()) / doctest::current_test_name();
INFO("path=\"" << path.string() << "\"");
auto num_files = size_t();
for (auto const& dir_entry : std::filesystem::directory_iterator(path)) {
++num_files;
auto const& test_file = dir_entry.path();
CAPTURE(test_file);
auto f = std::ifstream(test_file);
auto content = std::string((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
op(provider(content.data(), content.size()));
}
REQUIRE(num_files > 1);
}
} // namespace fuzz::detail
+47
View File
@@ -0,0 +1,47 @@
#pragma once
#include <fuzz/provider.h>
#include <functional>
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
extern "C" {
void HF_ITER(const uint8_t** buf_ptr, size_t* len_ptr);
}
#endif
namespace fuzz {
namespace detail {
void evaluate_corpus(std::function<void(provider)> const& op);
} // namespace detail
/**
* There are 2 modes how this the op() will be executed:
*
* Driven by honggfuzz: this is enabled when compiling with -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION.
* This is done to fuzz a particular test.
*
* Otherwise, this is run in "corpus" mode, where all files in a directory named by the testname are evaluated
* This should be done in normal unit testing. The location of the corpus base directory is determined in this order:
* 1. Use FUZZ_CORPUS_BASE_DIR environment variable
* 2. If this is not set, look in the working directory for a ".fuzz-corpus-base-dir" file which should contain
* the path to the base directory (relative to that particular file)
*/
template <typename Op>
void run(Op const& op) {
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
size_t len = 0;
uint8_t const* buf = nullptr;
while (true) {
::HF_ITER(&buf, &len);
op(provider(buf, len));
}
#else
detail::evaluate_corpus(op);
#endif
}
} // namespace fuzz