Merge commit 'f1087e81ecdca5a59ba5ffca684c955c5b38f7c2' as 'third_party/unordered_dense'
This commit is contained in:
+205
@@ -0,0 +1,205 @@
|
||||
#pragma once
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
namespace fuzz {
|
||||
|
||||
// Helper to provide a little bit more convenient interface than FuzzedDataProvider itself
|
||||
class provider {
|
||||
uint8_t const* m_data;
|
||||
size_t m_remaining_bytes;
|
||||
|
||||
// Reads one byte and returns a bool, or false when no data remains.
|
||||
[[nodiscard]] inline auto consume_bool() -> bool {
|
||||
return (1U & consume_integral<uint8_t>()) != 0U;
|
||||
}
|
||||
|
||||
// Returns a number in the range [Type's min, Type's max]. The value might
|
||||
// not be uniformly distributed in the given range. If there's no input data
|
||||
// left, always returns |min|.
|
||||
template <typename T>
|
||||
[[nodiscard]] auto consume_integral() -> T {
|
||||
return consume_integral_in_range(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
|
||||
}
|
||||
|
||||
// Returns a number in the range [min, max] by consuming bytes from the
|
||||
// input data. The value might not be uniformly distributed in the given
|
||||
// range. If there's no input data left, always returns |min|. |min| must
|
||||
// be less than or equal to |max|.
|
||||
template <typename T>
|
||||
[[nodiscard]] auto consume_integral_in_range(T min, T max) -> T {
|
||||
static_assert(std::is_integral<T>::value, "An integral type is required.");
|
||||
static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
|
||||
|
||||
if (min > max) {
|
||||
std::abort();
|
||||
}
|
||||
|
||||
// Use the biggest type possible to hold the range and the result.
|
||||
uint64_t range = static_cast<uint64_t>(max) - static_cast<uint64_t>(min);
|
||||
uint64_t result = 0;
|
||||
size_t offset = 0;
|
||||
|
||||
while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 && m_remaining_bytes != 0) {
|
||||
// Pull bytes off the end of the seed data. Experimentally, this seems to
|
||||
// allow the fuzzer to more easily explore the input space. This makes
|
||||
// sense, since it works by modifying inputs that caused new code to run,
|
||||
// and this data is often used to encode length of data read by
|
||||
// |ConsumeBytes|. Separating out read lengths makes it easier modify the
|
||||
// contents of the data that is actually read.
|
||||
--m_remaining_bytes;
|
||||
result = (result << CHAR_BIT) | m_data[m_remaining_bytes];
|
||||
offset += CHAR_BIT;
|
||||
}
|
||||
|
||||
// Avoid division by 0, in case |range + 1| results in overflow.
|
||||
if (range != std::numeric_limits<decltype(range)>::max()) {
|
||||
result = result % (range + 1);
|
||||
}
|
||||
|
||||
return static_cast<T>(static_cast<uint64_t>(min) + result);
|
||||
}
|
||||
|
||||
inline void advance_unchecked(size_t num_bytes) {
|
||||
m_data += num_bytes;
|
||||
m_remaining_bytes -= num_bytes;
|
||||
}
|
||||
|
||||
// Returns a std::string of length from 0 to |max_length|. When it runs out of
|
||||
// input data, returns what remains of the input. Designed to be more stable
|
||||
// with respect to a fuzzer inserting characters than just picking a random
|
||||
// length and then consuming that many bytes with |ConsumeBytes|.
|
||||
[[nodiscard]] inline auto consume_random_length_string(size_t max_length) -> std::string {
|
||||
// Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
|
||||
// followed by anything else to the end of the string. As a result of this
|
||||
// logic, a fuzzer can insert characters into the string, and the string
|
||||
// will be lengthened to include those new characters, resulting in a more
|
||||
// stable fuzzer than picking the length of a string independently from
|
||||
// picking its contents.
|
||||
std::string result;
|
||||
|
||||
// Reserve the anticipated capacity to prevent several reallocations.
|
||||
result.reserve(std::min(max_length, m_remaining_bytes));
|
||||
for (size_t i = 0; i < max_length && m_remaining_bytes != 0; ++i) {
|
||||
auto next = m_data[0];
|
||||
advance_unchecked(1);
|
||||
if (next == '\\' && m_remaining_bytes != 0) {
|
||||
next = m_data[0];
|
||||
advance_unchecked(1);
|
||||
if (next != '\\') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
result += static_cast<char>(next);
|
||||
}
|
||||
|
||||
result.shrink_to_fit();
|
||||
return result;
|
||||
}
|
||||
|
||||
provider(provider const&) = default;
|
||||
auto operator=(provider const&) -> provider& = default;
|
||||
|
||||
public:
|
||||
provider(provider&&) = default;
|
||||
auto operator=(provider&&) -> provider& = default;
|
||||
~provider() = default;
|
||||
|
||||
[[nodiscard]] auto copy() const -> provider {
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline explicit provider(void const* data, size_t size)
|
||||
: m_data(reinterpret_cast<uint8_t const*>(data)) /* NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) */
|
||||
, m_remaining_bytes(size) /* NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) */ {}
|
||||
|
||||
// random number in inclusive range [min, max]
|
||||
template <typename T>
|
||||
auto range(T min, T max) -> T {
|
||||
return consume_integral_in_range<T>(min, max);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto bounded(T max_exclusive) -> T {
|
||||
if (0 == max_exclusive) {
|
||||
return {};
|
||||
}
|
||||
return consume_integral_in_range<T>(0, max_exclusive - 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto integral() -> T {
|
||||
if constexpr (std::is_same_v<bool, T>) {
|
||||
return consume_bool();
|
||||
} else {
|
||||
return consume_integral<T>();
|
||||
}
|
||||
}
|
||||
|
||||
inline auto string(size_t max_length) -> std::string {
|
||||
return consume_random_length_string(max_length);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
auto pick(Args&&... args) -> std::common_type_t<decltype(args)...>& {
|
||||
static constexpr auto num_ops = sizeof...(args);
|
||||
|
||||
auto idx = size_t{};
|
||||
auto const chosen_idx = consume_integral_in_range<size_t>(0, num_ops - 1);
|
||||
std::common_type_t<decltype(args)...>* result = nullptr;
|
||||
((idx++ == chosen_idx ? (result = &args, true) : false) || ...);
|
||||
return *result;
|
||||
}
|
||||
|
||||
template <typename... Ops>
|
||||
void repeat_oneof(Ops&&... op) {
|
||||
static constexpr auto num_ops = sizeof...(op);
|
||||
|
||||
do {
|
||||
if constexpr (num_ops == 1) {
|
||||
(op(), ...);
|
||||
} else {
|
||||
auto chosen_op_idx = range<size_t>(0, num_ops - 1);
|
||||
auto op_idx = size_t{};
|
||||
((op_idx++ == chosen_op_idx ? op() : void()), ...);
|
||||
}
|
||||
} while (0 != m_remaining_bytes);
|
||||
}
|
||||
|
||||
template <typename... Ops>
|
||||
void limited_repeat_oneof(size_t min, size_t max, Ops&&... op) {
|
||||
static constexpr auto num_ops = sizeof...(op);
|
||||
|
||||
size_t const num_evaluations = consume_integral_in_range(min, max);
|
||||
for (size_t i = 0; i < num_evaluations; ++i) {
|
||||
if constexpr (num_ops == 1) {
|
||||
(op(), ...);
|
||||
} else {
|
||||
auto chosen_op_idx = range<size_t>(0, num_ops - 1);
|
||||
auto op_idx = size_t{};
|
||||
((op_idx++ == chosen_op_idx ? op() : void()), ...);
|
||||
}
|
||||
if (m_remaining_bytes == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] auto has_remaining_bytes() const -> bool {
|
||||
return 0U != m_remaining_bytes;
|
||||
}
|
||||
|
||||
static inline void require(bool b) {
|
||||
if (!b) {
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace fuzz
|
||||
+133
@@ -0,0 +1,133 @@
|
||||
#include <fuzz/run.h>
|
||||
|
||||
#include <app/doctest.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
namespace fuzz::detail {
|
||||
|
||||
namespace {
|
||||
|
||||
[[nodiscard]] constexpr auto is_alpha(char c) -> bool {
|
||||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr auto is_digit(char c) -> bool {
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr auto is_alnum(char c) -> bool {
|
||||
return is_alpha(c) || is_digit(c);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr auto contains(std::string_view haystack, char needle) -> bool {
|
||||
return std::string_view::npos != haystack.find_first_of(needle);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr auto is_valid_filename(std::string_view name) -> bool {
|
||||
using namespace std::literals;
|
||||
for (auto c : name) {
|
||||
if (!is_alnum(c) && !contains("_-+", c)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
auto env(char const* varname) -> std::optional<std::string> {
|
||||
#ifdef _MSC_VER
|
||||
char* pValue = nullptr;
|
||||
size_t len = 0;
|
||||
errno_t err = _dupenv_s(&pValue, &len, varname);
|
||||
if (err || nullptr == pValue) {
|
||||
return {};
|
||||
}
|
||||
auto str = std::string(pValue);
|
||||
free(pValue);
|
||||
return str;
|
||||
#else
|
||||
char const* val = std::getenv(varname); // NOLINT(concurrency-mt-unsafe,clang-analyzer-cplusplus.StringChecker)
|
||||
if (nullptr == val) {
|
||||
return {};
|
||||
}
|
||||
return val;
|
||||
#endif
|
||||
}
|
||||
|
||||
[[nodiscard]] auto read_file(std::filesystem::path const& p) -> std::optional<std::string> {
|
||||
auto f = std::ifstream(p);
|
||||
if (!f) {
|
||||
return {};
|
||||
}
|
||||
auto content = std::string((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
|
||||
if (f.bad()) {
|
||||
return {};
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
[[nodiscard]] auto find_fuzz_corpus_base_dir() -> std::optional<std::filesystem::path> {
|
||||
auto corpus_base_dir = env("FUZZ_CORPUS_BASE_DIR");
|
||||
if (corpus_base_dir) {
|
||||
return corpus_base_dir.value();
|
||||
}
|
||||
|
||||
auto p = std::filesystem::current_path();
|
||||
while (true) {
|
||||
auto const filename = p / ".fuzz-corpus-base-dir";
|
||||
// INFO(fmt::format("trying '{}'", filename.string()));
|
||||
if (std::filesystem::exists(filename)) {
|
||||
if (auto file_content = read_file(p / ".fuzz-corpus-base-dir"); file_content) {
|
||||
auto f = std::filesystem::path(file_content.value()).make_preferred();
|
||||
// INFO(fmt::format("got it! p='{}, f='{}', p/f='{}'\n", p.string(), f.string(), (p / f).string()));
|
||||
return p / f;
|
||||
}
|
||||
// could not read file
|
||||
throw std::runtime_error(fmt::format("could not read '{}'", filename.string()));
|
||||
}
|
||||
if (p == p.root_path()) {
|
||||
return {};
|
||||
}
|
||||
p = p.parent_path();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void evaluate_corpus(std::function<void(provider)> const& op) {
|
||||
if (!is_valid_filename(doctest::current_test_name())) {
|
||||
throw std::runtime_error("test case name needs to be a valid filename. only [a-zA-Z0-9_-+] are allowed");
|
||||
}
|
||||
|
||||
// 2 ways
|
||||
|
||||
auto corpus_base_dir = find_fuzz_corpus_base_dir();
|
||||
if (!corpus_base_dir) {
|
||||
throw std::runtime_error("could not find corpus base dir :-(");
|
||||
}
|
||||
|
||||
auto path = std::filesystem::path(corpus_base_dir.value()) / doctest::current_test_name();
|
||||
INFO("path=\"" << path.string() << "\"");
|
||||
auto num_files = size_t();
|
||||
for (auto const& dir_entry : std::filesystem::directory_iterator(path)) {
|
||||
++num_files;
|
||||
auto const& test_file = dir_entry.path();
|
||||
CAPTURE(test_file);
|
||||
|
||||
auto f = std::ifstream(test_file);
|
||||
auto content = std::string((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
|
||||
op(provider(content.data(), content.size()));
|
||||
}
|
||||
REQUIRE(num_files > 1);
|
||||
}
|
||||
|
||||
} // namespace fuzz::detail
|
||||
+47
@@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#include <fuzz/provider.h>
|
||||
|
||||
#include <functional>
|
||||
|
||||
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
|
||||
extern "C" {
|
||||
void HF_ITER(const uint8_t** buf_ptr, size_t* len_ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace fuzz {
|
||||
|
||||
namespace detail {
|
||||
|
||||
void evaluate_corpus(std::function<void(provider)> const& op);
|
||||
|
||||
} // namespace detail
|
||||
|
||||
/**
|
||||
* There are 2 modes how this the op() will be executed:
|
||||
*
|
||||
* Driven by honggfuzz: this is enabled when compiling with -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION.
|
||||
* This is done to fuzz a particular test.
|
||||
*
|
||||
* Otherwise, this is run in "corpus" mode, where all files in a directory named by the testname are evaluated
|
||||
* This should be done in normal unit testing. The location of the corpus base directory is determined in this order:
|
||||
* 1. Use FUZZ_CORPUS_BASE_DIR environment variable
|
||||
* 2. If this is not set, look in the working directory for a ".fuzz-corpus-base-dir" file which should contain
|
||||
* the path to the base directory (relative to that particular file)
|
||||
*/
|
||||
template <typename Op>
|
||||
void run(Op const& op) {
|
||||
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
|
||||
size_t len = 0;
|
||||
uint8_t const* buf = nullptr;
|
||||
while (true) {
|
||||
::HF_ITER(&buf, &len);
|
||||
op(provider(buf, len));
|
||||
}
|
||||
#else
|
||||
detail::evaluate_corpus(op);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace fuzz
|
||||
Reference in New Issue
Block a user