From 6e1c4bfecd6d3b993208c13e8b2afe9a1388121e Mon Sep 17 00:00:00 2001 From: Michael Krasnyk Date: Thu, 19 Jan 2017 15:14:30 +0100 Subject: [PATCH] Added indexed array data type with variable and fixed group blocks --- CHANGELOG.md | 1 + features/testbot/utf.feature | 3 +- .../contiguous_internalmem_datafacade.hpp | 64 +-- include/storage/io.hpp | 16 + include/storage/shared_datatype.hpp | 8 +- include/util/exception.hpp | 3 + include/util/indexed_data.hpp | 366 ++++++++++++++++++ include/util/name_table.hpp | 24 +- src/extractor/extraction_containers.cpp | 41 +- src/extractor/extractor_callbacks.cpp | 30 +- src/extractor/guidance/turn_handler.cpp | 2 +- src/storage/storage.cpp | 41 +- src/util/name_table.cpp | 65 ++-- unit_tests/util/indexed_data.cpp | 193 +++++++++ unit_tests/util/name_table.cpp | 117 ++++++ 15 files changed, 781 insertions(+), 193 deletions(-) create mode 100644 include/util/indexed_data.hpp create mode 100644 unit_tests/util/indexed_data.cpp create mode 100644 unit_tests/util/name_table.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 28572d8b5..752164074 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Fix #3475 removed an invalid `exit` field from the `arrive` maneuver - Fix #3515 adjusted number of `nodes` in `annotation` - Fix #3605 Fixed a bug that could lead to turns at the end of the road to be suppressed + - Fix #2844 handle up to 16777215 code units in OSM names - Infrastructure - Support building rpm packages. - Guidance diff --git a/features/testbot/utf.feature b/features/testbot/utf.feature index 36800db61..81049f73b 100644 --- a/features/testbot/utf.feature +++ b/features/testbot/utf.feature @@ -23,7 +23,6 @@ Feature: Handling of UTF characters | c | d | Cyrillic Москва,Cyrillic Москва | - @todo Scenario: Up to 255 Unicode Code Points (255 x Panda Code Point) Given the node map """ @@ -35,6 +34,6 @@ Feature: Handling of UTF characters | ab | ab | primary | | bc | 🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼 | primary | - When I route 100 times I should get + When I route 2 times I should get | from | to | route | | a | c | ab,🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼,🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼 | diff --git a/include/engine/datafacade/contiguous_internalmem_datafacade.hpp b/include/engine/datafacade/contiguous_internalmem_datafacade.hpp index 353d357e6..f78c742a7 100644 --- a/include/engine/datafacade/contiguous_internalmem_datafacade.hpp +++ b/include/engine/datafacade/contiguous_internalmem_datafacade.hpp @@ -18,6 +18,7 @@ #include "util/exception_utils.hpp" #include "util/guidance/turn_bearing.hpp" #include "util/log.hpp" +#include "util/name_table.hpp" #include "util/packed_vector.hpp" #include "util/range_table.hpp" #include "util/rectangle.hpp" @@ -79,7 +80,7 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade util::ShM::vector m_travel_mode_list; util::ShM::vector m_pre_turn_bearing; util::ShM::vector m_post_turn_bearing; - util::ShM::vector m_names_char_list; + util::NameTable m_names_table; util::ShM::vector m_name_begin_indices; util::ShM::vector m_geometry_indices; util::ShM::vector m_geometry_node_list; @@ -103,7 +104,7 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade std::unique_ptr m_geospatial_query; boost::filesystem::path file_index_path; - std::shared_ptr> m_name_table; + util::NameTable m_name_table; // bearing classes by node based node util::ShM::vector m_bearing_class_id_table; // entry class IDs @@ -267,23 +268,10 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade void InitializeNamePointers(storage::DataLayout &data_layout, char *memory_block) { - auto offsets_ptr = - data_layout.GetBlockPtr(memory_block, storage::DataLayout::NAME_OFFSETS); - auto blocks_ptr = - data_layout.GetBlockPtr(memory_block, storage::DataLayout::NAME_BLOCKS); - util::ShM::vector name_offsets( - offsets_ptr, data_layout.num_entries[storage::DataLayout::NAME_OFFSETS]); - util::ShM::vector name_blocks( - blocks_ptr, data_layout.num_entries[storage::DataLayout::NAME_BLOCKS]); - - auto names_list_ptr = - data_layout.GetBlockPtr(memory_block, storage::DataLayout::NAME_CHAR_LIST); - util::ShM::vector names_char_list( - names_list_ptr, data_layout.num_entries[storage::DataLayout::NAME_CHAR_LIST]); - m_name_table = std::make_unique>( - name_offsets, name_blocks, static_cast(names_char_list.size())); - - m_names_char_list = std::move(names_char_list); + auto name_data_ptr = + data_layout.GetBlockPtr(memory_block, storage::DataLayout::NAME_CHAR_DATA); + const auto name_data_size = data_layout.num_entries[storage::DataLayout::NAME_CHAR_DATA]; + m_name_table.reset(name_data_ptr, name_data_ptr + name_data_size); } void InitializeTurnLaneDescriptionsPointers(storage::DataLayout &data_layout, @@ -823,52 +811,22 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade StringView GetNameForID(const NameID id) const override final { - if (std::numeric_limits::max() == id) - { - return ""; - } - - auto range = m_name_table->GetRange(id); - - if (range.begin() == range.end()) - { - return ""; - } - - auto first = m_names_char_list.begin() + range.front(); - auto last = m_names_char_list.begin() + range.back() + 1u; - // These iterators are useless: they're InputIterators onto a contiguous block of memory. - // Deref to get to the first element, then Addressof to get the memory address of the it. - const std::size_t len = &*last - &*first; - - return StringView{&*first, len}; + return m_name_table.GetNameForID(id); } StringView GetRefForID(const NameID id) const override final { - // We store the ref after the name, destination and pronunciation of a street. - // We do this to get around the street length limit of 255 which would hit - // if we concatenate these. Order (see extractor_callbacks): - // name (0), destination (1), pronunciation (2), ref (3) - return GetNameForID(id + 3); + return m_name_table.GetRefForID(id); } StringView GetPronunciationForID(const NameID id) const override final { - // We store the pronunciation after the name and destination of a street. - // We do this to get around the street length limit of 255 which would hit - // if we concatenate these. Order (see extractor_callbacks): - // name (0), destination (1), pronunciation (2), ref (3) - return GetNameForID(id + 2); + return m_name_table.GetPronunciationForID(id); } StringView GetDestinationsForID(const NameID id) const override final { - // We store the destination after the name of a street. - // We do this to get around the street length limit of 255 which would hit - // if we concatenate these. Order (see extractor_callbacks): - // name (0), destination (1), pronunciation (2), ref (3) - return GetNameForID(id + 1); + return m_name_table.GetDestinationsForID(id); } bool IsCoreNode(const NodeID id) const override final diff --git a/include/storage/io.hpp b/include/storage/io.hpp index 07a2a0244..464083a08 100644 --- a/include/storage/io.hpp +++ b/include/storage/io.hpp @@ -61,6 +61,22 @@ class FileReader } } + std::size_t GetSize() + { + const boost::filesystem::ifstream::pos_type positon = input_stream.tellg(); + input_stream.seekg(0, std::ios::end); + const boost::filesystem::ifstream::pos_type file_size = input_stream.tellg(); + + if (file_size == boost::filesystem::ifstream::pos_type(-1)) + { + throw util::exception("File size for " + filepath.string() + " failed " + SOURCE_REF); + } + + // restore the current position + input_stream.seekg(positon, std::ios::beg); + return file_size; + } + /* Read count objects of type T into pointer dest */ template void ReadInto(T *dest, const std::size_t count) { diff --git a/include/storage/shared_datatype.hpp b/include/storage/shared_datatype.hpp index 3f281c79a..416d36e90 100644 --- a/include/storage/shared_datatype.hpp +++ b/include/storage/shared_datatype.hpp @@ -18,9 +18,7 @@ namespace storage // Added at the start and end of each block as sanity check const constexpr char CANARY[4] = {'O', 'S', 'R', 'M'}; -const constexpr char *block_id_to_name[] = {"NAME_OFFSETS", - "NAME_BLOCKS", - "NAME_CHAR_LIST", +const constexpr char *block_id_to_name[] = {"NAME_CHAR_DATA", "NAME_ID_LIST", "VIA_NODE_LIST", "GRAPH_NODE_LIST", @@ -64,9 +62,7 @@ struct DataLayout { enum BlockID { - NAME_OFFSETS = 0, - NAME_BLOCKS, - NAME_CHAR_LIST, + NAME_CHAR_DATA = 0, NAME_ID_LIST, VIA_NODE_LIST, GRAPH_NODE_LIST, diff --git a/include/util/exception.hpp b/include/util/exception.hpp index b18099586..5ba9fc567 100644 --- a/include/util/exception.hpp +++ b/include/util/exception.hpp @@ -32,6 +32,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include + namespace osrm { namespace util @@ -42,6 +44,7 @@ class exception final : public std::exception public: explicit exception(const char *message) : message(message) {} explicit exception(std::string message) : message(std::move(message)) {} + explicit exception(boost::format message) : message(message.str()) {} const char *what() const noexcept override { return message.c_str(); } private: diff --git a/include/util/indexed_data.hpp b/include/util/indexed_data.hpp new file mode 100644 index 000000000..abb869a10 --- /dev/null +++ b/include/util/indexed_data.hpp @@ -0,0 +1,366 @@ +#ifndef OSRM_INDEXED_DATA_HPP +#define OSRM_INDEXED_DATA_HPP + +#include "util/exception.hpp" +#include "util/string_view.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +namespace osrm +{ +namespace util +{ + +template struct VariableGroupBlock +{ + static constexpr std::uint32_t BLOCK_SIZE = N; + + using ResultType = T; + using ValueType = typename T::value_type; + + static_assert(0 <= BLOCK_SIZE && BLOCK_SIZE <= 16, "incorrect block size"); + static_assert(sizeof(ValueType) == 1, "data basic type must char"); + + struct BlockReference + { + std::uint32_t offset; + std::uint32_t descriptor; + }; + + VariableGroupBlock() {} + + /// Returns ceiling(log_256(value + 1)) + inline std::uint32_t log256(std::uint32_t value) const + { + BOOST_ASSERT(value < 0x1000000); + return value == 0 ? 0 : value < 0x100 ? 1 : value < 0x10000 ? 2 : 3; + } + + /// Advance data iterator by the value of byte_length bytes at length iterator. + /// Advance length iterator by byte_length. + template + inline void + var_advance(DataIterator &data, DataIterator &length, std::uint32_t byte_length) const + { + if (byte_length == 0) + { + } + else if (byte_length == 1) + { + data += static_cast(*length++); + } + else if (byte_length == 2) + { + data += static_cast(*length++); + data += static_cast(*length++) << 8; + } + else + { + BOOST_ASSERT(byte_length == 3); + data += static_cast(*length++); + data += static_cast(*length++) << 8; + data += static_cast(*length++) << 16; + } + } + + /// Summation of 16 2-bit values using SWAR + inline std::uint32_t sum2bits(std::uint32_t value) const + { + value = (value >> 2 & 0x33333333) + (value & 0x33333333); + value = (value >> 4 & 0x0f0f0f0f) + (value & 0x0f0f0f0f); + value = (value >> 8 & 0x00ff00ff) + (value & 0x00ff00ff); + return (value >> 16 & 0x0000ffff) + (value & 0x0000ffff); + } + + /// Write a block reference {offset, descriptor}, where offset + /// is a global block offset and descriptor is a 32-bit value + /// of prefix length. sum(descriptor) equals to the block + /// prefix length. + /// Returns the block prefix length. + template + Offset WriteBlockReference(std::ostream &out, + Offset data_offset, + OffsetIterator first, + OffsetIterator last) const + { + BOOST_ASSERT(data_offset <= std::numeric_limits::max()); + + Offset prefix_length = 0; + BlockReference refernce{static_cast(data_offset), 0}; + for (; first != last; --last) + { + const std::uint32_t data_length = *last - *std::prev(last); + if (data_length >= 0x1000000) + throw util::exception(boost::format("too large data length %1%") % data_length); + + const std::uint32_t byte_length = log256(data_length); + refernce.descriptor = (refernce.descriptor << 2) | byte_length; + prefix_length += byte_length; + } + + out.write((const char *)&refernce, sizeof(refernce)); + + return prefix_length; + } + + /// Write a block prefix that is an array of variable encoded data lengths: + /// 0 is omitted; + /// 1..255 is 1 byte; + /// 256..65535 is 2 bytes; + /// 65536..16777215 is 3 bytes. + /// [first..last] is an inclusive range of block data. + /// The length of the last item in the block is not stored. + template + void WriteBlockPrefix(std::ostream &out, OffsetIterator first, OffsetIterator last) const + { + for (OffsetIterator curr = first, next = std::next(first); curr != last; ++curr, ++next) + { + const std::uint32_t data_length = *next - *curr; + const std::uint32_t byte_length = log256(data_length); + if (byte_length == 0) + continue; + + out.write((const char *)&data_length, byte_length); + } + } + + /// Advances the range to an item stored in the referenced block. + /// Input [first..last) is a range of the complete block data with prefix. + /// Output [first..last) is a range of the referenced data at local_index. + template + void ReadRefrencedBlock(const BlockReference &reference, + std::uint32_t local_index, + DataIterator &first, + DataIterator &last) const + { + std::uint32_t descriptor = reference.descriptor; + DataIterator var_lengths = first; // iterator to the variable lengths part + std::advance(first, sum2bits(descriptor)); // advance first to the block data part + for (std::uint32_t i = 0; i < local_index; ++i, descriptor >>= 2) + { + var_advance(first, var_lengths, descriptor & 0x3); + } + + if (local_index < BLOCK_SIZE) + { + last = first; + var_advance(last, var_lengths, descriptor & 0x3); + } + } +}; + +template struct FixedGroupBlock +{ + static constexpr std::uint32_t BLOCK_SIZE = N; + + using ResultType = T; + using ValueType = typename T::value_type; + + static_assert(sizeof(ValueType) == 1, "data basic type must char"); + + struct BlockReference + { + std::uint32_t offset; + }; + + FixedGroupBlock() {} + + /// Write a block reference {offset}, where offset is a global block offset + /// Returns the fixed block prefix length. + template + Offset + WriteBlockReference(std::ostream &out, Offset data_offset, OffsetIterator, OffsetIterator) const + { + BOOST_ASSERT(data_offset <= std::numeric_limits::max()); + + BlockReference refernce{static_cast(data_offset)}; + out.write((const char *)&refernce, sizeof(refernce)); + + return BLOCK_SIZE; + } + + /// Write a fixed length block prefix. + template + void WriteBlockPrefix(std::ostream &out, OffsetIterator first, OffsetIterator last) const + { + std::uint32_t index = 0; + std::array block_prefix; + for (OffsetIterator curr = first, next = std::next(first); curr != last; ++curr, ++next) + { + const std::uint32_t data_length = *next - *curr; + if (data_length >= 0x100) + throw util::exception(boost::format("too large data length %1%") % data_length); + + block_prefix[index++] = static_cast(data_length); + } + out.write((const char *)block_prefix.data(), block_prefix.size()); + } + + /// Advances the range to an item stored in the referenced block. + /// Input [first..last) is a range of the complete block data with prefix. + /// Output [first..last) is a range of the referenced data at local_index. + template + void ReadRefrencedBlock(const BlockReference &, + std::uint32_t local_index, + DataIterator &first, + DataIterator &last) const + { + DataIterator fixed_lengths = first; // iterator to the fixed lengths part + std::advance(first, BLOCK_SIZE); // advance first to the block data part + for (std::uint32_t i = 0; i < local_index; ++i) + { + first += static_cast(*fixed_lengths++); + } + + if (local_index < BLOCK_SIZE) + { + last = first + static_cast(*fixed_lengths); + } + } +}; + +template struct IndexedData +{ + static constexpr std::uint32_t BLOCK_SIZE = GroupBlock::BLOCK_SIZE; + + using BlocksNumberType = std::uint32_t; + using DataSizeType = std::uint64_t; + + using BlockReference = typename GroupBlock::BlockReference; + using ResultType = typename GroupBlock::ResultType; + using ValueType = typename GroupBlock::ValueType; + + static_assert(sizeof(ValueType) == 1, "data basic type must char"); + + IndexedData() : blocks_number{0}, block_references{nullptr}, begin{nullptr}, end{nullptr} {} + + bool empty() const { return blocks_number == 0; } + + template + void + write(std::ostream &out, OffsetIterator first, OffsetIterator last, DataIterator data) const + { + static_assert(sizeof(typename DataIterator::value_type) == 1, "data basic type must char"); + + using diff_type = typename OffsetIterator::difference_type; + + BOOST_ASSERT(first < last); + const OffsetIterator sentinel = std::prev(last); + + // Write number of blocks + const auto number_of_elements = std::distance(first, sentinel); + const BlocksNumberType number_of_blocks = + number_of_elements == 0 ? 0 + : 1 + (std::distance(first, sentinel) - 1) / (BLOCK_SIZE + 1); + out.write((const char *)&number_of_blocks, sizeof(number_of_blocks)); + + // Write block references and compute the total data size that includes prefix and data + const GroupBlock block; + DataSizeType data_size = 0; + for (OffsetIterator curr = first, next = first; next != sentinel; curr = next) + { + std::advance(next, std::min(BLOCK_SIZE, std::distance(next, sentinel))); + data_size += block.WriteBlockReference(out, data_size, curr, next); + std::advance(next, std::min(1, std::distance(next, sentinel))); + data_size += *next - *curr; + } + + // Write the total data size + out.write((const char *)&data_size, sizeof(data_size)); + + // Write data blocks that are (prefix, data) + for (OffsetIterator curr = first, next = first; next != sentinel; curr = next) + { + std::advance(next, std::min(BLOCK_SIZE, std::distance(next, sentinel))); + block.WriteBlockPrefix(out, curr, next); + std::advance(next, std::min(1, std::distance(next, sentinel))); + std::copy(data + *curr, data + *next, std::ostream_iterator(out)); + } + } + + /// Set internal pointers from the buffer [first, last). + /// Data buffer pointed by ptr must exists during IndexedData life-time. + /// No ownership is transferred. + void reset(const ValueType *first, const ValueType *last) + { + // Read blocks number + if (first + sizeof(BlocksNumberType) > last) + throw util::exception("incorrect memory block"); + + blocks_number = *reinterpret_cast(first); + first += sizeof(BlocksNumberType); + + // Get block references pointer + if (first + sizeof(BlockReference) * blocks_number > last) + throw util::exception("incorrect memory block"); + + block_references = reinterpret_cast(first); + first += sizeof(BlockReference) * blocks_number; + + // Read total data size + if (first + sizeof(DataSizeType) > last) + throw util::exception("incorrect memory block"); + + auto data_size = *reinterpret_cast(first); + first += sizeof(DataSizeType); + + // Get data blocks begin and end iterators + begin = reinterpret_cast(first); + first += sizeof(ValueType) * data_size; + + if (first > last) + throw util::exception("incorrect memory block"); + + end = reinterpret_cast(first); + } + + // Return value at the given index + ResultType at(std::uint32_t index) const + { + // Get block external ad internal indices + const BlocksNumberType block_idx = index / (BLOCK_SIZE + 1); + const std::uint32_t internal_idx = index % (BLOCK_SIZE + 1); + + if (block_idx >= blocks_number) + return ResultType(); + + // Get block first and last iterators + auto first = begin + block_references[block_idx].offset; + auto last = + block_idx + 1 == blocks_number ? end : begin + block_references[block_idx + 1].offset; + + const GroupBlock block; + block.ReadRefrencedBlock(block_references[block_idx], internal_idx, first, last); + + return adapt(first, last); + } + + private: + template + typename std::enable_if::value, T>::type + adapt(const ValueType *first, const ValueType *last) const + { + return ResultType(first, last); + } + + template + typename std::enable_if::value, T>::type + adapt(const ValueType *first, const ValueType *last) const + { + return ResultType(first, std::distance(first, last)); + } + + BlocksNumberType blocks_number; + const BlockReference *block_references; + const ValueType *begin, *end; +}; +} +} +#endif // OSRM_INDEXED_DATA_HPP diff --git a/include/util/name_table.hpp b/include/util/name_table.hpp index 07d10db53..06aa8cb5e 100644 --- a/include/util/name_table.hpp +++ b/include/util/name_table.hpp @@ -1,7 +1,7 @@ #ifndef OSRM_UTIL_NAME_TABLE_HPP #define OSRM_UTIL_NAME_TABLE_HPP -#include "util/range_table.hpp" +#include "util/indexed_data.hpp" #include "util/shared_memory_vector_wrapper.hpp" #include "util/string_view.hpp" #include "util/typedefs.hpp" @@ -18,21 +18,33 @@ namespace util // processing based on name indices. class NameTable { - private: - // FIXME should this use shared memory - util::RangeTable<16, false> m_name_table; - ShM::vector m_names_char_list; - public: + using IndexedData = util::IndexedData>; + using ResultType = IndexedData::ResultType; + using ValueType = IndexedData::ValueType; + + NameTable() {} + + // Read filename and store own data in m_buffer NameTable(const std::string &filename); + // Keep pointers only in m_name_table and don't own data in m_buffer + void reset(ValueType *begin, ValueType *end); + // This class provides a limited view over all the string data we serialize out. // The following functions are a subset of what is available. // See the data facades for they provide full access to this serialized string data. // (at time of writing this: get{Name,Ref,Pronunciation,Destinations}ForID(name_id);) util::StringView GetNameForID(const NameID id) const; + util::StringView GetDestinationsForID(const NameID id) const; util::StringView GetRefForID(const NameID id) const; util::StringView GetPronunciationForID(const NameID id) const; + + private: + using BufferType = std::unique_ptr>; + + BufferType m_buffer; + IndexedData m_name_table; }; } // namespace util } // namespace osrm diff --git a/src/extractor/extraction_containers.cpp b/src/extractor/extraction_containers.cpp index 2c2b6718a..6676c695a 100644 --- a/src/extractor/extraction_containers.cpp +++ b/src/extractor/extraction_containers.cpp @@ -3,13 +3,13 @@ #include "extractor/extraction_way.hpp" #include "util/coordinate_calculation.hpp" -#include "util/range_table.hpp" #include "util/exception.hpp" #include "util/exception_utils.hpp" #include "util/fingerprint.hpp" #include "util/io.hpp" #include "util/log.hpp" +#include "util/name_table.hpp" #include "util/timing_util.hpp" #include @@ -176,43 +176,10 @@ void ExtractionContainers::WriteCharData(const std::string &file_name) util::UnbufferedLog log; log << "writing street name index ... "; TIMER_START(write_index); - boost::filesystem::ofstream file_stream(file_name, std::ios::binary); + boost::filesystem::ofstream file(file_name, std::ios::binary); - // transforms in-place name offsets to name lengths - BOOST_ASSERT(!name_offsets.empty()); - for (auto curr = name_offsets.begin(), next = name_offsets.begin() + 1; - next != name_offsets.end(); - ++curr, ++next) - { - *curr = *next - *curr; - } - - // removes the total length sentinel - unsigned total_length = name_offsets.back(); - name_offsets.pop_back(); - - // builds and writes the index - util::RangeTable<> index_range(name_offsets); - file_stream << index_range; - - file_stream.write((char *)&total_length, sizeof(unsigned)); - - // write all chars consecutively - char write_buffer[WRITE_BLOCK_BUFFER_SIZE]; - unsigned buffer_len = 0; - - for (const auto c : name_char_data) - { - write_buffer[buffer_len++] = c; - - if (buffer_len >= WRITE_BLOCK_BUFFER_SIZE) - { - file_stream.write(write_buffer, WRITE_BLOCK_BUFFER_SIZE); - buffer_len = 0; - } - } - - file_stream.write(write_buffer, buffer_len); + const util::NameTable::IndexedData indexed_data; + indexed_data.write(file, name_offsets.begin(), name_offsets.end(), name_char_data.begin()); TIMER_STOP(write_index); log << "ok, after " << TIMER_SEC(write_index) << "s"; diff --git a/src/extractor/extractor_callbacks.cpp b/src/extractor/extractor_callbacks.cpp index abfb3999f..f48e368a4 100644 --- a/src/extractor/extractor_callbacks.cpp +++ b/src/extractor/extractor_callbacks.cpp @@ -269,40 +269,34 @@ void ExtractorCallbacks::ProcessWay(const osmium::Way &input_way, const Extracti const auto road_classification = parsed_way.road_classification; - const constexpr std::size_t MAX_STRING_LENGTH = 255u; // Get the unique identifier for the street name, destination, and ref const auto name_iterator = string_map.find( MapKey(parsed_way.name, parsed_way.destinations, parsed_way.ref, parsed_way.pronunciation)); - auto name_id = EMPTY_NAMEID; + NameID name_id = EMPTY_NAMEID; if (string_map.end() == name_iterator) { - const auto name_length = std::min(MAX_STRING_LENGTH, parsed_way.name.size()); - const auto destinations_length = - std::min(MAX_STRING_LENGTH, parsed_way.destinations.size()); - const auto pronunciation_length = - std::min(MAX_STRING_LENGTH, parsed_way.pronunciation.size()); - const auto ref_length = std::min(MAX_STRING_LENGTH, parsed_way.ref.size()); - - // name_offsets already has an offset of a new name, take the offset index as the name id + // name_offsets has a sentinel element with the total name data size + // take the sentinels index as the name id of the new name data pack + // (name [name_id], destination [+1], pronunciation [+2], ref [+3]) name_id = external_memory.name_offsets.size() - 1; - std::copy(parsed_way.name.c_str(), - parsed_way.name.c_str() + name_length, + std::copy(parsed_way.name.begin(), + parsed_way.name.end(), std::back_inserter(external_memory.name_char_data)); external_memory.name_offsets.push_back(external_memory.name_char_data.size()); - std::copy(parsed_way.destinations.c_str(), - parsed_way.destinations.c_str() + destinations_length, + std::copy(parsed_way.destinations.begin(), + parsed_way.destinations.end(), std::back_inserter(external_memory.name_char_data)); external_memory.name_offsets.push_back(external_memory.name_char_data.size()); - std::copy(parsed_way.pronunciation.c_str(), - parsed_way.pronunciation.c_str() + pronunciation_length, + std::copy(parsed_way.pronunciation.begin(), + parsed_way.pronunciation.end(), std::back_inserter(external_memory.name_char_data)); external_memory.name_offsets.push_back(external_memory.name_char_data.size()); - std::copy(parsed_way.ref.c_str(), - parsed_way.ref.c_str() + ref_length, + std::copy(parsed_way.ref.begin(), + parsed_way.ref.end(), std::back_inserter(external_memory.name_char_data)); external_memory.name_offsets.push_back(external_memory.name_char_data.size()); diff --git a/src/extractor/guidance/turn_handler.cpp b/src/extractor/guidance/turn_handler.cpp index 57c168c89..664b2e607 100644 --- a/src/extractor/guidance/turn_handler.cpp +++ b/src/extractor/guidance/turn_handler.cpp @@ -574,7 +574,7 @@ TurnHandler::findForkCandidatesByGeometry(Intersection &intersection) const // // // left left - // / \ + // / \  // /____ right \ ______ right // | | // | | diff --git a/src/storage/storage.cpp b/src/storage/storage.cpp index bdb96efc6..4611aa578 100644 --- a/src/storage/storage.cpp +++ b/src/storage/storage.cpp @@ -194,19 +194,10 @@ void Storage::PopulateLayout(DataLayout &layout) } { - // collect number of elements to store in shared memory object util::Log() << "load names from: " << config.names_data_path; // number of entries in name index io::FileReader name_file(config.names_data_path, io::FileReader::HasNoFingerprint); - - const auto name_blocks = name_file.ReadElementCount32(); - layout.SetBlockSize(DataLayout::NAME_OFFSETS, name_blocks); - layout.SetBlockSize::BlockT>(DataLayout::NAME_BLOCKS, - name_blocks); - BOOST_ASSERT_MSG(0 != name_blocks, "name file broken"); - - const auto number_of_chars = name_file.ReadElementCount32(); - layout.SetBlockSize(DataLayout::NAME_CHAR_LIST, number_of_chars); + layout.SetBlockSize(DataLayout::NAME_CHAR_DATA, name_file.GetSize()); } { @@ -451,35 +442,13 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) // Name data { io::FileReader name_file(config.names_data_path, io::FileReader::HasNoFingerprint); - const auto name_blocks_count = name_file.ReadElementCount32(); - name_file.Skip(1); // name_char_list_count - - BOOST_ASSERT(name_blocks_count * sizeof(unsigned) == - layout.GetBlockSize(DataLayout::NAME_OFFSETS)); - BOOST_ASSERT(name_blocks_count * sizeof(typename util::RangeTable<16, true>::BlockT) == - layout.GetBlockSize(DataLayout::NAME_BLOCKS)); - - // Loading street names - const auto name_offsets_ptr = - layout.GetBlockPtr(memory_ptr, DataLayout::NAME_OFFSETS); - name_file.ReadInto(name_offsets_ptr, name_blocks_count); - - const auto name_blocks_ptr = - layout.GetBlockPtr(memory_ptr, DataLayout::NAME_BLOCKS); - name_file.ReadInto(reinterpret_cast(name_blocks_ptr), - layout.GetBlockSize(DataLayout::NAME_BLOCKS)); - - // The file format contains the element count a second time. Don't know why, - // but we need to read it here to progress the file pointer to the correct spot - const auto temp_count = name_file.ReadElementCount32(); + std::size_t name_file_size = name_file.GetSize(); + BOOST_ASSERT(name_file_size == layout.GetBlockSize(DataLayout::NAME_CHAR_DATA)); const auto name_char_ptr = - layout.GetBlockPtr(memory_ptr, DataLayout::NAME_CHAR_LIST); + layout.GetBlockPtr(memory_ptr, DataLayout::NAME_CHAR_DATA); - BOOST_ASSERT_MSG(temp_count == layout.GetBlockSize(DataLayout::NAME_CHAR_LIST), - "Name file corrupted!"); - - name_file.ReadInto(name_char_ptr, temp_count); + name_file.ReadInto(name_char_ptr, name_file_size); } // Turn lane data diff --git a/src/util/name_table.cpp b/src/util/name_table.cpp index 2b3b2b216..6d080c2d7 100644 --- a/src/util/name_table.cpp +++ b/src/util/name_table.cpp @@ -1,64 +1,58 @@ #include "util/name_table.hpp" #include "storage/io.hpp" -#include "util/exception.hpp" #include "util/log.hpp" -#include -#include -#include - -#include - namespace osrm { namespace util { -NameTable::NameTable(const std::string &filename) +NameTable::NameTable(const std::string &file_name) { - storage::io::FileReader name_stream_file_reader(filename, - storage::io::FileReader::HasNoFingerprint); + using FileReader = storage::io::FileReader; - m_name_table.ReadARangeTable(name_stream_file_reader); + FileReader name_stream_file_reader(file_name, FileReader::HasNoFingerprint); + const auto file_size = name_stream_file_reader.GetSize(); - const auto number_of_chars = name_stream_file_reader.ReadElementCount32(); + m_buffer = BufferType(static_cast(::operator new(file_size)), + [](void *ptr) { ::operator delete(ptr); }); + name_stream_file_reader.ReadInto(m_buffer.get(), file_size); + m_name_table.reset(m_buffer.get(), m_buffer.get() + file_size); - m_names_char_list.resize(number_of_chars + 1); //+1 gives sentinel element - m_names_char_list.back() = 0; - if (number_of_chars > 0) - { - name_stream_file_reader.ReadInto(&m_names_char_list[0], number_of_chars); - } - else + if (m_name_table.empty()) { util::Log() << "list of street names is empty in construction of name table from: \"" - << filename << "\""; + << file_name << "\""; } } +void NameTable::reset(ValueType *begin, ValueType *end) +{ + m_buffer.reset(); + m_name_table.reset(begin, end); +} + StringView NameTable::GetNameForID(const NameID id) const { - if (std::numeric_limits::max() == id) - { + if (id == INVALID_NAMEID) return {}; - } - auto range = m_name_table.GetRange(id); + return m_name_table.at(id); +} - if (range.begin() == range.end()) - { +StringView NameTable::GetDestinationsForID(const NameID id) const +{ + if (id == INVALID_NAMEID) return {}; - } - auto first = begin(m_names_char_list) + range.front(); - auto last = begin(m_names_char_list) + range.back() + 1; - const std::size_t len = last - first; - - return StringView{&*first, len}; + return m_name_table.at(id + 1); } StringView NameTable::GetRefForID(const NameID id) const { + if (id == INVALID_NAMEID) + return {}; + // Way string data is stored in blocks based on `id` as follows: // // | name | destination | pronunciation | ref | @@ -71,11 +65,14 @@ StringView NameTable::GetRefForID(const NameID id) const // Offset 0 is name, 1 is destination, 2 is pronunciation, 3 is ref. // See datafacades and extractor callbacks for details. const constexpr auto OFFSET_REF = 3u; - return GetNameForID(id + OFFSET_REF); + return m_name_table.at(id + OFFSET_REF); } StringView NameTable::GetPronunciationForID(const NameID id) const { + if (id == INVALID_NAMEID) + return {}; + // Way string data is stored in blocks based on `id` as follows: // // | name | destination | pronunciation | ref | @@ -88,7 +85,7 @@ StringView NameTable::GetPronunciationForID(const NameID id) const // Offset 0 is name, 1 is destination, 2 is pronunciation, 3 is ref. // See datafacades and extractor callbacks for details. const constexpr auto OFFSET_PRONUNCIATION = 2u; - return GetNameForID(id + OFFSET_PRONUNCIATION); + return m_name_table.at(id + OFFSET_PRONUNCIATION); } } // namespace util diff --git a/unit_tests/util/indexed_data.cpp b/unit_tests/util/indexed_data.cpp new file mode 100644 index 000000000..c94a40543 --- /dev/null +++ b/unit_tests/util/indexed_data.cpp @@ -0,0 +1,193 @@ +#include "util/indexed_data.hpp" +#include "util/exception.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +BOOST_AUTO_TEST_SUITE(indexed_data) + +using namespace osrm; +using namespace osrm::util; + +BOOST_AUTO_TEST_CASE(check_variable_group_block_bitops) +{ + VariableGroupBlock<16> variable_group_block; + BOOST_CHECK_EQUAL(variable_group_block.sum2bits(0xe4), 6); + BOOST_CHECK_EQUAL(variable_group_block.sum2bits(0x11111111), 8); + BOOST_CHECK_EQUAL(variable_group_block.sum2bits(0x55555555), 16); + BOOST_CHECK_EQUAL(variable_group_block.sum2bits(0xffffffff), 48); + + BOOST_CHECK_EQUAL(variable_group_block.log256(0), 0); + BOOST_CHECK_EQUAL(variable_group_block.log256(1), 1); + BOOST_CHECK_EQUAL(variable_group_block.log256(255), 1); + BOOST_CHECK_EQUAL(variable_group_block.log256(256), 2); + BOOST_CHECK_EQUAL(variable_group_block.log256(1024), 2); + BOOST_CHECK_EQUAL(variable_group_block.log256(16777215), 3); +} + +template +void test_rw(const Offsets &offsets, const Data &data) +{ + std::stringstream sstr; + IndexedData indexed_data; + indexed_data.write(sstr, offsets.begin(), offsets.end(), data.begin()); + + const std::string str = sstr.str(); + +#if 0 + std::cout << "\n" << typeid(IndexedData).name() << "\nsaved size = " << str.size() << "\n"; + for (auto c : str) + std::cout << std::hex << std::setw(2) << std::setfill('0') + << (int)((unsigned char)c) << " "; + std::cout << std::dec << "\n"; +#endif + + indexed_data.reset(str.c_str(), str.c_str() + str.size()); + + for (std::size_t index = 0; index < offsets.size() - 1; ++index) + { + typename IndexedData::ResultType expected_result(&data[offsets[index]], + &data[offsets[index + 1]]); + BOOST_CHECK_EQUAL(expected_result, indexed_data.at(index)); + } +} + +BOOST_AUTO_TEST_CASE(check_group_blocks_with_different_sizes) +{ + + std::vector str = { + "", "A", "bb", "ccc", "dDDd", "E", "ff", "ggg", "hhhh", "I", "jj", "", "kkk", + "llll", "M", "nn", "ooo", "pppp", "q", "r", "S", "T", "", "u", "V", "W", + "X", "Y", "Z", "", "", "", "", "", "", "", "0", ""}; + + std::vector name_char_data; + std::vector name_offsets; + for (auto s : str) + { + name_offsets.push_back(name_char_data.size()); + std::copy(s.begin(), s.end(), std::back_inserter(name_char_data)); + } + name_offsets.push_back(name_char_data.size()); + + test_rw>>(name_offsets, name_char_data); + test_rw>>(name_offsets, name_char_data); + test_rw>>(name_offsets, name_char_data); + + test_rw>>(name_offsets, name_char_data); + test_rw>>(name_offsets, name_char_data); + test_rw>>(name_offsets, name_char_data); + test_rw>>(name_offsets, name_char_data); + test_rw>>(name_offsets, name_char_data); +} + +BOOST_AUTO_TEST_CASE(check_1001_pandas) +{ + std::vector name_char_data; + std::vector name_offsets; + + const std::string panda = "🐼"; + name_offsets.push_back(0); + for (std::size_t i = 0; i < 1000; ++i) + std::copy(panda.begin(), panda.end(), std::back_inserter(name_char_data)); + name_offsets.push_back(name_char_data.size()); + std::copy(panda.begin(), panda.end(), std::back_inserter(name_char_data)); + name_offsets.push_back(name_char_data.size()); + + test_rw>>(name_offsets, name_char_data); +} + +BOOST_AUTO_TEST_CASE(check_different_sizes) +{ + for (std::size_t num_strings = 0; num_strings < 256; ++num_strings) + { + std::vector name_char_data; + std::vector name_offsets; + + const std::string canoe = "🛶"; + name_offsets.push_back(0); + for (std::size_t i = 0; i < num_strings; ++i) + { + std::copy(canoe.begin(), canoe.end(), std::back_inserter(name_char_data)); + name_offsets.push_back(name_char_data.size()); + } + + test_rw>>(name_offsets, name_char_data); + test_rw>>(name_offsets, name_char_data); + } +} + +BOOST_AUTO_TEST_CASE(check_max_size) +{ + std::vector name_data(0x1000000, '#'); + std::vector name_offsets; + + auto test_variable = [&name_offsets, &name_data]() { + test_rw>>(name_offsets, name_data); + }; + auto test_fixed = [&name_offsets, &name_data]() { + test_rw>>(name_offsets, name_data); + }; + + name_offsets = {0, 0x1000000}; + BOOST_CHECK_THROW(test_variable(), osrm::util::exception); + name_offsets = {0, 0x1000000 - 1}; + test_variable(); + + name_offsets = {0, 256}; + BOOST_CHECK_THROW(test_fixed(), osrm::util::exception); + name_offsets = {0, 255}; + test_fixed(); +} + +BOOST_AUTO_TEST_CASE(check_corrupted_memory) +{ + std::vector buf; + + auto test_variable = [&buf]() { + IndexedData>> indexed_data; + indexed_data.reset(&buf[0], &buf[buf.size()]); + const auto result = indexed_data.at(0); + return std::string(reinterpret_cast(&result[0]), result.size()); + }; + + // Use LE internal representation + buf = {0, 42}; + BOOST_CHECK_THROW(test_variable(), osrm::util::exception); + + buf = {1, 0, 0, 0, 0}; + BOOST_CHECK_THROW(test_variable(), osrm::util::exception); + + buf = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42}; + BOOST_CHECK_THROW(test_variable(), osrm::util::exception); + + buf = {1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 42}; + BOOST_CHECK_THROW(test_variable(), osrm::util::exception); + + buf = {1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 4, 0xF0, 0x9F, 0x90, 0xBC}; + BOOST_CHECK_EQUAL(test_variable(), "🐼"); +} + +BOOST_AUTO_TEST_CASE(check_string_view) +{ + std::stringstream sstr; + std::string name_data = "hellostringview"; + std::vector name_offsets = {0, 5, 11, 15}; + + IndexedData> indexed_data; + indexed_data.write(sstr, name_offsets.begin(), name_offsets.end(), name_data.begin()); + + const std::string str = sstr.str(); + indexed_data.reset(str.c_str(), str.c_str() + str.size()); + + BOOST_CHECK_EQUAL(indexed_data.at(0), "hello"); + BOOST_CHECK_EQUAL(indexed_data.at(1), "string"); + BOOST_CHECK_EQUAL(indexed_data.at(2), "view"); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/unit_tests/util/name_table.cpp b/unit_tests/util/name_table.cpp new file mode 100644 index 000000000..d7b2a6137 --- /dev/null +++ b/unit_tests/util/name_table.cpp @@ -0,0 +1,117 @@ +#include "util/name_table.hpp" +#include "util/exception.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +//#include + +BOOST_AUTO_TEST_SUITE(name_table) + +using namespace osrm; +using namespace osrm::util; + +std::string PrapareNameTableData(std::vector &data, bool fill_all) +{ + std::stringstream sstr; + NameTable::IndexedData indexed_data; + std::vector name_char_data; + std::vector name_offsets; + + for (auto s : data) + { + name_offsets.push_back(name_char_data.size()); + std::copy(s.begin(), s.end(), std::back_inserter(name_char_data)); + + if (fill_all) + { + std::string tmp; + + tmp = s + "_des"; + name_offsets.push_back(name_char_data.size()); + std::copy(tmp.begin(), tmp.end(), std::back_inserter(name_char_data)); + + tmp = s + "_pro"; + name_offsets.push_back(name_char_data.size()); + std::copy(tmp.begin(), tmp.end(), std::back_inserter(name_char_data)); + + tmp = s + "_ref"; + name_offsets.push_back(name_char_data.size()); + std::copy(tmp.begin(), tmp.end(), std::back_inserter(name_char_data)); + } + else + { + name_offsets.push_back(name_char_data.size()); + name_offsets.push_back(name_char_data.size()); + name_offsets.push_back(name_char_data.size()); + } + } + name_offsets.push_back(name_char_data.size()); + + indexed_data.write(sstr, name_offsets.begin(), name_offsets.end(), name_char_data.begin()); + + return sstr.str(); +} + +BOOST_AUTO_TEST_CASE(check_name_table_fill) +{ + std::vector expected_names = { + "", "A", "check_name", "ccc", "dDDd", "E", "ff", "ggg", "hhhh", "I", "jj", "", "kkk", + "llll", "M", "nn", "ooo", "pppp", "q", "r", "S", "T", "", "u", "V", "W", + "X", "Y", "Z", "", "", "", "", "", "", "", "0", ""}; + + auto data = PrapareNameTableData(expected_names, true); + + NameTable name_table; + name_table.reset(&data[0], &data[data.size()]); + + for (std::size_t index = 0; index < expected_names.size(); ++index) + { + const NameID id = 4 * index; + BOOST_CHECK_EQUAL(name_table.GetNameForID(id), expected_names[index]); + BOOST_CHECK_EQUAL(name_table.GetRefForID(id), expected_names[index] + "_ref"); + BOOST_CHECK_EQUAL(name_table.GetDestinationsForID(id), expected_names[index] + "_des"); + BOOST_CHECK_EQUAL(name_table.GetPronunciationForID(id), expected_names[index] + "_pro"); + } +} + +BOOST_AUTO_TEST_CASE(check_name_table_nofill) +{ + std::vector expected_names = { + "", "A", "check_name", "ccc", "dDDd", "E", "ff", "ggg", "hhhh", "I", "jj", "", "kkk", + "llll", "M", "nn", "ooo", "pppp", "q", "r", "S", "T", "", "u", "V", "W", + "X", "Y", "Z", "", "", "", "", "", "", "", "0", ""}; + + auto data = PrapareNameTableData(expected_names, false); + + NameTable name_table; + name_table.reset(&data[0], &data[data.size()]); + + // CALLGRIND_START_INSTRUMENTATION; + for (std::size_t index = 0; index < expected_names.size(); ++index) + { + const NameID id = 4 * index; + BOOST_CHECK_EQUAL(name_table.GetNameForID(id), expected_names[index]); + BOOST_CHECK(name_table.GetRefForID(id).empty()); + BOOST_CHECK(name_table.GetDestinationsForID(id).empty()); + BOOST_CHECK(name_table.GetPronunciationForID(id).empty()); + } + // CALLGRIND_STOP_INSTRUMENTATION; +} + +BOOST_AUTO_TEST_CASE(check_invalid_ids) +{ + NameTable name_table; + BOOST_CHECK_EQUAL(name_table.GetNameForID(INVALID_NAMEID), ""); + BOOST_CHECK_EQUAL(name_table.GetRefForID(INVALID_NAMEID), ""); + BOOST_CHECK_EQUAL(name_table.GetDestinationsForID(INVALID_NAMEID), ""); + BOOST_CHECK_EQUAL(name_table.GetPronunciationForID(INVALID_NAMEID), ""); +} + +BOOST_AUTO_TEST_SUITE_END()