Added indexed array data type with variable and fixed group blocks
This commit is contained in:
		
							parent
							
								
									cedeb15ade
								
							
						
					
					
						commit
						6e1c4bfecd
					
				| @ -4,6 +4,7 @@ | ||||
|       - Fix #3475 removed an invalid `exit` field from the `arrive` maneuver | ||||
|       - Fix #3515 adjusted number of `nodes` in `annotation` | ||||
|       - Fix #3605 Fixed a bug that could lead to turns at the end of the road to be suppressed | ||||
|       - Fix #2844 handle up to 16777215 code units in OSM names | ||||
|     - Infrastructure | ||||
|       - Support building rpm packages. | ||||
|     - Guidance | ||||
|  | ||||
| @ -23,7 +23,6 @@ Feature: Handling of UTF characters | ||||
|             | c    | d  | Cyrillic Москва,Cyrillic Москва               | | ||||
| 
 | ||||
| 
 | ||||
|     @todo | ||||
|     Scenario: Up to 255 Unicode Code Points (255 x Panda Code Point) | ||||
|         Given the node map | ||||
|             """ | ||||
| @ -35,6 +34,6 @@ Feature: Handling of UTF characters | ||||
|             | ab    | ab   | primary | | ||||
|             | bc    | 🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼 | primary | | ||||
| 
 | ||||
|         When I route 100 times I should get | ||||
|         When I route 2 times I should get | ||||
|             | from | to | route | | ||||
|             | a    | c  | ab,🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼,🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼🐼 | | ||||
|  | ||||
| @ -18,6 +18,7 @@ | ||||
| #include "util/exception_utils.hpp" | ||||
| #include "util/guidance/turn_bearing.hpp" | ||||
| #include "util/log.hpp" | ||||
| #include "util/name_table.hpp" | ||||
| #include "util/packed_vector.hpp" | ||||
| #include "util/range_table.hpp" | ||||
| #include "util/rectangle.hpp" | ||||
| @ -79,7 +80,7 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade | ||||
|     util::ShM<extractor::TravelMode, true>::vector m_travel_mode_list; | ||||
|     util::ShM<util::guidance::TurnBearing, true>::vector m_pre_turn_bearing; | ||||
|     util::ShM<util::guidance::TurnBearing, true>::vector m_post_turn_bearing; | ||||
|     util::ShM<char, true>::vector m_names_char_list; | ||||
|     util::NameTable m_names_table; | ||||
|     util::ShM<unsigned, true>::vector m_name_begin_indices; | ||||
|     util::ShM<unsigned, true>::vector m_geometry_indices; | ||||
|     util::ShM<NodeID, true>::vector m_geometry_node_list; | ||||
| @ -103,7 +104,7 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade | ||||
|     std::unique_ptr<SharedGeospatialQuery> m_geospatial_query; | ||||
|     boost::filesystem::path file_index_path; | ||||
| 
 | ||||
|     std::shared_ptr<util::RangeTable<16, true>> m_name_table; | ||||
|     util::NameTable m_name_table; | ||||
|     // bearing classes by node based node
 | ||||
|     util::ShM<BearingClassID, true>::vector m_bearing_class_id_table; | ||||
|     // entry class IDs
 | ||||
| @ -267,23 +268,10 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade | ||||
| 
 | ||||
|     void InitializeNamePointers(storage::DataLayout &data_layout, char *memory_block) | ||||
|     { | ||||
|         auto offsets_ptr = | ||||
|             data_layout.GetBlockPtr<unsigned>(memory_block, storage::DataLayout::NAME_OFFSETS); | ||||
|         auto blocks_ptr = | ||||
|             data_layout.GetBlockPtr<IndexBlock>(memory_block, storage::DataLayout::NAME_BLOCKS); | ||||
|         util::ShM<unsigned, true>::vector name_offsets( | ||||
|             offsets_ptr, data_layout.num_entries[storage::DataLayout::NAME_OFFSETS]); | ||||
|         util::ShM<IndexBlock, true>::vector name_blocks( | ||||
|             blocks_ptr, data_layout.num_entries[storage::DataLayout::NAME_BLOCKS]); | ||||
| 
 | ||||
|         auto names_list_ptr = | ||||
|             data_layout.GetBlockPtr<char>(memory_block, storage::DataLayout::NAME_CHAR_LIST); | ||||
|         util::ShM<char, true>::vector names_char_list( | ||||
|             names_list_ptr, data_layout.num_entries[storage::DataLayout::NAME_CHAR_LIST]); | ||||
|         m_name_table = std::make_unique<util::RangeTable<16, true>>( | ||||
|             name_offsets, name_blocks, static_cast<unsigned>(names_char_list.size())); | ||||
| 
 | ||||
|         m_names_char_list = std::move(names_char_list); | ||||
|         auto name_data_ptr = | ||||
|             data_layout.GetBlockPtr<char>(memory_block, storage::DataLayout::NAME_CHAR_DATA); | ||||
|         const auto name_data_size = data_layout.num_entries[storage::DataLayout::NAME_CHAR_DATA]; | ||||
|         m_name_table.reset(name_data_ptr, name_data_ptr + name_data_size); | ||||
|     } | ||||
| 
 | ||||
|     void InitializeTurnLaneDescriptionsPointers(storage::DataLayout &data_layout, | ||||
| @ -823,52 +811,22 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade | ||||
| 
 | ||||
|     StringView GetNameForID(const NameID id) const override final | ||||
|     { | ||||
|         if (std::numeric_limits<NameID>::max() == id) | ||||
|         { | ||||
|             return ""; | ||||
|         } | ||||
| 
 | ||||
|         auto range = m_name_table->GetRange(id); | ||||
| 
 | ||||
|         if (range.begin() == range.end()) | ||||
|         { | ||||
|             return ""; | ||||
|         } | ||||
| 
 | ||||
|         auto first = m_names_char_list.begin() + range.front(); | ||||
|         auto last = m_names_char_list.begin() + range.back() + 1u; | ||||
|         // These iterators are useless: they're InputIterators onto a contiguous block of memory.
 | ||||
|         // Deref to get to the first element, then Addressof to get the memory address of the it.
 | ||||
|         const std::size_t len = &*last - &*first; | ||||
| 
 | ||||
|         return StringView{&*first, len}; | ||||
|         return m_name_table.GetNameForID(id); | ||||
|     } | ||||
| 
 | ||||
|     StringView GetRefForID(const NameID id) const override final | ||||
|     { | ||||
|         // We store the ref after the name, destination and pronunciation of a street.
 | ||||
|         // We do this to get around the street length limit of 255 which would hit
 | ||||
|         // if we concatenate these. Order (see extractor_callbacks):
 | ||||
|         // name (0), destination (1), pronunciation (2), ref (3)
 | ||||
|         return GetNameForID(id + 3); | ||||
|         return m_name_table.GetRefForID(id); | ||||
|     } | ||||
| 
 | ||||
|     StringView GetPronunciationForID(const NameID id) const override final | ||||
|     { | ||||
|         // We store the pronunciation after the name and destination of a street.
 | ||||
|         // We do this to get around the street length limit of 255 which would hit
 | ||||
|         // if we concatenate these. Order (see extractor_callbacks):
 | ||||
|         // name (0), destination (1), pronunciation (2), ref (3)
 | ||||
|         return GetNameForID(id + 2); | ||||
|         return m_name_table.GetPronunciationForID(id); | ||||
|     } | ||||
| 
 | ||||
|     StringView GetDestinationsForID(const NameID id) const override final | ||||
|     { | ||||
|         // We store the destination after the name of a street.
 | ||||
|         // We do this to get around the street length limit of 255 which would hit
 | ||||
|         // if we concatenate these. Order (see extractor_callbacks):
 | ||||
|         // name (0), destination (1), pronunciation (2), ref (3)
 | ||||
|         return GetNameForID(id + 1); | ||||
|         return m_name_table.GetDestinationsForID(id); | ||||
|     } | ||||
| 
 | ||||
|     bool IsCoreNode(const NodeID id) const override final | ||||
|  | ||||
| @ -61,6 +61,22 @@ class FileReader | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     std::size_t GetSize() | ||||
|     { | ||||
|         const boost::filesystem::ifstream::pos_type positon = input_stream.tellg(); | ||||
|         input_stream.seekg(0, std::ios::end); | ||||
|         const boost::filesystem::ifstream::pos_type file_size = input_stream.tellg(); | ||||
| 
 | ||||
|         if (file_size == boost::filesystem::ifstream::pos_type(-1)) | ||||
|         { | ||||
|             throw util::exception("File size for " + filepath.string() + " failed " + SOURCE_REF); | ||||
|         } | ||||
| 
 | ||||
|         // restore the current position
 | ||||
|         input_stream.seekg(positon, std::ios::beg); | ||||
|         return file_size; | ||||
|     } | ||||
| 
 | ||||
|     /* Read count objects of type T into pointer dest */ | ||||
|     template <typename T> void ReadInto(T *dest, const std::size_t count) | ||||
|     { | ||||
|  | ||||
| @ -18,9 +18,7 @@ namespace storage | ||||
| // Added at the start and end of each block as sanity check
 | ||||
| const constexpr char CANARY[4] = {'O', 'S', 'R', 'M'}; | ||||
| 
 | ||||
| const constexpr char *block_id_to_name[] = {"NAME_OFFSETS", | ||||
|                                             "NAME_BLOCKS", | ||||
|                                             "NAME_CHAR_LIST", | ||||
| const constexpr char *block_id_to_name[] = {"NAME_CHAR_DATA", | ||||
|                                             "NAME_ID_LIST", | ||||
|                                             "VIA_NODE_LIST", | ||||
|                                             "GRAPH_NODE_LIST", | ||||
| @ -64,9 +62,7 @@ struct DataLayout | ||||
| { | ||||
|     enum BlockID | ||||
|     { | ||||
|         NAME_OFFSETS = 0, | ||||
|         NAME_BLOCKS, | ||||
|         NAME_CHAR_LIST, | ||||
|         NAME_CHAR_DATA = 0, | ||||
|         NAME_ID_LIST, | ||||
|         VIA_NODE_LIST, | ||||
|         GRAPH_NODE_LIST, | ||||
|  | ||||
| @ -32,6 +32,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| #include <string> | ||||
| #include <utility> | ||||
| 
 | ||||
| #include <boost/format.hpp> | ||||
| 
 | ||||
| namespace osrm | ||||
| { | ||||
| namespace util | ||||
| @ -42,6 +44,7 @@ class exception final : public std::exception | ||||
|   public: | ||||
|     explicit exception(const char *message) : message(message) {} | ||||
|     explicit exception(std::string message) : message(std::move(message)) {} | ||||
|     explicit exception(boost::format message) : message(message.str()) {} | ||||
|     const char *what() const noexcept override { return message.c_str(); } | ||||
| 
 | ||||
|   private: | ||||
|  | ||||
							
								
								
									
										366
									
								
								include/util/indexed_data.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										366
									
								
								include/util/indexed_data.hpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,366 @@ | ||||
| #ifndef OSRM_INDEXED_DATA_HPP | ||||
| #define OSRM_INDEXED_DATA_HPP | ||||
| 
 | ||||
| #include "util/exception.hpp" | ||||
| #include "util/string_view.hpp" | ||||
| 
 | ||||
| #include <boost/assert.hpp> | ||||
| 
 | ||||
| #include <array> | ||||
| #include <iterator> | ||||
| #include <limits> | ||||
| #include <ostream> | ||||
| #include <string> | ||||
| #include <type_traits> | ||||
| 
 | ||||
| namespace osrm | ||||
| { | ||||
| namespace util | ||||
| { | ||||
| 
 | ||||
| template <int N, typename T = std::string> struct VariableGroupBlock | ||||
| { | ||||
|     static constexpr std::uint32_t BLOCK_SIZE = N; | ||||
| 
 | ||||
|     using ResultType = T; | ||||
|     using ValueType = typename T::value_type; | ||||
| 
 | ||||
|     static_assert(0 <= BLOCK_SIZE && BLOCK_SIZE <= 16, "incorrect block size"); | ||||
|     static_assert(sizeof(ValueType) == 1, "data basic type must char"); | ||||
| 
 | ||||
|     struct BlockReference | ||||
|     { | ||||
|         std::uint32_t offset; | ||||
|         std::uint32_t descriptor; | ||||
|     }; | ||||
| 
 | ||||
|     VariableGroupBlock() {} | ||||
| 
 | ||||
|     /// Returns ceiling(log_256(value + 1))
 | ||||
|     inline std::uint32_t log256(std::uint32_t value) const | ||||
|     { | ||||
|         BOOST_ASSERT(value < 0x1000000); | ||||
|         return value == 0 ? 0 : value < 0x100 ? 1 : value < 0x10000 ? 2 : 3; | ||||
|     } | ||||
| 
 | ||||
|     /// Advance data iterator by the value of byte_length bytes at length iterator.
 | ||||
|     /// Advance length iterator by byte_length.
 | ||||
|     template <typename DataIterator> | ||||
|     inline void | ||||
|     var_advance(DataIterator &data, DataIterator &length, std::uint32_t byte_length) const | ||||
|     { | ||||
|         if (byte_length == 0) | ||||
|         { | ||||
|         } | ||||
|         else if (byte_length == 1) | ||||
|         { | ||||
|             data += static_cast<unsigned char>(*length++); | ||||
|         } | ||||
|         else if (byte_length == 2) | ||||
|         { | ||||
|             data += static_cast<unsigned char>(*length++); | ||||
|             data += static_cast<unsigned char>(*length++) << 8; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             BOOST_ASSERT(byte_length == 3); | ||||
|             data += static_cast<unsigned char>(*length++); | ||||
|             data += static_cast<unsigned char>(*length++) << 8; | ||||
|             data += static_cast<unsigned char>(*length++) << 16; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Summation of 16 2-bit values using SWAR
 | ||||
|     inline std::uint32_t sum2bits(std::uint32_t value) const | ||||
|     { | ||||
|         value = (value >> 2 & 0x33333333) + (value & 0x33333333); | ||||
|         value = (value >> 4 & 0x0f0f0f0f) + (value & 0x0f0f0f0f); | ||||
|         value = (value >> 8 & 0x00ff00ff) + (value & 0x00ff00ff); | ||||
|         return (value >> 16 & 0x0000ffff) + (value & 0x0000ffff); | ||||
|     } | ||||
| 
 | ||||
|     /// Write a block reference {offset, descriptor}, where offset
 | ||||
|     /// is a global block offset and descriptor is a 32-bit value
 | ||||
|     /// of prefix length. sum(descriptor) equals to the block
 | ||||
|     /// prefix length.
 | ||||
|     /// Returns the block prefix length.
 | ||||
|     template <typename Offset, typename OffsetIterator> | ||||
|     Offset WriteBlockReference(std::ostream &out, | ||||
|                                Offset data_offset, | ||||
|                                OffsetIterator first, | ||||
|                                OffsetIterator last) const | ||||
|     { | ||||
|         BOOST_ASSERT(data_offset <= std::numeric_limits<decltype(BlockReference::offset)>::max()); | ||||
| 
 | ||||
|         Offset prefix_length = 0; | ||||
|         BlockReference refernce{static_cast<decltype(BlockReference::offset)>(data_offset), 0}; | ||||
|         for (; first != last; --last) | ||||
|         { | ||||
|             const std::uint32_t data_length = *last - *std::prev(last); | ||||
|             if (data_length >= 0x1000000) | ||||
|                 throw util::exception(boost::format("too large data length %1%") % data_length); | ||||
| 
 | ||||
|             const std::uint32_t byte_length = log256(data_length); | ||||
|             refernce.descriptor = (refernce.descriptor << 2) | byte_length; | ||||
|             prefix_length += byte_length; | ||||
|         } | ||||
| 
 | ||||
|         out.write((const char *)&refernce, sizeof(refernce)); | ||||
| 
 | ||||
|         return prefix_length; | ||||
|     } | ||||
| 
 | ||||
|     /// Write a block prefix that is an array of variable encoded data lengths:
 | ||||
|     ///   0 is omitted;
 | ||||
|     ///   1..255 is 1 byte;
 | ||||
|     ///   256..65535 is 2 bytes;
 | ||||
|     ///   65536..16777215 is 3 bytes.
 | ||||
|     /// [first..last] is an inclusive range of block data.
 | ||||
|     /// The length of the last item in the block is not stored.
 | ||||
|     template <typename OffsetIterator> | ||||
|     void WriteBlockPrefix(std::ostream &out, OffsetIterator first, OffsetIterator last) const | ||||
|     { | ||||
|         for (OffsetIterator curr = first, next = std::next(first); curr != last; ++curr, ++next) | ||||
|         { | ||||
|             const std::uint32_t data_length = *next - *curr; | ||||
|             const std::uint32_t byte_length = log256(data_length); | ||||
|             if (byte_length == 0) | ||||
|                 continue; | ||||
| 
 | ||||
|             out.write((const char *)&data_length, byte_length); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Advances the range to an item stored in the referenced block.
 | ||||
|     /// Input [first..last) is a range of the complete block data with prefix.
 | ||||
|     /// Output [first..last) is a range of the referenced data at local_index.
 | ||||
|     template <typename DataIterator> | ||||
|     void ReadRefrencedBlock(const BlockReference &reference, | ||||
|                             std::uint32_t local_index, | ||||
|                             DataIterator &first, | ||||
|                             DataIterator &last) const | ||||
|     { | ||||
|         std::uint32_t descriptor = reference.descriptor; | ||||
|         DataIterator var_lengths = first;          // iterator to the variable lengths part
 | ||||
|         std::advance(first, sum2bits(descriptor)); // advance first to the block data part
 | ||||
|         for (std::uint32_t i = 0; i < local_index; ++i, descriptor >>= 2) | ||||
|         { | ||||
|             var_advance(first, var_lengths, descriptor & 0x3); | ||||
|         } | ||||
| 
 | ||||
|         if (local_index < BLOCK_SIZE) | ||||
|         { | ||||
|             last = first; | ||||
|             var_advance(last, var_lengths, descriptor & 0x3); | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| template <int N, typename T = std::string> struct FixedGroupBlock | ||||
| { | ||||
|     static constexpr std::uint32_t BLOCK_SIZE = N; | ||||
| 
 | ||||
|     using ResultType = T; | ||||
|     using ValueType = typename T::value_type; | ||||
| 
 | ||||
|     static_assert(sizeof(ValueType) == 1, "data basic type must char"); | ||||
| 
 | ||||
|     struct BlockReference | ||||
|     { | ||||
|         std::uint32_t offset; | ||||
|     }; | ||||
| 
 | ||||
|     FixedGroupBlock() {} | ||||
| 
 | ||||
|     /// Write a block reference {offset}, where offset is a global block offset
 | ||||
|     /// Returns the fixed block prefix length.
 | ||||
|     template <typename Offset, typename OffsetIterator> | ||||
|     Offset | ||||
|     WriteBlockReference(std::ostream &out, Offset data_offset, OffsetIterator, OffsetIterator) const | ||||
|     { | ||||
|         BOOST_ASSERT(data_offset <= std::numeric_limits<decltype(BlockReference::offset)>::max()); | ||||
| 
 | ||||
|         BlockReference refernce{static_cast<decltype(BlockReference::offset)>(data_offset)}; | ||||
|         out.write((const char *)&refernce, sizeof(refernce)); | ||||
| 
 | ||||
|         return BLOCK_SIZE; | ||||
|     } | ||||
| 
 | ||||
|     /// Write a fixed length block prefix.
 | ||||
|     template <typename OffsetIterator> | ||||
|     void WriteBlockPrefix(std::ostream &out, OffsetIterator first, OffsetIterator last) const | ||||
|     { | ||||
|         std::uint32_t index = 0; | ||||
|         std::array<ValueType, BLOCK_SIZE> block_prefix; | ||||
|         for (OffsetIterator curr = first, next = std::next(first); curr != last; ++curr, ++next) | ||||
|         { | ||||
|             const std::uint32_t data_length = *next - *curr; | ||||
|             if (data_length >= 0x100) | ||||
|                 throw util::exception(boost::format("too large data length %1%") % data_length); | ||||
| 
 | ||||
|             block_prefix[index++] = static_cast<ValueType>(data_length); | ||||
|         } | ||||
|         out.write((const char *)block_prefix.data(), block_prefix.size()); | ||||
|     } | ||||
| 
 | ||||
|     /// Advances the range to an item stored in the referenced block.
 | ||||
|     /// Input [first..last) is a range of the complete block data with prefix.
 | ||||
|     /// Output [first..last) is a range of the referenced data at local_index.
 | ||||
|     template <typename DataIterator> | ||||
|     void ReadRefrencedBlock(const BlockReference &, | ||||
|                             std::uint32_t local_index, | ||||
|                             DataIterator &first, | ||||
|                             DataIterator &last) const | ||||
|     { | ||||
|         DataIterator fixed_lengths = first; // iterator to the fixed lengths part
 | ||||
|         std::advance(first, BLOCK_SIZE);    // advance first to the block data part
 | ||||
|         for (std::uint32_t i = 0; i < local_index; ++i) | ||||
|         { | ||||
|             first += static_cast<unsigned char>(*fixed_lengths++); | ||||
|         } | ||||
| 
 | ||||
|         if (local_index < BLOCK_SIZE) | ||||
|         { | ||||
|             last = first + static_cast<unsigned char>(*fixed_lengths); | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| template <typename GroupBlock> struct IndexedData | ||||
| { | ||||
|     static constexpr std::uint32_t BLOCK_SIZE = GroupBlock::BLOCK_SIZE; | ||||
| 
 | ||||
|     using BlocksNumberType = std::uint32_t; | ||||
|     using DataSizeType = std::uint64_t; | ||||
| 
 | ||||
|     using BlockReference = typename GroupBlock::BlockReference; | ||||
|     using ResultType = typename GroupBlock::ResultType; | ||||
|     using ValueType = typename GroupBlock::ValueType; | ||||
| 
 | ||||
|     static_assert(sizeof(ValueType) == 1, "data basic type must char"); | ||||
| 
 | ||||
|     IndexedData() : blocks_number{0}, block_references{nullptr}, begin{nullptr}, end{nullptr} {} | ||||
| 
 | ||||
|     bool empty() const { return blocks_number == 0; } | ||||
| 
 | ||||
|     template <typename OffsetIterator, typename DataIterator> | ||||
|     void | ||||
|     write(std::ostream &out, OffsetIterator first, OffsetIterator last, DataIterator data) const | ||||
|     { | ||||
|         static_assert(sizeof(typename DataIterator::value_type) == 1, "data basic type must char"); | ||||
| 
 | ||||
|         using diff_type = typename OffsetIterator::difference_type; | ||||
| 
 | ||||
|         BOOST_ASSERT(first < last); | ||||
|         const OffsetIterator sentinel = std::prev(last); | ||||
| 
 | ||||
|         // Write number of blocks
 | ||||
|         const auto number_of_elements = std::distance(first, sentinel); | ||||
|         const BlocksNumberType number_of_blocks = | ||||
|             number_of_elements == 0 ? 0 | ||||
|                                     : 1 + (std::distance(first, sentinel) - 1) / (BLOCK_SIZE + 1); | ||||
|         out.write((const char *)&number_of_blocks, sizeof(number_of_blocks)); | ||||
| 
 | ||||
|         // Write block references and compute the total data size that includes prefix and data
 | ||||
|         const GroupBlock block; | ||||
|         DataSizeType data_size = 0; | ||||
|         for (OffsetIterator curr = first, next = first; next != sentinel; curr = next) | ||||
|         { | ||||
|             std::advance(next, std::min<diff_type>(BLOCK_SIZE, std::distance(next, sentinel))); | ||||
|             data_size += block.WriteBlockReference(out, data_size, curr, next); | ||||
|             std::advance(next, std::min<diff_type>(1, std::distance(next, sentinel))); | ||||
|             data_size += *next - *curr; | ||||
|         } | ||||
| 
 | ||||
|         // Write the total data size
 | ||||
|         out.write((const char *)&data_size, sizeof(data_size)); | ||||
| 
 | ||||
|         // Write data blocks that are (prefix, data)
 | ||||
|         for (OffsetIterator curr = first, next = first; next != sentinel; curr = next) | ||||
|         { | ||||
|             std::advance(next, std::min<diff_type>(BLOCK_SIZE, std::distance(next, sentinel))); | ||||
|             block.WriteBlockPrefix(out, curr, next); | ||||
|             std::advance(next, std::min<diff_type>(1, std::distance(next, sentinel))); | ||||
|             std::copy(data + *curr, data + *next, std::ostream_iterator<unsigned char>(out)); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Set internal pointers from the buffer [first, last).
 | ||||
|     /// Data buffer pointed by ptr must exists during IndexedData life-time.
 | ||||
|     /// No ownership is transferred.
 | ||||
|     void reset(const ValueType *first, const ValueType *last) | ||||
|     { | ||||
|         // Read blocks number
 | ||||
|         if (first + sizeof(BlocksNumberType) > last) | ||||
|             throw util::exception("incorrect memory block"); | ||||
| 
 | ||||
|         blocks_number = *reinterpret_cast<const BlocksNumberType *>(first); | ||||
|         first += sizeof(BlocksNumberType); | ||||
| 
 | ||||
|         // Get block references pointer
 | ||||
|         if (first + sizeof(BlockReference) * blocks_number > last) | ||||
|             throw util::exception("incorrect memory block"); | ||||
| 
 | ||||
|         block_references = reinterpret_cast<const BlockReference *>(first); | ||||
|         first += sizeof(BlockReference) * blocks_number; | ||||
| 
 | ||||
|         // Read total data size
 | ||||
|         if (first + sizeof(DataSizeType) > last) | ||||
|             throw util::exception("incorrect memory block"); | ||||
| 
 | ||||
|         auto data_size = *reinterpret_cast<const DataSizeType *>(first); | ||||
|         first += sizeof(DataSizeType); | ||||
| 
 | ||||
|         // Get data blocks begin and end iterators
 | ||||
|         begin = reinterpret_cast<const ValueType *>(first); | ||||
|         first += sizeof(ValueType) * data_size; | ||||
| 
 | ||||
|         if (first > last) | ||||
|             throw util::exception("incorrect memory block"); | ||||
| 
 | ||||
|         end = reinterpret_cast<const ValueType *>(first); | ||||
|     } | ||||
| 
 | ||||
|     // Return value at the given index
 | ||||
|     ResultType at(std::uint32_t index) const | ||||
|     { | ||||
|         // Get block external ad internal indices
 | ||||
|         const BlocksNumberType block_idx = index / (BLOCK_SIZE + 1); | ||||
|         const std::uint32_t internal_idx = index % (BLOCK_SIZE + 1); | ||||
| 
 | ||||
|         if (block_idx >= blocks_number) | ||||
|             return ResultType(); | ||||
| 
 | ||||
|         // Get block first and last iterators
 | ||||
|         auto first = begin + block_references[block_idx].offset; | ||||
|         auto last = | ||||
|             block_idx + 1 == blocks_number ? end : begin + block_references[block_idx + 1].offset; | ||||
| 
 | ||||
|         const GroupBlock block; | ||||
|         block.ReadRefrencedBlock(block_references[block_idx], internal_idx, first, last); | ||||
| 
 | ||||
|         return adapt(first, last); | ||||
|     } | ||||
| 
 | ||||
|   private: | ||||
|     template <class T = ResultType> | ||||
|     typename std::enable_if<!std::is_same<T, StringView>::value, T>::type | ||||
|     adapt(const ValueType *first, const ValueType *last) const | ||||
|     { | ||||
|         return ResultType(first, last); | ||||
|     } | ||||
| 
 | ||||
|     template <class T = ResultType> | ||||
|     typename std::enable_if<std::is_same<T, StringView>::value, T>::type | ||||
|     adapt(const ValueType *first, const ValueType *last) const | ||||
|     { | ||||
|         return ResultType(first, std::distance(first, last)); | ||||
|     } | ||||
| 
 | ||||
|     BlocksNumberType blocks_number; | ||||
|     const BlockReference *block_references; | ||||
|     const ValueType *begin, *end; | ||||
| }; | ||||
| } | ||||
| } | ||||
| #endif // OSRM_INDEXED_DATA_HPP
 | ||||
| @ -1,7 +1,7 @@ | ||||
| #ifndef OSRM_UTIL_NAME_TABLE_HPP | ||||
| #define OSRM_UTIL_NAME_TABLE_HPP | ||||
| 
 | ||||
| #include "util/range_table.hpp" | ||||
| #include "util/indexed_data.hpp" | ||||
| #include "util/shared_memory_vector_wrapper.hpp" | ||||
| #include "util/string_view.hpp" | ||||
| #include "util/typedefs.hpp" | ||||
| @ -18,21 +18,33 @@ namespace util | ||||
| // processing based on name indices.
 | ||||
| class NameTable | ||||
| { | ||||
|   private: | ||||
|     // FIXME should this use shared memory
 | ||||
|     util::RangeTable<16, false> m_name_table; | ||||
|     ShM<char, false>::vector m_names_char_list; | ||||
| 
 | ||||
|   public: | ||||
|     using IndexedData = util::IndexedData<util::VariableGroupBlock<16, util::StringView>>; | ||||
|     using ResultType = IndexedData::ResultType; | ||||
|     using ValueType = IndexedData::ValueType; | ||||
| 
 | ||||
|     NameTable() {} | ||||
| 
 | ||||
|     // Read filename and store own data in m_buffer
 | ||||
|     NameTable(const std::string &filename); | ||||
| 
 | ||||
|     // Keep pointers only in m_name_table and don't own data in m_buffer
 | ||||
|     void reset(ValueType *begin, ValueType *end); | ||||
| 
 | ||||
|     // This class provides a limited view over all the string data we serialize out.
 | ||||
|     // The following functions are a subset of what is available.
 | ||||
|     // See the data facades for they provide full access to this serialized string data.
 | ||||
|     // (at time of writing this: get{Name,Ref,Pronunciation,Destinations}ForID(name_id);)
 | ||||
|     util::StringView GetNameForID(const NameID id) const; | ||||
|     util::StringView GetDestinationsForID(const NameID id) const; | ||||
|     util::StringView GetRefForID(const NameID id) const; | ||||
|     util::StringView GetPronunciationForID(const NameID id) const; | ||||
| 
 | ||||
|   private: | ||||
|     using BufferType = std::unique_ptr<ValueType, std::function<void(void *)>>; | ||||
| 
 | ||||
|     BufferType m_buffer; | ||||
|     IndexedData m_name_table; | ||||
| }; | ||||
| } // namespace util
 | ||||
| } // namespace osrm
 | ||||
|  | ||||
| @ -3,13 +3,13 @@ | ||||
| #include "extractor/extraction_way.hpp" | ||||
| 
 | ||||
| #include "util/coordinate_calculation.hpp" | ||||
| #include "util/range_table.hpp" | ||||
| 
 | ||||
| #include "util/exception.hpp" | ||||
| #include "util/exception_utils.hpp" | ||||
| #include "util/fingerprint.hpp" | ||||
| #include "util/io.hpp" | ||||
| #include "util/log.hpp" | ||||
| #include "util/name_table.hpp" | ||||
| #include "util/timing_util.hpp" | ||||
| 
 | ||||
| #include <boost/assert.hpp> | ||||
| @ -176,43 +176,10 @@ void ExtractionContainers::WriteCharData(const std::string &file_name) | ||||
|     util::UnbufferedLog log; | ||||
|     log << "writing street name index ... "; | ||||
|     TIMER_START(write_index); | ||||
|     boost::filesystem::ofstream file_stream(file_name, std::ios::binary); | ||||
|     boost::filesystem::ofstream file(file_name, std::ios::binary); | ||||
| 
 | ||||
|     // transforms in-place name offsets to name lengths
 | ||||
|     BOOST_ASSERT(!name_offsets.empty()); | ||||
|     for (auto curr = name_offsets.begin(), next = name_offsets.begin() + 1; | ||||
|          next != name_offsets.end(); | ||||
|          ++curr, ++next) | ||||
|     { | ||||
|         *curr = *next - *curr; | ||||
|     } | ||||
| 
 | ||||
|     // removes the total length sentinel
 | ||||
|     unsigned total_length = name_offsets.back(); | ||||
|     name_offsets.pop_back(); | ||||
| 
 | ||||
|     // builds and writes the index
 | ||||
|     util::RangeTable<> index_range(name_offsets); | ||||
|     file_stream << index_range; | ||||
| 
 | ||||
|     file_stream.write((char *)&total_length, sizeof(unsigned)); | ||||
| 
 | ||||
|     // write all chars consecutively
 | ||||
|     char write_buffer[WRITE_BLOCK_BUFFER_SIZE]; | ||||
|     unsigned buffer_len = 0; | ||||
| 
 | ||||
|     for (const auto c : name_char_data) | ||||
|     { | ||||
|         write_buffer[buffer_len++] = c; | ||||
| 
 | ||||
|         if (buffer_len >= WRITE_BLOCK_BUFFER_SIZE) | ||||
|         { | ||||
|             file_stream.write(write_buffer, WRITE_BLOCK_BUFFER_SIZE); | ||||
|             buffer_len = 0; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     file_stream.write(write_buffer, buffer_len); | ||||
|     const util::NameTable::IndexedData indexed_data; | ||||
|     indexed_data.write(file, name_offsets.begin(), name_offsets.end(), name_char_data.begin()); | ||||
| 
 | ||||
|     TIMER_STOP(write_index); | ||||
|     log << "ok, after " << TIMER_SEC(write_index) << "s"; | ||||
|  | ||||
| @ -269,40 +269,34 @@ void ExtractorCallbacks::ProcessWay(const osmium::Way &input_way, const Extracti | ||||
| 
 | ||||
|     const auto road_classification = parsed_way.road_classification; | ||||
| 
 | ||||
|     const constexpr std::size_t MAX_STRING_LENGTH = 255u; | ||||
|     // Get the unique identifier for the street name, destination, and ref
 | ||||
|     const auto name_iterator = string_map.find( | ||||
|         MapKey(parsed_way.name, parsed_way.destinations, parsed_way.ref, parsed_way.pronunciation)); | ||||
|     auto name_id = EMPTY_NAMEID; | ||||
|     NameID name_id = EMPTY_NAMEID; | ||||
|     if (string_map.end() == name_iterator) | ||||
|     { | ||||
|         const auto name_length = std::min(MAX_STRING_LENGTH, parsed_way.name.size()); | ||||
|         const auto destinations_length = | ||||
|             std::min(MAX_STRING_LENGTH, parsed_way.destinations.size()); | ||||
|         const auto pronunciation_length = | ||||
|             std::min(MAX_STRING_LENGTH, parsed_way.pronunciation.size()); | ||||
|         const auto ref_length = std::min(MAX_STRING_LENGTH, parsed_way.ref.size()); | ||||
| 
 | ||||
|         // name_offsets already has an offset of a new name, take the offset index as the name id
 | ||||
|         // name_offsets has a sentinel element with the total name data size
 | ||||
|         // take the sentinels index as the name id of the new name data pack
 | ||||
|         // (name [name_id], destination [+1], pronunciation [+2], ref [+3])
 | ||||
|         name_id = external_memory.name_offsets.size() - 1; | ||||
| 
 | ||||
|         std::copy(parsed_way.name.c_str(), | ||||
|                   parsed_way.name.c_str() + name_length, | ||||
|         std::copy(parsed_way.name.begin(), | ||||
|                   parsed_way.name.end(), | ||||
|                   std::back_inserter(external_memory.name_char_data)); | ||||
|         external_memory.name_offsets.push_back(external_memory.name_char_data.size()); | ||||
| 
 | ||||
|         std::copy(parsed_way.destinations.c_str(), | ||||
|                   parsed_way.destinations.c_str() + destinations_length, | ||||
|         std::copy(parsed_way.destinations.begin(), | ||||
|                   parsed_way.destinations.end(), | ||||
|                   std::back_inserter(external_memory.name_char_data)); | ||||
|         external_memory.name_offsets.push_back(external_memory.name_char_data.size()); | ||||
| 
 | ||||
|         std::copy(parsed_way.pronunciation.c_str(), | ||||
|                   parsed_way.pronunciation.c_str() + pronunciation_length, | ||||
|         std::copy(parsed_way.pronunciation.begin(), | ||||
|                   parsed_way.pronunciation.end(), | ||||
|                   std::back_inserter(external_memory.name_char_data)); | ||||
|         external_memory.name_offsets.push_back(external_memory.name_char_data.size()); | ||||
| 
 | ||||
|         std::copy(parsed_way.ref.c_str(), | ||||
|                   parsed_way.ref.c_str() + ref_length, | ||||
|         std::copy(parsed_way.ref.begin(), | ||||
|                   parsed_way.ref.end(), | ||||
|                   std::back_inserter(external_memory.name_char_data)); | ||||
|         external_memory.name_offsets.push_back(external_memory.name_char_data.size()); | ||||
| 
 | ||||
|  | ||||
| @ -574,7 +574,7 @@ TurnHandler::findForkCandidatesByGeometry(Intersection &intersection) const | ||||
|         //
 | ||||
|         //
 | ||||
|         //           left             left
 | ||||
|         //            /                 \ | ||||
|         //            /                 \ 
 | ||||
|         //           /____ right         \ ______ right
 | ||||
|         //          |                     |
 | ||||
|         //          |                     |
 | ||||
|  | ||||
| @ -194,19 +194,10 @@ void Storage::PopulateLayout(DataLayout &layout) | ||||
|     } | ||||
| 
 | ||||
|     { | ||||
|         // collect number of elements to store in shared memory object
 | ||||
|         util::Log() << "load names from: " << config.names_data_path; | ||||
|         // number of entries in name index
 | ||||
|         io::FileReader name_file(config.names_data_path, io::FileReader::HasNoFingerprint); | ||||
| 
 | ||||
|         const auto name_blocks = name_file.ReadElementCount32(); | ||||
|         layout.SetBlockSize<unsigned>(DataLayout::NAME_OFFSETS, name_blocks); | ||||
|         layout.SetBlockSize<typename util::RangeTable<16, true>::BlockT>(DataLayout::NAME_BLOCKS, | ||||
|                                                                          name_blocks); | ||||
|         BOOST_ASSERT_MSG(0 != name_blocks, "name file broken"); | ||||
| 
 | ||||
|         const auto number_of_chars = name_file.ReadElementCount32(); | ||||
|         layout.SetBlockSize<char>(DataLayout::NAME_CHAR_LIST, number_of_chars); | ||||
|         layout.SetBlockSize<char>(DataLayout::NAME_CHAR_DATA, name_file.GetSize()); | ||||
|     } | ||||
| 
 | ||||
|     { | ||||
| @ -451,35 +442,13 @@ void Storage::PopulateData(const DataLayout &layout, char *memory_ptr) | ||||
|     // Name data
 | ||||
|     { | ||||
|         io::FileReader name_file(config.names_data_path, io::FileReader::HasNoFingerprint); | ||||
|         const auto name_blocks_count = name_file.ReadElementCount32(); | ||||
|         name_file.Skip<std::uint32_t>(1); // name_char_list_count
 | ||||
| 
 | ||||
|         BOOST_ASSERT(name_blocks_count * sizeof(unsigned) == | ||||
|                      layout.GetBlockSize(DataLayout::NAME_OFFSETS)); | ||||
|         BOOST_ASSERT(name_blocks_count * sizeof(typename util::RangeTable<16, true>::BlockT) == | ||||
|                      layout.GetBlockSize(DataLayout::NAME_BLOCKS)); | ||||
| 
 | ||||
|         // Loading street names
 | ||||
|         const auto name_offsets_ptr = | ||||
|             layout.GetBlockPtr<unsigned, true>(memory_ptr, DataLayout::NAME_OFFSETS); | ||||
|         name_file.ReadInto(name_offsets_ptr, name_blocks_count); | ||||
| 
 | ||||
|         const auto name_blocks_ptr = | ||||
|             layout.GetBlockPtr<unsigned, true>(memory_ptr, DataLayout::NAME_BLOCKS); | ||||
|         name_file.ReadInto(reinterpret_cast<char *>(name_blocks_ptr), | ||||
|                            layout.GetBlockSize(DataLayout::NAME_BLOCKS)); | ||||
| 
 | ||||
|         // The file format contains the element count a second time.  Don't know why,
 | ||||
|         // but we need to read it here to progress the file pointer to the correct spot
 | ||||
|         const auto temp_count = name_file.ReadElementCount32(); | ||||
|         std::size_t name_file_size = name_file.GetSize(); | ||||
| 
 | ||||
|         BOOST_ASSERT(name_file_size == layout.GetBlockSize(DataLayout::NAME_CHAR_DATA)); | ||||
|         const auto name_char_ptr = | ||||
|             layout.GetBlockPtr<char, true>(memory_ptr, DataLayout::NAME_CHAR_LIST); | ||||
|             layout.GetBlockPtr<char, true>(memory_ptr, DataLayout::NAME_CHAR_DATA); | ||||
| 
 | ||||
|         BOOST_ASSERT_MSG(temp_count == layout.GetBlockSize(DataLayout::NAME_CHAR_LIST), | ||||
|                          "Name file corrupted!"); | ||||
| 
 | ||||
|         name_file.ReadInto(name_char_ptr, temp_count); | ||||
|         name_file.ReadInto<char>(name_char_ptr, name_file_size); | ||||
|     } | ||||
| 
 | ||||
|     // Turn lane data
 | ||||
|  | ||||
| @ -1,64 +1,58 @@ | ||||
| #include "util/name_table.hpp" | ||||
| #include "storage/io.hpp" | ||||
| #include "util/exception.hpp" | ||||
| #include "util/log.hpp" | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <iterator> | ||||
| #include <limits> | ||||
| 
 | ||||
| #include <boost/filesystem/fstream.hpp> | ||||
| 
 | ||||
| namespace osrm | ||||
| { | ||||
| namespace util | ||||
| { | ||||
| 
 | ||||
| NameTable::NameTable(const std::string &filename) | ||||
| NameTable::NameTable(const std::string &file_name) | ||||
| { | ||||
|     storage::io::FileReader name_stream_file_reader(filename, | ||||
|                                                     storage::io::FileReader::HasNoFingerprint); | ||||
|     using FileReader = storage::io::FileReader; | ||||
| 
 | ||||
|     m_name_table.ReadARangeTable(name_stream_file_reader); | ||||
|     FileReader name_stream_file_reader(file_name, FileReader::HasNoFingerprint); | ||||
|     const auto file_size = name_stream_file_reader.GetSize(); | ||||
| 
 | ||||
|     const auto number_of_chars = name_stream_file_reader.ReadElementCount32(); | ||||
|     m_buffer = BufferType(static_cast<ValueType *>(::operator new(file_size)), | ||||
|                           [](void *ptr) { ::operator delete(ptr); }); | ||||
|     name_stream_file_reader.ReadInto<char>(m_buffer.get(), file_size); | ||||
|     m_name_table.reset(m_buffer.get(), m_buffer.get() + file_size); | ||||
| 
 | ||||
|     m_names_char_list.resize(number_of_chars + 1); //+1 gives sentinel element
 | ||||
|     m_names_char_list.back() = 0; | ||||
|     if (number_of_chars > 0) | ||||
|     { | ||||
|         name_stream_file_reader.ReadInto(&m_names_char_list[0], number_of_chars); | ||||
|     } | ||||
|     else | ||||
|     if (m_name_table.empty()) | ||||
|     { | ||||
|         util::Log() << "list of street names is empty in construction of name table from: \"" | ||||
|                     << filename << "\""; | ||||
|                     << file_name << "\""; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void NameTable::reset(ValueType *begin, ValueType *end) | ||||
| { | ||||
|     m_buffer.reset(); | ||||
|     m_name_table.reset(begin, end); | ||||
| } | ||||
| 
 | ||||
| StringView NameTable::GetNameForID(const NameID id) const | ||||
| { | ||||
|     if (std::numeric_limits<NameID>::max() == id) | ||||
|     { | ||||
|     if (id == INVALID_NAMEID) | ||||
|         return {}; | ||||
| 
 | ||||
|     return m_name_table.at(id); | ||||
| } | ||||
| 
 | ||||
|     auto range = m_name_table.GetRange(id); | ||||
| 
 | ||||
|     if (range.begin() == range.end()) | ||||
| StringView NameTable::GetDestinationsForID(const NameID id) const | ||||
| { | ||||
|     if (id == INVALID_NAMEID) | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     auto first = begin(m_names_char_list) + range.front(); | ||||
|     auto last = begin(m_names_char_list) + range.back() + 1; | ||||
|     const std::size_t len = last - first; | ||||
| 
 | ||||
|     return StringView{&*first, len}; | ||||
|     return m_name_table.at(id + 1); | ||||
| } | ||||
| 
 | ||||
| StringView NameTable::GetRefForID(const NameID id) const | ||||
| { | ||||
|     if (id == INVALID_NAMEID) | ||||
|         return {}; | ||||
| 
 | ||||
|     // Way string data is stored in blocks based on `id` as follows:
 | ||||
|     //
 | ||||
|     // | name | destination | pronunciation | ref |
 | ||||
| @ -71,11 +65,14 @@ StringView NameTable::GetRefForID(const NameID id) const | ||||
|     // Offset 0 is name, 1 is destination, 2 is pronunciation, 3 is ref.
 | ||||
|     // See datafacades and extractor callbacks for details.
 | ||||
|     const constexpr auto OFFSET_REF = 3u; | ||||
|     return GetNameForID(id + OFFSET_REF); | ||||
|     return m_name_table.at(id + OFFSET_REF); | ||||
| } | ||||
| 
 | ||||
| StringView NameTable::GetPronunciationForID(const NameID id) const | ||||
| { | ||||
|     if (id == INVALID_NAMEID) | ||||
|         return {}; | ||||
| 
 | ||||
|     // Way string data is stored in blocks based on `id` as follows:
 | ||||
|     //
 | ||||
|     // | name | destination | pronunciation | ref |
 | ||||
| @ -88,7 +85,7 @@ StringView NameTable::GetPronunciationForID(const NameID id) const | ||||
|     // Offset 0 is name, 1 is destination, 2 is pronunciation, 3 is ref.
 | ||||
|     // See datafacades and extractor callbacks for details.
 | ||||
|     const constexpr auto OFFSET_PRONUNCIATION = 2u; | ||||
|     return GetNameForID(id + OFFSET_PRONUNCIATION); | ||||
|     return m_name_table.at(id + OFFSET_PRONUNCIATION); | ||||
| } | ||||
| 
 | ||||
| } // namespace util
 | ||||
|  | ||||
							
								
								
									
										193
									
								
								unit_tests/util/indexed_data.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										193
									
								
								unit_tests/util/indexed_data.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,193 @@ | ||||
| #include "util/indexed_data.hpp" | ||||
| #include "util/exception.hpp" | ||||
| 
 | ||||
| #include <boost/test/test_case_template.hpp> | ||||
| #include <boost/test/unit_test.hpp> | ||||
| 
 | ||||
| #include <iomanip> | ||||
| #include <iostream> | ||||
| #include <sstream> | ||||
| #include <typeinfo> | ||||
| #include <vector> | ||||
| 
 | ||||
| BOOST_AUTO_TEST_SUITE(indexed_data) | ||||
| 
 | ||||
| using namespace osrm; | ||||
| using namespace osrm::util; | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(check_variable_group_block_bitops) | ||||
| { | ||||
|     VariableGroupBlock<16> variable_group_block; | ||||
|     BOOST_CHECK_EQUAL(variable_group_block.sum2bits(0xe4), 6); | ||||
|     BOOST_CHECK_EQUAL(variable_group_block.sum2bits(0x11111111), 8); | ||||
|     BOOST_CHECK_EQUAL(variable_group_block.sum2bits(0x55555555), 16); | ||||
|     BOOST_CHECK_EQUAL(variable_group_block.sum2bits(0xffffffff), 48); | ||||
| 
 | ||||
|     BOOST_CHECK_EQUAL(variable_group_block.log256(0), 0); | ||||
|     BOOST_CHECK_EQUAL(variable_group_block.log256(1), 1); | ||||
|     BOOST_CHECK_EQUAL(variable_group_block.log256(255), 1); | ||||
|     BOOST_CHECK_EQUAL(variable_group_block.log256(256), 2); | ||||
|     BOOST_CHECK_EQUAL(variable_group_block.log256(1024), 2); | ||||
|     BOOST_CHECK_EQUAL(variable_group_block.log256(16777215), 3); | ||||
| } | ||||
| 
 | ||||
| template <typename IndexedData, typename Offsets, typename Data> | ||||
| void test_rw(const Offsets &offsets, const Data &data) | ||||
| { | ||||
|     std::stringstream sstr; | ||||
|     IndexedData indexed_data; | ||||
|     indexed_data.write(sstr, offsets.begin(), offsets.end(), data.begin()); | ||||
| 
 | ||||
|     const std::string str = sstr.str(); | ||||
| 
 | ||||
| #if 0 | ||||
|     std::cout << "\n" << typeid(IndexedData).name() << "\nsaved size = " << str.size() << "\n"; | ||||
|     for (auto c : str) | ||||
|         std::cout << std::hex << std::setw(2) << std::setfill('0') | ||||
|                   << (int)((unsigned char)c) << " "; | ||||
|     std::cout << std::dec << "\n"; | ||||
| #endif | ||||
| 
 | ||||
|     indexed_data.reset(str.c_str(), str.c_str() + str.size()); | ||||
| 
 | ||||
|     for (std::size_t index = 0; index < offsets.size() - 1; ++index) | ||||
|     { | ||||
|         typename IndexedData::ResultType expected_result(&data[offsets[index]], | ||||
|                                                          &data[offsets[index + 1]]); | ||||
|         BOOST_CHECK_EQUAL(expected_result, indexed_data.at(index)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(check_group_blocks_with_different_sizes) | ||||
| { | ||||
| 
 | ||||
|     std::vector<std::string> str = { | ||||
|         "",     "A", "bb", "ccc", "dDDd", "E", "ff", "ggg", "hhhh", "I", "jj", "",  "kkk", | ||||
|         "llll", "M", "nn", "ooo", "pppp", "q", "r",  "S",   "T",    "",  "u",  "V", "W", | ||||
|         "X",    "Y", "Z",  "",    "",     "",  "",   "",    "",     "",  "0",  ""}; | ||||
| 
 | ||||
|     std::vector<unsigned char> name_char_data; | ||||
|     std::vector<std::uint32_t> name_offsets; | ||||
|     for (auto s : str) | ||||
|     { | ||||
|         name_offsets.push_back(name_char_data.size()); | ||||
|         std::copy(s.begin(), s.end(), std::back_inserter(name_char_data)); | ||||
|     } | ||||
|     name_offsets.push_back(name_char_data.size()); | ||||
| 
 | ||||
|     test_rw<IndexedData<VariableGroupBlock<0, std::string>>>(name_offsets, name_char_data); | ||||
|     test_rw<IndexedData<VariableGroupBlock<1, std::string>>>(name_offsets, name_char_data); | ||||
|     test_rw<IndexedData<VariableGroupBlock<16, std::string>>>(name_offsets, name_char_data); | ||||
| 
 | ||||
|     test_rw<IndexedData<FixedGroupBlock<0, std::string>>>(name_offsets, name_char_data); | ||||
|     test_rw<IndexedData<FixedGroupBlock<1, std::string>>>(name_offsets, name_char_data); | ||||
|     test_rw<IndexedData<FixedGroupBlock<16, std::string>>>(name_offsets, name_char_data); | ||||
|     test_rw<IndexedData<FixedGroupBlock<32, std::string>>>(name_offsets, name_char_data); | ||||
|     test_rw<IndexedData<FixedGroupBlock<128, std::string>>>(name_offsets, name_char_data); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(check_1001_pandas) | ||||
| { | ||||
|     std::vector<unsigned char> name_char_data; | ||||
|     std::vector<std::uint32_t> name_offsets; | ||||
| 
 | ||||
|     const std::string panda = "🐼"; | ||||
|     name_offsets.push_back(0); | ||||
|     for (std::size_t i = 0; i < 1000; ++i) | ||||
|         std::copy(panda.begin(), panda.end(), std::back_inserter(name_char_data)); | ||||
|     name_offsets.push_back(name_char_data.size()); | ||||
|     std::copy(panda.begin(), panda.end(), std::back_inserter(name_char_data)); | ||||
|     name_offsets.push_back(name_char_data.size()); | ||||
| 
 | ||||
|     test_rw<IndexedData<VariableGroupBlock<16, std::string>>>(name_offsets, name_char_data); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(check_different_sizes) | ||||
| { | ||||
|     for (std::size_t num_strings = 0; num_strings < 256; ++num_strings) | ||||
|     { | ||||
|         std::vector<unsigned char> name_char_data; | ||||
|         std::vector<std::uint32_t> name_offsets; | ||||
| 
 | ||||
|         const std::string canoe = "🛶"; | ||||
|         name_offsets.push_back(0); | ||||
|         for (std::size_t i = 0; i < num_strings; ++i) | ||||
|         { | ||||
|             std::copy(canoe.begin(), canoe.end(), std::back_inserter(name_char_data)); | ||||
|             name_offsets.push_back(name_char_data.size()); | ||||
|         } | ||||
| 
 | ||||
|         test_rw<IndexedData<VariableGroupBlock<16, std::string>>>(name_offsets, name_char_data); | ||||
|         test_rw<IndexedData<FixedGroupBlock<16, std::string>>>(name_offsets, name_char_data); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(check_max_size) | ||||
| { | ||||
|     std::vector<unsigned char> name_data(0x1000000, '#'); | ||||
|     std::vector<std::uint32_t> name_offsets; | ||||
| 
 | ||||
|     auto test_variable = [&name_offsets, &name_data]() { | ||||
|         test_rw<IndexedData<VariableGroupBlock<16, std::string>>>(name_offsets, name_data); | ||||
|     }; | ||||
|     auto test_fixed = [&name_offsets, &name_data]() { | ||||
|         test_rw<IndexedData<FixedGroupBlock<16, std::string>>>(name_offsets, name_data); | ||||
|     }; | ||||
| 
 | ||||
|     name_offsets = {0, 0x1000000}; | ||||
|     BOOST_CHECK_THROW(test_variable(), osrm::util::exception); | ||||
|     name_offsets = {0, 0x1000000 - 1}; | ||||
|     test_variable(); | ||||
| 
 | ||||
|     name_offsets = {0, 256}; | ||||
|     BOOST_CHECK_THROW(test_fixed(), osrm::util::exception); | ||||
|     name_offsets = {0, 255}; | ||||
|     test_fixed(); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(check_corrupted_memory) | ||||
| { | ||||
|     std::vector<unsigned char> buf; | ||||
| 
 | ||||
|     auto test_variable = [&buf]() { | ||||
|         IndexedData<VariableGroupBlock<16, std::vector<unsigned char>>> indexed_data; | ||||
|         indexed_data.reset(&buf[0], &buf[buf.size()]); | ||||
|         const auto result = indexed_data.at(0); | ||||
|         return std::string(reinterpret_cast<const char *>(&result[0]), result.size()); | ||||
|     }; | ||||
| 
 | ||||
|     // Use LE internal representation
 | ||||
|     buf = {0, 42}; | ||||
|     BOOST_CHECK_THROW(test_variable(), osrm::util::exception); | ||||
| 
 | ||||
|     buf = {1, 0, 0, 0, 0}; | ||||
|     BOOST_CHECK_THROW(test_variable(), osrm::util::exception); | ||||
| 
 | ||||
|     buf = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42}; | ||||
|     BOOST_CHECK_THROW(test_variable(), osrm::util::exception); | ||||
| 
 | ||||
|     buf = {1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 42}; | ||||
|     BOOST_CHECK_THROW(test_variable(), osrm::util::exception); | ||||
| 
 | ||||
|     buf = {1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 4, 0xF0, 0x9F, 0x90, 0xBC}; | ||||
|     BOOST_CHECK_EQUAL(test_variable(), "🐼"); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(check_string_view) | ||||
| { | ||||
|     std::stringstream sstr; | ||||
|     std::string name_data = "hellostringview"; | ||||
|     std::vector<std::uint32_t> name_offsets = {0, 5, 11, 15}; | ||||
| 
 | ||||
|     IndexedData<VariableGroupBlock<16, StringView>> indexed_data; | ||||
|     indexed_data.write(sstr, name_offsets.begin(), name_offsets.end(), name_data.begin()); | ||||
| 
 | ||||
|     const std::string str = sstr.str(); | ||||
|     indexed_data.reset(str.c_str(), str.c_str() + str.size()); | ||||
| 
 | ||||
|     BOOST_CHECK_EQUAL(indexed_data.at(0), "hello"); | ||||
|     BOOST_CHECK_EQUAL(indexed_data.at(1), "string"); | ||||
|     BOOST_CHECK_EQUAL(indexed_data.at(2), "view"); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_SUITE_END() | ||||
							
								
								
									
										117
									
								
								unit_tests/util/name_table.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								unit_tests/util/name_table.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,117 @@ | ||||
| #include "util/name_table.hpp" | ||||
| #include "util/exception.hpp" | ||||
| 
 | ||||
| #include <boost/test/test_case_template.hpp> | ||||
| #include <boost/test/unit_test.hpp> | ||||
| 
 | ||||
| #include <iomanip> | ||||
| #include <iostream> | ||||
| #include <sstream> | ||||
| #include <typeinfo> | ||||
| #include <vector> | ||||
| 
 | ||||
| //#include <valgrind/callgrind.h>
 | ||||
| 
 | ||||
| BOOST_AUTO_TEST_SUITE(name_table) | ||||
| 
 | ||||
| using namespace osrm; | ||||
| using namespace osrm::util; | ||||
| 
 | ||||
| std::string PrapareNameTableData(std::vector<std::string> &data, bool fill_all) | ||||
| { | ||||
|     std::stringstream sstr; | ||||
|     NameTable::IndexedData indexed_data; | ||||
|     std::vector<unsigned char> name_char_data; | ||||
|     std::vector<std::uint32_t> name_offsets; | ||||
| 
 | ||||
|     for (auto s : data) | ||||
|     { | ||||
|         name_offsets.push_back(name_char_data.size()); | ||||
|         std::copy(s.begin(), s.end(), std::back_inserter(name_char_data)); | ||||
| 
 | ||||
|         if (fill_all) | ||||
|         { | ||||
|             std::string tmp; | ||||
| 
 | ||||
|             tmp = s + "_des"; | ||||
|             name_offsets.push_back(name_char_data.size()); | ||||
|             std::copy(tmp.begin(), tmp.end(), std::back_inserter(name_char_data)); | ||||
| 
 | ||||
|             tmp = s + "_pro"; | ||||
|             name_offsets.push_back(name_char_data.size()); | ||||
|             std::copy(tmp.begin(), tmp.end(), std::back_inserter(name_char_data)); | ||||
| 
 | ||||
|             tmp = s + "_ref"; | ||||
|             name_offsets.push_back(name_char_data.size()); | ||||
|             std::copy(tmp.begin(), tmp.end(), std::back_inserter(name_char_data)); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             name_offsets.push_back(name_char_data.size()); | ||||
|             name_offsets.push_back(name_char_data.size()); | ||||
|             name_offsets.push_back(name_char_data.size()); | ||||
|         } | ||||
|     } | ||||
|     name_offsets.push_back(name_char_data.size()); | ||||
| 
 | ||||
|     indexed_data.write(sstr, name_offsets.begin(), name_offsets.end(), name_char_data.begin()); | ||||
| 
 | ||||
|     return sstr.str(); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(check_name_table_fill) | ||||
| { | ||||
|     std::vector<std::string> expected_names = { | ||||
|         "",     "A", "check_name", "ccc", "dDDd", "E", "ff", "ggg", "hhhh", "I", "jj", "",  "kkk", | ||||
|         "llll", "M", "nn",         "ooo", "pppp", "q", "r",  "S",   "T",    "",  "u",  "V", "W", | ||||
|         "X",    "Y", "Z",          "",    "",     "",  "",   "",    "",     "",  "0",  ""}; | ||||
| 
 | ||||
|     auto data = PrapareNameTableData(expected_names, true); | ||||
| 
 | ||||
|     NameTable name_table; | ||||
|     name_table.reset(&data[0], &data[data.size()]); | ||||
| 
 | ||||
|     for (std::size_t index = 0; index < expected_names.size(); ++index) | ||||
|     { | ||||
|         const NameID id = 4 * index; | ||||
|         BOOST_CHECK_EQUAL(name_table.GetNameForID(id), expected_names[index]); | ||||
|         BOOST_CHECK_EQUAL(name_table.GetRefForID(id), expected_names[index] + "_ref"); | ||||
|         BOOST_CHECK_EQUAL(name_table.GetDestinationsForID(id), expected_names[index] + "_des"); | ||||
|         BOOST_CHECK_EQUAL(name_table.GetPronunciationForID(id), expected_names[index] + "_pro"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(check_name_table_nofill) | ||||
| { | ||||
|     std::vector<std::string> expected_names = { | ||||
|         "",     "A", "check_name", "ccc", "dDDd", "E", "ff", "ggg", "hhhh", "I", "jj", "",  "kkk", | ||||
|         "llll", "M", "nn",         "ooo", "pppp", "q", "r",  "S",   "T",    "",  "u",  "V", "W", | ||||
|         "X",    "Y", "Z",          "",    "",     "",  "",   "",    "",     "",  "0",  ""}; | ||||
| 
 | ||||
|     auto data = PrapareNameTableData(expected_names, false); | ||||
| 
 | ||||
|     NameTable name_table; | ||||
|     name_table.reset(&data[0], &data[data.size()]); | ||||
| 
 | ||||
|     // CALLGRIND_START_INSTRUMENTATION;
 | ||||
|     for (std::size_t index = 0; index < expected_names.size(); ++index) | ||||
|     { | ||||
|         const NameID id = 4 * index; | ||||
|         BOOST_CHECK_EQUAL(name_table.GetNameForID(id), expected_names[index]); | ||||
|         BOOST_CHECK(name_table.GetRefForID(id).empty()); | ||||
|         BOOST_CHECK(name_table.GetDestinationsForID(id).empty()); | ||||
|         BOOST_CHECK(name_table.GetPronunciationForID(id).empty()); | ||||
|     } | ||||
|     // CALLGRIND_STOP_INSTRUMENTATION;
 | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_CASE(check_invalid_ids) | ||||
| { | ||||
|     NameTable name_table; | ||||
|     BOOST_CHECK_EQUAL(name_table.GetNameForID(INVALID_NAMEID), ""); | ||||
|     BOOST_CHECK_EQUAL(name_table.GetRefForID(INVALID_NAMEID), ""); | ||||
|     BOOST_CHECK_EQUAL(name_table.GetDestinationsForID(INVALID_NAMEID), ""); | ||||
|     BOOST_CHECK_EQUAL(name_table.GetPronunciationForID(INVALID_NAMEID), ""); | ||||
| } | ||||
| 
 | ||||
| BOOST_AUTO_TEST_SUITE_END() | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user