Added indexed array data type with variable and fixed group blocks

This commit is contained in:
Michael Krasnyk
2017-01-19 15:14:30 +01:00
committed by Patrick Niklaus
parent cedeb15ade
commit 6e1c4bfecd
15 changed files with 781 additions and 193 deletions
@@ -18,6 +18,7 @@
#include "util/exception_utils.hpp"
#include "util/guidance/turn_bearing.hpp"
#include "util/log.hpp"
#include "util/name_table.hpp"
#include "util/packed_vector.hpp"
#include "util/range_table.hpp"
#include "util/rectangle.hpp"
@@ -79,7 +80,7 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade
util::ShM<extractor::TravelMode, true>::vector m_travel_mode_list;
util::ShM<util::guidance::TurnBearing, true>::vector m_pre_turn_bearing;
util::ShM<util::guidance::TurnBearing, true>::vector m_post_turn_bearing;
util::ShM<char, true>::vector m_names_char_list;
util::NameTable m_names_table;
util::ShM<unsigned, true>::vector m_name_begin_indices;
util::ShM<unsigned, true>::vector m_geometry_indices;
util::ShM<NodeID, true>::vector m_geometry_node_list;
@@ -103,7 +104,7 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade
std::unique_ptr<SharedGeospatialQuery> m_geospatial_query;
boost::filesystem::path file_index_path;
std::shared_ptr<util::RangeTable<16, true>> m_name_table;
util::NameTable m_name_table;
// bearing classes by node based node
util::ShM<BearingClassID, true>::vector m_bearing_class_id_table;
// entry class IDs
@@ -267,23 +268,10 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade
void InitializeNamePointers(storage::DataLayout &data_layout, char *memory_block)
{
auto offsets_ptr =
data_layout.GetBlockPtr<unsigned>(memory_block, storage::DataLayout::NAME_OFFSETS);
auto blocks_ptr =
data_layout.GetBlockPtr<IndexBlock>(memory_block, storage::DataLayout::NAME_BLOCKS);
util::ShM<unsigned, true>::vector name_offsets(
offsets_ptr, data_layout.num_entries[storage::DataLayout::NAME_OFFSETS]);
util::ShM<IndexBlock, true>::vector name_blocks(
blocks_ptr, data_layout.num_entries[storage::DataLayout::NAME_BLOCKS]);
auto names_list_ptr =
data_layout.GetBlockPtr<char>(memory_block, storage::DataLayout::NAME_CHAR_LIST);
util::ShM<char, true>::vector names_char_list(
names_list_ptr, data_layout.num_entries[storage::DataLayout::NAME_CHAR_LIST]);
m_name_table = std::make_unique<util::RangeTable<16, true>>(
name_offsets, name_blocks, static_cast<unsigned>(names_char_list.size()));
m_names_char_list = std::move(names_char_list);
auto name_data_ptr =
data_layout.GetBlockPtr<char>(memory_block, storage::DataLayout::NAME_CHAR_DATA);
const auto name_data_size = data_layout.num_entries[storage::DataLayout::NAME_CHAR_DATA];
m_name_table.reset(name_data_ptr, name_data_ptr + name_data_size);
}
void InitializeTurnLaneDescriptionsPointers(storage::DataLayout &data_layout,
@@ -823,52 +811,22 @@ class ContiguousInternalMemoryDataFacade : public BaseDataFacade
StringView GetNameForID(const NameID id) const override final
{
if (std::numeric_limits<NameID>::max() == id)
{
return "";
}
auto range = m_name_table->GetRange(id);
if (range.begin() == range.end())
{
return "";
}
auto first = m_names_char_list.begin() + range.front();
auto last = m_names_char_list.begin() + range.back() + 1u;
// These iterators are useless: they're InputIterators onto a contiguous block of memory.
// Deref to get to the first element, then Addressof to get the memory address of the it.
const std::size_t len = &*last - &*first;
return StringView{&*first, len};
return m_name_table.GetNameForID(id);
}
StringView GetRefForID(const NameID id) const override final
{
// We store the ref after the name, destination and pronunciation of a street.
// We do this to get around the street length limit of 255 which would hit
// if we concatenate these. Order (see extractor_callbacks):
// name (0), destination (1), pronunciation (2), ref (3)
return GetNameForID(id + 3);
return m_name_table.GetRefForID(id);
}
StringView GetPronunciationForID(const NameID id) const override final
{
// We store the pronunciation after the name and destination of a street.
// We do this to get around the street length limit of 255 which would hit
// if we concatenate these. Order (see extractor_callbacks):
// name (0), destination (1), pronunciation (2), ref (3)
return GetNameForID(id + 2);
return m_name_table.GetPronunciationForID(id);
}
StringView GetDestinationsForID(const NameID id) const override final
{
// We store the destination after the name of a street.
// We do this to get around the street length limit of 255 which would hit
// if we concatenate these. Order (see extractor_callbacks):
// name (0), destination (1), pronunciation (2), ref (3)
return GetNameForID(id + 1);
return m_name_table.GetDestinationsForID(id);
}
bool IsCoreNode(const NodeID id) const override final
+16
View File
@@ -61,6 +61,22 @@ class FileReader
}
}
std::size_t GetSize()
{
const boost::filesystem::ifstream::pos_type positon = input_stream.tellg();
input_stream.seekg(0, std::ios::end);
const boost::filesystem::ifstream::pos_type file_size = input_stream.tellg();
if (file_size == boost::filesystem::ifstream::pos_type(-1))
{
throw util::exception("File size for " + filepath.string() + " failed " + SOURCE_REF);
}
// restore the current position
input_stream.seekg(positon, std::ios::beg);
return file_size;
}
/* Read count objects of type T into pointer dest */
template <typename T> void ReadInto(T *dest, const std::size_t count)
{
+2 -6
View File
@@ -18,9 +18,7 @@ namespace storage
// Added at the start and end of each block as sanity check
const constexpr char CANARY[4] = {'O', 'S', 'R', 'M'};
const constexpr char *block_id_to_name[] = {"NAME_OFFSETS",
"NAME_BLOCKS",
"NAME_CHAR_LIST",
const constexpr char *block_id_to_name[] = {"NAME_CHAR_DATA",
"NAME_ID_LIST",
"VIA_NODE_LIST",
"GRAPH_NODE_LIST",
@@ -64,9 +62,7 @@ struct DataLayout
{
enum BlockID
{
NAME_OFFSETS = 0,
NAME_BLOCKS,
NAME_CHAR_LIST,
NAME_CHAR_DATA = 0,
NAME_ID_LIST,
VIA_NODE_LIST,
GRAPH_NODE_LIST,
+3
View File
@@ -32,6 +32,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <string>
#include <utility>
#include <boost/format.hpp>
namespace osrm
{
namespace util
@@ -42,6 +44,7 @@ class exception final : public std::exception
public:
explicit exception(const char *message) : message(message) {}
explicit exception(std::string message) : message(std::move(message)) {}
explicit exception(boost::format message) : message(message.str()) {}
const char *what() const noexcept override { return message.c_str(); }
private:
+366
View File
@@ -0,0 +1,366 @@
#ifndef OSRM_INDEXED_DATA_HPP
#define OSRM_INDEXED_DATA_HPP
#include "util/exception.hpp"
#include "util/string_view.hpp"
#include <boost/assert.hpp>
#include <array>
#include <iterator>
#include <limits>
#include <ostream>
#include <string>
#include <type_traits>
namespace osrm
{
namespace util
{
template <int N, typename T = std::string> struct VariableGroupBlock
{
static constexpr std::uint32_t BLOCK_SIZE = N;
using ResultType = T;
using ValueType = typename T::value_type;
static_assert(0 <= BLOCK_SIZE && BLOCK_SIZE <= 16, "incorrect block size");
static_assert(sizeof(ValueType) == 1, "data basic type must char");
struct BlockReference
{
std::uint32_t offset;
std::uint32_t descriptor;
};
VariableGroupBlock() {}
/// Returns ceiling(log_256(value + 1))
inline std::uint32_t log256(std::uint32_t value) const
{
BOOST_ASSERT(value < 0x1000000);
return value == 0 ? 0 : value < 0x100 ? 1 : value < 0x10000 ? 2 : 3;
}
/// Advance data iterator by the value of byte_length bytes at length iterator.
/// Advance length iterator by byte_length.
template <typename DataIterator>
inline void
var_advance(DataIterator &data, DataIterator &length, std::uint32_t byte_length) const
{
if (byte_length == 0)
{
}
else if (byte_length == 1)
{
data += static_cast<unsigned char>(*length++);
}
else if (byte_length == 2)
{
data += static_cast<unsigned char>(*length++);
data += static_cast<unsigned char>(*length++) << 8;
}
else
{
BOOST_ASSERT(byte_length == 3);
data += static_cast<unsigned char>(*length++);
data += static_cast<unsigned char>(*length++) << 8;
data += static_cast<unsigned char>(*length++) << 16;
}
}
/// Summation of 16 2-bit values using SWAR
inline std::uint32_t sum2bits(std::uint32_t value) const
{
value = (value >> 2 & 0x33333333) + (value & 0x33333333);
value = (value >> 4 & 0x0f0f0f0f) + (value & 0x0f0f0f0f);
value = (value >> 8 & 0x00ff00ff) + (value & 0x00ff00ff);
return (value >> 16 & 0x0000ffff) + (value & 0x0000ffff);
}
/// Write a block reference {offset, descriptor}, where offset
/// is a global block offset and descriptor is a 32-bit value
/// of prefix length. sum(descriptor) equals to the block
/// prefix length.
/// Returns the block prefix length.
template <typename Offset, typename OffsetIterator>
Offset WriteBlockReference(std::ostream &out,
Offset data_offset,
OffsetIterator first,
OffsetIterator last) const
{
BOOST_ASSERT(data_offset <= std::numeric_limits<decltype(BlockReference::offset)>::max());
Offset prefix_length = 0;
BlockReference refernce{static_cast<decltype(BlockReference::offset)>(data_offset), 0};
for (; first != last; --last)
{
const std::uint32_t data_length = *last - *std::prev(last);
if (data_length >= 0x1000000)
throw util::exception(boost::format("too large data length %1%") % data_length);
const std::uint32_t byte_length = log256(data_length);
refernce.descriptor = (refernce.descriptor << 2) | byte_length;
prefix_length += byte_length;
}
out.write((const char *)&refernce, sizeof(refernce));
return prefix_length;
}
/// Write a block prefix that is an array of variable encoded data lengths:
/// 0 is omitted;
/// 1..255 is 1 byte;
/// 256..65535 is 2 bytes;
/// 65536..16777215 is 3 bytes.
/// [first..last] is an inclusive range of block data.
/// The length of the last item in the block is not stored.
template <typename OffsetIterator>
void WriteBlockPrefix(std::ostream &out, OffsetIterator first, OffsetIterator last) const
{
for (OffsetIterator curr = first, next = std::next(first); curr != last; ++curr, ++next)
{
const std::uint32_t data_length = *next - *curr;
const std::uint32_t byte_length = log256(data_length);
if (byte_length == 0)
continue;
out.write((const char *)&data_length, byte_length);
}
}
/// Advances the range to an item stored in the referenced block.
/// Input [first..last) is a range of the complete block data with prefix.
/// Output [first..last) is a range of the referenced data at local_index.
template <typename DataIterator>
void ReadRefrencedBlock(const BlockReference &reference,
std::uint32_t local_index,
DataIterator &first,
DataIterator &last) const
{
std::uint32_t descriptor = reference.descriptor;
DataIterator var_lengths = first; // iterator to the variable lengths part
std::advance(first, sum2bits(descriptor)); // advance first to the block data part
for (std::uint32_t i = 0; i < local_index; ++i, descriptor >>= 2)
{
var_advance(first, var_lengths, descriptor & 0x3);
}
if (local_index < BLOCK_SIZE)
{
last = first;
var_advance(last, var_lengths, descriptor & 0x3);
}
}
};
template <int N, typename T = std::string> struct FixedGroupBlock
{
static constexpr std::uint32_t BLOCK_SIZE = N;
using ResultType = T;
using ValueType = typename T::value_type;
static_assert(sizeof(ValueType) == 1, "data basic type must char");
struct BlockReference
{
std::uint32_t offset;
};
FixedGroupBlock() {}
/// Write a block reference {offset}, where offset is a global block offset
/// Returns the fixed block prefix length.
template <typename Offset, typename OffsetIterator>
Offset
WriteBlockReference(std::ostream &out, Offset data_offset, OffsetIterator, OffsetIterator) const
{
BOOST_ASSERT(data_offset <= std::numeric_limits<decltype(BlockReference::offset)>::max());
BlockReference refernce{static_cast<decltype(BlockReference::offset)>(data_offset)};
out.write((const char *)&refernce, sizeof(refernce));
return BLOCK_SIZE;
}
/// Write a fixed length block prefix.
template <typename OffsetIterator>
void WriteBlockPrefix(std::ostream &out, OffsetIterator first, OffsetIterator last) const
{
std::uint32_t index = 0;
std::array<ValueType, BLOCK_SIZE> block_prefix;
for (OffsetIterator curr = first, next = std::next(first); curr != last; ++curr, ++next)
{
const std::uint32_t data_length = *next - *curr;
if (data_length >= 0x100)
throw util::exception(boost::format("too large data length %1%") % data_length);
block_prefix[index++] = static_cast<ValueType>(data_length);
}
out.write((const char *)block_prefix.data(), block_prefix.size());
}
/// Advances the range to an item stored in the referenced block.
/// Input [first..last) is a range of the complete block data with prefix.
/// Output [first..last) is a range of the referenced data at local_index.
template <typename DataIterator>
void ReadRefrencedBlock(const BlockReference &,
std::uint32_t local_index,
DataIterator &first,
DataIterator &last) const
{
DataIterator fixed_lengths = first; // iterator to the fixed lengths part
std::advance(first, BLOCK_SIZE); // advance first to the block data part
for (std::uint32_t i = 0; i < local_index; ++i)
{
first += static_cast<unsigned char>(*fixed_lengths++);
}
if (local_index < BLOCK_SIZE)
{
last = first + static_cast<unsigned char>(*fixed_lengths);
}
}
};
template <typename GroupBlock> struct IndexedData
{
static constexpr std::uint32_t BLOCK_SIZE = GroupBlock::BLOCK_SIZE;
using BlocksNumberType = std::uint32_t;
using DataSizeType = std::uint64_t;
using BlockReference = typename GroupBlock::BlockReference;
using ResultType = typename GroupBlock::ResultType;
using ValueType = typename GroupBlock::ValueType;
static_assert(sizeof(ValueType) == 1, "data basic type must char");
IndexedData() : blocks_number{0}, block_references{nullptr}, begin{nullptr}, end{nullptr} {}
bool empty() const { return blocks_number == 0; }
template <typename OffsetIterator, typename DataIterator>
void
write(std::ostream &out, OffsetIterator first, OffsetIterator last, DataIterator data) const
{
static_assert(sizeof(typename DataIterator::value_type) == 1, "data basic type must char");
using diff_type = typename OffsetIterator::difference_type;
BOOST_ASSERT(first < last);
const OffsetIterator sentinel = std::prev(last);
// Write number of blocks
const auto number_of_elements = std::distance(first, sentinel);
const BlocksNumberType number_of_blocks =
number_of_elements == 0 ? 0
: 1 + (std::distance(first, sentinel) - 1) / (BLOCK_SIZE + 1);
out.write((const char *)&number_of_blocks, sizeof(number_of_blocks));
// Write block references and compute the total data size that includes prefix and data
const GroupBlock block;
DataSizeType data_size = 0;
for (OffsetIterator curr = first, next = first; next != sentinel; curr = next)
{
std::advance(next, std::min<diff_type>(BLOCK_SIZE, std::distance(next, sentinel)));
data_size += block.WriteBlockReference(out, data_size, curr, next);
std::advance(next, std::min<diff_type>(1, std::distance(next, sentinel)));
data_size += *next - *curr;
}
// Write the total data size
out.write((const char *)&data_size, sizeof(data_size));
// Write data blocks that are (prefix, data)
for (OffsetIterator curr = first, next = first; next != sentinel; curr = next)
{
std::advance(next, std::min<diff_type>(BLOCK_SIZE, std::distance(next, sentinel)));
block.WriteBlockPrefix(out, curr, next);
std::advance(next, std::min<diff_type>(1, std::distance(next, sentinel)));
std::copy(data + *curr, data + *next, std::ostream_iterator<unsigned char>(out));
}
}
/// Set internal pointers from the buffer [first, last).
/// Data buffer pointed by ptr must exists during IndexedData life-time.
/// No ownership is transferred.
void reset(const ValueType *first, const ValueType *last)
{
// Read blocks number
if (first + sizeof(BlocksNumberType) > last)
throw util::exception("incorrect memory block");
blocks_number = *reinterpret_cast<const BlocksNumberType *>(first);
first += sizeof(BlocksNumberType);
// Get block references pointer
if (first + sizeof(BlockReference) * blocks_number > last)
throw util::exception("incorrect memory block");
block_references = reinterpret_cast<const BlockReference *>(first);
first += sizeof(BlockReference) * blocks_number;
// Read total data size
if (first + sizeof(DataSizeType) > last)
throw util::exception("incorrect memory block");
auto data_size = *reinterpret_cast<const DataSizeType *>(first);
first += sizeof(DataSizeType);
// Get data blocks begin and end iterators
begin = reinterpret_cast<const ValueType *>(first);
first += sizeof(ValueType) * data_size;
if (first > last)
throw util::exception("incorrect memory block");
end = reinterpret_cast<const ValueType *>(first);
}
// Return value at the given index
ResultType at(std::uint32_t index) const
{
// Get block external ad internal indices
const BlocksNumberType block_idx = index / (BLOCK_SIZE + 1);
const std::uint32_t internal_idx = index % (BLOCK_SIZE + 1);
if (block_idx >= blocks_number)
return ResultType();
// Get block first and last iterators
auto first = begin + block_references[block_idx].offset;
auto last =
block_idx + 1 == blocks_number ? end : begin + block_references[block_idx + 1].offset;
const GroupBlock block;
block.ReadRefrencedBlock(block_references[block_idx], internal_idx, first, last);
return adapt(first, last);
}
private:
template <class T = ResultType>
typename std::enable_if<!std::is_same<T, StringView>::value, T>::type
adapt(const ValueType *first, const ValueType *last) const
{
return ResultType(first, last);
}
template <class T = ResultType>
typename std::enable_if<std::is_same<T, StringView>::value, T>::type
adapt(const ValueType *first, const ValueType *last) const
{
return ResultType(first, std::distance(first, last));
}
BlocksNumberType blocks_number;
const BlockReference *block_references;
const ValueType *begin, *end;
};
}
}
#endif // OSRM_INDEXED_DATA_HPP
+18 -6
View File
@@ -1,7 +1,7 @@
#ifndef OSRM_UTIL_NAME_TABLE_HPP
#define OSRM_UTIL_NAME_TABLE_HPP
#include "util/range_table.hpp"
#include "util/indexed_data.hpp"
#include "util/shared_memory_vector_wrapper.hpp"
#include "util/string_view.hpp"
#include "util/typedefs.hpp"
@@ -18,21 +18,33 @@ namespace util
// processing based on name indices.
class NameTable
{
private:
// FIXME should this use shared memory
util::RangeTable<16, false> m_name_table;
ShM<char, false>::vector m_names_char_list;
public:
using IndexedData = util::IndexedData<util::VariableGroupBlock<16, util::StringView>>;
using ResultType = IndexedData::ResultType;
using ValueType = IndexedData::ValueType;
NameTable() {}
// Read filename and store own data in m_buffer
NameTable(const std::string &filename);
// Keep pointers only in m_name_table and don't own data in m_buffer
void reset(ValueType *begin, ValueType *end);
// This class provides a limited view over all the string data we serialize out.
// The following functions are a subset of what is available.
// See the data facades for they provide full access to this serialized string data.
// (at time of writing this: get{Name,Ref,Pronunciation,Destinations}ForID(name_id);)
util::StringView GetNameForID(const NameID id) const;
util::StringView GetDestinationsForID(const NameID id) const;
util::StringView GetRefForID(const NameID id) const;
util::StringView GetPronunciationForID(const NameID id) const;
private:
using BufferType = std::unique_ptr<ValueType, std::function<void(void *)>>;
BufferType m_buffer;
IndexedData m_name_table;
};
} // namespace util
} // namespace osrm