Port .names file to tar
This commit is contained in:
@@ -3,9 +3,10 @@
|
||||
|
||||
/* A set of tools required for guidance in both pre and post-processing */
|
||||
|
||||
#include "extractor/name_table.hpp"
|
||||
#include "extractor/suffix_table.hpp"
|
||||
|
||||
#include "util/attributes.hpp"
|
||||
#include "util/name_table.hpp"
|
||||
#include "util/typedefs.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
@@ -216,7 +217,7 @@ inline bool requiresNameAnnounced(const std::string &from_name,
|
||||
|
||||
inline bool requiresNameAnnounced(const NameID from_name_id,
|
||||
const NameID to_name_id,
|
||||
const util::NameTable &name_table,
|
||||
const extractor::NameTable &name_table,
|
||||
const extractor::SuffixTable &suffix_table)
|
||||
{
|
||||
if (from_name_id == to_name_id)
|
||||
|
||||
+103
-95
@@ -1,12 +1,14 @@
|
||||
#ifndef OSRM_INDEXED_DATA_HPP
|
||||
#define OSRM_INDEXED_DATA_HPP
|
||||
|
||||
#include "storage/io.hpp"
|
||||
#include "storage/tar_fwd.hpp"
|
||||
|
||||
#include "util/exception.hpp"
|
||||
#include "util/string_view.hpp"
|
||||
#include "util/vector_view.hpp"
|
||||
|
||||
#include <boost/assert.hpp>
|
||||
#include <boost/function_output_iterator.hpp>
|
||||
|
||||
#include <array>
|
||||
#include <iterator>
|
||||
@@ -18,6 +20,23 @@ namespace osrm
|
||||
{
|
||||
namespace util
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template <typename GroupBlockPolicy, storage::Ownership Ownership> struct IndexedDataImpl;
|
||||
}
|
||||
|
||||
namespace serialization
|
||||
{
|
||||
template <typename BlockPolicy, storage::Ownership Ownership>
|
||||
inline void read(storage::tar::FileReader &reader,
|
||||
const std::string &name,
|
||||
detail::IndexedDataImpl<BlockPolicy, Ownership> &index_data);
|
||||
|
||||
template <typename BlockPolicy, storage::Ownership Ownership>
|
||||
inline void write(storage::tar::FileWriter &writer,
|
||||
const std::string &name,
|
||||
const detail::IndexedDataImpl<BlockPolicy, Ownership> &index_data);
|
||||
}
|
||||
|
||||
template <int N, typename T = std::string> struct VariableGroupBlock
|
||||
{
|
||||
@@ -85,11 +104,11 @@ template <int N, typename T = std::string> struct VariableGroupBlock
|
||||
/// of prefix length. sum(descriptor) equals to the block
|
||||
/// prefix length.
|
||||
/// Returns the block prefix length.
|
||||
template <typename Offset, typename OffsetIterator>
|
||||
Offset WriteBlockReference(storage::io::FileWriter &out,
|
||||
Offset data_offset,
|
||||
OffsetIterator first,
|
||||
OffsetIterator last) const
|
||||
template <typename Offset, typename OffsetIterator, typename OutIter>
|
||||
OutIter WriteBlockReference(OffsetIterator first,
|
||||
OffsetIterator last,
|
||||
Offset &data_offset,
|
||||
OutIter out) const
|
||||
{
|
||||
BOOST_ASSERT(data_offset <= std::numeric_limits<decltype(BlockReference::offset)>::max());
|
||||
|
||||
@@ -106,9 +125,9 @@ template <int N, typename T = std::string> struct VariableGroupBlock
|
||||
prefix_length += byte_length;
|
||||
}
|
||||
|
||||
out.WriteFrom(refernce);
|
||||
|
||||
return prefix_length;
|
||||
data_offset += prefix_length;
|
||||
*out++ = refernce;
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Write a block prefix that is an array of variable encoded data lengths:
|
||||
@@ -118,9 +137,8 @@ template <int N, typename T = std::string> struct VariableGroupBlock
|
||||
/// 65536..16777215 is 3 bytes.
|
||||
/// [first..last] is an inclusive range of block data.
|
||||
/// The length of the last item in the block is not stored.
|
||||
template <typename OffsetIterator>
|
||||
void
|
||||
WriteBlockPrefix(storage::io::FileWriter &out, OffsetIterator first, OffsetIterator last) const
|
||||
template <typename OffsetIterator, typename OutByteIter>
|
||||
OutByteIter WriteBlockPrefix(OffsetIterator first, OffsetIterator last, OutByteIter out) const
|
||||
{
|
||||
for (OffsetIterator curr = first, next = std::next(first); curr != last; ++curr, ++next)
|
||||
{
|
||||
@@ -131,8 +149,9 @@ template <int N, typename T = std::string> struct VariableGroupBlock
|
||||
|
||||
// Here, we're only writing a few bytes from the 4-byte std::uint32_t,
|
||||
// so we need to cast to (char *)
|
||||
out.WriteFrom((const char *)&data_length, byte_length);
|
||||
out = std::copy_n((const char *)&data_length, byte_length, out);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Advances the range to an item stored in the referenced block.
|
||||
@@ -178,36 +197,39 @@ template <int N, typename T = std::string> struct FixedGroupBlock
|
||||
|
||||
/// Write a block reference {offset}, where offset is a global block offset
|
||||
/// Returns the fixed block prefix length.
|
||||
template <typename Offset, typename OffsetIterator>
|
||||
Offset WriteBlockReference(storage::io::FileWriter &out,
|
||||
Offset data_offset,
|
||||
OffsetIterator,
|
||||
OffsetIterator) const
|
||||
template <typename Offset, typename OffsetIterator, typename OutIterator>
|
||||
OutIterator
|
||||
WriteBlockReference(OffsetIterator, OffsetIterator, Offset &data_offset, OutIterator out) const
|
||||
{
|
||||
BOOST_ASSERT(data_offset <= std::numeric_limits<decltype(BlockReference::offset)>::max());
|
||||
|
||||
BlockReference refernce{static_cast<decltype(BlockReference::offset)>(data_offset)};
|
||||
out.WriteFrom(refernce);
|
||||
data_offset += BLOCK_SIZE;
|
||||
*out++ = refernce;
|
||||
|
||||
return BLOCK_SIZE;
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Write a fixed length block prefix.
|
||||
template <typename OffsetIterator>
|
||||
void
|
||||
WriteBlockPrefix(storage::io::FileWriter &out, OffsetIterator first, OffsetIterator last) const
|
||||
template <typename OffsetIterator, typename OutByteIter>
|
||||
OutByteIter WriteBlockPrefix(OffsetIterator first, OffsetIterator last, OutByteIter out) const
|
||||
{
|
||||
std::uint32_t index = 0;
|
||||
std::array<ValueType, BLOCK_SIZE> block_prefix;
|
||||
constexpr std::size_t MAX_LENGTH = std::numeric_limits<std::make_unsigned_t<ValueType>>::max();
|
||||
|
||||
auto index = 0;
|
||||
std::array<ValueType, BLOCK_SIZE> prefix;
|
||||
|
||||
for (OffsetIterator curr = first, next = std::next(first); curr != last; ++curr, ++next)
|
||||
{
|
||||
const std::uint32_t data_length = *next - *curr;
|
||||
if (data_length >= 0x100)
|
||||
throw util::exception(boost::format("too large data length %1%") % data_length);
|
||||
if (data_length > MAX_LENGTH)
|
||||
throw util::exception(boost::format("too large data length %1% > %2%") % data_length % MAX_LENGTH);
|
||||
|
||||
block_prefix[index++] = static_cast<ValueType>(data_length);
|
||||
prefix[index++] = data_length;
|
||||
}
|
||||
out.WriteFrom(block_prefix.data(), block_prefix.size());
|
||||
|
||||
out = std::copy_n((const char *)prefix.data(), sizeof(ValueType)*BLOCK_SIZE, out);
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Advances the range to an item stored in the referenced block.
|
||||
@@ -233,28 +255,31 @@ template <int N, typename T = std::string> struct FixedGroupBlock
|
||||
}
|
||||
};
|
||||
|
||||
template <typename GroupBlock> struct IndexedData
|
||||
namespace detail
|
||||
{
|
||||
static constexpr std::uint32_t BLOCK_SIZE = GroupBlock::BLOCK_SIZE;
|
||||
template <typename GroupBlockPolicy, storage::Ownership Ownership> struct IndexedDataImpl
|
||||
{
|
||||
static constexpr std::uint32_t BLOCK_SIZE = GroupBlockPolicy::BLOCK_SIZE;
|
||||
|
||||
using BlocksNumberType = std::uint32_t;
|
||||
using DataSizeType = std::uint64_t;
|
||||
|
||||
using BlockReference = typename GroupBlock::BlockReference;
|
||||
using ResultType = typename GroupBlock::ResultType;
|
||||
using ValueType = typename GroupBlock::ValueType;
|
||||
using BlockReference = typename GroupBlockPolicy::BlockReference;
|
||||
using ResultType = typename GroupBlockPolicy::ResultType;
|
||||
using ValueType = typename GroupBlockPolicy::ValueType;
|
||||
|
||||
static_assert(sizeof(ValueType) == 1, "data basic type must char");
|
||||
|
||||
IndexedData() : blocks_number{0}, block_references{nullptr}, begin{nullptr}, end{nullptr} {}
|
||||
IndexedDataImpl() = default;
|
||||
IndexedDataImpl(util::vector_view<BlockReference> blocks_, util::vector_view<ValueType> values_)
|
||||
: blocks(std::move(blocks_)), values(std::move(values_))
|
||||
{
|
||||
}
|
||||
|
||||
bool empty() const { return blocks_number == 0; }
|
||||
bool empty() const { return blocks.empty(); }
|
||||
|
||||
template <typename OffsetIterator, typename DataIterator>
|
||||
void write(storage::io::FileWriter &out,
|
||||
OffsetIterator first,
|
||||
OffsetIterator last,
|
||||
DataIterator data) const
|
||||
IndexedDataImpl(OffsetIterator first, OffsetIterator last, DataIterator data)
|
||||
{
|
||||
static_assert(sizeof(typename DataIterator::value_type) == 1, "data basic type must char");
|
||||
|
||||
@@ -268,69 +293,37 @@ template <typename GroupBlock> struct IndexedData
|
||||
const BlocksNumberType number_of_blocks =
|
||||
number_of_elements == 0 ? 0
|
||||
: 1 + (std::distance(first, sentinel) - 1) / (BLOCK_SIZE + 1);
|
||||
out.WriteFrom(number_of_blocks);
|
||||
blocks.resize(number_of_blocks);
|
||||
|
||||
// Write block references and compute the total data size that includes prefix and data
|
||||
const GroupBlock block;
|
||||
const GroupBlockPolicy block;
|
||||
|
||||
auto block_iter = blocks.begin();
|
||||
DataSizeType data_size = 0;
|
||||
for (OffsetIterator curr = first, next = first; next != sentinel; curr = next)
|
||||
{
|
||||
std::advance(next, std::min<diff_type>(BLOCK_SIZE, std::distance(next, sentinel)));
|
||||
data_size += block.WriteBlockReference(out, data_size, curr, next);
|
||||
block_iter = block.WriteBlockReference(curr, next, data_size, block_iter);
|
||||
std::advance(next, std::min<diff_type>(1, std::distance(next, sentinel)));
|
||||
data_size += *next - *curr;
|
||||
}
|
||||
|
||||
// Write the total data size
|
||||
out.WriteFrom(data_size);
|
||||
|
||||
values.resize(data_size);
|
||||
auto values_byte_iter = reinterpret_cast<char *>(values.data());
|
||||
// Write data blocks that are (prefix, data)
|
||||
for (OffsetIterator curr = first, next = first; next != sentinel; curr = next)
|
||||
{
|
||||
std::advance(next, std::min<diff_type>(BLOCK_SIZE, std::distance(next, sentinel)));
|
||||
block.WriteBlockPrefix(out, curr, next);
|
||||
values_byte_iter = block.WriteBlockPrefix(curr, next, values_byte_iter);
|
||||
std::advance(next, std::min<diff_type>(1, std::distance(next, sentinel)));
|
||||
std::for_each(
|
||||
data + *curr, data + *next, [&out](const auto &element) { out.WriteFrom(element); });
|
||||
|
||||
auto to_bytes = [&](const auto &data) {
|
||||
values_byte_iter = std::copy_n(&data, sizeof(ValueType), values_byte_iter);
|
||||
};
|
||||
std::copy(data + *curr, data + *next, boost::make_function_output_iterator(to_bytes));
|
||||
}
|
||||
}
|
||||
|
||||
/// Set internal pointers from the buffer [first, last).
|
||||
/// Data buffer pointed by ptr must exists during IndexedData life-time.
|
||||
/// No ownership is transferred.
|
||||
void reset(const ValueType *first, const ValueType *last)
|
||||
{
|
||||
// Read blocks number
|
||||
if (first + sizeof(BlocksNumberType) > last)
|
||||
throw util::exception("incorrect memory block");
|
||||
|
||||
blocks_number = *reinterpret_cast<const BlocksNumberType *>(first);
|
||||
first += sizeof(BlocksNumberType);
|
||||
|
||||
// Get block references pointer
|
||||
if (first + sizeof(BlockReference) * blocks_number > last)
|
||||
throw util::exception("incorrect memory block");
|
||||
|
||||
block_references = reinterpret_cast<const BlockReference *>(first);
|
||||
first += sizeof(BlockReference) * blocks_number;
|
||||
|
||||
// Read total data size
|
||||
if (first + sizeof(DataSizeType) > last)
|
||||
throw util::exception("incorrect memory block");
|
||||
|
||||
auto data_size = *reinterpret_cast<const DataSizeType *>(first);
|
||||
first += sizeof(DataSizeType);
|
||||
|
||||
// Get data blocks begin and end iterators
|
||||
begin = reinterpret_cast<const ValueType *>(first);
|
||||
first += sizeof(ValueType) * data_size;
|
||||
|
||||
if (first > last)
|
||||
throw util::exception("incorrect memory block");
|
||||
|
||||
end = reinterpret_cast<const ValueType *>(first);
|
||||
}
|
||||
|
||||
// Return value at the given index
|
||||
ResultType at(std::uint32_t index) const
|
||||
{
|
||||
@@ -338,20 +331,29 @@ template <typename GroupBlock> struct IndexedData
|
||||
const BlocksNumberType block_idx = index / (BLOCK_SIZE + 1);
|
||||
const std::uint32_t internal_idx = index % (BLOCK_SIZE + 1);
|
||||
|
||||
if (block_idx >= blocks_number)
|
||||
if (block_idx >= blocks.size())
|
||||
return ResultType();
|
||||
|
||||
// Get block first and last iterators
|
||||
auto first = begin + block_references[block_idx].offset;
|
||||
auto last =
|
||||
block_idx + 1 == blocks_number ? end : begin + block_references[block_idx + 1].offset;
|
||||
auto first = values.begin() + blocks[block_idx].offset;
|
||||
auto last = block_idx + 1 == blocks.size() ? values.end()
|
||||
: values.begin() + blocks[block_idx + 1].offset;
|
||||
|
||||
const GroupBlock block;
|
||||
block.ReadRefrencedBlock(block_references[block_idx], internal_idx, first, last);
|
||||
const GroupBlockPolicy block;
|
||||
block.ReadRefrencedBlock(blocks[block_idx], internal_idx, first, last);
|
||||
|
||||
return adapt(first, last);
|
||||
return adapt(&*first, &*last);
|
||||
}
|
||||
|
||||
friend void serialization::read<GroupBlockPolicy, Ownership>(storage::tar::FileReader &reader,
|
||||
const std::string &name,
|
||||
IndexedDataImpl &index_data);
|
||||
|
||||
friend void
|
||||
serialization::write<GroupBlockPolicy, Ownership>(storage::tar::FileWriter &writer,
|
||||
const std::string &name,
|
||||
const IndexedDataImpl &index_data);
|
||||
|
||||
private:
|
||||
template <class T = ResultType>
|
||||
typename std::enable_if<!std::is_same<T, StringView>::value, T>::type
|
||||
@@ -367,10 +369,16 @@ template <typename GroupBlock> struct IndexedData
|
||||
return ResultType(first, std::distance(first, last));
|
||||
}
|
||||
|
||||
BlocksNumberType blocks_number;
|
||||
const BlockReference *block_references;
|
||||
const ValueType *begin, *end;
|
||||
template <typename T> using Vector = util::ViewOrVector<T, Ownership>;
|
||||
Vector<BlockReference> blocks;
|
||||
Vector<ValueType> values;
|
||||
};
|
||||
}
|
||||
|
||||
template <typename GroupBlockPolicy>
|
||||
using IndexedData = detail::IndexedDataImpl<GroupBlockPolicy, storage::Ownership::Container>;
|
||||
template <typename GroupBlockPolicy>
|
||||
using IndexedDataView = detail::IndexedDataImpl<GroupBlockPolicy, storage::Ownership::View>;
|
||||
}
|
||||
}
|
||||
#endif // OSRM_INDEXED_DATA_HPP
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
#ifndef OSRM_UTIL_NAME_TABLE_HPP
|
||||
#define OSRM_UTIL_NAME_TABLE_HPP
|
||||
|
||||
#include "util/indexed_data.hpp"
|
||||
#include "util/string_view.hpp"
|
||||
#include "util/typedefs.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace osrm
|
||||
{
|
||||
namespace util
|
||||
{
|
||||
|
||||
// While this could, theoretically, hold any names in the fitting format,
|
||||
// the NameTable allows access to a part of the Datafacade to allow
|
||||
// processing based on name indices.
|
||||
class NameTable
|
||||
{
|
||||
public:
|
||||
using IndexedData = util::IndexedData<util::VariableGroupBlock<16, util::StringView>>;
|
||||
using ResultType = IndexedData::ResultType;
|
||||
using ValueType = IndexedData::ValueType;
|
||||
|
||||
NameTable() {}
|
||||
|
||||
// Read filename and store own data in m_buffer
|
||||
NameTable(const std::string &filename);
|
||||
|
||||
// Keep pointers only in m_name_table and don't own data in m_buffer
|
||||
void reset(ValueType *begin, ValueType *end);
|
||||
|
||||
// This class provides a limited view over all the string data we serialize out.
|
||||
// The following functions are a subset of what is available.
|
||||
// See the data facades for they provide full access to this serialized string data.
|
||||
util::StringView GetNameForID(const NameID id) const;
|
||||
util::StringView GetDestinationsForID(const NameID id) const;
|
||||
util::StringView GetExitsForID(const NameID id) const;
|
||||
util::StringView GetRefForID(const NameID id) const;
|
||||
util::StringView GetPronunciationForID(const NameID id) const;
|
||||
|
||||
private:
|
||||
using BufferType = std::unique_ptr<ValueType, std::function<void(void *)>>;
|
||||
|
||||
BufferType m_buffer;
|
||||
IndexedData m_name_table;
|
||||
};
|
||||
} // namespace util
|
||||
} // namespace osrm
|
||||
|
||||
#endif // OSRM_UTIL_NAME_TABLE_HPP
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "util/packed_vector.hpp"
|
||||
#include "util/range_table.hpp"
|
||||
#include "util/static_graph.hpp"
|
||||
#include "util/indexed_data.hpp"
|
||||
|
||||
#include "storage/io.hpp"
|
||||
#include "storage/serialization.hpp"
|
||||
@@ -168,6 +169,22 @@ inline void write(storage::tar::FileWriter &writer,
|
||||
writer.WriteStreaming<typename std::remove_reference_t<decltype(graph)>::Edge>(
|
||||
name + "/edge_list", graph.edge_list.begin(), graph.number_of_edges);
|
||||
}
|
||||
|
||||
template <typename BlockPolicy, storage::Ownership Ownership>
|
||||
inline void
|
||||
read(storage::tar::FileReader &reader, const std::string &name, detail::IndexedDataImpl<BlockPolicy, Ownership> &index_data)
|
||||
{
|
||||
storage::serialization::read(reader, name + "/blocks", index_data.blocks);
|
||||
storage::serialization::read(reader, name + "/values", index_data.values);
|
||||
}
|
||||
|
||||
template <typename BlockPolicy, storage::Ownership Ownership>
|
||||
inline void write(storage::tar::FileWriter &writer,
|
||||
const std::string &name, const detail::IndexedDataImpl<BlockPolicy, Ownership> &index_data)
|
||||
{
|
||||
storage::serialization::write(writer, name + "/blocks", index_data.blocks);
|
||||
storage::serialization::write(writer, name + "/values", index_data.values);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user