osrm-backend/include/util/range_table.hpp

212 lines
6.6 KiB
C++
Raw Normal View History

#ifndef RANGE_TABLE_HPP
#define RANGE_TABLE_HPP
#include "storage/io.hpp"
#include "storage/shared_memory_ownership.hpp"
2017-03-29 08:07:03 -04:00
#include "util/integer_range.hpp"
#include "util/vector_view.hpp"
2014-06-15 05:04:10 -04:00
2014-06-11 04:22:34 -04:00
#include <array>
2016-03-23 08:04:23 -04:00
#include <fstream>
#include <utility>
2016-01-05 10:51:13 -05:00
namespace osrm
{
namespace util
{
/*
* These pre-declarations are needed because parsing C++ is hard
* and otherwise the compiler gets confused.
*/
template <unsigned BLOCK_SIZE = 16, storage::Ownership Ownership = storage::Ownership::Container>
2017-03-29 08:07:03 -04:00
class RangeTable;
2014-06-02 18:07:34 -04:00
2017-06-19 09:27:46 -04:00
namespace serialization
{
template <unsigned BlockSize, storage::Ownership Ownership>
void write(storage::io::FileWriter &writer, const util::RangeTable<BlockSize, Ownership> &table);
2017-06-19 09:27:46 -04:00
template <unsigned BlockSize, storage::Ownership Ownership>
void read(storage::io::FileReader &reader, util::RangeTable<BlockSize, Ownership> &table);
}
/**
* Stores adjacent ranges in a compressed format.
*
* Maximum supported length of a range is 255.
*
* Note: BLOCK_SIZE is the number of differential encodoed values.
* But each block consists of an absolute value and BLOCK_SIZE differential values.
* So the effective block size is sizeof(unsigned) + BLOCK_SIZE.
*/
template <unsigned BLOCK_SIZE, storage::Ownership Ownership> class RangeTable
{
2015-01-27 11:44:46 -05:00
public:
using BlockT = std::array<unsigned char, BLOCK_SIZE>;
using BlockContainerT = util::ViewOrVector<BlockT, Ownership>;
using OffsetContainerT = util::ViewOrVector<unsigned, Ownership>;
2016-01-05 10:51:13 -05:00
using RangeT = range<unsigned>;
2014-06-24 07:26:27 -04:00
RangeTable() : sum_lengths(0) {}
2014-06-02 18:07:34 -04:00
// for loading from shared memory
2015-01-27 11:44:46 -05:00
explicit RangeTable(OffsetContainerT &external_offsets,
BlockContainerT &external_blocks,
const unsigned sum_lengths)
: sum_lengths(sum_lengths)
2014-06-02 18:07:34 -04:00
{
using std::swap;
swap(block_offsets, external_offsets);
swap(diff_blocks, external_blocks);
2014-06-02 18:07:34 -04:00
}
// construct table from length vector
2016-01-05 06:04:04 -05:00
template <typename VectorT> explicit RangeTable(const VectorT &lengths)
{
2016-03-23 08:04:23 -04:00
const unsigned number_of_blocks = [&lengths]() {
2014-06-07 12:12:10 -04:00
unsigned num = (lengths.size() + 1) / (BLOCK_SIZE + 1);
2014-06-09 08:16:06 -04:00
if ((lengths.size() + 1) % (BLOCK_SIZE + 1) != 0)
{
2014-06-07 12:12:10 -04:00
num += 1;
2014-06-09 08:16:06 -04:00
}
2014-06-07 12:12:10 -04:00
return num;
}();
block_offsets.reserve(number_of_blocks);
diff_blocks.reserve(number_of_blocks);
unsigned last_length = 0;
unsigned lengths_prefix_sum = 0;
unsigned block_idx = 0;
unsigned block_counter = 0;
BlockT block;
unsigned block_sum = 0;
for (const unsigned l : lengths)
{
// first entry of a block: encode absolute offset
if (block_idx == 0)
{
block_offsets.push_back(lengths_prefix_sum);
block_sum = 0;
}
else
{
block[block_idx - 1] = last_length;
block_sum += last_length;
}
2015-01-27 11:44:46 -05:00
BOOST_ASSERT((block_idx == 0 && block_offsets[block_counter] == lengths_prefix_sum) ||
lengths_prefix_sum == (block_offsets[block_counter] + block_sum));
// block is full
2014-06-15 05:04:10 -04:00
if (BLOCK_SIZE == block_idx)
{
diff_blocks.push_back(block);
block_counter++;
}
// we can only store strings with length 255
BOOST_ASSERT(l <= 255);
lengths_prefix_sum += l;
last_length = l;
block_idx = (block_idx + 1) % (BLOCK_SIZE + 1);
}
// Last block can't be finished because we didn't add the sentinel
2015-01-27 11:44:46 -05:00
BOOST_ASSERT(block_counter == (number_of_blocks - 1));
// one block missing: starts with guard value
2014-06-15 05:04:10 -04:00
if (0 == block_idx)
{
// the last value is used as sentinel
block_offsets.push_back(lengths_prefix_sum);
2014-06-14 09:23:56 -04:00
block_idx = 1;
last_length = 0;
}
2014-06-15 05:04:10 -04:00
while (0 != block_idx)
{
block[block_idx - 1] = last_length;
2014-06-09 08:16:06 -04:00
last_length = 0;
block_idx = (block_idx + 1) % (BLOCK_SIZE + 1);
}
diff_blocks.push_back(block);
2015-01-27 11:44:46 -05:00
BOOST_ASSERT(diff_blocks.size() == number_of_blocks &&
block_offsets.size() == number_of_blocks);
sum_lengths = lengths_prefix_sum;
}
inline RangeT GetRange(const unsigned id) const
{
BOOST_ASSERT(id < block_offsets.size() + diff_blocks.size() * BLOCK_SIZE);
// internal_idx 0 is implicitly stored in block_offsets[block_idx]
2014-06-07 12:12:10 -04:00
const unsigned internal_idx = id % (BLOCK_SIZE + 1);
const unsigned block_idx = id / (BLOCK_SIZE + 1);
BOOST_ASSERT(block_idx < diff_blocks.size());
unsigned begin_idx = 0;
unsigned end_idx = 0;
begin_idx = block_offsets[block_idx];
2015-01-27 11:44:46 -05:00
const BlockT &block = diff_blocks[block_idx];
if (internal_idx > 0)
{
begin_idx += PrefixSumAtIndex(internal_idx - 1, block);
}
// next index inside current block
if (internal_idx < BLOCK_SIZE)
{
// note internal_idx - 1 is the *current* index for uint8_blocks
end_idx = begin_idx + block[internal_idx];
}
else
{
BOOST_ASSERT(block_idx < block_offsets.size() - 1);
end_idx = block_offsets[block_idx + 1];
}
2016-03-23 08:04:23 -04:00
BOOST_ASSERT(end_idx <= sum_lengths);
BOOST_ASSERT(begin_idx <= end_idx);
2016-01-05 10:51:13 -05:00
return irange(begin_idx, end_idx);
}
2014-06-02 18:07:34 -04:00
2017-06-19 09:27:46 -04:00
friend void serialization::write<BLOCK_SIZE, Ownership>(storage::io::FileWriter &writer,
const RangeTable &table);
friend void serialization::read<BLOCK_SIZE, Ownership>(storage::io::FileReader &reader,
RangeTable &table);
2015-01-27 11:44:46 -05:00
private:
inline unsigned PrefixSumAtIndex(int index, const BlockT &block) const;
2014-06-02 18:07:34 -04:00
// contains offset for each differential block
OffsetContainerT block_offsets;
// blocks of differential encoded offsets, should be aligned
BlockContainerT diff_blocks;
unsigned sum_lengths;
};
template <unsigned BLOCK_SIZE, storage::Ownership Ownership>
2017-04-03 04:28:46 -04:00
unsigned RangeTable<BLOCK_SIZE, Ownership>::PrefixSumAtIndex(int index, const BlockT &block) const
{
// this loop looks inefficent, but a modern compiler
// will emit nice SIMD here, at least for sensible block sizes. (I checked.)
unsigned sum = 0;
2014-06-15 05:04:10 -04:00
for (int i = 0; i <= index; ++i)
{
sum += block[i];
2014-06-15 05:04:10 -04:00
}
return sum;
}
2016-01-05 10:51:13 -05:00
}
}
2015-01-27 11:44:46 -05:00
#endif // RANGE_TABLE_HPP