Use differential encoding for name offsets
Each name is represented as an integer range in a vector of chars. Instead of storing the absolute offset inside this array, we can store only the offset to the previous entry (the string size). By doing this we reduce the number of bytes need to store an offset from 4 to 1 bytes (if we set a maximum string length of 255). This is however slower, since the absolute offset must be computed on each querry by summing up all previous lengths. To limit the performance inpact we only do this for blocks of a certain size (16).
This commit is contained in:
Normal file
Normal file
@ -0,0 +1,255 @@
#ifndef __RANGE_TABLE_H__
#define __RANGE_TABLE_H__
#include <vector>
#if defined(__GNUC__) && defined(__SSE2__)
#define OSRM_USE_SSE
#include <xmmintrin.h>
* These pre-declarations are needed because parsing C++ is hard
* and otherwise the compiler gets confused.
template<unsigned BLOCK_SIZE=16> class RangeTable;
template<unsigned BLOCK_SIZE>
std::ostream& operator<<(std::ostream &out, const RangeTable<BLOCK_SIZE> &table);
template<unsigned BLOCK_SIZE>
std::istream& operator>>(std::istream &in, RangeTable<BLOCK_SIZE> &table);
* Stores adjacent ranges in a compressed format.
* Maximum supported length of a range is 255.
* Note: BLOCK_SIZE is the number of differential encodoed values.
* But each block consists of an absolute value and BLOCK_SIZE differential values.
* So the effective block size is sizeof(unsigned) + BLOCK_SIZE.
template<unsigned BLOCK_SIZE>
class RangeTable
union BlockT
unsigned char uint8_blocks[BLOCK_SIZE];
static_assert(BLOCK_SIZE % 16 == 0,
"If SSE instructions are enabled, only multiples of 16 are supported as BLOCK_SIZE");
__m128i uint128_blocks[BLOCK_SIZE/16];
// contains offset for each differential block
std::vector<unsigned> block_offsets;
// blocks of differential encoded offsets, should be aligned
std::vector<BlockT> diff_blocks;
unsigned sum_lengths;
inline unsigned PrefixSumAtIndex(int index, const BlockT& block) const;
friend std::ostream& operator<< <>(std::ostream &out, const RangeTable &table);
friend std::istream& operator>> <>(std::istream &in, RangeTable &table);
RangeTable() {}
// construct table from length vector
RangeTable(std::vector<unsigned> lengths)
unsigned number_of_blocks = (lengths.size() + 1) / (BLOCK_SIZE + 1);
if (lengths.size() % (BLOCK_SIZE + 1) != 0)
number_of_blocks += 1;
unsigned last_length = 0;
unsigned lengths_prefix_sum = 0;
unsigned block_idx = 0;
unsigned block_counter = 0;
BlockT block;
unsigned block_sum = 0;
for (const unsigned l : lengths)
// first entry of a block: encode absolute offset
if (block_idx == 0)
block_sum = 0;
block.uint8_blocks[block_idx - 1] = last_length;
block_sum += last_length;
BOOST_ASSERT((block_idx == 0 && block_offsets[block_counter] == lengths_prefix_sum)
|| lengths_prefix_sum == (block_offsets[block_counter]+block_sum));
// block is full
if (block_idx == BLOCK_SIZE)
// we can only store strings with length 255
BOOST_ASSERT(l <= 255);
lengths_prefix_sum += l;
last_length = l;
block_idx = (block_idx + 1) % (BLOCK_SIZE + 1);
// Last block can't be finished because we didn't add the sentinel
BOOST_ASSERT (block_counter == (number_of_blocks - 1));
// one block missing: starts with guard value
if (block_idx == 0)
// the last value is used as sentinel
block_idx = (block_idx + 1) % BLOCK_SIZE;
while (block_idx != 0)
block.uint8_blocks[block_idx - 1] = 0;
block_idx = (block_idx + 1) % (BLOCK_SIZE + 1);
BOOST_ASSERT(diff_blocks.size() == number_of_blocks && block_offsets.size() == number_of_blocks);
sum_lengths = lengths_prefix_sum;
inline void GetRange(const unsigned id, unsigned& begin_idx, unsigned& end_idx) const
BOOST_ASSERT(id < block_offsets.size() + diff_blocks.size() * BLOCK_SIZE);
// internal_idx 0 is implicitly stored in block_offsets[block_idx]
unsigned internal_idx = id % (BLOCK_SIZE + 1);
unsigned block_idx = id / (BLOCK_SIZE + 1);
BOOST_ASSERT(block_idx < diff_blocks.size());
begin_idx = block_offsets[block_idx];
const BlockT& block = diff_blocks[block_idx];
if (internal_idx > 0)
begin_idx += PrefixSumAtIndex(internal_idx - 1, block);
// next index inside current block
if (internal_idx < BLOCK_SIZE)
// note internal_idx - 1 is the *current* index for uint8_blocks
end_idx = begin_idx + block.uint8_blocks[internal_idx];
BOOST_ASSERT(block_idx < block_offsets.size() - 1);
end_idx = block_offsets[block_idx + 1];
//std::cout << block_idx << " / " << internal_idx << " : " << begin_idx << " - " << end_idx << std::endl;
BOOST_ASSERT(begin_idx < sum_lengths && end_idx <= sum_lengths);
// For blocksize 16 we can use SSE instructions
unsigned RangeTable<16>::PrefixSumAtIndex(int index, const BlockT& block) const
union OffsetT
unsigned short u16[8];
__m128i u128;
OffsetT offsets;
// converts lower 8 bytes to 8 shorts
offsets.u128 = _mm_unpacklo_epi8(block.uint128_blocks[0], _mm_set1_epi8(0));
offsets.u128 = _mm_add_epi16(offsets.u128, _mm_slli_si128(offsets.u128, 2));
if (index < 2)
return offsets.u16[index];
offsets.u128 = _mm_add_epi16(offsets.u128, _mm_slli_si128(offsets.u128, 4));
if (index < 4)
return offsets.u16[index];
offsets.u128 = _mm_add_epi16(offsets.u128, _mm_slli_si128(offsets.u128, 8));
if (index < 8)
return offsets.u16[index];
unsigned temp = offsets.u16[7];
index -= 8;
// converts upper 8 bytes to 8 shorts
offsets.u128 = _mm_unpackhi_epi8(block.uint128_blocks[0], _mm_set1_epi8(0));
offsets.u128 = _mm_add_epi16(offsets.u128, _mm_slli_si128(offsets.u128, 2));
if (index < 2)
return (temp + offsets.u16[index]);
offsets.u128 = _mm_add_epi16(offsets.u128, _mm_slli_si128(offsets.u128, 4));
if (index < 4)
return (temp + offsets.u16[index]);
offsets.u128 = _mm_add_epi16(offsets.u128, _mm_slli_si128(offsets.u128, 8));
return (temp + offsets.u16[index]);
template<unsigned BLOCK_SIZE>
unsigned RangeTable<BLOCK_SIZE>::PrefixSumAtIndex(int index, const BlockT& block) const
unsigned sum = 0;
for (int i = 0; i <= index; i++)
sum += block.uint8_blocks[i];
return sum;
template<unsigned BLOCK_SIZE>
std::ostream& operator<<(std::ostream &out, const RangeTable<BLOCK_SIZE> &table)
// write number of block
unsigned number_of_blocks = table.diff_blocks.size();
out.write((char *) &number_of_blocks, sizeof(unsigned));
// write total length
out.write((char *) &table.sum_lengths, sizeof(unsigned));
// write block offsets
out.write((char *), sizeof(unsigned) * table.block_offsets.size());
// write blocks
out.write((char *), BLOCK_SIZE * table.diff_blocks.size());
return out;
template<unsigned BLOCK_SIZE>
std::istream& operator>>(std::istream &in, RangeTable<BLOCK_SIZE> &table)
// write number of block
unsigned number_of_blocks;
|||| *) &number_of_blocks, sizeof(unsigned));
// write total length
|||| *) &table.sum_lengths, sizeof(unsigned));
// read block offsets
|||| *), sizeof(unsigned) * number_of_blocks);
// read blocks
|||| *), BLOCK_SIZE * number_of_blocks);
return in;
@ -395,32 +395,19 @@ void ExtractionContainers::PrepareData(const std::string &output_file_name,
std::string name_file_streamName = (output_file_name + ".names");
boost::filesystem::ofstream name_file_stream(name_file_streamName, std::ios::binary);
// write number of names
const unsigned number_of_names = name_list.size() + 1;
name_file_stream.write((char *)&(number_of_names), sizeof(unsigned));
std::vector<unsigned> name_lengths;
for (const std::string &temp_string : name_list)
// compute total number of chars
unsigned total_number_of_chars = 0;
for (const std::string &temp_string : name_list)
total_number_of_chars += temp_string.length();
// write total number of chars
name_file_stream.write((char *)&(total_number_of_chars), sizeof(unsigned));
// write prefixe sums
unsigned name_lengths_prefix_sum = 0;
for (const std::string &temp_string : name_list)
name_file_stream.write((char *)&(name_lengths_prefix_sum), sizeof(unsigned));
name_lengths_prefix_sum += temp_string.length();
// duplicate on purpose!
name_file_stream.write((char *)&(name_lengths_prefix_sum), sizeof(unsigned));
RangeTable<> table(name_lengths);
name_file_stream << table;
// write all chars consecutively
for (const std::string &temp_string : name_list)
const unsigned string_length = temp_string.length();
const unsigned string_length = std::min(temp_string.length(), 255lu);
name_file_stream.write(temp_string.c_str(), string_length);
Reference in New Issue
Block a user