mmap tarfiles directly when mmapping is enabled, instead of copying data into separate mmapped block

Co-authored-by: Kajari Ghosh <ghoshkaj@gmail.com>
This commit is contained in:
Daniel Patterson 2018-10-26 23:48:51 -07:00
parent d80318f8ea
commit 2f9cb44368
No known key found for this signature in database
GPG Key ID: 19C12BE1725A028B
14 changed files with 309 additions and 219 deletions

View File

@ -10,6 +10,7 @@
#include <boost/iostreams/device/mapped_file.hpp> #include <boost/iostreams/device/mapped_file.hpp>
#include <memory> #include <memory>
#include <string>
namespace osrm namespace osrm
{ {
@ -24,8 +25,7 @@ namespace datafacade
class MMapMemoryAllocator : public ContiguousBlockAllocator class MMapMemoryAllocator : public ContiguousBlockAllocator
{ {
public: public:
explicit MMapMemoryAllocator(const storage::StorageConfig &config, explicit MMapMemoryAllocator(const storage::StorageConfig &config);
const boost::filesystem::path &memory_file);
~MMapMemoryAllocator() override final; ~MMapMemoryAllocator() override final;
// interface to give access to the datafacades // interface to give access to the datafacades
@ -33,8 +33,8 @@ class MMapMemoryAllocator : public ContiguousBlockAllocator
private: private:
storage::SharedDataIndex index; storage::SharedDataIndex index;
util::vector_view<char> mapped_memory; std::vector<boost::iostreams::mapped_file> mapped_memory_files;
boost::iostreams::mapped_file mapped_memory_file; std::string rtree_filename;
}; };
} // namespace datafacade } // namespace datafacade

View File

@ -32,9 +32,8 @@ class ExternalProvider final : public DataFacadeProvider<AlgorithmT, FacadeT>
public: public:
using Facade = typename DataFacadeProvider<AlgorithmT, FacadeT>::Facade; using Facade = typename DataFacadeProvider<AlgorithmT, FacadeT>::Facade;
ExternalProvider(const storage::StorageConfig &config, ExternalProvider(const storage::StorageConfig &config)
const boost::filesystem::path &memory_file) : facade_factory(std::make_shared<datafacade::MMapMemoryAllocator>(config))
: facade_factory(std::make_shared<datafacade::MMapMemoryAllocator>(config, memory_file))
{ {
} }

View File

@ -16,10 +16,15 @@ struct Block
{ {
std::uint64_t num_entries; std::uint64_t num_entries;
std::uint64_t byte_size; std::uint64_t byte_size;
std::uint64_t offset;
Block() : num_entries(0), byte_size(0) {} Block() : num_entries(0), byte_size(0), offset(0) {}
Block(std::uint64_t num_entries, std::uint64_t byte_size, std::uint64_t offset)
: num_entries(num_entries), byte_size(byte_size), offset(offset)
{
}
Block(std::uint64_t num_entries, std::uint64_t byte_size) Block(std::uint64_t num_entries, std::uint64_t byte_size)
: num_entries(num_entries), byte_size(byte_size) : num_entries(num_entries), byte_size(byte_size), offset(0)
{ {
} }
}; };
@ -29,7 +34,7 @@ using NamedBlock = std::tuple<std::string, Block>;
template <typename T> Block make_block(uint64_t num_entries) template <typename T> Block make_block(uint64_t num_entries)
{ {
static_assert(sizeof(T) % alignof(T) == 0, "aligned T* can't be used as an array pointer"); static_assert(sizeof(T) % alignof(T) == 0, "aligned T* can't be used as an array pointer");
return Block{num_entries, sizeof(T) * num_entries}; return Block{num_entries, sizeof(T) * num_entries, 0};
} }
} }
} }

View File

@ -284,9 +284,9 @@ template <typename K, typename V> void write(io::BufferWriter &writer, const std
} }
} }
inline void read(io::BufferReader &reader, DataLayout &layout) { read(reader, layout.blocks); } inline void read(io::BufferReader &reader, BaseDataLayout &layout) { read(reader, layout.blocks); }
inline void write(io::BufferWriter &writer, const DataLayout &layout) inline void write(io::BufferWriter &writer, const BaseDataLayout &layout)
{ {
write(writer, layout.blocks); write(writer, layout.blocks);
} }

View File

@ -5,6 +5,7 @@
#include <boost/function_output_iterator.hpp> #include <boost/function_output_iterator.hpp>
#include <type_traits>
#include <unordered_map> #include <unordered_map>
namespace osrm namespace osrm
@ -19,8 +20,8 @@ class SharedDataIndex
public: public:
struct AllocatedRegion struct AllocatedRegion
{ {
char *memory_ptr; void *memory_ptr;
DataLayout layout; std::unique_ptr<BaseDataLayout> layout;
}; };
SharedDataIndex() = default; SharedDataIndex() = default;
@ -29,7 +30,7 @@ class SharedDataIndex
// Build mapping from block name to region // Build mapping from block name to region
for (auto index : util::irange<std::uint32_t>(0, regions.size())) for (auto index : util::irange<std::uint32_t>(0, regions.size()))
{ {
regions[index].layout.List("", regions[index].layout->List("",
boost::make_function_output_iterator([&](const auto &name) { boost::make_function_output_iterator([&](const auto &name) {
block_to_region[name] = index; block_to_region[name] = index;
})); }));
@ -40,32 +41,44 @@ class SharedDataIndex
{ {
for (const auto &region : regions) for (const auto &region : regions)
{ {
region.layout.List(name_prefix, out); region.layout->List(name_prefix, out);
} }
} }
template <typename T> auto GetBlockPtr(const std::string &name) const template <typename T> auto GetBlockPtr(const std::string &name) const
{ {
#if !defined(__GNUC__) || (__GNUC__ > 4)
// is_tivially_copyable only exists in GCC >=5
static_assert(std::is_trivially_copyable<T>::value,
"Block-based data must be a trivially copyable type");
static_assert(sizeof(T) % alignof(T) == 0, "aligned T* can't be used as an array pointer");
#endif
const auto &region = GetBlockRegion(name); const auto &region = GetBlockRegion(name);
return region.layout.GetBlockPtr<T>(region.memory_ptr, name); return reinterpret_cast<T *>(region.layout->GetBlockPtr(region.memory_ptr, name));
} }
template <typename T> auto GetBlockPtr(const std::string &name) template <typename T> auto GetBlockPtr(const std::string &name)
{ {
#if !defined(__GNUC__) || (__GNUC__ > 4)
// is_tivially_copyable only exists in GCC >=5
static_assert(std::is_trivially_copyable<T>::value,
"Block-based data must be a trivially copyable type");
static_assert(sizeof(T) % alignof(T) == 0, "aligned T* can't be used as an array pointer");
#endif
const auto &region = GetBlockRegion(name); const auto &region = GetBlockRegion(name);
return region.layout.GetBlockPtr<T>(region.memory_ptr, name); return reinterpret_cast<T *>(region.layout->GetBlockPtr(region.memory_ptr, name));
} }
std::size_t GetBlockEntries(const std::string &name) const std::size_t GetBlockEntries(const std::string &name) const
{ {
const auto &region = GetBlockRegion(name); const auto &region = GetBlockRegion(name);
return region.layout.GetBlockEntries(name); return region.layout->GetBlockEntries(name);
} }
std::size_t GetBlockSize(const std::string &name) const std::size_t GetBlockSize(const std::string &name) const
{ {
const auto &region = GetBlockRegion(name); const auto &region = GetBlockRegion(name);
return region.layout.GetBlockSize(name); return region.layout->GetBlockSize(name);
} }
private: private:

View File

@ -20,12 +20,12 @@ namespace osrm
namespace storage namespace storage
{ {
class DataLayout; class BaseDataLayout;
namespace serialization namespace serialization
{ {
inline void read(io::BufferReader &reader, DataLayout &layout); inline void read(io::BufferReader &reader, BaseDataLayout &layout);
inline void write(io::BufferWriter &writer, const DataLayout &layout); inline void write(io::BufferWriter &writer, const BaseDataLayout &layout);
} // namespace serialization } // namespace serialization
namespace detail namespace detail
@ -54,44 +54,28 @@ inline std::string trimName(const std::string &name_prefix, const std::string &n
} }
} // namespace detail } // namespace detail
class DataLayout class BaseDataLayout
{ {
public: public:
DataLayout() : blocks{} {} virtual ~BaseDataLayout() = default;
inline void SetBlock(const std::string &name, Block block) { blocks[name] = std::move(block); } inline void SetBlock(const std::string &name, Block block) { blocks[name] = std::move(block); }
inline uint64_t GetBlockEntries(const std::string &name) const inline std::uint64_t GetBlockEntries(const std::string &name) const
{ {
return GetBlock(name).num_entries; return GetBlock(name).num_entries;
} }
inline uint64_t GetBlockSize(const std::string &name) const { return GetBlock(name).byte_size; } inline std::uint64_t GetBlockSize(const std::string &name) const
{
return GetBlock(name).byte_size;
}
inline bool HasBlock(const std::string &name) const inline bool HasBlock(const std::string &name) const
{ {
return blocks.find(name) != blocks.end(); return blocks.find(name) != blocks.end();
} }
inline uint64_t GetSizeOfLayout() const
{
uint64_t result = 0;
for (const auto &name_and_block : blocks)
{
result += GetBlockSize(name_and_block.first) + BLOCK_ALIGNMENT;
}
return result;
}
template <typename T> inline T *GetBlockPtr(char *shared_memory, const std::string &name) const
{
static_assert(BLOCK_ALIGNMENT % std::alignment_of<T>::value == 0,
"Datatype does not fit alignment constraints.");
char *ptr = (char *)GetAlignedBlockPtr(shared_memory, name);
return (T *)ptr;
}
// Depending on the name prefix this function either lists all blocks with the same prefix // Depending on the name prefix this function either lists all blocks with the same prefix
// or all entries in the sub-directory. // or all entries in the sub-directory.
// '/ch/edge' -> '/ch/edge_filter/0/blocks', '/ch/edge_filter/1/blocks' // '/ch/edge' -> '/ch/edge_filter/0/blocks', '/ch/edge_filter/1/blocks'
@ -115,10 +99,10 @@ class DataLayout
} }
} }
private: virtual inline void *GetBlockPtr(void *base_ptr, const std::string &name) const = 0;
friend void serialization::read(io::BufferReader &reader, DataLayout &layout); virtual inline std::uint64_t GetSizeOfLayout() const = 0;
friend void serialization::write(io::BufferWriter &writer, const DataLayout &layout);
protected:
const Block &GetBlock(const std::string &name) const const Block &GetBlock(const std::string &name) const
{ {
auto iter = blocks.find(name); auto iter = blocks.find(name);
@ -130,10 +114,42 @@ class DataLayout
return iter->second; return iter->second;
} }
friend void serialization::read(io::BufferReader &reader, BaseDataLayout &layout);
friend void serialization::write(io::BufferWriter &writer, const BaseDataLayout &layout);
std::map<std::string, Block> blocks;
};
class ContiguousDataLayout final : public BaseDataLayout
{
public:
inline std::uint64_t GetSizeOfLayout() const override final
{
std::uint64_t result = 0;
for (const auto &name_and_block : blocks)
{
result += GetBlockSize(name_and_block.first) + BLOCK_ALIGNMENT;
}
return result;
}
inline void *GetBlockPtr(void *base_ptr, const std::string &name) const override final
{
// TODO: re-enable this alignment checking somehow
// static_assert(BLOCK_ALIGNMENT % std::alignment_of<T>::value == 0,
// "Datatype does not fit alignment constraints.");
return GetAlignedBlockPtr(base_ptr, name);
}
private:
friend void serialization::read(io::BufferReader &reader, BaseDataLayout &layout);
friend void serialization::write(io::BufferWriter &writer, const BaseDataLayout &layout);
// Fit aligned storage in buffer to 64 bytes to conform with AVX 512 types // Fit aligned storage in buffer to 64 bytes to conform with AVX 512 types
inline void *align(void *&ptr) const noexcept inline void *align(void *&ptr) const noexcept
{ {
const auto intptr = reinterpret_cast<uintptr_t>(ptr); const auto intptr = reinterpret_cast<std::uintptr_t>(ptr);
const auto aligned = (intptr - 1u + BLOCK_ALIGNMENT) & -BLOCK_ALIGNMENT; const auto aligned = (intptr - 1u + BLOCK_ALIGNMENT) & -BLOCK_ALIGNMENT;
return ptr = reinterpret_cast<void *>(aligned); return ptr = reinterpret_cast<void *>(aligned);
} }
@ -157,7 +173,27 @@ class DataLayout
} }
static constexpr std::size_t BLOCK_ALIGNMENT = 64; static constexpr std::size_t BLOCK_ALIGNMENT = 64;
std::map<std::string, Block> blocks; };
class TarDataLayout final : public BaseDataLayout
{
public:
inline std::uint64_t GetSizeOfLayout() const override final
{
std::uint64_t result = 0;
for (const auto &name_and_block : blocks)
{
result += GetBlockSize(name_and_block.first);
}
return result;
}
inline void *GetBlockPtr(void *base_ptr, const std::string &name) const override final
{
auto offset = GetBlock(name).offset;
const auto offset_address = reinterpret_cast<std::uintptr_t>(base_ptr) + offset;
return reinterpret_cast<void *>(offset_address);
}
}; };
struct SharedRegion struct SharedRegion

View File

@ -35,22 +35,28 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <boost/filesystem/path.hpp> #include <boost/filesystem/path.hpp>
#include <string> #include <string>
#include <vector>
namespace osrm namespace osrm
{ {
namespace storage namespace storage
{ {
void populateLayoutFromFile(const boost::filesystem::path &path, storage::BaseDataLayout &layout);
class Storage class Storage
{ {
public: public:
Storage(StorageConfig config); Storage(StorageConfig config);
int Run(int max_wait, const std::string &name, bool only_metric); int Run(int max_wait, const std::string &name, bool only_metric);
void PopulateStaticLayout(DataLayout &layout);
void PopulateUpdatableLayout(DataLayout &layout);
void PopulateStaticData(const SharedDataIndex &index); void PopulateStaticData(const SharedDataIndex &index);
void PopulateUpdatableData(const SharedDataIndex &index); void PopulateUpdatableData(const SharedDataIndex &index);
void PopulateLayout(storage::BaseDataLayout &layout,
const std::vector<std::pair<bool, boost::filesystem::path>> &files);
std::string PopulateLayoutWithRTree(storage::BaseDataLayout &layout);
std::vector<std::pair<bool, boost::filesystem::path>> GetUpdatableFiles();
std::vector<std::pair<bool, boost::filesystem::path>> GetStaticFiles();
private: private:
StorageConfig config; StorageConfig config;

View File

@ -15,14 +15,14 @@ namespace util
namespace detail namespace detail
{ {
template <typename T, typename RegionT> template <typename T, typename MmapContainerT>
util::vector_view<T> mmapFile(const boost::filesystem::path &file, RegionT &region) util::vector_view<T> mmapFile(const boost::filesystem::path &file, MmapContainerT &mmap_container)
{ {
try try
{ {
region.open(file); mmap_container.open(file);
std::size_t num_objects = region.size() / sizeof(T); std::size_t num_objects = mmap_container.size() / sizeof(T);
auto data_ptr = region.data(); auto data_ptr = mmap_container.data();
BOOST_ASSERT(reinterpret_cast<uintptr_t>(data_ptr) % alignof(T) == 0); BOOST_ASSERT(reinterpret_cast<uintptr_t>(data_ptr) % alignof(T) == 0);
return util::vector_view<T>(reinterpret_cast<T *>(data_ptr), num_objects); return util::vector_view<T>(reinterpret_cast<T *>(data_ptr), num_objects);
} }
@ -34,9 +34,10 @@ util::vector_view<T> mmapFile(const boost::filesystem::path &file, RegionT &regi
} }
} }
template <typename T, typename RegionT> template <typename T, typename MmapContainerT>
util::vector_view<T> util::vector_view<T> mmapFile(const boost::filesystem::path &file,
mmapFile(const boost::filesystem::path &file, RegionT &region, const std::size_t size) MmapContainerT &mmap_container,
const std::size_t size)
{ {
try try
{ {
@ -45,10 +46,10 @@ mmapFile(const boost::filesystem::path &file, RegionT &region, const std::size_t
params.path = file.string(); params.path = file.string();
params.flags = boost::iostreams::mapped_file::readwrite; params.flags = boost::iostreams::mapped_file::readwrite;
params.new_file_size = size; params.new_file_size = size;
region.open(params); mmap_container.open(params);
std::size_t num_objects = size / sizeof(T); std::size_t num_objects = size / sizeof(T);
auto data_ptr = region.data(); auto data_ptr = mmap_container.data();
BOOST_ASSERT(reinterpret_cast<uintptr_t>(data_ptr) % alignof(T) == 0); BOOST_ASSERT(reinterpret_cast<uintptr_t>(data_ptr) % alignof(T) == 0);
return util::vector_view<T>(reinterpret_cast<T *>(data_ptr), num_objects); return util::vector_view<T>(reinterpret_cast<T *>(data_ptr), num_objects);
} }
@ -63,24 +64,24 @@ mmapFile(const boost::filesystem::path &file, RegionT &region, const std::size_t
template <typename T> template <typename T>
util::vector_view<const T> mmapFile(const boost::filesystem::path &file, util::vector_view<const T> mmapFile(const boost::filesystem::path &file,
boost::iostreams::mapped_file_source &region) boost::iostreams::mapped_file_source &mmap_container)
{ {
return detail::mmapFile<const T>(file, region); return detail::mmapFile<const T>(file, mmap_container);
} }
template <typename T> template <typename T>
util::vector_view<T> mmapFile(const boost::filesystem::path &file, util::vector_view<T> mmapFile(const boost::filesystem::path &file,
boost::iostreams::mapped_file &region) boost::iostreams::mapped_file &mmap_container)
{ {
return detail::mmapFile<T>(file, region); return detail::mmapFile<T>(file, mmap_container);
} }
template <typename T> template <typename T>
util::vector_view<T> mmapFile(const boost::filesystem::path &file, util::vector_view<T> mmapFile(const boost::filesystem::path &file,
boost::iostreams::mapped_file &region, boost::iostreams::mapped_file &mmap_container,
std::size_t size) std::size_t size)
{ {
return detail::mmapFile<T>(file, region, size); return detail::mmapFile<T>(file, mmap_container, size);
} }
} }
} }

View File

@ -1,5 +1,6 @@
#include "engine/datafacade/mmap_memory_allocator.hpp" #include "engine/datafacade/mmap_memory_allocator.hpp"
#include "storage/block.hpp"
#include "storage/io.hpp" #include "storage/io.hpp"
#include "storage/serialization.hpp" #include "storage/serialization.hpp"
#include "storage/storage.hpp" #include "storage/storage.hpp"
@ -7,7 +8,7 @@
#include "util/log.hpp" #include "util/log.hpp"
#include "util/mmap_file.hpp" #include "util/mmap_file.hpp"
#include "boost/assert.hpp" #include <boost/assert.hpp>
namespace osrm namespace osrm
{ {
@ -16,46 +17,50 @@ namespace engine
namespace datafacade namespace datafacade
{ {
MMapMemoryAllocator::MMapMemoryAllocator(const storage::StorageConfig &config, MMapMemoryAllocator::MMapMemoryAllocator(const storage::StorageConfig &config)
const boost::filesystem::path &memory_file)
{ {
storage::Storage storage(config); storage::Storage storage(config);
std::vector<storage::SharedDataIndex::AllocatedRegion> allocated_regions;
if (!boost::filesystem::exists(memory_file))
{ {
storage::DataLayout initial_layout; std::unique_ptr<storage::BaseDataLayout> fake_layout =
storage.PopulateStaticLayout(initial_layout); std::make_unique<storage::TarDataLayout>();
storage.PopulateUpdatableLayout(initial_layout);
auto data_size = initial_layout.GetSizeOfLayout(); // Convert the boost::filesystem::path object into a plain string
// that's stored as a member of this allocator object
rtree_filename = storage.PopulateLayoutWithRTree(*fake_layout);
storage::io::BufferWriter writer; // Now, we add one more AllocatedRegion, with it's start address as the start
storage::serialization::write(writer, initial_layout); // of the rtree_filename string we've saved. In the fake_layout, we've
auto encoded_layout = writer.GetBuffer(); // stated that the data is at offset 0, which is where the string starts
// at it's own memory address.
auto total_size = data_size + encoded_layout.size(); // The syntax &(rtree_filename[0]) gets the memory address of the first char.
// We can't use the convenient `.data()` or `.c_str()` methods, because
mapped_memory = util::mmapFile<char>(memory_file, mapped_memory_file, total_size); // prior to C++17 (which we're not using), those return a `const char *`,
// which isn't compatible with the `char *` that AllocatedRegion expects
std::copy(encoded_layout.begin(), encoded_layout.end(), mapped_memory.data()); // for it's memory_ptr
allocated_regions.push_back({&(rtree_filename[0]), std::move(fake_layout)});
index = storage::SharedDataIndex(
{{mapped_memory.data() + encoded_layout.size(), std::move(initial_layout)}});
storage.PopulateStaticData(index);
storage.PopulateUpdatableData(index);
} }
else
auto files = storage.GetStaticFiles();
auto updatable_files = storage.GetUpdatableFiles();
files.insert(files.end(), updatable_files.begin(), updatable_files.end());
for (const auto &file : files)
{ {
mapped_memory = util::mmapFile<char>(memory_file, mapped_memory_file); if (boost::filesystem::exists(file.second))
{
storage::DataLayout layout; std::unique_ptr<storage::BaseDataLayout> layout =
storage::io::BufferReader reader(mapped_memory.data(), mapped_memory.size()); std::make_unique<storage::TarDataLayout>();
storage::serialization::read(reader, layout); boost::iostreams::mapped_file mapped_memory_file;
auto layout_size = reader.GetPosition(); util::mmapFile<char>(file.second, mapped_memory_file);
mapped_memory_files.push_back(std::move(mapped_memory_file));
index = storage::SharedDataIndex({{mapped_memory.data() + layout_size, std::move(layout)}}); storage::populateLayoutFromFile(file.second, *layout);
allocated_regions.push_back({mapped_memory_file.data(), std::move(layout)});
} }
}
index = storage::SharedDataIndex{std::move(allocated_regions)};
} }
MMapMemoryAllocator::~MMapMemoryAllocator() {} MMapMemoryAllocator::~MMapMemoryAllocator() {}

View File

@ -15,14 +15,20 @@ ProcessMemoryAllocator::ProcessMemoryAllocator(const storage::StorageConfig &con
storage::Storage storage(config); storage::Storage storage(config);
// Calculate the layout/size of the memory block // Calculate the layout/size of the memory block
storage::DataLayout layout; auto static_files = storage.GetStaticFiles();
storage.PopulateStaticLayout(layout); auto updatable_files = storage.GetUpdatableFiles();
storage.PopulateUpdatableLayout(layout); std::unique_ptr<storage::BaseDataLayout> layout =
std::make_unique<storage::ContiguousDataLayout>();
storage.PopulateLayoutWithRTree(*layout);
storage.PopulateLayout(*layout, static_files);
storage.PopulateLayout(*layout, updatable_files);
// Allocate the memory block, then load data from files into it // Allocate the memory block, then load data from files into it
internal_memory = std::make_unique<char[]>(layout.GetSizeOfLayout()); internal_memory = std::make_unique<char[]>(layout->GetSizeOfLayout());
index = storage::SharedDataIndex({{internal_memory.get(), std::move(layout)}}); std::vector<storage::SharedDataIndex::AllocatedRegion> regions;
regions.push_back({internal_memory.get(), std::move(layout)});
index = {std::move(regions)};
storage.PopulateStaticData(index); storage.PopulateStaticData(index);
storage.PopulateUpdatableData(index); storage.PopulateUpdatableData(index);

View File

@ -25,8 +25,9 @@ SharedMemoryAllocator::SharedMemoryAllocator(
auto mem = storage::makeSharedMemory(shm_key); auto mem = storage::makeSharedMemory(shm_key);
storage::io::BufferReader reader(reinterpret_cast<char *>(mem->Ptr()), mem->Size()); storage::io::BufferReader reader(reinterpret_cast<char *>(mem->Ptr()), mem->Size());
storage::DataLayout layout; std::unique_ptr<storage::BaseDataLayout> layout =
storage::serialization::read(reader, layout); std::make_unique<storage::ContiguousDataLayout>();
storage::serialization::read(reader, *layout);
auto layout_size = reader.GetPosition(); auto layout_size = reader.GetPosition();
regions.push_back({reinterpret_cast<char *>(mem->Ptr()) + layout_size, std::move(layout)}); regions.push_back({reinterpret_cast<char *>(mem->Ptr()) + layout_size, std::move(layout)});

View File

@ -44,24 +44,6 @@ namespace
{ {
using Monitor = SharedMonitor<SharedRegionRegister>; using Monitor = SharedMonitor<SharedRegionRegister>;
void readBlocks(const boost::filesystem::path &path, DataLayout &layout)
{
tar::FileReader reader(path, tar::FileReader::VerifyFingerprint);
std::vector<tar::FileReader::FileEntry> entries;
reader.List(std::back_inserter(entries));
for (const auto &entry : entries)
{
const auto name_end = entry.name.rfind(".meta");
if (name_end == std::string::npos)
{
auto number_of_elements = reader.ReadElementCount64(entry.name);
layout.SetBlock(entry.name, Block{number_of_elements, entry.size});
}
}
}
struct RegionHandle struct RegionHandle
{ {
std::unique_ptr<SharedMemory> memory; std::unique_ptr<SharedMemory> memory;
@ -69,7 +51,8 @@ struct RegionHandle
std::uint16_t shm_key; std::uint16_t shm_key;
}; };
auto setupRegion(SharedRegionRegister &shared_register, const DataLayout &layout) RegionHandle setupRegion(SharedRegionRegister &shared_register,
const storage::BaseDataLayout &layout)
{ {
// This is safe because we have an exclusive lock for all osrm-datastore processes. // This is safe because we have an exclusive lock for all osrm-datastore processes.
auto shm_key = shared_register.ReserveKey(); auto shm_key = shared_register.ReserveKey();
@ -184,6 +167,24 @@ bool swapData(Monitor &monitor,
} }
} }
void populateLayoutFromFile(const boost::filesystem::path &path, storage::BaseDataLayout &layout)
{
tar::FileReader reader(path, tar::FileReader::VerifyFingerprint);
std::vector<tar::FileReader::FileEntry> entries;
reader.List(std::back_inserter(entries));
for (const auto &entry : entries)
{
const auto name_end = entry.name.rfind(".meta");
if (name_end == std::string::npos)
{
auto number_of_elements = reader.ReadElementCount64(entry.name);
layout.SetBlock(entry.name, Block{number_of_elements, entry.size, entry.offset});
}
}
}
Storage::Storage(StorageConfig config_) : config(std::move(config_)) {} Storage::Storage(StorageConfig config_) : config(std::move(config_)) {}
int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric) int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric)
@ -243,29 +244,35 @@ int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric
auto static_region = shared_register.GetRegion(region_id); auto static_region = shared_register.GetRegion(region_id);
auto static_memory = makeSharedMemory(static_region.shm_key); auto static_memory = makeSharedMemory(static_region.shm_key);
DataLayout static_layout; std::unique_ptr<storage::BaseDataLayout> static_layout =
std::make_unique<storage::ContiguousDataLayout>();
io::BufferReader reader(reinterpret_cast<char *>(static_memory->Ptr()), io::BufferReader reader(reinterpret_cast<char *>(static_memory->Ptr()),
static_memory->Size()); static_memory->Size());
serialization::read(reader, static_layout); serialization::read(reader, *static_layout);
auto layout_size = reader.GetPosition(); auto layout_size = reader.GetPosition();
auto *data_ptr = reinterpret_cast<char *>(static_memory->Ptr()) + layout_size; auto *data_ptr = reinterpret_cast<char *>(static_memory->Ptr()) + layout_size;
regions.push_back({data_ptr, static_layout}); regions.push_back({data_ptr, std::move(static_layout)});
readonly_handles.push_back({std::move(static_memory), data_ptr, static_region.shm_key}); readonly_handles.push_back({std::move(static_memory), data_ptr, static_region.shm_key});
} }
else else
{ {
DataLayout static_layout; std::unique_ptr<storage::BaseDataLayout> static_layout =
PopulateStaticLayout(static_layout); std::make_unique<storage::ContiguousDataLayout>();
auto static_handle = setupRegion(shared_register, static_layout); Storage::PopulateLayoutWithRTree(*static_layout);
regions.push_back({static_handle.data_ptr, static_layout}); std::vector<std::pair<bool, boost::filesystem::path>> files = Storage::GetStaticFiles();
Storage::PopulateLayout(*static_layout, files);
auto static_handle = setupRegion(shared_register, *static_layout);
regions.push_back({static_handle.data_ptr, std::move(static_layout)});
handles[dataset_name + "/static"] = std::move(static_handle); handles[dataset_name + "/static"] = std::move(static_handle);
} }
DataLayout updatable_layout; std::unique_ptr<storage::BaseDataLayout> updatable_layout =
PopulateUpdatableLayout(updatable_layout); std::make_unique<storage::ContiguousDataLayout>();
auto updatable_handle = setupRegion(shared_register, updatable_layout); std::vector<std::pair<bool, boost::filesystem::path>> files = Storage::GetUpdatableFiles();
regions.push_back({updatable_handle.data_ptr, updatable_layout}); Storage::PopulateLayout(*updatable_layout, files);
auto updatable_handle = setupRegion(shared_register, *updatable_layout);
regions.push_back({updatable_handle.data_ptr, std::move(updatable_layout)});
handles[dataset_name + "/updatable"] = std::move(updatable_handle); handles[dataset_name + "/updatable"] = std::move(updatable_handle);
SharedDataIndex index{std::move(regions)}; SharedDataIndex index{std::move(regions)};
@ -281,24 +288,12 @@ int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
/** std::vector<std::pair<bool, boost::filesystem::path>> Storage::GetStaticFiles()
* This function examines all our data files and figures out how much
* memory needs to be allocated, and the position of each data structure
* in that big block. It updates the fields in the DataLayout parameter.
*/
void Storage::PopulateStaticLayout(DataLayout &static_layout)
{ {
{
auto absolute_file_index_path =
boost::filesystem::absolute(config.GetPath(".osrm.fileIndex"));
static_layout.SetBlock("/common/rtree/file_index_path",
make_block<char>(absolute_file_index_path.string().length() + 1));
}
constexpr bool REQUIRED = true; constexpr bool REQUIRED = true;
constexpr bool OPTIONAL = false; constexpr bool OPTIONAL = false;
std::vector<std::pair<bool, boost::filesystem::path>> tar_files = {
std::vector<std::pair<bool, boost::filesystem::path>> files = {
{OPTIONAL, config.GetPath(".osrm.cells")}, {OPTIONAL, config.GetPath(".osrm.cells")},
{OPTIONAL, config.GetPath(".osrm.partition")}, {OPTIONAL, config.GetPath(".osrm.partition")},
{REQUIRED, config.GetPath(".osrm.icd")}, {REQUIRED, config.GetPath(".osrm.icd")},
@ -310,53 +305,73 @@ void Storage::PopulateStaticLayout(DataLayout &static_layout)
{REQUIRED, config.GetPath(".osrm.maneuver_overrides")}, {REQUIRED, config.GetPath(".osrm.maneuver_overrides")},
{REQUIRED, config.GetPath(".osrm.edges")}, {REQUIRED, config.GetPath(".osrm.edges")},
{REQUIRED, config.GetPath(".osrm.names")}, {REQUIRED, config.GetPath(".osrm.names")},
{REQUIRED, config.GetPath(".osrm.ramIndex")}, {REQUIRED, config.GetPath(".osrm.ramIndex")}};
};
for (const auto &file : tar_files) for (const auto &file : files)
{ {
if (boost::filesystem::exists(file.second)) if (file.first == REQUIRED && !boost::filesystem::exists(file.second))
{ {
readBlocks(file.second, static_layout); throw util::exception("Could not find required filed: " + std::get<1>(file).string());
}
else
{
if (file.first == REQUIRED)
{
throw util::exception("Could not find required filed: " +
std::get<1>(file).string());
}
} }
} }
return files;
} }
void Storage::PopulateUpdatableLayout(DataLayout &updatable_layout) std::vector<std::pair<bool, boost::filesystem::path>> Storage::GetUpdatableFiles()
{ {
constexpr bool REQUIRED = true; constexpr bool REQUIRED = true;
constexpr bool OPTIONAL = false; constexpr bool OPTIONAL = false;
std::vector<std::pair<bool, boost::filesystem::path>> tar_files = {
std::vector<std::pair<bool, boost::filesystem::path>> files = {
{OPTIONAL, config.GetPath(".osrm.mldgr")}, {OPTIONAL, config.GetPath(".osrm.mldgr")},
{OPTIONAL, config.GetPath(".osrm.cell_metrics")}, {OPTIONAL, config.GetPath(".osrm.cell_metrics")},
{OPTIONAL, config.GetPath(".osrm.hsgr")}, {OPTIONAL, config.GetPath(".osrm.hsgr")},
{REQUIRED, config.GetPath(".osrm.datasource_names")}, {REQUIRED, config.GetPath(".osrm.datasource_names")},
{REQUIRED, config.GetPath(".osrm.geometry")}, {REQUIRED, config.GetPath(".osrm.geometry")},
{REQUIRED, config.GetPath(".osrm.turn_weight_penalties")}, {REQUIRED, config.GetPath(".osrm.turn_weight_penalties")},
{REQUIRED, config.GetPath(".osrm.turn_duration_penalties")}, {REQUIRED, config.GetPath(".osrm.turn_duration_penalties")}};
};
for (const auto &file : tar_files) for (const auto &file : files)
{
if (file.first == REQUIRED && !boost::filesystem::exists(file.second))
{
throw util::exception("Could not find required filed: " + std::get<1>(file).string());
}
}
return files;
}
std::string Storage::PopulateLayoutWithRTree(storage::BaseDataLayout &layout)
{
// Figure out the path to the rtree file (it's not a tar file)
auto absolute_file_index_path = boost::filesystem::absolute(config.GetPath(".osrm.fileIndex"));
// Convert the boost::filesystem::path object into a plain string
// that can then be stored as a member of an allocator object
auto rtree_filename = absolute_file_index_path.string();
// Here, we hardcode the special file_index_path block name.
// The important bit here is that the "offset" is set to zero
layout.SetBlock("/common/rtree/file_index_path", make_block<char>(rtree_filename.length() + 1));
return rtree_filename;
}
/**
* This function examines all our data files and figures out how much
* memory needs to be allocated, and the position of each data structure
* in that big block. It updates the fields in the layout parameter.
*/
void Storage::PopulateLayout(storage::BaseDataLayout &layout,
const std::vector<std::pair<bool, boost::filesystem::path>> &files)
{
for (const auto &file : files)
{ {
if (boost::filesystem::exists(file.second)) if (boost::filesystem::exists(file.second))
{ {
readBlocks(file.second, updatable_layout); populateLayoutFromFile(file.second, layout);
}
else
{
if (file.first == REQUIRED)
{
throw util::exception("Could not find required filed: " +
std::get<1>(file).string());
}
} }
} }
} }

View File

@ -52,14 +52,14 @@ void listRegions(bool show_blocks)
auto memory = makeSharedMemory(region.shm_key); auto memory = makeSharedMemory(region.shm_key);
io::BufferReader reader(reinterpret_cast<char *>(memory->Ptr()), memory->Size()); io::BufferReader reader(reinterpret_cast<char *>(memory->Ptr()), memory->Size());
DataLayout layout; std::unique_ptr<BaseDataLayout> layout = std::make_unique<ContiguousDataLayout>();
serialization::read(reader, layout); serialization::read(reader, *layout);
std::vector<std::string> block_names; std::vector<std::string> block_names;
layout.List("", std::back_inserter(block_names)); layout->List("", std::back_inserter(block_names));
for (auto &name : block_names) for (auto &name : block_names)
{ {
osrm::util::Log() << " " << name << " " << layout.GetBlockSize(name); osrm::util::Log() << " " << name << " " << layout->GetBlockSize(name);
} }
} }
} }

View File

@ -15,86 +15,89 @@ using namespace osrm::storage;
BOOST_AUTO_TEST_CASE(layout_write_test) BOOST_AUTO_TEST_CASE(layout_write_test)
{ {
DataLayout layout; std::unique_ptr<BaseDataLayout> layout = std::make_unique<ContiguousDataLayout>();
Block block_1{20, 8 * 20}; Block block_1{20, 8 * 20};
Block block_2{1, 4 * 1}; Block block_2{1, 4 * 1};
Block block_3{100, static_cast<std::uint64_t>(std::ceil(100 / 64.))}; Block block_3{100, static_cast<std::uint64_t>(std::ceil(100 / 64.))};
layout.SetBlock("block1", block_1); layout->SetBlock("block1", block_1);
layout.SetBlock("block2", block_2); layout->SetBlock("block2", block_2);
layout.SetBlock("block3", block_3); layout->SetBlock("block3", block_3);
// Canary and alignment change layout size // Canary and alignment change layout size
BOOST_CHECK_GT(layout.GetSizeOfLayout(), BOOST_CHECK_GT(layout->GetSizeOfLayout(),
block_1.byte_size + block_2.byte_size + block_3.byte_size); block_1.byte_size + block_2.byte_size + block_3.byte_size);
BOOST_CHECK_EQUAL(layout.GetBlockSize("block1"), block_1.byte_size); BOOST_CHECK_EQUAL(layout->GetBlockSize("block1"), block_1.byte_size);
BOOST_CHECK_EQUAL(layout.GetBlockSize("block2"), block_2.byte_size); BOOST_CHECK_EQUAL(layout->GetBlockSize("block2"), block_2.byte_size);
BOOST_CHECK_EQUAL(layout.GetBlockSize("block3"), block_3.byte_size); BOOST_CHECK_EQUAL(layout->GetBlockSize("block3"), block_3.byte_size);
std::vector<char> buffer(layout.GetSizeOfLayout()); std::vector<char> buffer(layout->GetSizeOfLayout());
auto smallest_addr = buffer.data(); auto smallest_addr = buffer.data();
auto biggest_addr = buffer.data() + buffer.size(); auto biggest_addr = buffer.data() + buffer.size();
{ {
auto block_1_ptr = layout.GetBlockPtr<std::uint64_t>(buffer.data(), "block1"); auto block_1_ptr =
auto block_2_ptr = layout.GetBlockPtr<std::uint32_t>(buffer.data(), "block2"); reinterpret_cast<std::uint64_t *>(layout->GetBlockPtr(buffer.data(), "block1"));
auto block_3_ptr = layout.GetBlockPtr<std::uint64_t>(buffer.data(), "block3"); auto block_2_ptr =
reinterpret_cast<std::uint32_t *>(layout->GetBlockPtr(buffer.data(), "block2"));
auto block_3_ptr =
reinterpret_cast<std::uint64_t *>(layout->GetBlockPtr(buffer.data(), "block3"));
BOOST_CHECK_LT(reinterpret_cast<std::size_t>(smallest_addr), BOOST_CHECK_LE(reinterpret_cast<std::size_t>(smallest_addr),
reinterpret_cast<std::size_t>(block_1_ptr)); reinterpret_cast<std::size_t>(block_1_ptr));
BOOST_CHECK_GT( BOOST_CHECK_GT(
reinterpret_cast<std::size_t>(biggest_addr), reinterpret_cast<std::size_t>(biggest_addr),
reinterpret_cast<std::size_t>(block_1_ptr + layout.GetBlockEntries("block1"))); reinterpret_cast<std::size_t>(block_1_ptr + layout->GetBlockEntries("block1")));
BOOST_CHECK_LT(reinterpret_cast<std::size_t>(smallest_addr), BOOST_CHECK_LT(reinterpret_cast<std::size_t>(smallest_addr),
reinterpret_cast<std::size_t>(block_2_ptr)); reinterpret_cast<std::size_t>(block_2_ptr));
BOOST_CHECK_GT( BOOST_CHECK_GT(
reinterpret_cast<std::size_t>(biggest_addr), reinterpret_cast<std::size_t>(biggest_addr),
reinterpret_cast<std::size_t>(block_2_ptr + layout.GetBlockEntries("block2"))); reinterpret_cast<std::size_t>(block_2_ptr + layout->GetBlockEntries("block2")));
BOOST_CHECK_LT(reinterpret_cast<std::size_t>(smallest_addr), BOOST_CHECK_LT(reinterpret_cast<std::size_t>(smallest_addr),
reinterpret_cast<std::size_t>(block_3_ptr)); reinterpret_cast<std::size_t>(block_3_ptr));
BOOST_CHECK_GT(reinterpret_cast<std::size_t>(biggest_addr), BOOST_CHECK_GT(reinterpret_cast<std::size_t>(biggest_addr),
reinterpret_cast<std::size_t>( reinterpret_cast<std::size_t>(
block_3_ptr + static_cast<std::size_t>( block_3_ptr + static_cast<std::size_t>(
std::ceil(layout.GetBlockEntries("block3") / 64)))); std::ceil(layout->GetBlockEntries("block3") / 64))));
} }
} }
BOOST_AUTO_TEST_CASE(layout_list_test) BOOST_AUTO_TEST_CASE(layout_list_test)
{ {
DataLayout layout; std::unique_ptr<BaseDataLayout> layout = std::make_unique<ContiguousDataLayout>();
Block block_1{20, 8 * 20}; Block block_1{20, 8 * 20};
Block block_2{1, 4 * 1}; Block block_2{1, 4 * 1};
Block block_3{100, static_cast<std::uint64_t>(std::ceil(100 / 64.))}; Block block_3{100, static_cast<std::uint64_t>(std::ceil(100 / 64.))};
layout.SetBlock("/ch/edge_filter/block1", block_1); layout->SetBlock("/ch/edge_filter/block1", block_1);
layout.SetBlock("/ch/edge_filter/block2", block_2); layout->SetBlock("/ch/edge_filter/block2", block_2);
layout.SetBlock("/ch/edge_filter/block3", block_3); layout->SetBlock("/ch/edge_filter/block3", block_3);
layout.SetBlock("/mld/metrics/0/durations", block_2); layout->SetBlock("/mld/metrics/0/durations", block_2);
layout.SetBlock("/mld/metrics/0/weights", block_3); layout->SetBlock("/mld/metrics/0/weights", block_3);
layout.SetBlock("/mld/metrics/1/durations", block_2); layout->SetBlock("/mld/metrics/1/durations", block_2);
layout.SetBlock("/mld/metrics/1/weights", block_3); layout->SetBlock("/mld/metrics/1/weights", block_3);
std::vector<std::string> results_1; std::vector<std::string> results_1;
std::vector<std::string> results_2; std::vector<std::string> results_2;
std::vector<std::string> results_3; std::vector<std::string> results_3;
layout.List("/ch/edge_filter", std::back_inserter(results_1)); layout->List("/ch/edge_filter", std::back_inserter(results_1));
layout.List("/ch/edge_filter/", std::back_inserter(results_2)); layout->List("/ch/edge_filter/", std::back_inserter(results_2));
layout.List("/ch/", std::back_inserter(results_3)); layout->List("/ch/", std::back_inserter(results_3));
std::vector<std::string> results_4; std::vector<std::string> results_4;
std::vector<std::string> results_5; std::vector<std::string> results_5;
std::vector<std::string> results_6; std::vector<std::string> results_6;
layout.List("/mld/metrics", std::back_inserter(results_4)); layout->List("/mld/metrics", std::back_inserter(results_4));
layout.List("/mld/metrics/", std::back_inserter(results_5)); layout->List("/mld/metrics/", std::back_inserter(results_5));
layout.List("/mld/", std::back_inserter(results_6)); layout->List("/mld/", std::back_inserter(results_6));
std::vector<std::string> results_7; std::vector<std::string> results_7;
layout.List("", std::back_inserter(results_7)); layout->List("", std::back_inserter(results_7));
BOOST_CHECK_EQUAL(results_7.size(), 7); BOOST_CHECK_EQUAL(results_7.size(), 7);
CHECK_EQUAL_RANGE( CHECK_EQUAL_RANGE(