mmap tarfiles directly when mmapping is enabled, instead of copying data into separate mmapped block
Co-authored-by: Kajari Ghosh <ghoshkaj@gmail.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
#include "engine/datafacade/mmap_memory_allocator.hpp"
|
||||
|
||||
#include "storage/block.hpp"
|
||||
#include "storage/io.hpp"
|
||||
#include "storage/serialization.hpp"
|
||||
#include "storage/storage.hpp"
|
||||
@@ -7,7 +8,7 @@
|
||||
#include "util/log.hpp"
|
||||
#include "util/mmap_file.hpp"
|
||||
|
||||
#include "boost/assert.hpp"
|
||||
#include <boost/assert.hpp>
|
||||
|
||||
namespace osrm
|
||||
{
|
||||
@@ -16,46 +17,50 @@ namespace engine
|
||||
namespace datafacade
|
||||
{
|
||||
|
||||
MMapMemoryAllocator::MMapMemoryAllocator(const storage::StorageConfig &config,
|
||||
const boost::filesystem::path &memory_file)
|
||||
MMapMemoryAllocator::MMapMemoryAllocator(const storage::StorageConfig &config)
|
||||
{
|
||||
storage::Storage storage(config);
|
||||
std::vector<storage::SharedDataIndex::AllocatedRegion> allocated_regions;
|
||||
|
||||
if (!boost::filesystem::exists(memory_file))
|
||||
{
|
||||
storage::DataLayout initial_layout;
|
||||
storage.PopulateStaticLayout(initial_layout);
|
||||
storage.PopulateUpdatableLayout(initial_layout);
|
||||
std::unique_ptr<storage::BaseDataLayout> fake_layout =
|
||||
std::make_unique<storage::TarDataLayout>();
|
||||
|
||||
auto data_size = initial_layout.GetSizeOfLayout();
|
||||
// Convert the boost::filesystem::path object into a plain string
|
||||
// that's stored as a member of this allocator object
|
||||
rtree_filename = storage.PopulateLayoutWithRTree(*fake_layout);
|
||||
|
||||
storage::io::BufferWriter writer;
|
||||
storage::serialization::write(writer, initial_layout);
|
||||
auto encoded_layout = writer.GetBuffer();
|
||||
|
||||
auto total_size = data_size + encoded_layout.size();
|
||||
|
||||
mapped_memory = util::mmapFile<char>(memory_file, mapped_memory_file, total_size);
|
||||
|
||||
std::copy(encoded_layout.begin(), encoded_layout.end(), mapped_memory.data());
|
||||
|
||||
index = storage::SharedDataIndex(
|
||||
{{mapped_memory.data() + encoded_layout.size(), std::move(initial_layout)}});
|
||||
|
||||
storage.PopulateStaticData(index);
|
||||
storage.PopulateUpdatableData(index);
|
||||
// Now, we add one more AllocatedRegion, with it's start address as the start
|
||||
// of the rtree_filename string we've saved. In the fake_layout, we've
|
||||
// stated that the data is at offset 0, which is where the string starts
|
||||
// at it's own memory address.
|
||||
// The syntax &(rtree_filename[0]) gets the memory address of the first char.
|
||||
// We can't use the convenient `.data()` or `.c_str()` methods, because
|
||||
// prior to C++17 (which we're not using), those return a `const char *`,
|
||||
// which isn't compatible with the `char *` that AllocatedRegion expects
|
||||
// for it's memory_ptr
|
||||
allocated_regions.push_back({&(rtree_filename[0]), std::move(fake_layout)});
|
||||
}
|
||||
else
|
||||
|
||||
auto files = storage.GetStaticFiles();
|
||||
auto updatable_files = storage.GetUpdatableFiles();
|
||||
files.insert(files.end(), updatable_files.begin(), updatable_files.end());
|
||||
|
||||
for (const auto &file : files)
|
||||
{
|
||||
mapped_memory = util::mmapFile<char>(memory_file, mapped_memory_file);
|
||||
|
||||
storage::DataLayout layout;
|
||||
storage::io::BufferReader reader(mapped_memory.data(), mapped_memory.size());
|
||||
storage::serialization::read(reader, layout);
|
||||
auto layout_size = reader.GetPosition();
|
||||
|
||||
index = storage::SharedDataIndex({{mapped_memory.data() + layout_size, std::move(layout)}});
|
||||
if (boost::filesystem::exists(file.second))
|
||||
{
|
||||
std::unique_ptr<storage::BaseDataLayout> layout =
|
||||
std::make_unique<storage::TarDataLayout>();
|
||||
boost::iostreams::mapped_file mapped_memory_file;
|
||||
util::mmapFile<char>(file.second, mapped_memory_file);
|
||||
mapped_memory_files.push_back(std::move(mapped_memory_file));
|
||||
storage::populateLayoutFromFile(file.second, *layout);
|
||||
allocated_regions.push_back({mapped_memory_file.data(), std::move(layout)});
|
||||
}
|
||||
}
|
||||
|
||||
index = storage::SharedDataIndex{std::move(allocated_regions)};
|
||||
}
|
||||
|
||||
MMapMemoryAllocator::~MMapMemoryAllocator() {}
|
||||
|
||||
@@ -15,14 +15,20 @@ ProcessMemoryAllocator::ProcessMemoryAllocator(const storage::StorageConfig &con
|
||||
storage::Storage storage(config);
|
||||
|
||||
// Calculate the layout/size of the memory block
|
||||
storage::DataLayout layout;
|
||||
storage.PopulateStaticLayout(layout);
|
||||
storage.PopulateUpdatableLayout(layout);
|
||||
auto static_files = storage.GetStaticFiles();
|
||||
auto updatable_files = storage.GetUpdatableFiles();
|
||||
std::unique_ptr<storage::BaseDataLayout> layout =
|
||||
std::make_unique<storage::ContiguousDataLayout>();
|
||||
storage.PopulateLayoutWithRTree(*layout);
|
||||
storage.PopulateLayout(*layout, static_files);
|
||||
storage.PopulateLayout(*layout, updatable_files);
|
||||
|
||||
// Allocate the memory block, then load data from files into it
|
||||
internal_memory = std::make_unique<char[]>(layout.GetSizeOfLayout());
|
||||
internal_memory = std::make_unique<char[]>(layout->GetSizeOfLayout());
|
||||
|
||||
index = storage::SharedDataIndex({{internal_memory.get(), std::move(layout)}});
|
||||
std::vector<storage::SharedDataIndex::AllocatedRegion> regions;
|
||||
regions.push_back({internal_memory.get(), std::move(layout)});
|
||||
index = {std::move(regions)};
|
||||
|
||||
storage.PopulateStaticData(index);
|
||||
storage.PopulateUpdatableData(index);
|
||||
|
||||
@@ -25,8 +25,9 @@ SharedMemoryAllocator::SharedMemoryAllocator(
|
||||
auto mem = storage::makeSharedMemory(shm_key);
|
||||
|
||||
storage::io::BufferReader reader(reinterpret_cast<char *>(mem->Ptr()), mem->Size());
|
||||
storage::DataLayout layout;
|
||||
storage::serialization::read(reader, layout);
|
||||
std::unique_ptr<storage::BaseDataLayout> layout =
|
||||
std::make_unique<storage::ContiguousDataLayout>();
|
||||
storage::serialization::read(reader, *layout);
|
||||
auto layout_size = reader.GetPosition();
|
||||
|
||||
regions.push_back({reinterpret_cast<char *>(mem->Ptr()) + layout_size, std::move(layout)});
|
||||
|
||||
+87
-72
@@ -44,24 +44,6 @@ namespace
|
||||
{
|
||||
using Monitor = SharedMonitor<SharedRegionRegister>;
|
||||
|
||||
void readBlocks(const boost::filesystem::path &path, DataLayout &layout)
|
||||
{
|
||||
tar::FileReader reader(path, tar::FileReader::VerifyFingerprint);
|
||||
|
||||
std::vector<tar::FileReader::FileEntry> entries;
|
||||
reader.List(std::back_inserter(entries));
|
||||
|
||||
for (const auto &entry : entries)
|
||||
{
|
||||
const auto name_end = entry.name.rfind(".meta");
|
||||
if (name_end == std::string::npos)
|
||||
{
|
||||
auto number_of_elements = reader.ReadElementCount64(entry.name);
|
||||
layout.SetBlock(entry.name, Block{number_of_elements, entry.size});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct RegionHandle
|
||||
{
|
||||
std::unique_ptr<SharedMemory> memory;
|
||||
@@ -69,7 +51,8 @@ struct RegionHandle
|
||||
std::uint16_t shm_key;
|
||||
};
|
||||
|
||||
auto setupRegion(SharedRegionRegister &shared_register, const DataLayout &layout)
|
||||
RegionHandle setupRegion(SharedRegionRegister &shared_register,
|
||||
const storage::BaseDataLayout &layout)
|
||||
{
|
||||
// This is safe because we have an exclusive lock for all osrm-datastore processes.
|
||||
auto shm_key = shared_register.ReserveKey();
|
||||
@@ -184,6 +167,24 @@ bool swapData(Monitor &monitor,
|
||||
}
|
||||
}
|
||||
|
||||
void populateLayoutFromFile(const boost::filesystem::path &path, storage::BaseDataLayout &layout)
|
||||
{
|
||||
tar::FileReader reader(path, tar::FileReader::VerifyFingerprint);
|
||||
|
||||
std::vector<tar::FileReader::FileEntry> entries;
|
||||
reader.List(std::back_inserter(entries));
|
||||
|
||||
for (const auto &entry : entries)
|
||||
{
|
||||
const auto name_end = entry.name.rfind(".meta");
|
||||
if (name_end == std::string::npos)
|
||||
{
|
||||
auto number_of_elements = reader.ReadElementCount64(entry.name);
|
||||
layout.SetBlock(entry.name, Block{number_of_elements, entry.size, entry.offset});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Storage::Storage(StorageConfig config_) : config(std::move(config_)) {}
|
||||
|
||||
int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric)
|
||||
@@ -243,29 +244,35 @@ int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric
|
||||
auto static_region = shared_register.GetRegion(region_id);
|
||||
auto static_memory = makeSharedMemory(static_region.shm_key);
|
||||
|
||||
DataLayout static_layout;
|
||||
std::unique_ptr<storage::BaseDataLayout> static_layout =
|
||||
std::make_unique<storage::ContiguousDataLayout>();
|
||||
io::BufferReader reader(reinterpret_cast<char *>(static_memory->Ptr()),
|
||||
static_memory->Size());
|
||||
serialization::read(reader, static_layout);
|
||||
serialization::read(reader, *static_layout);
|
||||
auto layout_size = reader.GetPosition();
|
||||
auto *data_ptr = reinterpret_cast<char *>(static_memory->Ptr()) + layout_size;
|
||||
|
||||
regions.push_back({data_ptr, static_layout});
|
||||
regions.push_back({data_ptr, std::move(static_layout)});
|
||||
readonly_handles.push_back({std::move(static_memory), data_ptr, static_region.shm_key});
|
||||
}
|
||||
else
|
||||
{
|
||||
DataLayout static_layout;
|
||||
PopulateStaticLayout(static_layout);
|
||||
auto static_handle = setupRegion(shared_register, static_layout);
|
||||
regions.push_back({static_handle.data_ptr, static_layout});
|
||||
std::unique_ptr<storage::BaseDataLayout> static_layout =
|
||||
std::make_unique<storage::ContiguousDataLayout>();
|
||||
Storage::PopulateLayoutWithRTree(*static_layout);
|
||||
std::vector<std::pair<bool, boost::filesystem::path>> files = Storage::GetStaticFiles();
|
||||
Storage::PopulateLayout(*static_layout, files);
|
||||
auto static_handle = setupRegion(shared_register, *static_layout);
|
||||
regions.push_back({static_handle.data_ptr, std::move(static_layout)});
|
||||
handles[dataset_name + "/static"] = std::move(static_handle);
|
||||
}
|
||||
|
||||
DataLayout updatable_layout;
|
||||
PopulateUpdatableLayout(updatable_layout);
|
||||
auto updatable_handle = setupRegion(shared_register, updatable_layout);
|
||||
regions.push_back({updatable_handle.data_ptr, updatable_layout});
|
||||
std::unique_ptr<storage::BaseDataLayout> updatable_layout =
|
||||
std::make_unique<storage::ContiguousDataLayout>();
|
||||
std::vector<std::pair<bool, boost::filesystem::path>> files = Storage::GetUpdatableFiles();
|
||||
Storage::PopulateLayout(*updatable_layout, files);
|
||||
auto updatable_handle = setupRegion(shared_register, *updatable_layout);
|
||||
regions.push_back({updatable_handle.data_ptr, std::move(updatable_layout)});
|
||||
handles[dataset_name + "/updatable"] = std::move(updatable_handle);
|
||||
|
||||
SharedDataIndex index{std::move(regions)};
|
||||
@@ -281,24 +288,12 @@ int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function examines all our data files and figures out how much
|
||||
* memory needs to be allocated, and the position of each data structure
|
||||
* in that big block. It updates the fields in the DataLayout parameter.
|
||||
*/
|
||||
void Storage::PopulateStaticLayout(DataLayout &static_layout)
|
||||
std::vector<std::pair<bool, boost::filesystem::path>> Storage::GetStaticFiles()
|
||||
{
|
||||
{
|
||||
auto absolute_file_index_path =
|
||||
boost::filesystem::absolute(config.GetPath(".osrm.fileIndex"));
|
||||
|
||||
static_layout.SetBlock("/common/rtree/file_index_path",
|
||||
make_block<char>(absolute_file_index_path.string().length() + 1));
|
||||
}
|
||||
|
||||
constexpr bool REQUIRED = true;
|
||||
constexpr bool OPTIONAL = false;
|
||||
std::vector<std::pair<bool, boost::filesystem::path>> tar_files = {
|
||||
|
||||
std::vector<std::pair<bool, boost::filesystem::path>> files = {
|
||||
{OPTIONAL, config.GetPath(".osrm.cells")},
|
||||
{OPTIONAL, config.GetPath(".osrm.partition")},
|
||||
{REQUIRED, config.GetPath(".osrm.icd")},
|
||||
@@ -310,53 +305,73 @@ void Storage::PopulateStaticLayout(DataLayout &static_layout)
|
||||
{REQUIRED, config.GetPath(".osrm.maneuver_overrides")},
|
||||
{REQUIRED, config.GetPath(".osrm.edges")},
|
||||
{REQUIRED, config.GetPath(".osrm.names")},
|
||||
{REQUIRED, config.GetPath(".osrm.ramIndex")},
|
||||
};
|
||||
{REQUIRED, config.GetPath(".osrm.ramIndex")}};
|
||||
|
||||
for (const auto &file : tar_files)
|
||||
for (const auto &file : files)
|
||||
{
|
||||
if (boost::filesystem::exists(file.second))
|
||||
if (file.first == REQUIRED && !boost::filesystem::exists(file.second))
|
||||
{
|
||||
readBlocks(file.second, static_layout);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (file.first == REQUIRED)
|
||||
{
|
||||
throw util::exception("Could not find required filed: " +
|
||||
std::get<1>(file).string());
|
||||
}
|
||||
throw util::exception("Could not find required filed: " + std::get<1>(file).string());
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
void Storage::PopulateUpdatableLayout(DataLayout &updatable_layout)
|
||||
std::vector<std::pair<bool, boost::filesystem::path>> Storage::GetUpdatableFiles()
|
||||
{
|
||||
constexpr bool REQUIRED = true;
|
||||
constexpr bool OPTIONAL = false;
|
||||
std::vector<std::pair<bool, boost::filesystem::path>> tar_files = {
|
||||
|
||||
std::vector<std::pair<bool, boost::filesystem::path>> files = {
|
||||
{OPTIONAL, config.GetPath(".osrm.mldgr")},
|
||||
{OPTIONAL, config.GetPath(".osrm.cell_metrics")},
|
||||
{OPTIONAL, config.GetPath(".osrm.hsgr")},
|
||||
{REQUIRED, config.GetPath(".osrm.datasource_names")},
|
||||
{REQUIRED, config.GetPath(".osrm.geometry")},
|
||||
{REQUIRED, config.GetPath(".osrm.turn_weight_penalties")},
|
||||
{REQUIRED, config.GetPath(".osrm.turn_duration_penalties")},
|
||||
};
|
||||
{REQUIRED, config.GetPath(".osrm.turn_duration_penalties")}};
|
||||
|
||||
for (const auto &file : tar_files)
|
||||
for (const auto &file : files)
|
||||
{
|
||||
if (file.first == REQUIRED && !boost::filesystem::exists(file.second))
|
||||
{
|
||||
throw util::exception("Could not find required filed: " + std::get<1>(file).string());
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
std::string Storage::PopulateLayoutWithRTree(storage::BaseDataLayout &layout)
|
||||
{
|
||||
// Figure out the path to the rtree file (it's not a tar file)
|
||||
auto absolute_file_index_path = boost::filesystem::absolute(config.GetPath(".osrm.fileIndex"));
|
||||
|
||||
// Convert the boost::filesystem::path object into a plain string
|
||||
// that can then be stored as a member of an allocator object
|
||||
auto rtree_filename = absolute_file_index_path.string();
|
||||
|
||||
// Here, we hardcode the special file_index_path block name.
|
||||
// The important bit here is that the "offset" is set to zero
|
||||
layout.SetBlock("/common/rtree/file_index_path", make_block<char>(rtree_filename.length() + 1));
|
||||
|
||||
return rtree_filename;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function examines all our data files and figures out how much
|
||||
* memory needs to be allocated, and the position of each data structure
|
||||
* in that big block. It updates the fields in the layout parameter.
|
||||
*/
|
||||
void Storage::PopulateLayout(storage::BaseDataLayout &layout,
|
||||
const std::vector<std::pair<bool, boost::filesystem::path>> &files)
|
||||
{
|
||||
for (const auto &file : files)
|
||||
{
|
||||
if (boost::filesystem::exists(file.second))
|
||||
{
|
||||
readBlocks(file.second, updatable_layout);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (file.first == REQUIRED)
|
||||
{
|
||||
throw util::exception("Could not find required filed: " +
|
||||
std::get<1>(file).string());
|
||||
}
|
||||
populateLayoutFromFile(file.second, layout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+4
-4
@@ -52,14 +52,14 @@ void listRegions(bool show_blocks)
|
||||
auto memory = makeSharedMemory(region.shm_key);
|
||||
io::BufferReader reader(reinterpret_cast<char *>(memory->Ptr()), memory->Size());
|
||||
|
||||
DataLayout layout;
|
||||
serialization::read(reader, layout);
|
||||
std::unique_ptr<BaseDataLayout> layout = std::make_unique<ContiguousDataLayout>();
|
||||
serialization::read(reader, *layout);
|
||||
|
||||
std::vector<std::string> block_names;
|
||||
layout.List("", std::back_inserter(block_names));
|
||||
layout->List("", std::back_inserter(block_names));
|
||||
for (auto &name : block_names)
|
||||
{
|
||||
osrm::util::Log() << " " << name << " " << layout.GetBlockSize(name);
|
||||
osrm::util::Log() << " " << name << " " << layout->GetBlockSize(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user