mmap tarfiles directly when mmapping is enabled, instead of copying data into separate mmapped block

Co-authored-by: Kajari Ghosh <ghoshkaj@gmail.com>
This commit is contained in:
Daniel Patterson
2018-10-26 23:48:51 -07:00
parent d80318f8ea
commit 2f9cb44368
14 changed files with 309 additions and 219 deletions
+37 -32
View File
@@ -1,5 +1,6 @@
#include "engine/datafacade/mmap_memory_allocator.hpp"
#include "storage/block.hpp"
#include "storage/io.hpp"
#include "storage/serialization.hpp"
#include "storage/storage.hpp"
@@ -7,7 +8,7 @@
#include "util/log.hpp"
#include "util/mmap_file.hpp"
#include "boost/assert.hpp"
#include <boost/assert.hpp>
namespace osrm
{
@@ -16,46 +17,50 @@ namespace engine
namespace datafacade
{
MMapMemoryAllocator::MMapMemoryAllocator(const storage::StorageConfig &config,
const boost::filesystem::path &memory_file)
MMapMemoryAllocator::MMapMemoryAllocator(const storage::StorageConfig &config)
{
storage::Storage storage(config);
std::vector<storage::SharedDataIndex::AllocatedRegion> allocated_regions;
if (!boost::filesystem::exists(memory_file))
{
storage::DataLayout initial_layout;
storage.PopulateStaticLayout(initial_layout);
storage.PopulateUpdatableLayout(initial_layout);
std::unique_ptr<storage::BaseDataLayout> fake_layout =
std::make_unique<storage::TarDataLayout>();
auto data_size = initial_layout.GetSizeOfLayout();
// Convert the boost::filesystem::path object into a plain string
// that's stored as a member of this allocator object
rtree_filename = storage.PopulateLayoutWithRTree(*fake_layout);
storage::io::BufferWriter writer;
storage::serialization::write(writer, initial_layout);
auto encoded_layout = writer.GetBuffer();
auto total_size = data_size + encoded_layout.size();
mapped_memory = util::mmapFile<char>(memory_file, mapped_memory_file, total_size);
std::copy(encoded_layout.begin(), encoded_layout.end(), mapped_memory.data());
index = storage::SharedDataIndex(
{{mapped_memory.data() + encoded_layout.size(), std::move(initial_layout)}});
storage.PopulateStaticData(index);
storage.PopulateUpdatableData(index);
// Now, we add one more AllocatedRegion, with it's start address as the start
// of the rtree_filename string we've saved. In the fake_layout, we've
// stated that the data is at offset 0, which is where the string starts
// at it's own memory address.
// The syntax &(rtree_filename[0]) gets the memory address of the first char.
// We can't use the convenient `.data()` or `.c_str()` methods, because
// prior to C++17 (which we're not using), those return a `const char *`,
// which isn't compatible with the `char *` that AllocatedRegion expects
// for it's memory_ptr
allocated_regions.push_back({&(rtree_filename[0]), std::move(fake_layout)});
}
else
auto files = storage.GetStaticFiles();
auto updatable_files = storage.GetUpdatableFiles();
files.insert(files.end(), updatable_files.begin(), updatable_files.end());
for (const auto &file : files)
{
mapped_memory = util::mmapFile<char>(memory_file, mapped_memory_file);
storage::DataLayout layout;
storage::io::BufferReader reader(mapped_memory.data(), mapped_memory.size());
storage::serialization::read(reader, layout);
auto layout_size = reader.GetPosition();
index = storage::SharedDataIndex({{mapped_memory.data() + layout_size, std::move(layout)}});
if (boost::filesystem::exists(file.second))
{
std::unique_ptr<storage::BaseDataLayout> layout =
std::make_unique<storage::TarDataLayout>();
boost::iostreams::mapped_file mapped_memory_file;
util::mmapFile<char>(file.second, mapped_memory_file);
mapped_memory_files.push_back(std::move(mapped_memory_file));
storage::populateLayoutFromFile(file.second, *layout);
allocated_regions.push_back({mapped_memory_file.data(), std::move(layout)});
}
}
index = storage::SharedDataIndex{std::move(allocated_regions)};
}
MMapMemoryAllocator::~MMapMemoryAllocator() {}
@@ -15,14 +15,20 @@ ProcessMemoryAllocator::ProcessMemoryAllocator(const storage::StorageConfig &con
storage::Storage storage(config);
// Calculate the layout/size of the memory block
storage::DataLayout layout;
storage.PopulateStaticLayout(layout);
storage.PopulateUpdatableLayout(layout);
auto static_files = storage.GetStaticFiles();
auto updatable_files = storage.GetUpdatableFiles();
std::unique_ptr<storage::BaseDataLayout> layout =
std::make_unique<storage::ContiguousDataLayout>();
storage.PopulateLayoutWithRTree(*layout);
storage.PopulateLayout(*layout, static_files);
storage.PopulateLayout(*layout, updatable_files);
// Allocate the memory block, then load data from files into it
internal_memory = std::make_unique<char[]>(layout.GetSizeOfLayout());
internal_memory = std::make_unique<char[]>(layout->GetSizeOfLayout());
index = storage::SharedDataIndex({{internal_memory.get(), std::move(layout)}});
std::vector<storage::SharedDataIndex::AllocatedRegion> regions;
regions.push_back({internal_memory.get(), std::move(layout)});
index = {std::move(regions)};
storage.PopulateStaticData(index);
storage.PopulateUpdatableData(index);
@@ -25,8 +25,9 @@ SharedMemoryAllocator::SharedMemoryAllocator(
auto mem = storage::makeSharedMemory(shm_key);
storage::io::BufferReader reader(reinterpret_cast<char *>(mem->Ptr()), mem->Size());
storage::DataLayout layout;
storage::serialization::read(reader, layout);
std::unique_ptr<storage::BaseDataLayout> layout =
std::make_unique<storage::ContiguousDataLayout>();
storage::serialization::read(reader, *layout);
auto layout_size = reader.GetPosition();
regions.push_back({reinterpret_cast<char *>(mem->Ptr()) + layout_size, std::move(layout)});
+87 -72
View File
@@ -44,24 +44,6 @@ namespace
{
using Monitor = SharedMonitor<SharedRegionRegister>;
void readBlocks(const boost::filesystem::path &path, DataLayout &layout)
{
tar::FileReader reader(path, tar::FileReader::VerifyFingerprint);
std::vector<tar::FileReader::FileEntry> entries;
reader.List(std::back_inserter(entries));
for (const auto &entry : entries)
{
const auto name_end = entry.name.rfind(".meta");
if (name_end == std::string::npos)
{
auto number_of_elements = reader.ReadElementCount64(entry.name);
layout.SetBlock(entry.name, Block{number_of_elements, entry.size});
}
}
}
struct RegionHandle
{
std::unique_ptr<SharedMemory> memory;
@@ -69,7 +51,8 @@ struct RegionHandle
std::uint16_t shm_key;
};
auto setupRegion(SharedRegionRegister &shared_register, const DataLayout &layout)
RegionHandle setupRegion(SharedRegionRegister &shared_register,
const storage::BaseDataLayout &layout)
{
// This is safe because we have an exclusive lock for all osrm-datastore processes.
auto shm_key = shared_register.ReserveKey();
@@ -184,6 +167,24 @@ bool swapData(Monitor &monitor,
}
}
void populateLayoutFromFile(const boost::filesystem::path &path, storage::BaseDataLayout &layout)
{
tar::FileReader reader(path, tar::FileReader::VerifyFingerprint);
std::vector<tar::FileReader::FileEntry> entries;
reader.List(std::back_inserter(entries));
for (const auto &entry : entries)
{
const auto name_end = entry.name.rfind(".meta");
if (name_end == std::string::npos)
{
auto number_of_elements = reader.ReadElementCount64(entry.name);
layout.SetBlock(entry.name, Block{number_of_elements, entry.size, entry.offset});
}
}
}
Storage::Storage(StorageConfig config_) : config(std::move(config_)) {}
int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric)
@@ -243,29 +244,35 @@ int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric
auto static_region = shared_register.GetRegion(region_id);
auto static_memory = makeSharedMemory(static_region.shm_key);
DataLayout static_layout;
std::unique_ptr<storage::BaseDataLayout> static_layout =
std::make_unique<storage::ContiguousDataLayout>();
io::BufferReader reader(reinterpret_cast<char *>(static_memory->Ptr()),
static_memory->Size());
serialization::read(reader, static_layout);
serialization::read(reader, *static_layout);
auto layout_size = reader.GetPosition();
auto *data_ptr = reinterpret_cast<char *>(static_memory->Ptr()) + layout_size;
regions.push_back({data_ptr, static_layout});
regions.push_back({data_ptr, std::move(static_layout)});
readonly_handles.push_back({std::move(static_memory), data_ptr, static_region.shm_key});
}
else
{
DataLayout static_layout;
PopulateStaticLayout(static_layout);
auto static_handle = setupRegion(shared_register, static_layout);
regions.push_back({static_handle.data_ptr, static_layout});
std::unique_ptr<storage::BaseDataLayout> static_layout =
std::make_unique<storage::ContiguousDataLayout>();
Storage::PopulateLayoutWithRTree(*static_layout);
std::vector<std::pair<bool, boost::filesystem::path>> files = Storage::GetStaticFiles();
Storage::PopulateLayout(*static_layout, files);
auto static_handle = setupRegion(shared_register, *static_layout);
regions.push_back({static_handle.data_ptr, std::move(static_layout)});
handles[dataset_name + "/static"] = std::move(static_handle);
}
DataLayout updatable_layout;
PopulateUpdatableLayout(updatable_layout);
auto updatable_handle = setupRegion(shared_register, updatable_layout);
regions.push_back({updatable_handle.data_ptr, updatable_layout});
std::unique_ptr<storage::BaseDataLayout> updatable_layout =
std::make_unique<storage::ContiguousDataLayout>();
std::vector<std::pair<bool, boost::filesystem::path>> files = Storage::GetUpdatableFiles();
Storage::PopulateLayout(*updatable_layout, files);
auto updatable_handle = setupRegion(shared_register, *updatable_layout);
regions.push_back({updatable_handle.data_ptr, std::move(updatable_layout)});
handles[dataset_name + "/updatable"] = std::move(updatable_handle);
SharedDataIndex index{std::move(regions)};
@@ -281,24 +288,12 @@ int Storage::Run(int max_wait, const std::string &dataset_name, bool only_metric
return EXIT_SUCCESS;
}
/**
* This function examines all our data files and figures out how much
* memory needs to be allocated, and the position of each data structure
* in that big block. It updates the fields in the DataLayout parameter.
*/
void Storage::PopulateStaticLayout(DataLayout &static_layout)
std::vector<std::pair<bool, boost::filesystem::path>> Storage::GetStaticFiles()
{
{
auto absolute_file_index_path =
boost::filesystem::absolute(config.GetPath(".osrm.fileIndex"));
static_layout.SetBlock("/common/rtree/file_index_path",
make_block<char>(absolute_file_index_path.string().length() + 1));
}
constexpr bool REQUIRED = true;
constexpr bool OPTIONAL = false;
std::vector<std::pair<bool, boost::filesystem::path>> tar_files = {
std::vector<std::pair<bool, boost::filesystem::path>> files = {
{OPTIONAL, config.GetPath(".osrm.cells")},
{OPTIONAL, config.GetPath(".osrm.partition")},
{REQUIRED, config.GetPath(".osrm.icd")},
@@ -310,53 +305,73 @@ void Storage::PopulateStaticLayout(DataLayout &static_layout)
{REQUIRED, config.GetPath(".osrm.maneuver_overrides")},
{REQUIRED, config.GetPath(".osrm.edges")},
{REQUIRED, config.GetPath(".osrm.names")},
{REQUIRED, config.GetPath(".osrm.ramIndex")},
};
{REQUIRED, config.GetPath(".osrm.ramIndex")}};
for (const auto &file : tar_files)
for (const auto &file : files)
{
if (boost::filesystem::exists(file.second))
if (file.first == REQUIRED && !boost::filesystem::exists(file.second))
{
readBlocks(file.second, static_layout);
}
else
{
if (file.first == REQUIRED)
{
throw util::exception("Could not find required filed: " +
std::get<1>(file).string());
}
throw util::exception("Could not find required filed: " + std::get<1>(file).string());
}
}
return files;
}
void Storage::PopulateUpdatableLayout(DataLayout &updatable_layout)
std::vector<std::pair<bool, boost::filesystem::path>> Storage::GetUpdatableFiles()
{
constexpr bool REQUIRED = true;
constexpr bool OPTIONAL = false;
std::vector<std::pair<bool, boost::filesystem::path>> tar_files = {
std::vector<std::pair<bool, boost::filesystem::path>> files = {
{OPTIONAL, config.GetPath(".osrm.mldgr")},
{OPTIONAL, config.GetPath(".osrm.cell_metrics")},
{OPTIONAL, config.GetPath(".osrm.hsgr")},
{REQUIRED, config.GetPath(".osrm.datasource_names")},
{REQUIRED, config.GetPath(".osrm.geometry")},
{REQUIRED, config.GetPath(".osrm.turn_weight_penalties")},
{REQUIRED, config.GetPath(".osrm.turn_duration_penalties")},
};
{REQUIRED, config.GetPath(".osrm.turn_duration_penalties")}};
for (const auto &file : tar_files)
for (const auto &file : files)
{
if (file.first == REQUIRED && !boost::filesystem::exists(file.second))
{
throw util::exception("Could not find required filed: " + std::get<1>(file).string());
}
}
return files;
}
std::string Storage::PopulateLayoutWithRTree(storage::BaseDataLayout &layout)
{
// Figure out the path to the rtree file (it's not a tar file)
auto absolute_file_index_path = boost::filesystem::absolute(config.GetPath(".osrm.fileIndex"));
// Convert the boost::filesystem::path object into a plain string
// that can then be stored as a member of an allocator object
auto rtree_filename = absolute_file_index_path.string();
// Here, we hardcode the special file_index_path block name.
// The important bit here is that the "offset" is set to zero
layout.SetBlock("/common/rtree/file_index_path", make_block<char>(rtree_filename.length() + 1));
return rtree_filename;
}
/**
* This function examines all our data files and figures out how much
* memory needs to be allocated, and the position of each data structure
* in that big block. It updates the fields in the layout parameter.
*/
void Storage::PopulateLayout(storage::BaseDataLayout &layout,
const std::vector<std::pair<bool, boost::filesystem::path>> &files)
{
for (const auto &file : files)
{
if (boost::filesystem::exists(file.second))
{
readBlocks(file.second, updatable_layout);
}
else
{
if (file.first == REQUIRED)
{
throw util::exception("Could not find required filed: " +
std::get<1>(file).string());
}
populateLayoutFromFile(file.second, layout);
}
}
}
+4 -4
View File
@@ -52,14 +52,14 @@ void listRegions(bool show_blocks)
auto memory = makeSharedMemory(region.shm_key);
io::BufferReader reader(reinterpret_cast<char *>(memory->Ptr()), memory->Size());
DataLayout layout;
serialization::read(reader, layout);
std::unique_ptr<BaseDataLayout> layout = std::make_unique<ContiguousDataLayout>();
serialization::read(reader, *layout);
std::vector<std::string> block_names;
layout.List("", std::back_inserter(block_names));
layout->List("", std::back_inserter(block_names));
for (auto &name : block_names)
{
osrm::util::Log() << " " << name << " " << layout.GetBlockSize(name);
osrm::util::Log() << " " << name << " " << layout->GetBlockSize(name);
}
}
}