mmap tarfiles directly when mmapping is enabled, instead of copying data into separate mmapped block

Co-authored-by: Kajari Ghosh <ghoshkaj@gmail.com>
This commit is contained in:
Daniel Patterson
2018-10-26 23:48:51 -07:00
parent d80318f8ea
commit 2f9cb44368
14 changed files with 309 additions and 219 deletions
@@ -10,6 +10,7 @@
#include <boost/iostreams/device/mapped_file.hpp>
#include <memory>
#include <string>
namespace osrm
{
@@ -24,8 +25,7 @@ namespace datafacade
class MMapMemoryAllocator : public ContiguousBlockAllocator
{
public:
explicit MMapMemoryAllocator(const storage::StorageConfig &config,
const boost::filesystem::path &memory_file);
explicit MMapMemoryAllocator(const storage::StorageConfig &config);
~MMapMemoryAllocator() override final;
// interface to give access to the datafacades
@@ -33,8 +33,8 @@ class MMapMemoryAllocator : public ContiguousBlockAllocator
private:
storage::SharedDataIndex index;
util::vector_view<char> mapped_memory;
boost::iostreams::mapped_file mapped_memory_file;
std::vector<boost::iostreams::mapped_file> mapped_memory_files;
std::string rtree_filename;
};
} // namespace datafacade
+2 -3
View File
@@ -32,9 +32,8 @@ class ExternalProvider final : public DataFacadeProvider<AlgorithmT, FacadeT>
public:
using Facade = typename DataFacadeProvider<AlgorithmT, FacadeT>::Facade;
ExternalProvider(const storage::StorageConfig &config,
const boost::filesystem::path &memory_file)
: facade_factory(std::make_shared<datafacade::MMapMemoryAllocator>(config, memory_file))
ExternalProvider(const storage::StorageConfig &config)
: facade_factory(std::make_shared<datafacade::MMapMemoryAllocator>(config))
{
}
+8 -3
View File
@@ -16,10 +16,15 @@ struct Block
{
std::uint64_t num_entries;
std::uint64_t byte_size;
std::uint64_t offset;
Block() : num_entries(0), byte_size(0) {}
Block() : num_entries(0), byte_size(0), offset(0) {}
Block(std::uint64_t num_entries, std::uint64_t byte_size, std::uint64_t offset)
: num_entries(num_entries), byte_size(byte_size), offset(offset)
{
}
Block(std::uint64_t num_entries, std::uint64_t byte_size)
: num_entries(num_entries), byte_size(byte_size)
: num_entries(num_entries), byte_size(byte_size), offset(0)
{
}
};
@@ -29,7 +34,7 @@ using NamedBlock = std::tuple<std::string, Block>;
template <typename T> Block make_block(uint64_t num_entries)
{
static_assert(sizeof(T) % alignof(T) == 0, "aligned T* can't be used as an array pointer");
return Block{num_entries, sizeof(T) * num_entries};
return Block{num_entries, sizeof(T) * num_entries, 0};
}
}
}
+2 -2
View File
@@ -284,9 +284,9 @@ template <typename K, typename V> void write(io::BufferWriter &writer, const std
}
}
inline void read(io::BufferReader &reader, DataLayout &layout) { read(reader, layout.blocks); }
inline void read(io::BufferReader &reader, BaseDataLayout &layout) { read(reader, layout.blocks); }
inline void write(io::BufferWriter &writer, const DataLayout &layout)
inline void write(io::BufferWriter &writer, const BaseDataLayout &layout)
{
write(writer, layout.blocks);
}
+24 -11
View File
@@ -5,6 +5,7 @@
#include <boost/function_output_iterator.hpp>
#include <type_traits>
#include <unordered_map>
namespace osrm
@@ -19,8 +20,8 @@ class SharedDataIndex
public:
struct AllocatedRegion
{
char *memory_ptr;
DataLayout layout;
void *memory_ptr;
std::unique_ptr<BaseDataLayout> layout;
};
SharedDataIndex() = default;
@@ -29,10 +30,10 @@ class SharedDataIndex
// Build mapping from block name to region
for (auto index : util::irange<std::uint32_t>(0, regions.size()))
{
regions[index].layout.List("",
boost::make_function_output_iterator([&](const auto &name) {
block_to_region[name] = index;
}));
regions[index].layout->List("",
boost::make_function_output_iterator([&](const auto &name) {
block_to_region[name] = index;
}));
}
}
@@ -40,32 +41,44 @@ class SharedDataIndex
{
for (const auto &region : regions)
{
region.layout.List(name_prefix, out);
region.layout->List(name_prefix, out);
}
}
template <typename T> auto GetBlockPtr(const std::string &name) const
{
#if !defined(__GNUC__) || (__GNUC__ > 4)
// is_tivially_copyable only exists in GCC >=5
static_assert(std::is_trivially_copyable<T>::value,
"Block-based data must be a trivially copyable type");
static_assert(sizeof(T) % alignof(T) == 0, "aligned T* can't be used as an array pointer");
#endif
const auto &region = GetBlockRegion(name);
return region.layout.GetBlockPtr<T>(region.memory_ptr, name);
return reinterpret_cast<T *>(region.layout->GetBlockPtr(region.memory_ptr, name));
}
template <typename T> auto GetBlockPtr(const std::string &name)
{
#if !defined(__GNUC__) || (__GNUC__ > 4)
// is_tivially_copyable only exists in GCC >=5
static_assert(std::is_trivially_copyable<T>::value,
"Block-based data must be a trivially copyable type");
static_assert(sizeof(T) % alignof(T) == 0, "aligned T* can't be used as an array pointer");
#endif
const auto &region = GetBlockRegion(name);
return region.layout.GetBlockPtr<T>(region.memory_ptr, name);
return reinterpret_cast<T *>(region.layout->GetBlockPtr(region.memory_ptr, name));
}
std::size_t GetBlockEntries(const std::string &name) const
{
const auto &region = GetBlockRegion(name);
return region.layout.GetBlockEntries(name);
return region.layout->GetBlockEntries(name);
}
std::size_t GetBlockSize(const std::string &name) const
{
const auto &region = GetBlockRegion(name);
return region.layout.GetBlockSize(name);
return region.layout->GetBlockSize(name);
}
private:
+67 -31
View File
@@ -20,12 +20,12 @@ namespace osrm
namespace storage
{
class DataLayout;
class BaseDataLayout;
namespace serialization
{
inline void read(io::BufferReader &reader, DataLayout &layout);
inline void read(io::BufferReader &reader, BaseDataLayout &layout);
inline void write(io::BufferWriter &writer, const DataLayout &layout);
inline void write(io::BufferWriter &writer, const BaseDataLayout &layout);
} // namespace serialization
namespace detail
@@ -54,44 +54,28 @@ inline std::string trimName(const std::string &name_prefix, const std::string &n
}
} // namespace detail
class DataLayout
class BaseDataLayout
{
public:
DataLayout() : blocks{} {}
virtual ~BaseDataLayout() = default;
inline void SetBlock(const std::string &name, Block block) { blocks[name] = std::move(block); }
inline uint64_t GetBlockEntries(const std::string &name) const
inline std::uint64_t GetBlockEntries(const std::string &name) const
{
return GetBlock(name).num_entries;
}
inline uint64_t GetBlockSize(const std::string &name) const { return GetBlock(name).byte_size; }
inline std::uint64_t GetBlockSize(const std::string &name) const
{
return GetBlock(name).byte_size;
}
inline bool HasBlock(const std::string &name) const
{
return blocks.find(name) != blocks.end();
}
inline uint64_t GetSizeOfLayout() const
{
uint64_t result = 0;
for (const auto &name_and_block : blocks)
{
result += GetBlockSize(name_and_block.first) + BLOCK_ALIGNMENT;
}
return result;
}
template <typename T> inline T *GetBlockPtr(char *shared_memory, const std::string &name) const
{
static_assert(BLOCK_ALIGNMENT % std::alignment_of<T>::value == 0,
"Datatype does not fit alignment constraints.");
char *ptr = (char *)GetAlignedBlockPtr(shared_memory, name);
return (T *)ptr;
}
// Depending on the name prefix this function either lists all blocks with the same prefix
// or all entries in the sub-directory.
// '/ch/edge' -> '/ch/edge_filter/0/blocks', '/ch/edge_filter/1/blocks'
@@ -115,10 +99,10 @@ class DataLayout
}
}
private:
friend void serialization::read(io::BufferReader &reader, DataLayout &layout);
friend void serialization::write(io::BufferWriter &writer, const DataLayout &layout);
virtual inline void *GetBlockPtr(void *base_ptr, const std::string &name) const = 0;
virtual inline std::uint64_t GetSizeOfLayout() const = 0;
protected:
const Block &GetBlock(const std::string &name) const
{
auto iter = blocks.find(name);
@@ -130,10 +114,42 @@ class DataLayout
return iter->second;
}
friend void serialization::read(io::BufferReader &reader, BaseDataLayout &layout);
friend void serialization::write(io::BufferWriter &writer, const BaseDataLayout &layout);
std::map<std::string, Block> blocks;
};
class ContiguousDataLayout final : public BaseDataLayout
{
public:
inline std::uint64_t GetSizeOfLayout() const override final
{
std::uint64_t result = 0;
for (const auto &name_and_block : blocks)
{
result += GetBlockSize(name_and_block.first) + BLOCK_ALIGNMENT;
}
return result;
}
inline void *GetBlockPtr(void *base_ptr, const std::string &name) const override final
{
// TODO: re-enable this alignment checking somehow
// static_assert(BLOCK_ALIGNMENT % std::alignment_of<T>::value == 0,
// "Datatype does not fit alignment constraints.");
return GetAlignedBlockPtr(base_ptr, name);
}
private:
friend void serialization::read(io::BufferReader &reader, BaseDataLayout &layout);
friend void serialization::write(io::BufferWriter &writer, const BaseDataLayout &layout);
// Fit aligned storage in buffer to 64 bytes to conform with AVX 512 types
inline void *align(void *&ptr) const noexcept
{
const auto intptr = reinterpret_cast<uintptr_t>(ptr);
const auto intptr = reinterpret_cast<std::uintptr_t>(ptr);
const auto aligned = (intptr - 1u + BLOCK_ALIGNMENT) & -BLOCK_ALIGNMENT;
return ptr = reinterpret_cast<void *>(aligned);
}
@@ -157,7 +173,27 @@ class DataLayout
}
static constexpr std::size_t BLOCK_ALIGNMENT = 64;
std::map<std::string, Block> blocks;
};
class TarDataLayout final : public BaseDataLayout
{
public:
inline std::uint64_t GetSizeOfLayout() const override final
{
std::uint64_t result = 0;
for (const auto &name_and_block : blocks)
{
result += GetBlockSize(name_and_block.first);
}
return result;
}
inline void *GetBlockPtr(void *base_ptr, const std::string &name) const override final
{
auto offset = GetBlock(name).offset;
const auto offset_address = reinterpret_cast<std::uintptr_t>(base_ptr) + offset;
return reinterpret_cast<void *>(offset_address);
}
};
struct SharedRegion
+9 -3
View File
@@ -35,22 +35,28 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <boost/filesystem/path.hpp>
#include <string>
#include <vector>
namespace osrm
{
namespace storage
{
void populateLayoutFromFile(const boost::filesystem::path &path, storage::BaseDataLayout &layout);
class Storage
{
public:
Storage(StorageConfig config);
int Run(int max_wait, const std::string &name, bool only_metric);
void PopulateStaticLayout(DataLayout &layout);
void PopulateUpdatableLayout(DataLayout &layout);
void PopulateStaticData(const SharedDataIndex &index);
void PopulateUpdatableData(const SharedDataIndex &index);
void PopulateLayout(storage::BaseDataLayout &layout,
const std::vector<std::pair<bool, boost::filesystem::path>> &files);
std::string PopulateLayoutWithRTree(storage::BaseDataLayout &layout);
std::vector<std::pair<bool, boost::filesystem::path>> GetUpdatableFiles();
std::vector<std::pair<bool, boost::filesystem::path>> GetStaticFiles();
private:
StorageConfig config;
+17 -16
View File
@@ -15,14 +15,14 @@ namespace util
namespace detail
{
template <typename T, typename RegionT>
util::vector_view<T> mmapFile(const boost::filesystem::path &file, RegionT &region)
template <typename T, typename MmapContainerT>
util::vector_view<T> mmapFile(const boost::filesystem::path &file, MmapContainerT &mmap_container)
{
try
{
region.open(file);
std::size_t num_objects = region.size() / sizeof(T);
auto data_ptr = region.data();
mmap_container.open(file);
std::size_t num_objects = mmap_container.size() / sizeof(T);
auto data_ptr = mmap_container.data();
BOOST_ASSERT(reinterpret_cast<uintptr_t>(data_ptr) % alignof(T) == 0);
return util::vector_view<T>(reinterpret_cast<T *>(data_ptr), num_objects);
}
@@ -34,9 +34,10 @@ util::vector_view<T> mmapFile(const boost::filesystem::path &file, RegionT &regi
}
}
template <typename T, typename RegionT>
util::vector_view<T>
mmapFile(const boost::filesystem::path &file, RegionT &region, const std::size_t size)
template <typename T, typename MmapContainerT>
util::vector_view<T> mmapFile(const boost::filesystem::path &file,
MmapContainerT &mmap_container,
const std::size_t size)
{
try
{
@@ -45,10 +46,10 @@ mmapFile(const boost::filesystem::path &file, RegionT &region, const std::size_t
params.path = file.string();
params.flags = boost::iostreams::mapped_file::readwrite;
params.new_file_size = size;
region.open(params);
mmap_container.open(params);
std::size_t num_objects = size / sizeof(T);
auto data_ptr = region.data();
auto data_ptr = mmap_container.data();
BOOST_ASSERT(reinterpret_cast<uintptr_t>(data_ptr) % alignof(T) == 0);
return util::vector_view<T>(reinterpret_cast<T *>(data_ptr), num_objects);
}
@@ -63,24 +64,24 @@ mmapFile(const boost::filesystem::path &file, RegionT &region, const std::size_t
template <typename T>
util::vector_view<const T> mmapFile(const boost::filesystem::path &file,
boost::iostreams::mapped_file_source &region)
boost::iostreams::mapped_file_source &mmap_container)
{
return detail::mmapFile<const T>(file, region);
return detail::mmapFile<const T>(file, mmap_container);
}
template <typename T>
util::vector_view<T> mmapFile(const boost::filesystem::path &file,
boost::iostreams::mapped_file &region)
boost::iostreams::mapped_file &mmap_container)
{
return detail::mmapFile<T>(file, region);
return detail::mmapFile<T>(file, mmap_container);
}
template <typename T>
util::vector_view<T> mmapFile(const boost::filesystem::path &file,
boost::iostreams::mapped_file &region,
boost::iostreams::mapped_file &mmap_container,
std::size_t size)
{
return detail::mmapFile<T>(file, region, size);
return detail::mmapFile<T>(file, mmap_container, size);
}
}
}