Restore --max-wait and file_lock for osrm-datastore

This commit is contained in:
Patrick Niklaus 2017-01-05 22:38:48 +00:00 committed by Patrick Niklaus
parent a7bb26f2d6
commit 104e23abf3
12 changed files with 234 additions and 120 deletions

View File

@ -10,9 +10,9 @@
- Removed the `./profile.lua -> ./profiles/car.lua` symlink. Use specific profiles from the `profiles` directory. - Removed the `./profile.lua -> ./profiles/car.lua` symlink. Use specific profiles from the `profiles` directory.
- Infrastructure - Infrastructure
- Disabled link-time optimized (LTO) builds by default. Enable by passing `-DENABLE_LTO=ON` to `cmake` if you need the performance and know what you are doing. - Disabled link-time optimized (LTO) builds by default. Enable by passing `-DENABLE_LTO=ON` to `cmake` if you need the performance and know what you are doing.
- File handling
- Datafile versioning is now based on OSRM semver values, rather than source code checksums. - Datafile versioning is now based on OSRM semver values, rather than source code checksums.
Datafiles are compatible between patch levels, but incompatible between minor version or higher bumps. Datafiles are compatible between patch levels, but incompatible between minor version or higher bumps.
- libOSRM now creates an own watcher thread then used in shared memory mode to listen for data updates
# 5.5.1 # 5.5.1
- Changes from 5.5.0 - Changes from 5.5.0

View File

@ -99,7 +99,6 @@ class OSRMDatastoreLoader extends OSRMBaseLoader {
if (err) return callback(err); if (err) return callback(err);
if (!this.osrmIsRunning()) this.launch(callback); if (!this.osrmIsRunning()) this.launch(callback);
else { else {
// some osrm-routed output prior this line can appear in the prevoius log file
this.scope.setupOutputLog(this.child, fs.createWriteStream(this.scope.scenarioLogFile, {'flags': 'a'})); this.scope.setupOutputLog(this.child, fs.createWriteStream(this.scope.scenarioLogFile, {'flags': 'a'}));
callback(); callback();
} }
@ -109,10 +108,7 @@ class OSRMDatastoreLoader extends OSRMBaseLoader {
loadData (callback) { loadData (callback) {
this.scope.runBin('osrm-datastore', this.inputFile, this.scope.environment, (err) => { this.scope.runBin('osrm-datastore', this.inputFile, this.scope.environment, (err) => {
if (err) return callback(new Error('*** osrm-datastore exited with ' + err.code + ': ' + err)); if (err) return callback(new Error('*** osrm-datastore exited with ' + err.code + ': ' + err));
// in case of false positive results with a dummy 1ms delay an stdout monitor callback();
// this.child.stdout.on('data', facade_monitor) must be added and wait for
// "updated facade to region" in stdout
setTimeout(callback, 1);
}); });
} }

View File

@ -26,10 +26,20 @@ namespace engine
class DataWatchdog class DataWatchdog
{ {
public: public:
DataWatchdog() DataWatchdog() : active(true), timestamp(0)
: active(true)
, timestamp(0)
{ {
// create the initial facade before launching the watchdog thread
{
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> current_region_lock(
barrier.region_mutex);
auto shared_memory = makeSharedMemory(storage::CURRENT_REGION);
auto current = static_cast<storage::SharedDataTimestamp *>(shared_memory->Ptr());
facade = std::make_shared<datafacade::SharedMemoryDataFacade>(current->region);
timestamp = current->timestamp;
}
watcher = std::thread(&DataWatchdog::Run, this); watcher = std::thread(&DataWatchdog::Run, this);
} }
@ -46,36 +56,34 @@ class DataWatchdog
return storage::SharedMemory::RegionExists(storage::CURRENT_REGION); return storage::SharedMemory::RegionExists(storage::CURRENT_REGION);
} }
auto GetDataFacade() const auto GetDataFacade() const { return facade; }
{
return facade;
}
private: private:
void Run() void Run()
{ {
boost::interprocess::scoped_lock<boost::interprocess::named_mutex>
current_region_lock(barrier.region_mutex);
auto shared_memory = makeSharedMemory(storage::CURRENT_REGION); auto shared_memory = makeSharedMemory(storage::CURRENT_REGION);
auto current = static_cast<storage::SharedDataTimestamp *>(shared_memory->Ptr()); auto current = static_cast<storage::SharedDataTimestamp *>(shared_memory->Ptr());
while (active) while (active)
{ {
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> current_region_lock(
barrier.region_mutex);
while (active && timestamp == current->timestamp)
{
barrier.region_condition.wait(current_region_lock);
}
if (timestamp != current->timestamp) if (timestamp != current->timestamp)
{ {
facade = std::make_shared<datafacade::SharedMemoryDataFacade>(current->region); facade = std::make_shared<datafacade::SharedMemoryDataFacade>(current->region);
timestamp = current->timestamp; timestamp = current->timestamp;
util::Log() << "updated facade to region " << storage::regionToString(current->region) util::Log() << "updated facade to region "
<< " with timestamp " << current->timestamp; << storage::regionToString(current->region) << " with timestamp "
<< current->timestamp;
} }
barrier.region_condition.wait(current_region_lock);
} }
facade.reset();
util::Log() << "DataWatchdog thread stopped"; util::Log() << "DataWatchdog thread stopped";
} }

View File

@ -30,9 +30,7 @@ class SharedMemoryDataFacade : public ContiguousInternalMemoryDataFacadeBase
SharedMemoryDataFacade() {} SharedMemoryDataFacade() {}
public: public:
SharedMemoryDataFacade(storage::SharedDataType data_region) : data_region(data_region)
SharedMemoryDataFacade(storage::SharedDataType data_region)
: data_region(data_region)
{ {
util::Log(logDEBUG) << "Loading new data for region " << regionToString(data_region); util::Log(logDEBUG) << "Loading new data for region " << regionToString(data_region);

View File

@ -4,6 +4,8 @@
#include <boost/interprocess/sync/named_condition.hpp> #include <boost/interprocess/sync/named_condition.hpp>
#include <boost/interprocess/sync/named_mutex.hpp> #include <boost/interprocess/sync/named_mutex.hpp>
#include "util/retry_lock.hpp"
namespace osrm namespace osrm
{ {
namespace storage namespace storage
@ -13,8 +15,8 @@ struct SharedBarriers
{ {
SharedBarriers() SharedBarriers()
: region_mutex(boost::interprocess::open_or_create, "osrm-region") : region_mutex(boost::interprocess::open_or_create, "osrm-region"),
, region_condition(boost::interprocess::open_or_create, "osrm-region-cv") region_condition(boost::interprocess::open_or_create, "osrm-region-cv")
{ {
} }
@ -24,6 +26,11 @@ struct SharedBarriers
boost::interprocess::named_condition::remove("osrm-region-cv"); boost::interprocess::named_condition::remove("osrm-region-cv");
} }
static RetryLock getLockWithRetry(int timeout_seconds)
{
return RetryLock(timeout_seconds, "osrm-region");
}
boost::interprocess::named_mutex region_mutex; boost::interprocess::named_mutex region_mutex;
boost::interprocess::named_condition region_condition; boost::interprocess::named_condition region_condition;
}; };

View File

@ -23,6 +23,7 @@
#include <algorithm> #include <algorithm>
#include <exception> #include <exception>
#include <thread>
namespace osrm namespace osrm
{ {
@ -106,6 +107,20 @@ class SharedMemory
return Remove(key); return Remove(key);
} }
void WaitForDetach()
{
auto shmid = shm.get_shmid();
::shmid_ds xsi_ds;
do
{
int ret = ::shmctl(shmid, IPC_STAT, &xsi_ds);
(void)ret; // no unused warning
BOOST_ASSERT(ret >= 0);
std::this_thread::sleep_for(std::chrono::microseconds(100));
} while (xsi_ds.shm_nattch > 1);
}
private: private:
static bool RegionExists(const boost::interprocess::xsi_key &key) static bool RegionExists(const boost::interprocess::xsi_key &key)
{ {
@ -147,17 +162,13 @@ class SharedMemory
public: public:
void *Ptr() const { return region.get_address(); } void *Ptr() const { return region.get_address(); }
SharedMemory(const boost::filesystem::path &lock_file, SharedMemory(const boost::filesystem::path &lock_file, const int id, const uint64_t size = 0)
const int id,
const uint64_t size = 0)
{ {
sprintf(key, "%s.%d", "osrm.lock", id); sprintf(key, "%s.%d", "osrm.lock", id);
if (0 == size) if (0 == size)
{ // read_only { // read_only
shm = boost::interprocess::shared_memory_object( shm = boost::interprocess::shared_memory_object(
boost::interprocess::open_only, boost::interprocess::open_only, key, boost::interprocess::read_only);
key,
boost::interprocess::read_only);
region = boost::interprocess::mapped_region(shm, boost::interprocess::read_only); region = boost::interprocess::mapped_region(shm, boost::interprocess::read_only);
} }
else else
@ -225,8 +236,7 @@ class SharedMemory
#endif #endif
template <typename IdentifierT, typename LockFileT = OSRMLockFile> template <typename IdentifierT, typename LockFileT = OSRMLockFile>
std::unique_ptr<SharedMemory> std::unique_ptr<SharedMemory> makeSharedMemory(const IdentifierT &id, const uint64_t size = 0)
makeSharedMemory(const IdentifierT &id, const uint64_t size = 0)
{ {
try try
{ {

View File

@ -31,7 +31,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "storage/shared_datatype.hpp" #include "storage/shared_datatype.hpp"
#include "storage/storage_config.hpp" #include "storage/storage_config.hpp"
#include <boost/interprocess/sync/named_mutex.hpp>
#include <boost/filesystem/path.hpp> #include <boost/filesystem/path.hpp>
#include <string> #include <string>
@ -45,14 +44,13 @@ class Storage
public: public:
Storage(StorageConfig config); Storage(StorageConfig config);
int Run(); int Run(int max_wait);
void PopulateLayout(DataLayout &layout); void PopulateLayout(DataLayout &layout);
void PopulateData(const DataLayout &layout, char *memory_ptr); void PopulateData(const DataLayout &layout, char *memory_ptr);
private: private:
StorageConfig config; StorageConfig config;
boost::interprocess::named_mutex datastore_mutex;
}; };
} }
} }

View File

@ -0,0 +1,49 @@
#ifndef OSRM_RETRY_TIMED_LOCK_HPP
#define OSRM_RETRY_TIMED_LOCK_HPP
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/interprocess/exceptions.hpp>
#include <boost/interprocess/sync/named_mutex.hpp>
#include <boost/interprocess/sync/scoped_lock.hpp>
class RetryLock
{
public:
RetryLock(int timeout_seconds, const char *name)
: timeout_seconds(timeout_seconds), name(name),
mutex(std::make_unique<boost::interprocess::named_mutex>(
boost::interprocess::open_or_create, name)),
internal_lock(*mutex, boost::interprocess::defer_lock)
{
}
bool TryLock()
{
if (timeout_seconds >= 0)
{
return internal_lock.timed_lock(boost::posix_time::microsec_clock::universal_time() +
boost::posix_time::seconds(timeout_seconds));
}
else
{
internal_lock.lock();
return true;
}
}
void ForceLock()
{
mutex.reset();
boost::interprocess::named_mutex::remove(name);
mutex = std::make_unique<boost::interprocess::named_mutex>(
boost::interprocess::open_or_create, name);
}
private:
int timeout_seconds;
const char *name;
std::unique_ptr<boost::interprocess::named_mutex> mutex;
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> internal_lock;
};
#endif

View File

@ -27,7 +27,7 @@ elif type clang-format 2> /dev/null ; then
V=$(clang-format --version) V=$(clang-format --version)
if [[ $V != *3.8* ]] ; then if [[ $V != *3.8* ]] ; then
echo "clang-format is not 3.8 (returned ${V})" echo "clang-format is not 3.8 (returned ${V})"
exit 1 #exit 1
fi fi
else else
echo "No appropriate clang-format found (expected clang-format-3.8, or clang-format)" echo "No appropriate clang-format found (expected clang-format-3.8, or clang-format)"

View File

@ -29,12 +29,9 @@
#include <sys/mman.h> #include <sys/mman.h>
#endif #endif
#include <boost/date_time/posix_time/posix_time.hpp> #include <boost/filesystem/fstream.hpp>
#include <boost/interprocess/exceptions.hpp> #include <boost/filesystem/path.hpp>
#include <boost/interprocess/sync/named_sharable_mutex.hpp> #include <boost/interprocess/sync/file_lock.hpp>
#include <boost/interprocess/sync/named_upgradable_mutex.hpp>
#include <boost/interprocess/sync/scoped_lock.hpp>
#include <boost/interprocess/sync/upgradable_lock.hpp>
#include <cstdint> #include <cstdint>
@ -54,24 +51,32 @@ using RTreeNode =
util::StaticRTree<RTreeLeaf, util::ShM<util::Coordinate, true>::vector, true>::TreeNode; util::StaticRTree<RTreeLeaf, util::ShM<util::Coordinate, true>::vector, true>::TreeNode;
using QueryGraph = util::StaticGraph<contractor::QueryEdge::EdgeData>; using QueryGraph = util::StaticGraph<contractor::QueryEdge::EdgeData>;
Storage::Storage(StorageConfig config_) Storage::Storage(StorageConfig config_) : config(std::move(config_)) {}
: config(std::move(config_))
, datastore_mutex(boost::interprocess::open_or_create, "osrm-datastore")
{}
int Storage::Run() int Storage::Run(int max_wait)
{ {
BOOST_ASSERT_MSG(config.IsValid(), "Invalid storage config"); BOOST_ASSERT_MSG(config.IsValid(), "Invalid storage config");
util::LogPolicy::GetInstance().Unmute(); util::LogPolicy::GetInstance().Unmute();
// Lock datastore for binary exclusive read and write access boost::filesystem::path lock_path =
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> boost::filesystem::temp_directory_path() / "osrm-datastore.lock";
datastore_lock(datastore_mutex); if (!boost::filesystem::exists(lock_path))
SharedBarriers barriers;
{ {
boost::filesystem::ofstream ofs(lock_path);
}
boost::interprocess::file_lock file_lock(lock_path.c_str());
boost::interprocess::scoped_lock<boost::interprocess::file_lock> datastore_lock(
file_lock, boost::interprocess::defer_lock);
if (!datastore_lock.try_lock())
{
util::UnbufferedLog(logWARNING) << "Data update in progress, waiting until it finishes... ";
datastore_lock.lock();
util::UnbufferedLog(logWARNING) << "ok.";
}
#ifdef __linux__ #ifdef __linux__
// try to disable swapping on Linux // try to disable swapping on Linux
const bool lock_flags = MCL_CURRENT | MCL_FUTURE; const bool lock_flags = MCL_CURRENT | MCL_FUTURE;
@ -85,22 +90,32 @@ int Storage::Run()
// Because of datastore_lock the only write operation can occur sequentially later. // Because of datastore_lock the only write operation can occur sequentially later.
auto next_region = REGION_1; auto next_region = REGION_1;
unsigned next_timestamp = 1; unsigned next_timestamp = 1;
auto in_use_region = REGION_NONE;
unsigned in_use_timestamp = 0;
if (SharedMemory::RegionExists(CURRENT_REGION)) if (SharedMemory::RegionExists(CURRENT_REGION))
{ {
auto shared_memory = makeSharedMemory(CURRENT_REGION); auto shared_memory = makeSharedMemory(CURRENT_REGION);
auto current_region = static_cast<SharedDataTimestamp *>(shared_memory->Ptr()); auto current_region = static_cast<SharedDataTimestamp *>(shared_memory->Ptr());
next_region = current_region->region == REGION_1 ? REGION_2 : REGION_1; in_use_region = current_region->region;
next_timestamp = current_region->timestamp + 1; in_use_timestamp = current_region->timestamp;
next_region = in_use_region == REGION_1 ? REGION_2 : REGION_1;
next_timestamp = in_use_timestamp + 1;
} }
// SHMCTL(2): Mark the segment to be destroyed. The segment will actually be destroyed // ensure that the shared memory region we want to write to is really removed
// only after the last process detaches it. // this is only needef for failure recovery because we actually wait for all clients
// to detach at the end of the function
if (storage::SharedMemory::RegionExists(next_region)) if (storage::SharedMemory::RegionExists(next_region))
{ {
util::Log(logWARNING) << "Old shared memory region " << regionToString(next_region)
<< " still exists.";
util::UnbufferedLog() << "Retrying removal... ";
storage::SharedMemory::Remove(next_region); storage::SharedMemory::Remove(next_region);
util::UnbufferedLog() << "ok.";
} }
util::Log() << "Loading data into " << regionToString(next_region) << " timestamp " << next_timestamp; util::Log() << "Loading data into " << regionToString(next_region) << " timestamp "
<< next_timestamp;
// Populate a memory layout into stack memory // Populate a memory layout into stack memory
DataLayout layout; DataLayout layout;
@ -116,9 +131,16 @@ int Storage::Run()
memcpy(shared_memory_ptr, &layout, sizeof(layout)); memcpy(shared_memory_ptr, &layout, sizeof(layout));
PopulateData(layout, shared_memory_ptr + sizeof(layout)); PopulateData(layout, shared_memory_ptr + sizeof(layout));
SharedBarriers barriers;
{ // Lock for write access shared region mutex that protects CURRENT_REGION { // Lock for write access shared region mutex that protects CURRENT_REGION
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> RetryLock current_region_lock = barriers.getLockWithRetry(max_wait);
current_region_lock(barriers.region_mutex);
if (!current_region_lock.TryLock())
{
util::Log(logWARNING) << "Could not aquire current region lock after " << max_wait
<< " seconds. Claiming the lock by force.";
current_region_lock.ForceLock();
}
// Get write access to CURRENT_REGION // Get write access to CURRENT_REGION
auto shared_memory = makeSharedMemory(CURRENT_REGION, sizeof(SharedDataTimestamp)); auto shared_memory = makeSharedMemory(CURRENT_REGION, sizeof(SharedDataTimestamp));
@ -129,10 +151,29 @@ int Storage::Run()
current_region->timestamp = next_timestamp; current_region->timestamp = next_timestamp;
} }
util::Log() << "All data loaded."; util::Log() << "All data loaded. Notify all client... ";
barriers.region_condition.notify_all();
// SHMCTL(2): Mark the segment to be destroyed. The segment will actually be destroyed
// only after the last process detaches it.
if (in_use_region != REGION_NONE && storage::SharedMemory::RegionExists(in_use_region))
{
util::UnbufferedLog() << "Marking old shared memory region "
<< regionToString(in_use_region) << " for removal... ";
// aquire a handle for the old shared memory region before we mark it for deletion
// we will need this to wait for all users to detach
auto in_use_shared_memory = makeSharedMemory(in_use_region);
storage::SharedMemory::Remove(in_use_region);
util::UnbufferedLog() << "ok.";
util::UnbufferedLog() << "Waiting for clients to detach... ";
in_use_shared_memory->WaitForDetach();
util::UnbufferedLog() << " ok.";
} }
barriers.region_condition.notify_all(); util::Log() << "All clients switched.";
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@ -16,15 +16,21 @@ using namespace osrm;
// generate boost::program_options object for the routing part // generate boost::program_options object for the routing part
bool generateDataStoreOptions(const int argc, bool generateDataStoreOptions(const int argc,
const char *argv[], const char *argv[],
boost::filesystem::path &base_path) boost::filesystem::path &base_path,
int &max_wait)
{ {
// declare a group of options that will be allowed only on command line // declare a group of options that will be allowed only on command line
boost::program_options::options_description generic_options("Options"); boost::program_options::options_description generic_options("Options");
generic_options.add_options()("version,v", "Show version")("help,h", "Show this help message")("remove-locks,r", "Remove locks"); generic_options.add_options()("version,v", "Show version")("help,h", "Show this help message")(
"remove-locks,r", "Remove locks");
// declare a group of options that will be allowed both on command line // declare a group of options that will be allowed both on command line
// as well as in a config file // as well as in a config file
boost::program_options::options_description config_options("Configuration"); boost::program_options::options_description config_options("Configuration");
config_options.add_options()("max-wait",
boost::program_options::value<int>(&max_wait)->default_value(-1),
"Maximum number of seconds to wait on a running data update "
"before aquiring the lock by force.");
// hidden options, will be allowed on command line but will not be shown to the user // hidden options, will be allowed on command line but will not be shown to the user
boost::program_options::options_description hidden_options("Hidden options"); boost::program_options::options_description hidden_options("Hidden options");
@ -110,7 +116,8 @@ int main(const int argc, const char *argv[]) try
util::LogPolicy::GetInstance().Unmute(); util::LogPolicy::GetInstance().Unmute();
boost::filesystem::path base_path; boost::filesystem::path base_path;
if (!generateDataStoreOptions(argc, argv, base_path)) int max_wait = -1;
if (!generateDataStoreOptions(argc, argv, base_path, max_wait))
{ {
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
@ -122,7 +129,7 @@ int main(const int argc, const char *argv[]) try
} }
storage::Storage storage(std::move(config)); storage::Storage storage(std::move(config));
return storage.Run(); return storage.Run(max_wait);
} }
catch (const std::bad_alloc &e) catch (const std::bad_alloc &e)
{ {

View File

@ -1,5 +1,5 @@
#include "util/coordinate.hpp"
#include "util/coordinate_calculation.hpp" #include "util/coordinate_calculation.hpp"
#include "util/coordinate.hpp"
#include "util/trigonometry_table.hpp" #include "util/trigonometry_table.hpp"
#include "util/web_mercator.hpp" #include "util/web_mercator.hpp"