Recover from stale mutexes with force unlocking and retry

This commit is contained in:
Patrick Niklaus
2016-10-12 21:26:59 +02:00
committed by Patrick Niklaus
parent caa7c994a0
commit 81c5cba0e5
6 changed files with 145 additions and 28 deletions
+30 -3
View File
@@ -12,7 +12,8 @@ using namespace osrm;
// generate boost::program_options object for the routing part
bool generateDataStoreOptions(const int argc,
const char *argv[],
boost::filesystem::path &base_path)
boost::filesystem::path &base_path,
int &max_wait)
{
// declare a group of options that will be allowed only on command line
boost::program_options::options_description generic_options("Options");
@@ -21,6 +22,9 @@ bool generateDataStoreOptions(const int argc,
// declare a group of options that will be allowed both on command line
// as well as in a config file
boost::program_options::options_description config_options("Configuration");
config_options.add_options()("max-wait",
boost::program_options::value<int>(&max_wait)->default_value(-1),
"Maximum number of seconds to wait on requests that use the old dataset.");
// hidden options, will be allowed on command line but will not be shown to the user
boost::program_options::options_description hidden_options("Hidden options");
@@ -87,7 +91,8 @@ int main(const int argc, const char *argv[]) try
util::LogPolicy::GetInstance().Unmute();
boost::filesystem::path base_path;
if (!generateDataStoreOptions(argc, argv, base_path))
int max_wait = -1;
if (!generateDataStoreOptions(argc, argv, base_path, max_wait))
{
return EXIT_SUCCESS;
}
@@ -98,7 +103,29 @@ int main(const int argc, const char *argv[]) try
return EXIT_FAILURE;
}
storage::Storage storage(std::move(config));
return storage.Run();
// We will attempt to load this dataset to memory several times if we encounter
// an error we can recover from. This is needed when we need to clear mutexes
// that have been left dangling by other processes.
const constexpr unsigned MAX_RETRIES = 3;
unsigned retry_counter = 0;
storage::Storage::ReturnCode code = storage::Storage::ReturnCode::Retry;
while(code == storage::Storage::ReturnCode::Retry && retry_counter < MAX_RETRIES)
{
if (retry_counter > 0)
{
util::SimpleLogger().Write(logWARNING) << "Try number " << (retry_counter+1) << " to load the dataset.";
}
code = storage.Run(max_wait);
retry_counter++;
}
if (code == storage::Storage::ReturnCode::Ok)
{
return EXIT_SUCCESS;
}
return EXIT_FAILURE;
}
catch (const std::bad_alloc &e)
{
+5 -4
View File
@@ -7,9 +7,10 @@ int main()
{
osrm::util::LogPolicy::GetInstance().Unmute();
osrm::util::SimpleLogger().Write() << "Releasing all locks";
osrm::storage::SharedBarriers barriers;
boost::interprocess::named_upgradable_mutex::remove("current_regions");
boost::interprocess::named_sharable_mutex::remove("regions_1");
boost::interprocess::named_sharable_mutex::remove("regions_2");
osrm::storage::SharedBarriers::resetCurrentRegions();
osrm::storage::SharedBarriers::resetRegions1();
osrm::storage::SharedBarriers::resetRegions2();
return 0;
}