Split PBF Parser into a single Read and a single Parse thread to increase parsing speed.
This commit is contained in:
parent
a2389d741e
commit
2a6275cf38
@ -32,6 +32,84 @@ or see http://www.gnu.org/licenses/agpl.txt.
|
|||||||
#include "ExtractorStructs.h"
|
#include "ExtractorStructs.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Concurrent Queue written by Anthony Williams:
|
||||||
|
http://www.justsoftwaresolutions.co.uk/threading/implementing-a-thread-safe-queue-using-condition-variables.html
|
||||||
|
*/
|
||||||
|
template<typename Data>
|
||||||
|
class concurrent_queue
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
std::queue<Data> internal_queue;
|
||||||
|
mutable boost::mutex queue_mutex;
|
||||||
|
mutable boost::mutex queue_full_mutex;
|
||||||
|
boost::condition_variable queue_cv;
|
||||||
|
boost::condition_variable queue_full_cv;
|
||||||
|
const size_t max_queue_size;
|
||||||
|
|
||||||
|
bool size_exceeded() const {
|
||||||
|
boost::mutex::scoped_lock lock(queue_mutex);
|
||||||
|
return internal_queue.size() >= max_queue_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
concurrent_queue(const size_t max_size)
|
||||||
|
: max_queue_size(max_size) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void push(Data const& data)
|
||||||
|
{
|
||||||
|
if (size_exceeded()) {
|
||||||
|
boost::mutex::scoped_lock qf_lock(queue_full_mutex);
|
||||||
|
queue_full_cv.wait(qf_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
boost::mutex::scoped_lock lock(queue_mutex);
|
||||||
|
internal_queue.push(data);
|
||||||
|
lock.unlock();
|
||||||
|
queue_cv.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool empty() const
|
||||||
|
{
|
||||||
|
boost::mutex::scoped_lock lock(queue_mutex);
|
||||||
|
return internal_queue.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool try_pop(Data& popped_value)
|
||||||
|
{
|
||||||
|
boost::mutex::scoped_lock lock(queue_mutex);
|
||||||
|
if(internal_queue.empty())
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
popped_value=internal_queue.front();
|
||||||
|
internal_queue.pop();
|
||||||
|
queue_full_cv.notify_one();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void wait_and_pop(Data& popped_value)
|
||||||
|
{
|
||||||
|
boost::mutex::scoped_lock lock(queue_mutex);
|
||||||
|
while(internal_queue.empty())
|
||||||
|
{
|
||||||
|
queue_cv.wait(lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
popped_value=internal_queue.front();
|
||||||
|
internal_queue.pop();
|
||||||
|
queue_full_cv.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
int size() const {
|
||||||
|
boost::mutex::scoped_lock lock(queue_mutex);
|
||||||
|
return static_cast<int>(internal_queue.size());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class PBFParser : public BaseParser<_Node, _RawRestrictionContainer, _Way> {
|
class PBFParser : public BaseParser<_Node, _RawRestrictionContainer, _Way> {
|
||||||
|
|
||||||
enum EntityType {
|
enum EntityType {
|
||||||
@ -61,9 +139,9 @@ class PBFParser : public BaseParser<_Node, _RawRestrictionContainer, _Way> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PBFParser(const char * fileName) {
|
PBFParser(const char * fileName)
|
||||||
|
: threadDataQueue( new concurrent_queue<_ThreadData*>(25) ) { /* Max 25 items in queue */
|
||||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||||
omp_set_num_threads(1);
|
|
||||||
input.open(fileName, std::ios::in | std::ios::binary);
|
input.open(fileName, std::ios::in | std::ios::binary);
|
||||||
|
|
||||||
if (!input) {
|
if (!input) {
|
||||||
@ -86,10 +164,12 @@ public:
|
|||||||
if(input.is_open())
|
if(input.is_open())
|
||||||
input.close();
|
input.close();
|
||||||
|
|
||||||
unsigned maxThreads = omp_get_max_threads();
|
// Clean up any leftover ThreadData objects in the queue
|
||||||
for ( unsigned threadNum = 0; threadNum < maxThreads; ++threadNum ) {
|
_ThreadData* td;
|
||||||
delete threadDataVector[threadNum];
|
while (threadDataQueue->try_pop(td)) {
|
||||||
|
delete td;
|
||||||
}
|
}
|
||||||
|
delete threadDataQueue;
|
||||||
|
|
||||||
google::protobuf::ShutdownProtobufLibrary();
|
google::protobuf::ShutdownProtobufLibrary();
|
||||||
|
|
||||||
@ -100,12 +180,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool Init() {
|
bool Init() {
|
||||||
/** Init Vector with ThreadData Objects */
|
|
||||||
unsigned maxThreads = omp_get_max_threads();
|
|
||||||
for ( unsigned threadNum = 0; threadNum < maxThreads; ++threadNum ) {
|
|
||||||
threadDataVector.push_back( new _ThreadData( ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
_ThreadData initData;
|
_ThreadData initData;
|
||||||
/** read Header */
|
/** read Header */
|
||||||
if(!readPBFBlobHeader(input, &initData)) {
|
if(!readPBFBlobHeader(input, &initData)) {
|
||||||
@ -137,19 +211,29 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Parse() {
|
void ReadData() {
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
_ThreadData * threadData = threadDataVector[omp_get_thread_num()];
|
|
||||||
//parse through all Blocks
|
|
||||||
bool keepRunning = true;
|
bool keepRunning = true;
|
||||||
// while(readNextBlock(input)) {
|
|
||||||
do {
|
do {
|
||||||
#pragma omp critical
|
_ThreadData *threadData = new _ThreadData();
|
||||||
{
|
|
||||||
keepRunning = readNextBlock(input, threadData);
|
keepRunning = readNextBlock(input, threadData);
|
||||||
|
|
||||||
|
if (keepRunning)
|
||||||
|
threadDataQueue->push(threadData);
|
||||||
|
else
|
||||||
|
threadDataQueue->push(NULL); // No more data to read, parse stops when NULL encountered
|
||||||
|
} while(keepRunning);
|
||||||
}
|
}
|
||||||
if(keepRunning) {
|
|
||||||
|
void ParseData() {
|
||||||
|
while (1) {
|
||||||
|
_ThreadData *threadData;
|
||||||
|
threadDataQueue->wait_and_pop(threadData);
|
||||||
|
if (threadData == NULL) {
|
||||||
|
cout << "Parse Data Thread Finished" << endl;
|
||||||
|
threadDataQueue->push(NULL); // Signal end of data for other threads
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
loadBlock(threadData);
|
loadBlock(threadData);
|
||||||
for(int i = 0; i < threadData->PBFprimitiveBlock.primitivegroup_size(); i++) {
|
for(int i = 0; i < threadData->PBFprimitiveBlock.primitivegroup_size(); i++) {
|
||||||
threadData->currentGroupID = i;
|
threadData->currentGroupID = i;
|
||||||
@ -164,9 +248,21 @@ public:
|
|||||||
if(threadData->entityTypeIndicator == TypeDenseNode)
|
if(threadData->entityTypeIndicator == TypeDenseNode)
|
||||||
parseDenseNode(threadData);
|
parseDenseNode(threadData);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
delete threadData;
|
||||||
|
threadData = NULL;
|
||||||
}
|
}
|
||||||
}while(keepRunning);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Parse() {
|
||||||
|
// Start the read and parse threads
|
||||||
|
boost::thread readThread(boost::bind(&PBFParser::ReadData, this));
|
||||||
|
boost::thread parseThread(boost::bind(&PBFParser::ParseData, this));
|
||||||
|
|
||||||
|
// Wait for the threads to finish
|
||||||
|
readThread.join();
|
||||||
|
parseThread.join();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -200,28 +296,19 @@ private:
|
|||||||
keyVals.Add(key, value);
|
keyVals.Add(key, value);
|
||||||
denseTagIndex += 2;
|
denseTagIndex += 2;
|
||||||
}
|
}
|
||||||
#pragma omp critical
|
|
||||||
{
|
|
||||||
if(!(*addressCallback)(n, keyVals))
|
if(!(*addressCallback)(n, keyVals))
|
||||||
std::cerr << "[PBFParser] adress not parsed" << std::endl;
|
std::cerr << "[PBFParser] adress not parsed" << std::endl;
|
||||||
}
|
|
||||||
|
|
||||||
#pragma omp critical
|
|
||||||
{
|
|
||||||
if(!(*nodeCallback)(n))
|
if(!(*nodeCallback)(n))
|
||||||
std::cerr << "[PBFParser] dense node not parsed" << std::endl;
|
std::cerr << "[PBFParser] dense node not parsed" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void parseNode(_ThreadData * threadData) {
|
void parseNode(_ThreadData * threadData) {
|
||||||
_Node n;
|
_Node n;
|
||||||
#pragma omp critical
|
|
||||||
{
|
|
||||||
if(!(*nodeCallback)(n))
|
if(!(*nodeCallback)(n))
|
||||||
std::cerr << "[PBFParser] simple node not parsed" << std::endl;
|
std::cerr << "[PBFParser] simple node not parsed" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void parseRelation(_ThreadData * threadData) {
|
void parseRelation(_ThreadData * threadData) {
|
||||||
const OSMPBF::PrimitiveGroup& group = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID );
|
const OSMPBF::PrimitiveGroup& group = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID );
|
||||||
@ -291,14 +378,11 @@ private:
|
|||||||
// cout << "node " << currentRestriction.viaNode;
|
// cout << "node " << currentRestriction.viaNode;
|
||||||
// cout << " to " << currentRestriction.to << endl;
|
// cout << " to " << currentRestriction.to << endl;
|
||||||
// }
|
// }
|
||||||
#pragma omp critical
|
|
||||||
{
|
|
||||||
if(!(*restrictionCallback)(currentRestrictionContainer))
|
if(!(*restrictionCallback)(currentRestrictionContainer))
|
||||||
std::cerr << "[PBFParser] relation not parsed" << std::endl;
|
std::cerr << "[PBFParser] relation not parsed" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void parseWay(_ThreadData * threadData) {
|
void parseWay(_ThreadData * threadData) {
|
||||||
if( threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).ways_size() > 0) {
|
if( threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).ways_size() > 0) {
|
||||||
@ -317,18 +401,15 @@ private:
|
|||||||
const std::string val = threadData->PBFprimitiveBlock.stringtable().s(inputWay.vals(i));
|
const std::string val = threadData->PBFprimitiveBlock.stringtable().s(inputWay.vals(i));
|
||||||
w.keyVals.Add(key, val);
|
w.keyVals.Add(key, val);
|
||||||
}
|
}
|
||||||
#pragma omp critical
|
|
||||||
{
|
|
||||||
if(!(*wayCallback)(w)) {
|
if(!(*wayCallback)(w)) {
|
||||||
std::cerr << "[PBFParser] way not parsed" << std::endl;
|
std::cerr << "[PBFParser] way not parsed" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void loadGroup(_ThreadData * threadData) {
|
void loadGroup(_ThreadData * threadData) {
|
||||||
#pragma omp atomic
|
|
||||||
groupCount++;
|
groupCount++;
|
||||||
|
|
||||||
const OSMPBF::PrimitiveGroup& group = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID );
|
const OSMPBF::PrimitiveGroup& group = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID );
|
||||||
@ -350,7 +431,6 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void loadBlock(_ThreadData * threadData) {
|
void loadBlock(_ThreadData * threadData) {
|
||||||
#pragma omp critical
|
|
||||||
blockCount++;
|
blockCount++;
|
||||||
threadData->currentGroupID = 0;
|
threadData->currentGroupID = 0;
|
||||||
threadData->currentEntityID = 0;
|
threadData->currentEntityID = 0;
|
||||||
@ -522,9 +602,8 @@ private:
|
|||||||
/* the input stream to parse */
|
/* the input stream to parse */
|
||||||
std::fstream input;
|
std::fstream input;
|
||||||
|
|
||||||
/* ThreadData Array */
|
/* ThreadData Queue */
|
||||||
std::vector < _ThreadData* > threadDataVector;
|
concurrent_queue < _ThreadData* >* threadDataQueue;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* PBFPARSER_H_ */
|
#endif /* PBFPARSER_H_ */
|
||||||
|
Loading…
Reference in New Issue
Block a user