some optimization to speed up pbf parsing

This commit is contained in:
Dennis Luxen 2013-06-26 13:39:45 -04:00
parent 163cfda282
commit 9d6bd91279
2 changed files with 40 additions and 47 deletions

View File

@ -76,7 +76,7 @@ inline bool PBFParser::ReadHeader() {
else if ( "DenseNodes" == feature ) { else if ( "DenseNodes" == feature ) {
supported = true; supported = true;
} }
if ( !supported ) { if ( !supported ) {
std::cerr << "[error] required feature not supported: " << feature.data() << std::endl; std::cerr << "[error] required feature not supported: " << feature.data() << std::endl;
return false; return false;
@ -159,18 +159,15 @@ inline void PBFParser::parseDenseNode(_ThreadData * threadData) {
int64_t m_lastDenseLatitude = 0; int64_t m_lastDenseLatitude = 0;
int64_t m_lastDenseLongitude = 0; int64_t m_lastDenseLongitude = 0;
ImportNode n;
std::vector<ImportNode> extracted_nodes_vector;
const int number_of_nodes = dense.id_size(); const int number_of_nodes = dense.id_size();
extracted_nodes_vector.reserve(number_of_nodes); std::vector<ImportNode> extracted_nodes_vector(number_of_nodes);
for(int i = 0; i < number_of_nodes; ++i) { for(int i = 0; i < number_of_nodes; ++i) {
n.Clear();
m_lastDenseID += dense.id( i ); m_lastDenseID += dense.id( i );
m_lastDenseLatitude += dense.lat( i ); m_lastDenseLatitude += dense.lat( i );
m_lastDenseLongitude += dense.lon( i ); m_lastDenseLongitude += dense.lon( i );
n.id = m_lastDenseID; extracted_nodes_vector[i].id = m_lastDenseID;
n.lat = 100000*( ( double ) m_lastDenseLatitude * threadData->PBFprimitiveBlock.granularity() + threadData->PBFprimitiveBlock.lat_offset() ) / NANO; extracted_nodes_vector[i].lat = 100000*( ( double ) m_lastDenseLatitude * threadData->PBFprimitiveBlock.granularity() + threadData->PBFprimitiveBlock.lat_offset() ) / NANO;
n.lon = 100000*( ( double ) m_lastDenseLongitude * threadData->PBFprimitiveBlock.granularity() + threadData->PBFprimitiveBlock.lon_offset() ) / NANO; extracted_nodes_vector[i].lon = 100000*( ( double ) m_lastDenseLongitude * threadData->PBFprimitiveBlock.granularity() + threadData->PBFprimitiveBlock.lon_offset() ) / NANO;
while (denseTagIndex < dense.keys_vals_size()) { while (denseTagIndex < dense.keys_vals_size()) {
const int tagValue = dense.keys_vals( denseTagIndex ); const int tagValue = dense.keys_vals( denseTagIndex );
if( 0==tagValue ) { if( 0==tagValue ) {
@ -180,10 +177,9 @@ inline void PBFParser::parseDenseNode(_ThreadData * threadData) {
const int keyValue = dense.keys_vals ( denseTagIndex+1 ); const int keyValue = dense.keys_vals ( denseTagIndex+1 );
const std::string & key = threadData->PBFprimitiveBlock.stringtable().s(tagValue).data(); const std::string & key = threadData->PBFprimitiveBlock.stringtable().s(tagValue).data();
const std::string & value = threadData->PBFprimitiveBlock.stringtable().s(keyValue).data(); const std::string & value = threadData->PBFprimitiveBlock.stringtable().s(keyValue).data();
n.keyVals.Add(key, value); extracted_nodes_vector[i].keyVals.Add(key, value);
denseTagIndex += 2; denseTagIndex += 2;
} }
extracted_nodes_vector.push_back(n);
} }
#pragma omp parallel for schedule ( guided ) #pragma omp parallel for schedule ( guided )
@ -292,37 +288,33 @@ inline void PBFParser::parseRelation(_ThreadData * threadData) {
} }
inline void PBFParser::parseWay(_ThreadData * threadData) { inline void PBFParser::parseWay(_ThreadData * threadData) {
ExtractionWay w;
std::vector<ExtractionWay> waysToParse;
const int number_of_ways = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).ways_size(); const int number_of_ways = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).ways_size();
waysToParse.reserve(number_of_ways); std::vector<ExtractionWay> parsed_way_vector(number_of_ways);
for(int i = 0; i < number_of_ways; ++i) { for(int i = 0; i < number_of_ways; ++i) {
w.Clear();
const OSMPBF::Way& inputWay = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).ways( i ); const OSMPBF::Way& inputWay = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).ways( i );
w.id = inputWay.id(); parsed_way_vector[i].id = inputWay.id();
unsigned pathNode(0); unsigned pathNode(0);
const int number_of_referenced_nodes = inputWay.refs_size(); const int number_of_referenced_nodes = inputWay.refs_size();
for(int i = 0; i < number_of_referenced_nodes; ++i) { for(int j = 0; j < number_of_referenced_nodes; ++j) {
pathNode += inputWay.refs(i); pathNode += inputWay.refs(j);
w.path.push_back(pathNode); parsed_way_vector[i].path.push_back(pathNode);
} }
assert(inputWay.keys_size() == inputWay.vals_size()); assert(inputWay.keys_size() == inputWay.vals_size());
const int number_of_keys = inputWay.keys_size(); const int number_of_keys = inputWay.keys_size();
for(int i = 0; i < number_of_keys; ++i) { for(int j = 0; j < number_of_keys; ++j) {
const std::string & key = threadData->PBFprimitiveBlock.stringtable().s(inputWay.keys(i)); const std::string & key = threadData->PBFprimitiveBlock.stringtable().s(inputWay.keys(j));
const std::string & val = threadData->PBFprimitiveBlock.stringtable().s(inputWay.vals(i)); const std::string & val = threadData->PBFprimitiveBlock.stringtable().s(inputWay.vals(j));
w.keyVals.Add(key, val); parsed_way_vector[i].keyVals.Add(key, val);
} }
waysToParse.push_back(w);
} }
#pragma omp parallel for schedule ( guided ) #pragma omp parallel for schedule ( guided )
for(int i = 0; i < number_of_ways; ++i) { for(int i = 0; i < number_of_ways; ++i) {
ExtractionWay & w = waysToParse[i]; ExtractionWay & w = parsed_way_vector[i];
ParseWayInLua( w, scriptingEnvironment.getLuaStateForThreadID(omp_get_thread_num()) ); ParseWayInLua( w, scriptingEnvironment.getLuaStateForThreadID(omp_get_thread_num()) );
} }
BOOST_FOREACH(ExtractionWay & w, waysToParse) { BOOST_FOREACH(ExtractionWay & w, parsed_way_vector) {
extractor_callbacks->wayFunction(w); extractor_callbacks->wayFunction(w);
} }
} }
@ -423,7 +415,7 @@ inline bool PBFParser::readBlob(std::fstream& stream, _ThreadData * threadData)
if(stream.eof()) { if(stream.eof()) {
return false; return false;
} }
const int size = threadData->PBFBlobHeader.datasize(); const int size = threadData->PBFBlobHeader.datasize();
if ( size < 0 || size > MAX_BLOB_SIZE ) { if ( size < 0 || size > MAX_BLOB_SIZE ) {
std::cerr << "[error] invalid Blob size:" << size << std::endl; std::cerr << "[error] invalid Blob size:" << size << std::endl;

View File

@ -1,17 +1,17 @@
/* /*
open source routing machine open source routing machine
Copyright (C) Dennis Luxen, others 2010 Copyright (C) Dennis Luxen, others 2010
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU AFFERO General Public License as published by it under the terms of the GNU AFFERO General Public License as published by
the Free Software Foundation; either version 3 of the License, or the Free Software Foundation; either version 3 of the License, or
any later version. any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU Affero General Public License You should have received a copy of the GNU Affero General Public License
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@ -21,6 +21,13 @@
#ifndef PBFPARSER_H_ #ifndef PBFPARSER_H_
#define PBFPARSER_H_ #define PBFPARSER_H_
#include "../DataStructures/HashTable.h"
#include "../DataStructures/ConcurrentQueue.h"
#include "../Util/MachineInfo.h"
#include "../Util/OpenMPWrapper.h"
#include "../typedefs.h"
#include "BaseParser.h"
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/make_shared.hpp> #include <boost/make_shared.hpp>
#include <boost/ref.hpp> #include <boost/ref.hpp>
@ -30,44 +37,38 @@
#include <zlib.h> #include <zlib.h>
#include "../typedefs.h"
#include "../DataStructures/HashTable.h"
#include "../DataStructures/ConcurrentQueue.h"
#include "../Util/MachineInfo.h"
#include "../Util/OpenMPWrapper.h"
#include "BaseParser.h"
class PBFParser : public BaseParser { class PBFParser : public BaseParser {
enum EntityType { enum EntityType {
TypeNode = 1, TypeNode = 1,
TypeWay = 2, TypeWay = 2,
TypeRelation = 4, TypeRelation = 4,
TypeDenseNode = 8 TypeDenseNode = 8
} ; } ;
struct _ThreadData { struct _ThreadData {
int currentGroupID; int currentGroupID;
int currentEntityID; int currentEntityID;
short entityTypeIndicator; short entityTypeIndicator;
OSMPBF::BlobHeader PBFBlobHeader; OSMPBF::BlobHeader PBFBlobHeader;
OSMPBF::Blob PBFBlob; OSMPBF::Blob PBFBlob;
OSMPBF::HeaderBlock PBFHeaderBlock; OSMPBF::HeaderBlock PBFHeaderBlock;
OSMPBF::PrimitiveBlock PBFprimitiveBlock; OSMPBF::PrimitiveBlock PBFprimitiveBlock;
std::vector<char> charBuffer; std::vector<char> charBuffer;
}; };
public: public:
PBFParser(const char * fileName, ExtractorCallbacks* ec, ScriptingEnvironment& se); PBFParser(const char * fileName, ExtractorCallbacks* ec, ScriptingEnvironment& se);
virtual ~PBFParser(); virtual ~PBFParser();
inline bool ReadHeader(); inline bool ReadHeader();
inline bool Parse(); inline bool Parse();
private: private:
inline void ReadData(); inline void ReadData();
inline void ParseData(); inline void ParseData();
@ -75,7 +76,7 @@ private:
inline void parseNode(_ThreadData * ); inline void parseNode(_ThreadData * );
inline void parseRelation(_ThreadData * threadData); inline void parseRelation(_ThreadData * threadData);
inline void parseWay(_ThreadData * threadData); inline void parseWay(_ThreadData * threadData);
inline void loadGroup(_ThreadData * threadData); inline void loadGroup(_ThreadData * threadData);
inline void loadBlock(_ThreadData * threadData); inline void loadBlock(_ThreadData * threadData);
inline bool readPBFBlobHeader(std::fstream& stream, _ThreadData * threadData); inline bool readPBFBlobHeader(std::fstream& stream, _ThreadData * threadData);
@ -83,17 +84,17 @@ private:
inline bool unpackLZMA(std::fstream &, _ThreadData * ); inline bool unpackLZMA(std::fstream &, _ThreadData * );
inline bool readBlob(std::fstream& stream, _ThreadData * threadData) ; inline bool readBlob(std::fstream& stream, _ThreadData * threadData) ;
inline bool readNextBlock(std::fstream& stream, _ThreadData * threadData); inline bool readNextBlock(std::fstream& stream, _ThreadData * threadData);
static const int NANO = 1000 * 1000 * 1000; static const int NANO = 1000 * 1000 * 1000;
static const int MAX_BLOB_HEADER_SIZE = 64 * 1024; static const int MAX_BLOB_HEADER_SIZE = 64 * 1024;
static const int MAX_BLOB_SIZE = 32 * 1024 * 1024; static const int MAX_BLOB_SIZE = 32 * 1024 * 1024;
#ifndef NDEBUG #ifndef NDEBUG
/* counting the number of read blocks and groups */ /* counting the number of read blocks and groups */
unsigned groupCount; unsigned groupCount;
unsigned blockCount; unsigned blockCount;
#endif #endif
std::fstream input; // the input stream to parse std::fstream input; // the input stream to parse
boost::shared_ptr<ConcurrentQueue < _ThreadData* > > threadDataQueue; boost::shared_ptr<ConcurrentQueue < _ThreadData* > > threadDataQueue;
}; };