some optimization to speed up pbf parsing

2013-06-26 13:39:45 -04:00 · 2013-06-26 13:39:45 -04:00 · 9d6bd91279
commit 9d6bd91279
parent 163cfda282
2 changed files with 40 additions and 47 deletions
--- a/Extractor/PBFParser.cpp
+++ b/Extractor/PBFParser.cpp
@ -76,7 +76,7 @@ inline bool PBFParser::ReadHeader() {
 			else if ( "DenseNodes" == feature ) {
 				supported = true;
 			}
-			
+
 			if ( !supported ) {
 				std::cerr << "[error] required feature not supported: " << feature.data() << std::endl;
 				return false;
@ -159,18 +159,15 @@ inline void PBFParser::parseDenseNode(_ThreadData * threadData) {
 	int64_t m_lastDenseLatitude = 0;
 	int64_t m_lastDenseLongitude = 0;

-	ImportNode n;
-	std::vector<ImportNode> extracted_nodes_vector;
 	const int number_of_nodes = dense.id_size();
-	extracted_nodes_vector.reserve(number_of_nodes);
+	std::vector<ImportNode> extracted_nodes_vector(number_of_nodes);
 	for(int i = 0; i < number_of_nodes; ++i) {
-		n.Clear();
 		m_lastDenseID += dense.id( i );
 		m_lastDenseLatitude += dense.lat( i );
 		m_lastDenseLongitude += dense.lon( i );
-		n.id = m_lastDenseID;
-		n.lat = 100000*( ( double ) m_lastDenseLatitude * threadData->PBFprimitiveBlock.granularity() + threadData->PBFprimitiveBlock.lat_offset() ) / NANO;
-		n.lon = 100000*( ( double ) m_lastDenseLongitude * threadData->PBFprimitiveBlock.granularity() + threadData->PBFprimitiveBlock.lon_offset() ) / NANO;
+		extracted_nodes_vector[i].id = m_lastDenseID;
+		extracted_nodes_vector[i].lat = 100000*( ( double ) m_lastDenseLatitude * threadData->PBFprimitiveBlock.granularity() + threadData->PBFprimitiveBlock.lat_offset() ) / NANO;
+		extracted_nodes_vector[i].lon = 100000*( ( double ) m_lastDenseLongitude * threadData->PBFprimitiveBlock.granularity() + threadData->PBFprimitiveBlock.lon_offset() ) / NANO;
 		while (denseTagIndex < dense.keys_vals_size()) {
 			const int tagValue = dense.keys_vals( denseTagIndex );
 			if( 0==tagValue ) {
@ -180,10 +177,9 @@ inline void PBFParser::parseDenseNode(_ThreadData * threadData) {
 			const int keyValue = dense.keys_vals ( denseTagIndex+1 );
 			const std::string & key = threadData->PBFprimitiveBlock.stringtable().s(tagValue).data();
 			const std::string & value = threadData->PBFprimitiveBlock.stringtable().s(keyValue).data();
-			n.keyVals.Add(key, value);
+			extracted_nodes_vector[i].keyVals.Add(key, value);
 			denseTagIndex += 2;
 		}
-		extracted_nodes_vector.push_back(n);
 	}

 #pragma omp parallel for schedule ( guided )
@ -292,37 +288,33 @@ inline void PBFParser::parseRelation(_ThreadData * threadData) {
 }

 inline void PBFParser::parseWay(_ThreadData * threadData) {
-	ExtractionWay w;
-	std::vector<ExtractionWay> waysToParse;
 	const int number_of_ways = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).ways_size();
-	waysToParse.reserve(number_of_ways);
+	std::vector<ExtractionWay> parsed_way_vector(number_of_ways);
 	for(int i = 0; i < number_of_ways; ++i) {
-		w.Clear();
 		const OSMPBF::Way& inputWay = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).ways( i );
-		w.id = inputWay.id();
+		parsed_way_vector[i].id = inputWay.id();
 		unsigned pathNode(0);
 		const int number_of_referenced_nodes = inputWay.refs_size();
-		for(int i = 0; i < number_of_referenced_nodes; ++i) {
-			pathNode += inputWay.refs(i);
-			w.path.push_back(pathNode);
+		for(int j = 0; j < number_of_referenced_nodes; ++j) {
+			pathNode += inputWay.refs(j);
+			parsed_way_vector[i].path.push_back(pathNode);
 		}
 		assert(inputWay.keys_size() == inputWay.vals_size());
 		const int number_of_keys = inputWay.keys_size();
-		for(int i = 0; i < number_of_keys; ++i) {
-			const std::string & key = threadData->PBFprimitiveBlock.stringtable().s(inputWay.keys(i));
-			const std::string & val = threadData->PBFprimitiveBlock.stringtable().s(inputWay.vals(i));
-			w.keyVals.Add(key, val);
+		for(int j = 0; j < number_of_keys; ++j) {
+			const std::string & key = threadData->PBFprimitiveBlock.stringtable().s(inputWay.keys(j));
+			const std::string & val = threadData->PBFprimitiveBlock.stringtable().s(inputWay.vals(j));
+			parsed_way_vector[i].keyVals.Add(key, val);
 		}
-		waysToParse.push_back(w);
 	}

 #pragma omp parallel for schedule ( guided )
 	for(int i = 0; i < number_of_ways; ++i) {
-	    ExtractionWay & w = waysToParse[i];
+	    ExtractionWay & w = parsed_way_vector[i];
 	    ParseWayInLua( w, scriptingEnvironment.getLuaStateForThreadID(omp_get_thread_num()) );
 	}

-	BOOST_FOREACH(ExtractionWay & w, waysToParse) {
+	BOOST_FOREACH(ExtractionWay & w, parsed_way_vector) {
 	    extractor_callbacks->wayFunction(w);
 	}
 }
@ -423,7 +415,7 @@ inline bool PBFParser::readBlob(std::fstream& stream, _ThreadData * threadData)
 	if(stream.eof()) {
 		return false;
 	}
-	
+
 	const int size = threadData->PBFBlobHeader.datasize();
 	if ( size < 0 || size > MAX_BLOB_SIZE ) {
 		std::cerr << "[error] invalid Blob size:" << size << std::endl;
--- a/Extractor/PBFParser.h
+++ b/Extractor/PBFParser.h
@ -1,17 +1,17 @@
 /*
 open source routing machine
 Copyright (C) Dennis Luxen, others 2010
- 
+
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU AFFERO General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 any later version.
- 
+
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
- 
+
 You should have received a copy of the GNU Affero General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
@ -21,6 +21,13 @@
 #ifndef PBFPARSER_H_
 #define PBFPARSER_H_

+#include "../DataStructures/HashTable.h"
+#include "../DataStructures/ConcurrentQueue.h"
+#include "../Util/MachineInfo.h"
+#include "../Util/OpenMPWrapper.h"
+#include "../typedefs.h"
+
+#include "BaseParser.h"
 #include <boost/shared_ptr.hpp>
 #include <boost/make_shared.hpp>
 #include <boost/ref.hpp>
@ -30,44 +37,38 @@

 #include <zlib.h>

-#include "../typedefs.h"
-#include "../DataStructures/HashTable.h"
-#include "../DataStructures/ConcurrentQueue.h"
-#include "../Util/MachineInfo.h"
-#include "../Util/OpenMPWrapper.h"

-#include "BaseParser.h"

 class PBFParser : public BaseParser {
-    
+
    enum EntityType {
        TypeNode = 1,
        TypeWay = 2,
        TypeRelation = 4,
        TypeDenseNode = 8
    } ;
-    
+
    struct _ThreadData {
        int currentGroupID;
        int currentEntityID;
        short entityTypeIndicator;
-        
+
        OSMPBF::BlobHeader PBFBlobHeader;
        OSMPBF::Blob PBFBlob;
-        
+
        OSMPBF::HeaderBlock PBFHeaderBlock;
        OSMPBF::PrimitiveBlock PBFprimitiveBlock;
-        
+
        std::vector<char> charBuffer;
    };
-    
+
 public:
    PBFParser(const char * fileName, ExtractorCallbacks* ec, ScriptingEnvironment& se);
    virtual ~PBFParser();
-    
+
    inline bool ReadHeader();
 	inline bool Parse();
-    
+
 private:
    inline void ReadData();
    inline void ParseData();
@ -75,7 +76,7 @@ private:
    inline void parseNode(_ThreadData * );
    inline void parseRelation(_ThreadData * threadData);
    inline void parseWay(_ThreadData * threadData);
-    
+
    inline void loadGroup(_ThreadData * threadData);
    inline void loadBlock(_ThreadData * threadData);
    inline bool readPBFBlobHeader(std::fstream& stream, _ThreadData * threadData);
@ -83,17 +84,17 @@ private:
    inline bool unpackLZMA(std::fstream &, _ThreadData * );
    inline bool readBlob(std::fstream& stream, _ThreadData * threadData) ;
    inline bool readNextBlock(std::fstream& stream, _ThreadData * threadData);
-    
+
    static const int NANO = 1000 * 1000 * 1000;
    static const int MAX_BLOB_HEADER_SIZE = 64 * 1024;
    static const int MAX_BLOB_SIZE = 32 * 1024 * 1024;
-    
+
 #ifndef NDEBUG
    /* counting the number of read blocks and groups */
    unsigned groupCount;
    unsigned blockCount;
 #endif
-	
+
    std::fstream input;     // the input stream to parse
    boost::shared_ptr<ConcurrentQueue < _ThreadData* > > threadDataQueue;
 };