From 1ecad20a0d09988ee40e53919ac2b5cedf5a956a Mon Sep 17 00:00:00 2001 From: Emil Tin Date: Sun, 10 Feb 2013 10:59:54 +0100 Subject: [PATCH] support xml, move duplicated xml/pbf code to base --- Extractor/BaseParser.cpp | 113 +++++++++++++++++++++++++++++++++++++++ Extractor/BaseParser.h | 27 ++++++---- Extractor/PBFParser.cpp | 101 ++++------------------------------ Extractor/PBFParser.h | 23 ++------ Extractor/XMLParser.cpp | 101 +++++----------------------------- Extractor/XMLParser.h | 18 ++----- extractor.cpp | 15 +++--- features/support/data.rb | 5 +- 8 files changed, 173 insertions(+), 230 deletions(-) create mode 100644 Extractor/BaseParser.cpp diff --git a/Extractor/BaseParser.cpp b/Extractor/BaseParser.cpp new file mode 100644 index 000000000..d84ebe034 --- /dev/null +++ b/Extractor/BaseParser.cpp @@ -0,0 +1,113 @@ +/* +open source routing machine +Copyright (C) Dennis Luxen, others 2010 + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU AFFERO General Public License as published by +the Free Software Foundation; either version 3 of the License, or +any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +or see http://www.gnu.org/licenses/agpl.txt. +*/ + +#include "BaseParser.h" + +BaseParser::BaseParser(ExtractorCallbacks* em, ScriptingEnvironment& se) : +externalMemory(em), scriptingEnvironment(se), luaState(NULL), use_turn_restrictions(true) { + luaState = se.getLuaStateForThreadID(0); + ReadUseRestrictionsSetting(); + ReadRestrictionExceptions(); +} + +void BaseParser::ReadUseRestrictionsSetting() { + if(0 != luaL_dostring( luaState, "return use_turn_restrictions\n")) { + ERR(lua_tostring( luaState,-1)<< " occured in scripting block"); + } + if( lua_isboolean( luaState, -1) ) { + use_turn_restrictions = lua_toboolean(luaState, -1); + } + if( use_turn_restrictions ) { + INFO("Using turn restrictions" ); + } else { + INFO("Ignoring turn restrictions" ); + } +} + +void BaseParser::ReadRestrictionExceptions() { + if(lua_function_exists(luaState, "get_exceptions" )) { + //get list of turn restriction exceptions + try { + luabind::call_function( + luaState, + "get_exceptions", + boost::ref(restriction_exceptions) + ); + INFO("Found " << restriction_exceptions.size() << " exceptions to turn restriction"); + BOOST_FOREACH(std::string & str, restriction_exceptions) { + INFO(" " << str); + } + } catch (const luabind::error &er) { + lua_State* Ler=er.state(); + report_errors(Ler, -1); + ERR(er.what()); + } + } else { + INFO("Found no exceptions to turn restrictions"); + } +} + +void BaseParser::report_errors(lua_State *L, int status) { + if ( status!=0 ) { + std::cerr << "-- " << lua_tostring(L, -1) << std::endl; + lua_pop(L, 1); // remove error message + } +} + +inline void BaseParser::ParseNodeInLua(ImportNode& n, lua_State* localLuaState) { + try { + luabind::call_function( localLuaState, "node_function", boost::ref(n) ); + } catch (const luabind::error &er) { + lua_State* Ler=er.state(); + report_errors(Ler, -1); + ERR(er.what()); + } +} + +inline void BaseParser::ParseWayInLua(ExtractionWay& w, lua_State* localLuaState) { + try { + luabind::call_function( localLuaState, "way_function", boost::ref(w), w.path.size() ); + } catch (const luabind::error &er) { + lua_State* Ler=er.state(); + report_errors(Ler, -1); + ERR(er.what()); + } +} + +inline bool BaseParser::ShouldIgnoreRestriction(std::string& exception_string) { + //should this restriction be ignored? yes if there's an overlap between: + //a) the list of modes in the except tag of the restriction (exception_string), ex: except=bus;bicycle + //b) the lua profile defines a hierachy of modes, ex: [access, vehicle, bicycle] + + if( "" == exception_string ) + return false; + + //Be warned, this is quadratic work here, but we assume that + //only a few exceptions are actually defined. + std::vector exceptions; + boost::algorithm::split_regex(exceptions, exception_string, boost::regex("[;][ ]*")); + BOOST_FOREACH(std::string& str, exceptions) { + if( restriction_exceptions.end() != std::find(restriction_exceptions.begin(), restriction_exceptions.end(), str) ) { + return true; + break; //BOOST_FOREACH + } + } + return false; +} \ No newline at end of file diff --git a/Extractor/BaseParser.h b/Extractor/BaseParser.h index adf417a73..4c23bc78d 100644 --- a/Extractor/BaseParser.h +++ b/Extractor/BaseParser.h @@ -29,23 +29,30 @@ extern "C" { #include +#include "ExtractorCallbacks.h" #include "ScriptingEnvironment.h" -template class BaseParser : boost::noncopyable { public: + BaseParser(ExtractorCallbacks* em, ScriptingEnvironment& se); virtual ~BaseParser() {} - virtual bool Init() = 0; - virtual void RegisterCallbacks(ExternalMemoryT * externalMemory) = 0; - virtual void RegisterScriptingEnvironment(ScriptingEnvironment & _se) = 0; + virtual bool ReadHeader() = 0; virtual bool Parse() = 0; - void report_errors(lua_State *L, int status) { - if ( status!=0 ) { - std::cerr << "-- " << lua_tostring(L, -1) << std::endl; - lua_pop(L, 1); // remove error message - } - } + inline virtual void ParseNodeInLua(ImportNode& n, lua_State* luaStateForThread); + inline virtual void ParseWayInLua(ExtractionWay& n, lua_State* luaStateForThread); + virtual void report_errors(lua_State *L, int status); + +protected: + virtual void ReadUseRestrictionsSetting(); + virtual void ReadRestrictionExceptions(); + inline virtual bool ShouldIgnoreRestriction(std::string& exception_string); + + ExtractorCallbacks* externalMemory; + ScriptingEnvironment& scriptingEnvironment; + lua_State* luaState; + std::vector restriction_exceptions; + bool use_turn_restrictions; }; diff --git a/Extractor/PBFParser.cpp b/Extractor/PBFParser.cpp index 3280e7e81..de8acad14 100644 --- a/Extractor/PBFParser.cpp +++ b/Extractor/PBFParser.cpp @@ -20,7 +20,7 @@ #include "PBFParser.h" -PBFParser::PBFParser(const char * fileName) : externalMemory(NULL), use_turn_restrictions(true) { +PBFParser::PBFParser(ExtractorCallbacks* em, ScriptingEnvironment& se, const char * fileName) : BaseParser( em, se ) { GOOGLE_PROTOBUF_VERIFY_VERSION; //TODO: What is the bottleneck here? Filling the queue or reading the stuff from disk? //NOTE: With Lua scripting, it is parsing the stuff. I/O is virtually for free. @@ -37,43 +37,6 @@ PBFParser::PBFParser(const char * fileName) : externalMemory(NULL), use_turn_res #endif } -void PBFParser::RegisterCallbacks(ExtractorCallbacks * em) { - externalMemory = em; -} - -void PBFParser::RegisterScriptingEnvironment(ScriptingEnvironment & _se) { - scriptingEnvironment = _se; - - if(0 != luaL_dostring( scriptingEnvironment.getLuaStateForThreadID(0), "return use_turn_restrictions\n")) { - ERR(lua_tostring(scriptingEnvironment.getLuaStateForThreadID(0),-1)<< " occured in scripting block"); - } - if( lua_isboolean(scriptingEnvironment.getLuaStateForThreadID(0), -1) ) { - use_turn_restrictions = lua_toboolean(scriptingEnvironment.getLuaStateForThreadID(0), -1); - } - INFO("Use turn restrictions: " << (use_turn_restrictions ? "yes" : "no")); - - if(lua_function_exists(scriptingEnvironment.getLuaStateForThreadID(0), "get_exceptions" )) { - //get list of turn restriction exceptions - try { - luabind::call_function( - scriptingEnvironment.getLuaStateForThreadID(0), - "get_exceptions", - boost::ref(restriction_exceptions_vector) - ); - INFO("Found " << restriction_exceptions_vector.size() << " exceptions to turn restriction"); - BOOST_FOREACH(std::string & str, restriction_exceptions_vector) { - INFO("ignoring: " << str); - } - } catch (const luabind::error &er) { - lua_State* Ler=er.state(); - report_errors(Ler, -1); - ERR(er.what()); - } - } else { - INFO("Found no exceptions to turn restrictions"); - } -} - PBFParser::~PBFParser() { if(input.is_open()) input.close(); @@ -90,7 +53,7 @@ PBFParser::~PBFParser() { #endif } -inline bool PBFParser::Init() { +inline bool PBFParser::ReadHeader() { _ThreadData initData; /** read Header */ if(!readPBFBlobHeader(input, &initData)) { @@ -218,24 +181,9 @@ inline void PBFParser::parseDenseNode(_ThreadData * threadData) { #pragma omp parallel for schedule ( guided ) for(unsigned i = 0; i < endi_nodes; ++i) { ImportNode &n = nodesToParse[i]; - /** Pass the unpacked node to the LUA call back **/ - try { - luabind::call_function( - scriptingEnvironment.getLuaStateForThreadID(omp_get_thread_num()), - "node_function", - boost::ref(n) - ); - } catch (const luabind::error &er) { - lua_State* Ler=er.state(); - report_errors(Ler, -1); - ERR(er.what()); - } - // catch (...) { - // ERR("Unknown error occurred during PBF dense node parsing!"); - // } + ParseNodeInLua( n, scriptingEnvironment.getLuaStateForThreadID(omp_get_thread_num()) ); } - BOOST_FOREACH(ImportNode &n, nodesToParse) { if(!externalMemory->nodeFunction(n)) std::cerr << "[PBFParser] dense node not parsed" << std::endl; @@ -249,12 +197,12 @@ inline void PBFParser::parseNode(_ThreadData * ) { inline void PBFParser::parseRelation(_ThreadData * threadData) { //TODO: leave early, if relation is not a restriction //TODO: reuse rawRestriction container - if( use_turn_restrictions==false ) + if( !use_turn_restrictions ) return; const OSMPBF::PrimitiveGroup& group = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ); for(int i = 0; i < group.relations_size(); ++i ) { - std::string exception_of_restriction_tag; + std::string restriction_exceptions; const OSMPBF::Relation& inputRelation = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).relations(i); bool isRestriction = false; bool isOnlyRestriction = false; @@ -272,24 +220,12 @@ inline void PBFParser::parseRelation(_ThreadData * threadData) { isOnlyRestriction = true; } if ("except" == key) { - exception_of_restriction_tag = val; - } - } - - //Check if restriction shall be ignored - if(isRestriction && ("" != exception_of_restriction_tag) ) { - //Be warned, this is quadratic work here, but we assume that - //only a few exceptions are actually defined. - std::vector tokenized_exception_tags_of_restriction; - boost::algorithm::split_regex(tokenized_exception_tags_of_restriction, exception_of_restriction_tag, boost::regex("[;][ ]*")); - BOOST_FOREACH(std::string & str, tokenized_exception_tags_of_restriction) { - if(restriction_exceptions_vector.end() != std::find(restriction_exceptions_vector.begin(), restriction_exceptions_vector.end(), str)) { - isRestriction = false; - break; //BOOST_FOREACH - } + restriction_exceptions = val; } } + if( isRestriction && ShouldIgnoreRestriction(restriction_exceptions) ) + isRestriction = false; if(isRestriction) { int64_t lastRef = 0; @@ -368,29 +304,12 @@ inline void PBFParser::parseWay(_ThreadData * threadData) { #pragma omp parallel for schedule ( guided ) for(unsigned i = 0; i < endi_ways; ++i) { ExtractionWay & w = waysToParse[i]; - /** Pass the unpacked way to the LUA call back **/ - try { - luabind::call_function( - scriptingEnvironment.getLuaStateForThreadID(omp_get_thread_num()), - "way_function", - boost::ref(w), - w.path.size() - ); - - } catch (const luabind::error &er) { - lua_State* Ler=er.state(); - report_errors(Ler, -1); - ERR(er.what()); - } - // catch (...) { - // ERR("Unknown error!"); - // } + ParseWayInLua( w, scriptingEnvironment.getLuaStateForThreadID(omp_get_thread_num()) ); } BOOST_FOREACH(ExtractionWay & w, waysToParse) { - if(!externalMemory->wayFunction(w)) { + if(!externalMemory->wayFunction(w)) std::cerr << "[PBFParser] way not parsed" << std::endl; - } } } diff --git a/Extractor/PBFParser.h b/Extractor/PBFParser.h index e00d64fbc..a483baf19 100644 --- a/Extractor/PBFParser.h +++ b/Extractor/PBFParser.h @@ -37,11 +37,8 @@ #include "../Util/OpenMPWrapper.h" #include "BaseParser.h" -#include "ExtractorCallbacks.h" -#include "ExtractorStructs.h" -#include "ScriptingEnvironment.h" -class PBFParser : public BaseParser { +class PBFParser : public BaseParser { enum EntityType { TypeNode = 1, @@ -65,13 +62,10 @@ class PBFParser : public BaseParser > threadDataQueue; - ScriptingEnvironment scriptingEnvironment; - - bool use_turn_restrictions; - std::vector restriction_exceptions_vector; + std::fstream input; // the input stream to parse + boost::shared_ptr > threadDataQueue; // ThreadData Queue }; #endif /* PBFPARSER_H_ */ diff --git a/Extractor/XMLParser.cpp b/Extractor/XMLParser.cpp index 64ef2e25b..b1225cc5c 100644 --- a/Extractor/XMLParser.cpp +++ b/Extractor/XMLParser.cpp @@ -27,42 +27,12 @@ #include "../DataStructures/InputReaderFactory.h" -XMLParser::XMLParser(const char * filename) : externalMemory(NULL), myLuaState(NULL){ +XMLParser::XMLParser(ExtractorCallbacks* em, ScriptingEnvironment& se, const char * filename) : BaseParser( em, se ) { WARN("Parsing plain .osm/.osm.bz2 is deprecated. Switch to .pbf"); inputReader = inputReaderFactory(filename); } -XMLParser::~XMLParser() {} - -void XMLParser::RegisterCallbacks(ExtractorCallbacks * em) { - externalMemory = em; -} - -void XMLParser::RegisterScriptingEnvironment(ScriptingEnvironment & _se) { - myLuaState = _se.getLuaStateForThreadID(0); - if(lua_function_exists(myLuaState, "get_exceptions" )) { - //get list of turn restriction exceptions - try { - luabind::call_function( - myLuaState, - "get_exceptions", - boost::ref(restriction_exceptions_vector) - ); - INFO("Found " << restriction_exceptions_vector.size() << " exceptions to turn restriction"); - BOOST_FOREACH(std::string & str, restriction_exceptions_vector) { - INFO(" " << str); - } - } catch (const luabind::error &er) { - lua_State* Ler=er.state(); - report_errors(Ler, -1); - ERR(er.what()); - } - } else { - INFO("Found no exceptions to turn restrictions"); - } -} - -bool XMLParser::Init() { +bool XMLParser::ReadHeader() { return (xmlTextReaderRead( inputReader ) == 1); } bool XMLParser::Parse() { @@ -78,50 +48,18 @@ bool XMLParser::Parse() { continue; if ( xmlStrEqual( currentName, ( const xmlChar* ) "node" ) == 1 ) { - ImportNode n = _ReadXMLNode( ); - /** Pass the unpacked node to the LUA call back **/ - try { - luabind::call_function( - myLuaState, - "node_function", - boost::ref(n) - ); - if(!externalMemory->nodeFunction(n)) - std::cerr << "[XMLParser] dense node not parsed" << std::endl; - } catch (const luabind::error &er) { - std::cerr << er.what() << std::endl; - lua_State* Ler=er.state(); - report_errors(Ler, -1); - } catch (std::exception & e) { - ERR(e.what()); - } catch (...) { - ERR("Unknown error occurred during XML node parsing!"); - } + ImportNode n = _ReadXMLNode(); + ParseNodeInLua( n, luaState ); + + if(!externalMemory->nodeFunction(n)) + std::cerr << "[XMLParser] dense node not parsed" << std::endl; } if ( xmlStrEqual( currentName, ( const xmlChar* ) "way" ) == 1 ) { ExtractionWay way = _ReadXMLWay( ); - - /** Pass the unpacked way to the LUA call back **/ - try { - luabind::call_function( - myLuaState, - "way_function", - boost::ref(way), - way.path.size() - ); - if(!externalMemory->wayFunction(way)) { - std::cerr << "[PBFParser] way not parsed" << std::endl; - } - } catch (const luabind::error &er) { - std::cerr << er.what() << std::endl; - lua_State* Ler=er.state(); - report_errors(Ler, -1); - } catch (std::exception & e) { - ERR(e.what()); - } catch (...) { - ERR("Unknown error occurred during XML way parsing!"); - } + ParseWayInLua( way, luaState ); + if(!externalMemory->wayFunction(way)) + std::cerr << "[PBFParser] way not parsed" << std::endl; } if ( xmlStrEqual( currentName, ( const xmlChar* ) "relation" ) == 1 ) { _RawRestrictionContainer r = _ReadXMLRestriction(); @@ -138,7 +76,7 @@ bool XMLParser::Parse() { _RawRestrictionContainer XMLParser::_ReadXMLRestriction() { _RawRestrictionContainer restriction; - std::string exception_of_restriction_tag; + std::string exception_string; if ( xmlTextReaderIsEmptyElement( inputReader ) != 1 ) { const int depth = xmlTextReaderDepth( inputReader );while ( xmlTextReaderRead( inputReader ) == 1 ) { @@ -168,7 +106,7 @@ _RawRestrictionContainer XMLParser::_ReadXMLRestriction() { restriction.restriction.flags.isOnly = true; } if ( xmlStrEqual(k, (const xmlChar *) "except") ) { - exception_of_restriction_tag = (const char*) value; + exception_string = (const char*) value; } } @@ -204,19 +142,8 @@ _RawRestrictionContainer XMLParser::_ReadXMLRestriction() { } } - //Check if restriction shall be ignored - if( "" != exception_of_restriction_tag ) { - //Be warned, this is quadratic work here, but we assume that - //only a few exceptions are actually defined. - std::vector tokenized_exception_tags_of_restriction; - boost::algorithm::split_regex(tokenized_exception_tags_of_restriction, exception_of_restriction_tag, boost::regex("[;][ ]*")); - BOOST_FOREACH(std::string & str, tokenized_exception_tags_of_restriction) { - if(restriction_exceptions_vector.end() != std::find(restriction_exceptions_vector.begin(), restriction_exceptions_vector.end(), str)) { - restriction.fromWay = UINT_MAX; //workaround to ignore the restriction - break; //BOOST_FOREACH - } - } - } + if( ShouldIgnoreRestriction(exception_string) ) + restriction.fromWay = UINT_MAX; //workaround to ignore the restriction return restriction; } diff --git a/Extractor/XMLParser.h b/Extractor/XMLParser.h index 3bc54825d..cc96e65e1 100644 --- a/Extractor/XMLParser.h +++ b/Extractor/XMLParser.h @@ -25,28 +25,18 @@ #include "../typedefs.h" #include "BaseParser.h" -#include "ExtractorCallbacks.h" -#include "ScriptingEnvironment.h" -class XMLParser : public BaseParser { +class XMLParser : public BaseParser { public: - XMLParser(const char * filename); - virtual ~XMLParser(); - void RegisterCallbacks(ExtractorCallbacks * em); - void RegisterScriptingEnvironment(ScriptingEnvironment & _se); - bool Init(); + XMLParser(ExtractorCallbacks* em, ScriptingEnvironment& se, const char* filename); + bool ReadHeader(); bool Parse(); private: _RawRestrictionContainer _ReadXMLRestriction(); ExtractionWay _ReadXMLWay(); - ImportNode _ReadXMLNode( ); - /* Input Reader */ + ImportNode _ReadXMLNode(); xmlTextReaderPtr inputReader; - ExtractorCallbacks * externalMemory; - lua_State *myLuaState; - - std::vector restriction_exceptions_vector; }; #endif /* XMLPARSER_H_ */ diff --git a/extractor.cpp b/extractor.cpp index 1aa72f83b..fb52961e6 100644 --- a/extractor.cpp +++ b/extractor.cpp @@ -94,20 +94,19 @@ int main (int argc, char *argv[]) { stringMap[""] = 0; extractCallBacks = new ExtractorCallbacks(&externalMemory, &stringMap); - BaseParser * parser; + BaseParser* parser; if(isPBF) { - parser = new PBFParser(argv[1]); + parser = new PBFParser(extractCallBacks, scriptingEnvironment, argv[1]); } else { - parser = new XMLParser(argv[1]); + parser = new XMLParser(extractCallBacks, scriptingEnvironment, argv[1]); } - parser->RegisterCallbacks(extractCallBacks); - parser->RegisterScriptingEnvironment(scriptingEnvironment); - - if(!parser->Init()) + + if(!parser->ReadHeader()) ERR("Parser not initialized!"); + INFO("Parsing in progress.."); double time = get_timestamp(); parser->Parse(); - INFO("parsing finished after " << get_timestamp() - time << " seconds"); + INFO("Parsing finished after " << get_timestamp() - time << " seconds"); externalMemory.PrepareData(outputFileName, restrictionsFileName, amountOfRAM); diff --git a/features/support/data.rb b/features/support/data.rb index 227a962fc..ce6b995c1 100644 --- a/features/support/data.rb +++ b/features/support/data.rb @@ -205,14 +205,15 @@ def write_timestamp end def reprocess + use_pbf = true Dir.chdir TEST_FOLDER do write_osm write_timestamp - convert_osm_to_pbf + convert_osm_to_pbf if use_pbf unless extracted? log_preprocess_info log "== Extracting #{@osm_file}.osm...", :preprocess - unless system "../osrm-extract #{@osm_file}.osm.pbf 1>>#{PREPROCESS_LOG_FILE} 2>>#{PREPROCESS_LOG_FILE} #{PROFILES_PATH}/#{@profile}.lua" + unless system "../osrm-extract #{@osm_file}.osm#{'.pbf' if use_pbf} 1>>#{PREPROCESS_LOG_FILE} 2>>#{PREPROCESS_LOG_FILE} #{PROFILES_PATH}/#{@profile}.lua" log "*** Exited with code #{$?.exitstatus}.", :preprocess raise ExtractError.new $?.exitstatus, "osrm-extract exited with code #{$?.exitstatus}." end