From df4ead91b6b9d424bc3039eece0f594232726e90 Mon Sep 17 00:00:00 2001 From: Dennis Luxen Date: Fri, 18 Mar 2011 10:55:18 +0000 Subject: [PATCH] Introducing AdressCallback in Parser, reverting node renumbering --- Contractor/Contractor.h | 15 +-- DataStructures/BaseParser.h | 2 +- DataStructures/ExtractorCallBacks.h | 25 +++- DataStructures/ExtractorStructs.h | 29 ++++- DataStructures/GridEdge.h | 31 +++-- DataStructures/NNGrid.h | 191 ++++++++++++++-------------- DataStructures/PBFParser.h | 15 ++- DataStructures/XMLParser.h | 2 +- createHierarchy.cpp | 117 +++++++---------- extractLargeNetwork.cpp | 41 ++++-- 10 files changed, 260 insertions(+), 208 deletions(-) diff --git a/Contractor/Contractor.h b/Contractor/Contractor.h index 078b972fe..1f714478f 100644 --- a/Contractor/Contractor.h +++ b/Contractor/Contractor.h @@ -132,7 +132,7 @@ public: edge.data.backwardTurn = i->isBackwardTurn(); edges.push_back( edge ); } - std::vector< InputEdge >().swap( inputEdges ); //free memory +// std::vector< InputEdge >().swap( inputEdges ); //free memory #ifdef _GLIBCXX_PARALLEL __gnu_parallel::sort( edges.begin(), edges.end() ); #else @@ -388,24 +388,15 @@ private: _Heap& heap = data->heap; - int nodes = 0; - unsigned targetsFound = 0; + unsigned nodes = 0; while ( heap.Size() > 0 ) { const NodeID node = heap.DeleteMin(); const int distance = heap.GetKey( node ); - //const int hops = heap.GetData( node ).hops; - if ( nodes++ > 1000 ) + if ( nodes++ > numTargets ) return; - //if ( hops >= 5 ) - // return; //Destination settled? if ( distance > maxDistance ) return; - if( heap.GetData( node ).target ) { - targetsFound++; - if ( targetsFound >= numTargets ) - return; - } //iterate over all edges of node for ( _DynamicGraph::EdgeIterator edge = _graph->BeginEdges( node ), endEdges = _graph->EndEdges( node ); edge != endEdges; ++edge ) { diff --git a/DataStructures/BaseParser.h b/DataStructures/BaseParser.h index 7d0bc9011..76b92bac3 100644 --- a/DataStructures/BaseParser.h +++ b/DataStructures/BaseParser.h @@ -26,7 +26,7 @@ class BaseParser { public: virtual ~BaseParser() {} virtual bool Init() = 0; - virtual bool RegisterCallbacks(bool (*nodeCallbackPointer)(NodeT), bool (*relationCallbackPointer)(RelationT), bool (*wayCallbackPointer)(WayT)) = 0; + virtual bool RegisterCallbacks(bool (*nodeCallbackPointer)(NodeT), bool (*relationCallbackPointer)(RelationT), bool (*wayCallbackPointer)(WayT), bool (*addressCallbackPointer)(NodeT, HashTable)) = 0; virtual bool Parse() = 0; private: }; diff --git a/DataStructures/ExtractorCallBacks.h b/DataStructures/ExtractorCallBacks.h index 3ece8e022..1a939dfcd 100644 --- a/DataStructures/ExtractorCallBacks.h +++ b/DataStructures/ExtractorCallBacks.h @@ -27,23 +27,27 @@ or see http://www.gnu.org/licenses/agpl.txt. typedef stxxl::vector STXXLNodeIDVector; typedef stxxl::vector<_Node> STXXLNodeVector; typedef stxxl::vector<_Edge> STXXLEdgeVector; +typedef stxxl::vector<_Address> STXXLAddressVector; typedef stxxl::vector STXXLStringVector; + class ExtractorCallbacks{ private: STXXLNodeVector * allNodes; STXXLNodeIDVector * usedNodes; STXXLEdgeVector * allEdges; STXXLStringVector * nameVector; + STXXLAddressVector * addressVector; Settings settings; StringMap * stringMap; public: - ExtractorCallbacks(STXXLNodeVector * aNodes, STXXLNodeIDVector * uNodes, STXXLEdgeVector * aEdges, STXXLStringVector * nVector, Settings s, StringMap * strMap){ + ExtractorCallbacks(STXXLNodeVector * aNodes, STXXLNodeIDVector * uNodes, STXXLEdgeVector * aEdges, STXXLStringVector * nVector, STXXLAddressVector * adrVector, Settings s, StringMap * strMap){ allNodes = aNodes; usedNodes = uNodes; allEdges = aEdges; nameVector = nVector; + addressVector = adrVector; settings = s; stringMap = strMap; } @@ -56,14 +60,33 @@ public: delete stringMap; } + bool adressFunction(_Node n, HashTable &keyVals) { + std::string housenumber(keyVals.Find("addr:housenumber")); + std::string housename(keyVals.Find("addr:housename")); + std::string street(keyVals.Find("addr:street")); + std::string state(keyVals.Find("addr:state")); + std::string country(keyVals.Find("addr:country")); + std::string postcode(keyVals.Find("addr:postcode")); + std::string city(keyVals.Find("addr:city")); + + if(housenumber != "" || housename != "" || street != "") { + if(housenumber == "") + housenumber = housename; + addressVector->push_back(_Address(n, housenumber, street, state, country, postcode, city)); + } + return true; + } + bool nodeFunction(_Node &n) { allNodes->push_back(n); return true; } + bool relationFunction(_Relation &r) { //do nothing; return true; } + bool wayFunction(_Way &w) { std::string highway( w.keyVals.Find("highway") ); std::string name( w.keyVals.Find("name") ); diff --git a/DataStructures/ExtractorStructs.h b/DataStructures/ExtractorStructs.h index 6584c98fe..ba9bf08ce 100644 --- a/DataStructures/ExtractorStructs.h +++ b/DataStructures/ExtractorStructs.h @@ -53,16 +53,13 @@ struct _Node : NodeInfo{ _Node(int _lat, int _lon, unsigned int _id) : NodeInfo(_lat, _lon, _id) {} _Node() {} - static _Node min_value() - { + static _Node min_value() { return _Node(0,0,0); } - static _Node max_value() - { + static _Node max_value() { return _Node(numeric_limits::max(), numeric_limits::max(), numeric_limits::max()); } - NodeID key() const - { + NodeID key() const { return id; } }; @@ -97,6 +94,26 @@ struct _Way { HashTable keyVals; }; +struct _Address { + _Address() {} + _Address(_Node n, std::string h, std::string str, std::string sta, std::string p, std::string ci, std::string co) { + node = n; + housenumber = h; + street = str; + state = sta; + postcode = p; + city = ci; + country = co; + } + _Node node; + std::string housenumber; + std::string street; + std::string state; + std::string postcode; + std::string city; + std::string country; +}; + struct _Relation { enum { unknown = 0, ferry diff --git a/DataStructures/GridEdge.h b/DataStructures/GridEdge.h index 6b5ad2b97..55159a243 100644 --- a/DataStructures/GridEdge.h +++ b/DataStructures/GridEdge.h @@ -21,27 +21,36 @@ or see http://www.gnu.org/licenses/agpl.txt. #ifndef GRIDEDGE_H_ #define GRIDEDGE_H_ -struct GridEdgeData { - GridEdgeData(_Edge e, unsigned f, unsigned r) : edge(e), fileIndex(f), ramIndex(r) {} - GridEdgeData() {} - _Edge edge; +struct _GridEdge { + _GridEdge(NodeID s, NodeID t, _Coordinate sc, _Coordinate tc) : start(s), target(t), startCoord(sc), targetCoord(tc) {} + _GridEdge() {} + NodeID start; + NodeID target; + _Coordinate startCoord; + _Coordinate targetCoord; +}; + +struct GridEntry { + GridEntry() {} + GridEntry(_GridEdge e, unsigned f, unsigned r) : edge(e), fileIndex(f), ramIndex(r) {} + _GridEdge edge; unsigned fileIndex; unsigned ramIndex; - bool operator< ( const GridEdgeData& right ) const { + bool operator< ( const GridEntry& right ) const { if(right.edge.start != edge.start) return right.edge.start < edge.start; if(right.edge.target != edge.target) return right.edge.target < edge.target; return false; } - bool operator==( const GridEdgeData& right ) const { + bool operator==( const GridEntry& right ) const { return right.edge.start == edge.start && right.edge.target == edge.target; } }; struct CompareGridEdgeDataByFileIndex { - bool operator () (const GridEdgeData & a, const GridEdgeData & b) const + bool operator () (const GridEntry & a, const GridEntry & b) const { return a.fileIndex < b.fileIndex; } @@ -49,21 +58,21 @@ struct CompareGridEdgeDataByFileIndex struct CompareGridEdgeDataByRamIndex { - typedef GridEdgeData value_type; + typedef GridEntry value_type; - bool operator () (const GridEdgeData & a, const GridEdgeData & b) const + bool operator () (const GridEntry & a, const GridEntry & b) const { return a.ramIndex < b.ramIndex; } value_type max_value() { - GridEdgeData e; + GridEntry e; e.ramIndex = (1024*1024) - 1; return e; } value_type min_value() { - GridEdgeData e; + GridEntry e; e.ramIndex = 0; return e; } diff --git a/DataStructures/NNGrid.h b/DataStructures/NNGrid.h index ba6897eb4..efb1cb2a5 100644 --- a/DataStructures/NNGrid.h +++ b/DataStructures/NNGrid.h @@ -21,11 +21,11 @@ or see http://www.gnu.org/licenses/agpl.txt. #ifndef NNGRID_H_ #define NNGRID_H_ -#include - +#include #include -#include #include +#include +#include #include #include @@ -37,12 +37,10 @@ or see http://www.gnu.org/licenses/agpl.txt. #include "Percent.h" #include "PhantomNodes.h" #include "Util.h" - #include "StaticGraph.h" namespace NNGrid{ -static unsigned getFileIndexForLatLon(const int lt, const int ln) -{ +static unsigned GetFileIndexForLatLon(const int lt, const int ln) { double lat = lt/100000.; double lon = ln/100000.; @@ -60,7 +58,7 @@ static unsigned getFileIndexForLatLon(const int lt, const int ln) return fileIndex; } -static unsigned getRAMIndexFromFileIndex(const int fileIndex) { +static unsigned GetRAMIndexFromFileIndex(const int fileIndex) { unsigned fileLine = fileIndex / 32768; fileLine = fileLine / 32; fileLine = fileLine * 1024; @@ -75,8 +73,7 @@ static inline int signum(int x){ return (x > 0) ? 1 : (x < 0) ? -1 : 0; } -static void bresenham(int xstart,int ystart,int xend,int yend, std::vector > &indexList) -{ +static void GetIndicesByBresenhamsAlgorithm(int xstart,int ystart,int xend,int yend, std::vector > &indexList) { int x, y, t, dx, dy, incx, incy, pdx, pdy, ddx, ddy, es, el, err; dx = xend - xstart; @@ -87,13 +84,11 @@ static void bresenham(int xstart,int ystart,int xend,int yend, std::vectordy) - { + if (dx>dy) { pdx=incx; pdy=0; ddx=incx; ddy=incy; es =dy; el =dx; - } else - { + } else { pdx=0; pdy=incy; ddx=incx; ddy=incy; es =dx; el =dy; @@ -103,33 +98,29 @@ static void bresenham(int xstart,int ystart,int xend,int yend, std::vector > &indexList) -{ +static void GetListOfIndexesForEdgeAndGridSize(_Coordinate& start, _Coordinate& target, std::vector > &indexList) { double lat1 = start.lat/100000.; double lon1 = start.lon/100000.; @@ -142,7 +133,7 @@ static void getListOfIndexesForEdgeAndGridSize(_Coordinate& start, _Coordinate& double x2 = ( lon2 + 180.0 ) / 360.0; double y2 = ( lat2 + 90.0 ) / 180.0; - bresenham(x1*32768, y1*32768, x2*32768, y2*32768, indexList); + GetIndicesByBresenhamsAlgorithm(x1*32768, y1*32768, x2*32768, y2*32768, indexList); } template @@ -161,7 +152,7 @@ class NNGrid { public: ThreadLookupTable threadLookup; - NNGrid() { ramIndexTable.resize((1024*1024), UINT_MAX); if( WriteAccess) { entries = new stxxl::vector(); }} + NNGrid() { ramIndexTable.resize((1024*1024), UINT_MAX); if( WriteAccess) { entries = new stxxl::vector(); }} NNGrid(const char* rif, const char* iif, unsigned numberOfThreads = omp_get_num_procs()) { ramIndexTable.resize((1024*1024), UINT_MAX); @@ -175,7 +166,6 @@ public: ~NNGrid() { if(ramInFile.is_open()) ramInFile.close(); -// if(indexInFile.is_open()) indexInFile.close(); if (WriteAccess) { delete entries; @@ -185,14 +175,13 @@ public: delete indexFileStreams[i]; } threadLookup.EraseAll(); + ramIndexTable.clear(); } void OpenIndexFiles() { assert(ramInFile.is_open()); -// assert(indexInFile.is_open()); - for(int i = 0; i < 1024*1024; i++) - { + for(int i = 0; i < 1024*1024; i++) { unsigned temp; ramInFile.read((char*)&temp, sizeof(unsigned)); ramIndexTable[i] = temp; @@ -200,21 +189,26 @@ public: ramInFile.close(); } - void AddEdge(_Edge edge, _Coordinate start, _Coordinate target) - { - edge.startCoord = start; - edge.targetCoord = target; + template + void ConstructGrid(std::vector & edgeList, vector * int2ExtNodeMap, char * ramIndexOut, char * fileIndexOut) { + Percent p(edgeList.size()); + for(NodeID i = 0; i < edgeList.size(); i++) { + p.printIncrement(); + if( edgeList[i].isLocatable() == false ) + continue; + EdgeT edge = edgeList[i]; - std::vector > indexList; - getListOfIndexesForEdgeAndGridSize(start, target, indexList); - for(unsigned i = 0; i < indexList.size(); i++) - { - entries->push_back(GridEdgeData(edge, indexList[i].first, indexList[i].second)); - } - } - - void ConstructGrid(char * ramIndexOut, char * fileIndexOut) - { + int slat = int2ExtNodeMap->at(edge.source()).lat; + int slon = int2ExtNodeMap->at(edge.source()).lon; + int tlat = int2ExtNodeMap->at(edge.target()).lat; + int tlon = int2ExtNodeMap->at(edge.target()).lon; + AddEdge( _GridEdge( + edgeList[i].source(), + edgeList[i].target(), + _Coordinate(slat, slon), + _Coordinate(tlat, tlon) ) + ); + } double timestamp = get_timestamp(); //create index file on disk, old one is over written indexOutFile.open(fileIndexOut, std::ios::out | std::ios::binary | std::ios::trunc); @@ -222,18 +216,16 @@ public: //sort entries stxxl::sort(entries->begin(), entries->end(), CompareGridEdgeDataByRamIndex(), 1024*1024*1024); cout << "ok in " << (get_timestamp() - timestamp) << "s" << endl; - std::vector entriesInFileWithRAMSameIndex; + std::vector entriesInFileWithRAMSameIndex; unsigned indexInRamTable = entries->begin()->ramIndex; unsigned lastPositionInIndexFile = 0; unsigned numberOfUsedCells = 0; unsigned maxNumberOfRAMCellElements = 0; cout << "writing data ..." << flush; - Percent p(entries->size()); - for(stxxl::vector::iterator vt = entries->begin(); vt != entries->end(); vt++) - { + p.reinit(entries->size()); + for(stxxl::vector::iterator vt = entries->begin(); vt != entries->end(); vt++) { p.printIncrement(); - if(vt->ramIndex != indexInRamTable) - { + if(vt->ramIndex != indexInRamTable) { unsigned numberOfBytesInCell = FillCell(entriesInFileWithRAMSameIndex, lastPositionInIndexFile); if(entriesInFileWithRAMSameIndex.size() > maxNumberOfRAMCellElements) maxNumberOfRAMCellElements = entriesInFileWithRAMSameIndex.size(); @@ -253,9 +245,8 @@ public: assert(entriesInFileWithRAMSameIndex.size() == 0); - for(int i = 0; i < 1024*1024; i++) - { - if(ramIndexTable[i] != UINT_MAX){ + for(int i = 0; i < 1024*1024; i++) { + if(ramIndexTable[i] != UINT_MAX) { numberOfUsedCells--; } } @@ -272,25 +263,29 @@ public: ramFile.close(); } - bool FindRoutingStarts(const _Coordinate startCoord, const _Coordinate targetCoord, PhantomNodes * routingStarts) { - unsigned fileIndex = getFileIndexForLatLon(startCoord.lat, startCoord.lon); + bool FindRoutingStarts(const _Coordinate& startCoord, const _Coordinate& targetCoord, PhantomNodes * routingStarts) { + + /** search for point on edge close to source */ + unsigned fileIndex = GetFileIndexForLatLon(startCoord.lat, startCoord.lon); std::vector<_Edge> candidates; double timestamp = get_timestamp(); - for(int j = -32768; j < (32768+1); j+=32768){ - for(int i = -1; i < 2; i++){ + + for(int j = -32768; j < (32768+1); j+=32768) { + for(int i = -1; i < 2; i++){ GetContentsOfFileBucket(fileIndex+i+j, candidates); } } + + std::cout << "[debug] " << candidates.size() << " start candidates" << std::endl; _Coordinate tmp; double dist = numeric_limits::max(); timestamp = get_timestamp(); - for(std::vector<_Edge>::iterator it = candidates.begin(); it != candidates.end(); it++) - { + for(std::vector<_Edge>::iterator it = candidates.begin(); it != candidates.end(); it++) { double r = 0.; double tmpDist = ComputeDistance(startCoord, it->startCoord, it->targetCoord, tmp, &r); - if(tmpDist < dist) - { - routingStarts->startNode1 = it->start; + if(tmpDist < dist) { + std::cout << "[debug] start distance " << (it - candidates.begin()) << " " << tmpDist << std::endl; + routingStarts->startNode1 = it->start; routingStarts->startNode2 = it->target; routingStarts->startRatio = r; dist = tmpDist; @@ -298,14 +293,17 @@ public: routingStarts->startCoord.lon = tmp.lon; } } - fileIndex = getFileIndexForLatLon(targetCoord.lat, targetCoord.lon); - candidates.clear(); + candidates.clear(); + + /** search for point on edge close to target */ + fileIndex = GetFileIndexForLatLon(targetCoord.lat, targetCoord.lon); timestamp = get_timestamp(); - for(int j = -32768; j < (32768+1); j+=32768){ + for(int j = -32768; j < (32768+1); j+=32768) { for(int i = -1; i < 2; i++){ GetContentsOfFileBucket(fileIndex+i+j, candidates); } } + std::cout << "[debug] " << candidates.size() << " target candidates" << std::endl; dist = numeric_limits::max(); timestamp = get_timestamp(); for(std::vector<_Edge>::iterator it = candidates.begin(); it != candidates.end(); it++) @@ -314,7 +312,8 @@ public: double tmpDist = ComputeDistance(targetCoord, it->startCoord, it->targetCoord, tmp, &r); if(tmpDist < dist) { - routingStarts->targetNode1 = it->start; + std::cout << "[debug] target distance " << (it - candidates.begin()) << " " << tmpDist << std::endl; + routingStarts->targetNode1 = it->start; routingStarts->targetNode2 = it->target; routingStarts->targetRatio = r; dist = tmpDist; @@ -325,25 +324,22 @@ public: return true; } - _Coordinate FindNearestPointOnEdge(const _Coordinate& inputCoordinate) - { - unsigned fileIndex = getFileIndexForLatLon(inputCoordinate.lat, inputCoordinate.lon); + _Coordinate FindNearestPointOnEdge(const _Coordinate& inputCoordinate) { + unsigned fileIndex = GetFileIndexForLatLon(inputCoordinate.lat, inputCoordinate.lon); std::vector<_Edge> candidates; double timestamp = get_timestamp(); - for(int j = -32768; j < (32768+1); j+=32768){ - for(int i = -1; i < 2; i++){ + for(int j = -32768; j < (32768+1); j+=32768) { + for(int i = -1; i < 2; i++) { GetContentsOfFileBucket(fileIndex+i+j, candidates); } } _Coordinate nearest(numeric_limits::max(), numeric_limits::max()), tmp; double dist = numeric_limits::max(); timestamp = get_timestamp(); - for(std::vector<_Edge>::iterator it = candidates.begin(); it != candidates.end(); it++) - { + for(std::vector<_Edge>::iterator it = candidates.begin(); it != candidates.end(); it++) { double r = 0.; double tmpDist = ComputeDistance(inputCoordinate, it->startCoord, it->targetCoord, tmp, &r); - if(tmpDist < dist) - { + if(tmpDist < dist) { dist = tmpDist; nearest = tmp; } @@ -352,7 +348,7 @@ public: } private: - unsigned FillCell(std::vector& entriesWithSameRAMIndex, unsigned fileOffset ) + unsigned FillCell(std::vector& entriesWithSameRAMIndex, unsigned fileOffset ) { vector * tmpBuffer = new vector(); tmpBuffer->resize(32*32*4096,0); @@ -388,13 +384,13 @@ private: //sort & unique std::sort(entriesWithSameRAMIndex.begin(), entriesWithSameRAMIndex.end(), CompareGridEdgeDataByFileIndex()); - std::vector::iterator uniqueEnd = std::unique(entriesWithSameRAMIndex.begin(), entriesWithSameRAMIndex.end()); + std::vector::iterator uniqueEnd = std::unique(entriesWithSameRAMIndex.begin(), entriesWithSameRAMIndex.end()); //traverse each file bucket and write its contents to disk - std::vector entriesWithSameFileIndex; + std::vector entriesWithSameFileIndex; unsigned fileIndex = entriesWithSameRAMIndex.begin()->fileIndex; - for(std::vector::iterator it = entriesWithSameRAMIndex.begin(); it != uniqueEnd; it++) + for(std::vector::iterator it = entriesWithSameRAMIndex.begin(); it != uniqueEnd; it++) { assert(cellMap->find(it->fileIndex) != cellMap->end() ); //asserting that file index belongs to cell index if(it->fileIndex != fileIndex) @@ -402,21 +398,21 @@ private: // start in cellIndex vermerken int localFileIndex = entriesWithSameFileIndex.begin()->fileIndex; int localCellIndex = cellMap->find(localFileIndex)->second; - /*int localRamIndex = */getRAMIndexFromFileIndex(localFileIndex); + /*int localRamIndex = */GetRAMIndexFromFileIndex(localFileIndex); assert(cellMap->find(entriesWithSameFileIndex.begin()->fileIndex) != cellMap->end()); cellIndex[localCellIndex] = indexIntoTmpBuffer + fileOffset; indexIntoTmpBuffer += FlushEntriesWithSameFileIndexToBuffer(entriesWithSameFileIndex, tmpBuffer, indexIntoTmpBuffer); entriesWithSameFileIndex.clear(); //todo: in flushEntries erledigen. } - GridEdgeData data = *it; + GridEntry data = *it; entriesWithSameFileIndex.push_back(data); fileIndex = it->fileIndex; } assert(cellMap->find(entriesWithSameFileIndex.begin()->fileIndex) != cellMap->end()); int localFileIndex = entriesWithSameFileIndex.begin()->fileIndex; int localCellIndex = cellMap->find(localFileIndex)->second; - /*int localRamIndex = */getRAMIndexFromFileIndex(localFileIndex); + /*int localRamIndex = */GetRAMIndexFromFileIndex(localFileIndex); cellIndex[localCellIndex] = indexIntoTmpBuffer + fileOffset; indexIntoTmpBuffer += FlushEntriesWithSameFileIndexToBuffer(entriesWithSameFileIndex, tmpBuffer, indexIntoTmpBuffer); @@ -442,7 +438,7 @@ private: return numberOfWrittenBytes; } - unsigned FlushEntriesWithSameFileIndexToBuffer(const std::vector &vectorWithSameFileIndex, vector * tmpBuffer, const unsigned index) + unsigned FlushEntriesWithSameFileIndexToBuffer( std::vector &vectorWithSameFileIndex, vector * tmpBuffer, const unsigned index) { tmpBuffer->resize(tmpBuffer->size()+(sizeof(NodeID)+sizeof(NodeID)+4*sizeof(int)+sizeof(unsigned))*vectorWithSameFileIndex.size() ); unsigned counter = 0; @@ -454,7 +450,9 @@ private: assert( vectorWithSameFileIndex[i].ramIndex == vectorWithSameFileIndex[i+1].ramIndex ); } - for(std::vector::const_iterator et = vectorWithSameFileIndex.begin(); et != vectorWithSameFileIndex.end(); et++) + sort( vectorWithSameFileIndex.begin(), vectorWithSameFileIndex.end() ); + std::vector::const_iterator newEnd = unique(vectorWithSameFileIndex.begin(), vectorWithSameFileIndex.end()); + for(std::vector::const_iterator et = vectorWithSameFileIndex.begin(); et != newEnd; et++) { char * start = (char *)&et->edge.start; for(unsigned i = 0; i < sizeof(NodeID); i++) @@ -502,11 +500,9 @@ private: return counter; } - void GetContentsOfFileBucket(const unsigned fileIndex, std::vector<_Edge>& result) - { -// cout << "thread: " << boost::this_thread::get_id() << ", hash: " << boost_thread_id_hash(boost::this_thread::get_id()) << ", id: " << threadLookup.table.Find(boost_thread_id_hash(boost::this_thread::get_id())) << endl; + void GetContentsOfFileBucket(const unsigned fileIndex, std::vector<_Edge>& result) { unsigned threadID = threadLookup.Find(boost_thread_id_hash(boost::this_thread::get_id())); - unsigned ramIndex = getRAMIndexFromFileIndex(fileIndex); + unsigned ramIndex = GetRAMIndexFromFileIndex(fileIndex); unsigned startIndexInFile = ramIndexTable[ramIndex]; // ifstream indexInFile( indexFileStreams[threadID]->stream ); if(startIndexInFile == UINT_MAX){ @@ -576,9 +572,16 @@ private: delete cellMap; } + void AddEdge(_GridEdge edge) { + std::vector > indexList; + GetListOfIndexesForEdgeAndGridSize(edge.startCoord, edge.targetCoord, indexList); + for(unsigned i = 0; i < indexList.size(); i++) { + entries->push_back(GridEntry(edge, indexList[i].first, indexList[i].second)); + } + } + /* More or less from monav project, thanks */ - double ComputeDistance(const _Coordinate& inputPoint, const _Coordinate& source, const _Coordinate& target, _Coordinate& nearest, double *r) - { + double ComputeDistance(const _Coordinate& inputPoint, const _Coordinate& source, const _Coordinate& target, _Coordinate& nearest, double *r) { const double vY = (double)target.lon - (double)source.lon; const double vX = (double)target.lat - (double)source.lat; @@ -587,18 +590,17 @@ private: const double lengthSquared = vX * vX + vY * vY; - if(lengthSquared != 0) - { + if(lengthSquared != 0) { *r = (vX * wX + vY * wY) / lengthSquared; } double percentage = *r; - if(*r <=0 ){ + if(*r <=0 ) { nearest.lat = source.lat; nearest.lon = source.lon; percentage = 0; return wY * wY + wX * wX; } - if( *r>= 1){ + if( *r>= 1) { nearest.lat = target.lat; nearest.lon = target.lon; percentage = 1; @@ -615,10 +617,9 @@ private: } ofstream indexOutFile; -// ifstream indexInFile; ifstream ramInFile; std::vector < _ThreadData* > indexFileStreams; - stxxl::vector * entries; + stxxl::vector * entries; std::vector ramIndexTable; //4 MB for first level index in RAM }; } diff --git a/DataStructures/PBFParser.h b/DataStructures/PBFParser.h index d62df8f05..2d48635a8 100644 --- a/DataStructures/PBFParser.h +++ b/DataStructures/PBFParser.h @@ -62,10 +62,11 @@ public: groupCount = 0; } - bool RegisterCallbacks(bool (*nodeCallbackPointer)(_Node), bool (*relationCallbackPointer)(_Relation), bool (*wayCallbackPointer)(_Way) ) { + bool RegisterCallbacks(bool (*nodeCallbackPointer)(_Node), bool (*relationCallbackPointer)(_Relation), bool (*wayCallbackPointer)(_Way),bool (*addressCallbackPointer)(_Node, HashTable) ) { nodeCallback = *nodeCallbackPointer; wayCallback = *wayCallbackPointer; relationCallback = *relationCallbackPointer; + addressCallback = *addressCallbackPointer; return true; } @@ -145,6 +146,7 @@ private: int m_lastDenseLongitude = 0; for(int i = 0; i < dense.id_size(); i++) { + HashTable keyVals; m_lastDenseID += dense.id( i ); m_lastDenseLatitude += dense.lat( i ); m_lastDenseLongitude += dense.lon( i ); @@ -158,13 +160,17 @@ private: denseTagIndex++; break; } - //int keyValue = dense.keys_vals ( denseTagIndex+1 ); + int keyValue = dense.keys_vals ( denseTagIndex+1 ); /* Key/Value Pairs are known from here on */ // std::cout << "[debug] node: " << n.id << std::endl; // std::cout << "[debug] key = " << PBFprimitiveBlock.stringtable().s(tagValue).data() << ", value: " << PBFprimitiveBlock.stringtable().s(keyValue).data() << std::endl; - + std::string key = PBFprimitiveBlock.stringtable().s(tagValue).data(); + std::string value = PBFprimitiveBlock.stringtable().s(keyValue).data(); + keyVals.Add(key, value); denseTagIndex += 2; } + if(!(*addressCallback)(n, keyVals)) + std::cerr << "[PBFParser] adress not parsed" << std::endl; if(!(*nodeCallback)(n)) std::cerr << "[PBFParser] dense node not parsed" << std::endl; @@ -287,6 +293,7 @@ private: ret = inflate( &compressedDataStream, Z_FINISH ); if ( ret != Z_STREAM_END ) { std::cerr << "[error] failed to inflate zlib stream" << std::endl; + std::cerr << "[error] Error type: " << ret << std::endl; return false; } @@ -413,7 +420,7 @@ private: bool (*nodeCallback)(_Node); bool (*wayCallback)(_Way); bool (*relationCallback)(_Relation); - + bool (*addressCallback)(_Node, HashTable); /* the input stream to parse */ std::fstream input; }; diff --git a/DataStructures/XMLParser.h b/DataStructures/XMLParser.h index 3c4889c22..156fe322e 100644 --- a/DataStructures/XMLParser.h +++ b/DataStructures/XMLParser.h @@ -36,7 +36,7 @@ public: } ~XMLParser() {} - bool RegisterCallbacks(bool (*nodeCallbackPointer)(_Node), bool (*relationCallbackPointer)(_Relation), bool (*wayCallbackPointer)(_Way) ) { + bool RegisterCallbacks(bool (*nodeCallbackPointer)(_Node), bool (*relationCallbackPointer)(_Relation), bool (*wayCallbackPointer)(_Way), bool (*addressCallbackPointer)(_Node, HashTable) ) { nodeCallback = *nodeCallbackPointer; wayCallback = *wayCallbackPointer; relationCallback = *relationCallbackPointer; diff --git a/createHierarchy.cpp b/createHierarchy.cpp index f3cf806ac..f05c94fb4 100644 --- a/createHierarchy.cpp +++ b/createHierarchy.cpp @@ -67,14 +67,13 @@ int main (int argc, char *argv[]) { exit(-1); } - //todo: check if contractor exists - unsigned numberOfThreads = omp_get_num_procs(); - if(testDataFile("contractor.ini")) { - ContractorConfiguration contractorConfig("contractor.ini"); - if(atoi(contractorConfig.GetParameter("Threads").c_str()) != 0 && (unsigned)atoi(contractorConfig.GetParameter("Threads").c_str()) <= numberOfThreads) - numberOfThreads = (unsigned)atoi( contractorConfig.GetParameter("Threads").c_str() ); - } - omp_set_num_threads(numberOfThreads); + unsigned numberOfThreads = omp_get_num_procs(); + if(testDataFile("contractor.ini")) { + ContractorConfiguration contractorConfig("contractor.ini"); + if(atoi(contractorConfig.GetParameter("Threads").c_str()) != 0 && (unsigned)atoi(contractorConfig.GetParameter("Threads").c_str()) <= numberOfThreads) + numberOfThreads = (unsigned)atoi( contractorConfig.GetParameter("Threads").c_str() ); + } + omp_set_num_threads(numberOfThreads); cout << "preprocessing data from input file " << argv[1]; #ifdef _GLIBCXX_PARALLEL @@ -90,13 +89,14 @@ int main (int argc, char *argv[]) { } vector edgeList; const NodeID n = readOSRMGraphFromStream(in, edgeList, int2ExtNodeMap); + unsigned numberOfNodes = int2ExtNodeMap->size(); in.close(); - cout << "computing turn vector info ..." << flush; - TurnInfoFactory * infoFactory = new TurnInfoFactory(n, edgeList); - infoFactory->Run(); - delete infoFactory; - cout << "ok" << endl; + // cout << "computing turn vector info ..." << flush; + // TurnInfoFactory * infoFactory = new TurnInfoFactory(n, edgeList); + // infoFactory->Run(); + // delete infoFactory; + // cout << "ok" << endl; char nodeOut[1024]; char edgeOut[1024]; @@ -114,65 +114,31 @@ int main (int argc, char *argv[]) { strcat(ramIndexOut, ".ramIndex"); strcat(fileIndexOut, ".fileIndex"); strcat(levelInfoOut, ".levels"); - ofstream mapOutFile(nodeOut, ios::binary); - - WritableGrid * g = new WritableGrid(); - cout << "building grid ..." << flush; - Percent p(edgeList.size()); - for(NodeID i = 0; i < edgeList.size(); i++) { - p.printIncrement(); - if(!edgeList[i].isLocatable()) - continue; - int slat = int2ExtNodeMap->at(edgeList[i].source()).lat; - int slon = int2ExtNodeMap->at(edgeList[i].source()).lon; - int tlat = int2ExtNodeMap->at(edgeList[i].target()).lat; - int tlon = int2ExtNodeMap->at(edgeList[i].target()).lon; - g->AddEdge( - _Edge( - edgeList[i].source(), - edgeList[i].target(), - 0, - ((edgeList[i].isBackward() && edgeList[i].isForward()) ? 0 : 1), - edgeList[i].weight() - ), - - _Coordinate(slat, slon), - _Coordinate(tlat, tlon) - ); - } - g->ConstructGrid(ramIndexOut, fileIndexOut); - delete g; - - unsigned numberOfNodes = int2ExtNodeMap->size(); - //Serializing the node map. - for(NodeID i = 0; i < int2ExtNodeMap->size(); i++) { - mapOutFile.write((char *)&(int2ExtNodeMap->at(i)), sizeof(NodeInfo)); - } - mapOutFile.close(); - int2ExtNodeMap->clear(); - delete int2ExtNodeMap; cout << "initializing contractor ..." << flush; Contractor* contractor = new Contractor( n, edgeList ); - contractor->Run(); - cout << "checking data sanity ..." << flush; - contractor->CheckForAllOrigEdges(edgeList); - cout << "ok" << endl; LevelInformation * levelInfo = contractor->GetLevelInformation(); + std::cout << "sorting level info" << std::endl; + for(unsigned currentLevel = levelInfo->GetNumberOfLevels(); currentLevel>0; currentLevel--) { + std::vector & level = levelInfo->GetLevel(currentLevel-1); + std::sort(level.begin(), level.end()); + } + + std::cout << "writing level info" << std::endl; ofstream levelOutFile(levelInfoOut, ios::binary); unsigned numberOfLevels = levelInfo->GetNumberOfLevels(); levelOutFile.write((char *)&numberOfLevels, sizeof(unsigned)); for(unsigned currentLevel = 0; currentLevel < levelInfo->GetNumberOfLevels(); currentLevel++ ) { - std::vector & level = levelInfo->GetLevel(currentLevel); - unsigned sizeOfLevel = level.size(); - levelOutFile.write((char *)&sizeOfLevel, sizeof(unsigned)); - for(unsigned currentLevelEntry = 0; currentLevelEntry < sizeOfLevel; currentLevelEntry++) { - unsigned node = level[currentLevelEntry]; - assert(node < numberOfNodes); - levelOutFile.write((char *)&node, sizeof(unsigned)); - } + std::vector & level = levelInfo->GetLevel(currentLevel); + unsigned sizeOfLevel = level.size(); + levelOutFile.write((char *)&sizeOfLevel, sizeof(unsigned)); + for(unsigned currentLevelEntry = 0; currentLevelEntry < sizeOfLevel; currentLevelEntry++) { + unsigned node = level[currentLevelEntry]; + levelOutFile.write((char *)&node, sizeof(unsigned)); + assert(node < numberOfNodes); + } } levelOutFile.close(); std::vector< ContractionCleanup::Edge > contractedEdges; @@ -187,13 +153,10 @@ int main (int argc, char *argv[]) { cleanup->GetData(cleanedEdgeList); delete cleanup; - ofstream edgeOutFile(edgeOut, ios::binary); - - //Serializing the edge list. cout << "Serializing edges " << flush; - p.reinit(cleanedEdgeList.size()); - for(std::vector< InputEdge>::iterator it = cleanedEdgeList.begin(); it != cleanedEdgeList.end(); it++) - { + ofstream edgeOutFile(edgeOut, ios::binary); + Percent p(cleanedEdgeList.size()); + for(std::vector< InputEdge>::iterator it = cleanedEdgeList.begin(); it != cleanedEdgeList.end(); it++) { p.printIncrement(); int distance= it->data.distance; assert(distance > 0); @@ -228,5 +191,23 @@ int main (int argc, char *argv[]) { edgeOutFile.close(); cleanedEdgeList.clear(); + std::cout << "writing node map ..." << std::flush; + ofstream mapOutFile(nodeOut, ios::binary); + + for(NodeID i = 0; i < int2ExtNodeMap->size(); i++) { + + mapOutFile.write((char *)&(int2ExtNodeMap->at(i)), sizeof(NodeInfo)); + } + mapOutFile.close(); + std::cout << "ok" << std::endl; + + WritableGrid * writeableGrid = new WritableGrid(); + cout << "building grid ..." << flush; + writeableGrid->ConstructGrid(edgeList, int2ExtNodeMap, ramIndexOut, fileIndexOut); + delete writeableGrid; + + int2ExtNodeMap->clear(); + delete int2ExtNodeMap; + cout << "finished" << endl; } diff --git a/extractLargeNetwork.cpp b/extractLargeNetwork.cpp index f9aeb50f6..bffdb919e 100644 --- a/extractLargeNetwork.cpp +++ b/extractLargeNetwork.cpp @@ -47,6 +47,7 @@ unsigned globalRelationCounter = 0; ExtractorCallbacks * extractCallBacks; bool nodeFunction(_Node n); +bool adressFunction(_Node n, HashTable keyVals); bool relationFunction(_Relation r); bool wayFunction(_Way w); @@ -76,12 +77,14 @@ int main (int argc, char *argv[]) { outputFileName.append(".osrm"); } } + std::string adressFileName(outputFileName); STXXLNodeIDVector * usedNodes = new STXXLNodeIDVector(); STXXLNodeVector * allNodes = new STXXLNodeVector(); STXXLNodeVector * confirmedNodes = new STXXLNodeVector(); STXXLEdgeVector * allEdges = new STXXLEdgeVector(); STXXLEdgeVector * confirmedEdges = new STXXLEdgeVector(); + STXXLAddressVector* adressVector = new STXXLAddressVector(); STXXLStringVector * nameVector = new STXXLStringVector(); NodeMap * nodeMap = new NodeMap(); @@ -95,14 +98,15 @@ int main (int argc, char *argv[]) { nodeMap->set_empty_key(UINT_MAX); stringMap->set_empty_key(GetRandomString()); stringMap->insert(std::make_pair("", 0)); - extractCallBacks = new ExtractorCallbacks(allNodes, usedNodes, allEdges, nameVector, settings, stringMap); + extractCallBacks = new ExtractorCallbacks(allNodes, usedNodes, allEdges, nameVector, adressVector, settings, stringMap); BaseParser<_Node, _Relation, _Way> * parser; - if(isPBF) + if(isPBF) { parser = new PBFParser(argv[1]); - else + } else { parser = new XMLParser(argv[1]); - parser->RegisterCallbacks(&nodeFunction, &relationFunction, &wayFunction); + } + parser->RegisterCallbacks(&nodeFunction, &relationFunction, &wayFunction, &adressFunction); if(parser->Init()) { parser->Parse(); } else { @@ -116,6 +120,7 @@ int main (int argc, char *argv[]) { std::cout << "[info] no. of used nodes: " << usedNodes->size() << std::endl; std::cout << "[info] raw no. of edges: " << allEdges->size() << std::endl; std::cout << "[info] raw no. of relations: " << globalRelationCounter << std::endl; + std::cout << "[info] raw no. of addresses: " << adressVector->size() << std::endl; std::cout << "[info] parsing through input file took " << get_timestamp() - time << "seconds" << std::endl; time = get_timestamp(); @@ -124,6 +129,8 @@ int main (int argc, char *argv[]) { std::cout << "[extractor] Sorting used nodes ... " << std::flush; stxxl::sort(usedNodes->begin(), usedNodes->end(), Cmp(), memory_to_use); std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl; + std::cout << "[debug] highest node id: " << usedNodes->back() << std::endl; + time = get_timestamp(); std::cout << "[extractor] Erasing duplicate entries ... " << std::flush; stxxl::vector::iterator NewEnd = unique ( usedNodes->begin(),usedNodes->end() ) ; @@ -159,6 +166,7 @@ int main (int argc, char *argv[]) { } } cout << "ok, after " << get_timestamp() - time << "s" << endl; + std::cout << "[debug] no of entries in nodemap" << nodeMap->size() << std::endl; time = get_timestamp(); cout << "[extractor] Writing used nodes ... " << flush; @@ -175,14 +183,12 @@ int main (int argc, char *argv[]) { assert(eit->type > -1 || eit->speed != -1); NodeMap::iterator startit = nodeMap->find(eit->start); - if(startit == nodeMap->end()) - { + if(startit == nodeMap->end()) { continue; } NodeMap::iterator targetit = nodeMap->find(eit->target); - if(targetit == nodeMap->end()) - { + if(targetit == nodeMap->end()) { continue; } confirmedEdges->push_back(*eit); @@ -242,7 +248,6 @@ int main (int argc, char *argv[]) { unsigned currentNameIndex = 0; unsigned elementCounter(0); for(STXXLStringVector::iterator it = nameVector->begin(); it != nameVector->end(); it++) { -// for(unsigned i = 0; i < nameVector->size(); i++) { nameIndex->at(elementCounter) = currentNameIndex; currentNameIndex += it->length(); elementCounter++; @@ -263,6 +268,17 @@ int main (int argc, char *argv[]) { delete nameIndex; std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl; + time = get_timestamp(); + std::cout << "[extractor] writing address list ... " << std::flush; + + adressFileName.append(".address"); + std::ofstream addressOutFile(adressFileName.c_str()); + for(STXXLAddressVector::iterator it = adressVector->begin(); it != adressVector->end(); it++) { + addressOutFile << it->node.id << "|" << it->node.lat << "|" << it->node.lon << "|" << it->city << "|" << it->street << "|" << it->housenumber << "|" << it->state << "|" << it->country << "\n"; + } + addressOutFile.close(); + std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl; + } catch ( const std::exception& e ) { std::cerr << "Caught Execption:" << e.what() << std::endl; return false; @@ -277,6 +293,7 @@ int main (int argc, char *argv[]) { delete nodeMap; delete confirmedNodes; delete confirmedEdges; + delete adressVector; delete parser; cout << "[extractor] finished." << endl; return 0; @@ -286,6 +303,12 @@ bool nodeFunction(_Node n) { extractCallBacks->nodeFunction(n); return true; } + +bool adressFunction(_Node n, HashTable keyVals){ + extractCallBacks->adressFunction(n, keyVals); + return true; +} + bool relationFunction(_Relation r) { globalRelationCounter++; return true;