From 846bb11cc0032d0fdad35333fbb6ec3a3726e6d1 Mon Sep 17 00:00:00 2001 From: Dennis Luxen Date: Wed, 23 Mar 2011 17:15:13 +0000 Subject: [PATCH] BREAKING CHANGE! Intermediate file format is not ASCII encoded anymore. Saves roughly 40 % space. Loading and saving is now much faster. Also, 50% I/Os less by writing the intermediate data non-linear. Please recompute all your data files. --- DataStructures/ExtractorCallBacks.h | 2 + DataStructures/ExtractorStructs.h | 15 ++-- DataStructures/PBFParser.h | 2 + Util/GraphLoader.h | 69 +++++++++++++++ createHierarchy.cpp | 6 +- extractLargeNetwork.cpp | 130 ++++++++++++++-------------- 6 files changed, 148 insertions(+), 76 deletions(-) diff --git a/DataStructures/ExtractorCallBacks.h b/DataStructures/ExtractorCallBacks.h index 79c168f0f..fb3c3285b 100644 --- a/DataStructures/ExtractorCallBacks.h +++ b/DataStructures/ExtractorCallBacks.h @@ -60,6 +60,7 @@ public: /** warning: caller needs to take care of synchronization! */ bool adressFunction(_Node n, HashTable &keyVals) { + /* std::string housenumber(keyVals.Find("addr:housenumber")); std::string housename(keyVals.Find("addr:housename")); std::string street(keyVals.Find("addr:street")); @@ -73,6 +74,7 @@ public: housenumber = housename; addressVector->push_back(_Address(n, housenumber, street, state, country, postcode, city)); } + */ return true; } diff --git a/DataStructures/ExtractorStructs.h b/DataStructures/ExtractorStructs.h index 1743045f1..fa8d5e8c9 100644 --- a/DataStructures/ExtractorStructs.h +++ b/DataStructures/ExtractorStructs.h @@ -50,8 +50,8 @@ std::string names[14] = { "motorway", "motorway_link", "trunk", "trunk_link", "p double speeds[14] = { 110, 90, 90, 70, 70, 60, 60, 50, 55, 25, 40 , 10, 30, 5}; struct _Node : NodeInfo{ - _Node(int _lat, int _lon, unsigned int _id) : NodeInfo(_lat, _lon, _id), used(false) {} - _Node() : used(false) {} + _Node(int _lat, int _lon, unsigned int _id) : NodeInfo(_lat, _lon, _id) {} + _Node() {} static _Node min_value() { return _Node(0,0,0); @@ -62,8 +62,6 @@ struct _Node : NodeInfo{ NodeID key() const { return id; } - - bool used; }; struct _Coordinate { @@ -123,16 +121,15 @@ struct _Relation { }; struct _Edge { - _Edge() : used(false) {}; - _Edge(NodeID s, NodeID t) : start(s), target(t), used(false) { } - _Edge(NodeID s, NodeID t, short tp, short d, double sp): start(s), target(t), type(tp), direction(d), speed(sp), used(false) { } + _Edge() {}; + _Edge(NodeID s, NodeID t) : start(s), target(t) { } + _Edge(NodeID s, NodeID t, short tp, short d, double sp): start(s), target(t), type(tp), direction(d), speed(sp) { } NodeID start; NodeID target; - short type:15; + short type; short direction; double speed; unsigned nameID; - bool used:1; _Coordinate startCoord; _Coordinate targetCoord; diff --git a/DataStructures/PBFParser.h b/DataStructures/PBFParser.h index 56a415802..5f9be6ca4 100644 --- a/DataStructures/PBFParser.h +++ b/DataStructures/PBFParser.h @@ -93,8 +93,10 @@ public: google::protobuf::ShutdownProtobufLibrary(); +#ifdef DEBUG std::cout << "[info] blocks: " << blockCount << std::endl; std::cout << "[info] groups: " << groupCount << std::endl; +#endif } bool Init() { diff --git a/Util/GraphLoader.h b/Util/GraphLoader.h index 24a5450ad..4b368ab83 100644 --- a/Util/GraphLoader.h +++ b/Util/GraphLoader.h @@ -100,7 +100,76 @@ NodeID readOSRMGraphFromStream(istream &in, vector& edgeList, vector +NodeID readBinaryOSRMGraphFromStream(istream &in, vector& edgeList, vector * int2ExtNodeMap) { + NodeID n, source, target, id; + EdgeID m; + short dir; + int xcoord, ycoord;// direction (0 = open, 1 = forward, 2+ = open) + ExternalNodeMap ext2IntNodeMap; + ext2IntNodeMap.set_empty_key(UINT_MAX); +// in >> n; + in.read((char*)&n, sizeof(NodeID)); + VERBOSE(cout << "Importing n = " << n << " nodes ..." << flush;) + for (NodeID i=0; i> id >> ycoord >> xcoord; + in.read((char*)&id, sizeof(unsigned)); + in.read((char*)&ycoord, sizeof(int)); + in.read((char*)&xcoord, sizeof(int)); + int2ExtNodeMap->push_back(NodeInfo(xcoord, ycoord, id)); + ext2IntNodeMap.insert(make_pair(id, i)); + } +// in >> m; + in.read((char*)&m, sizeof(unsigned)); + VERBOSE(cout << " and " << m << " edges ..." << flush;) + edgeList.reserve(m); + for (EdgeID i=0; i> source >> target >> length >> dir >> weight >> type >> nameID; + in.read((char*)&source, sizeof(unsigned)); + in.read((char*)&target, sizeof(unsigned)); + in.read((char*)&length, sizeof(int)); + in.read((char*)&dir, sizeof(short)); + in.read((char*)&weight, sizeof(int)); + in.read((char*)&type, sizeof(short)); + in.read((char*)&nameID ,sizeof(unsigned)); + assert(length > 0); + assert(weight > 0); + assert(0<=dir && dir<=2); + + bool forward = true; + bool backward = true; + if (1 == dir) { backward = false; } + if (2 == dir) { forward = false; } + + if(length == 0) { cerr << "loaded null length edge" << endl; exit(1); } + + // translate the external NodeIDs to internal IDs + ExternalNodeMap::iterator intNodeID = ext2IntNodeMap.find(source); + if( ext2IntNodeMap.find(source) == ext2IntNodeMap.end()) { + cerr << "after " << edgeList.size() << " edges" << endl; + cerr << "->" << source << "," << target << "," << length << "," << dir << "," << weight << endl; + cerr << "unresolved source NodeID: " << source << endl; exit(0); + } + source = intNodeID->second; + intNodeID = ext2IntNodeMap.find(target); + if(ext2IntNodeMap.find(target) == ext2IntNodeMap.end()) { cerr << "unresolved target NodeID : " << target << endl; exit(0); } + target = intNodeID->second; + + if(source == UINT_MAX || target == UINT_MAX) { cerr << "nonexisting source or target" << endl; exit(0); } + + EdgeT inputEdge(source, target, nameID, weight, forward, backward, type ); + edgeList.push_back(inputEdge); + } + ext2IntNodeMap.clear(); + vector(edgeList.begin(), edgeList.end()).swap(edgeList); //remove excess candidates. + cout << "ok" << endl; + return n; +} template NodeID readDTMPGraphFromStream(istream &in, vector& edgeList, vector * int2ExtNodeMap) { NodeID n, source, target, id; diff --git a/createHierarchy.cpp b/createHierarchy.cpp index f05c94fb4..9fdadf608 100644 --- a/createHierarchy.cpp +++ b/createHierarchy.cpp @@ -88,8 +88,7 @@ int main (int argc, char *argv[]) { cerr << "Cannot open " << argv[1] << endl; exit(-1); } vector edgeList; - const NodeID n = readOSRMGraphFromStream(in, edgeList, int2ExtNodeMap); - unsigned numberOfNodes = int2ExtNodeMap->size(); + const NodeID n = readBinaryOSRMGraphFromStream(in, edgeList, int2ExtNodeMap); in.close(); // cout << "computing turn vector info ..." << flush; @@ -137,7 +136,7 @@ int main (int argc, char *argv[]) { for(unsigned currentLevelEntry = 0; currentLevelEntry < sizeOfLevel; currentLevelEntry++) { unsigned node = level[currentLevelEntry]; levelOutFile.write((char *)&node, sizeof(unsigned)); - assert(node < numberOfNodes); + assert(node < n); } } levelOutFile.close(); @@ -195,7 +194,6 @@ int main (int argc, char *argv[]) { ofstream mapOutFile(nodeOut, ios::binary); for(NodeID i = 0; i < int2ExtNodeMap->size(); i++) { - mapOutFile.write((char *)&(int2ExtNodeMap->at(i)), sizeof(NodeInfo)); } mapOutFile.close(); diff --git a/extractLargeNetwork.cpp b/extractLargeNetwork.cpp index e36385478..13193a3a8 100644 --- a/extractLargeNetwork.cpp +++ b/extractLargeNetwork.cpp @@ -115,7 +115,6 @@ int main (int argc, char *argv[]) { std::cerr << "[error] parser not initialized!" << std::endl; exit(-1); } - /* flush needs to be called to flush the remaining local vector elements */ delete parser; try { @@ -146,10 +145,11 @@ int main (int argc, char *argv[]) { std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl; time = get_timestamp(); - ofstream fout; - fout.open(outputFileName.c_str()); + std::ofstream fout; + fout.open(outputFileName.c_str(), std::ios::binary); + fout.write((char*)&usedNodeCounter, sizeof(unsigned)); - cout << "[extractor] Confirming used nodes ... " << flush; + std::cout << "[extractor] Confirming used nodes ... " << std::flush; STXXLNodeVector::iterator nodesIT = allNodes.begin(); STXXLNodeIDVector::iterator usedNodeIDsIT = usedNodeIDs.begin(); while(usedNodeIDsIT != usedNodeIDs.end() && nodesIT != allNodes.end()) { @@ -162,29 +162,25 @@ int main (int argc, char *argv[]) { continue; } if(*usedNodeIDsIT == nodesIT->id) { - nodesIT->used = true; + fout.write((char*)&(nodesIT->id), sizeof(unsigned)); + fout.write((char*)&(nodesIT->lon), sizeof(int)); + fout.write((char*)&(nodesIT->lat), sizeof(int)); +// std::cout << "serializing: " << nodesIT->id << ", lat: " << nodesIT->lat << ", lon: " << nodesIT->lon << std::endl; usedNodeCounter++; usedNodeIDsIT++; nodesIT++; } } - cout << "ok, after " << get_timestamp() - time << "s" << std::endl; + std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl; time = get_timestamp(); -// std::cout << "[extractor] Erasing unused nodes ... " << std::flush; -// allNodes.resize(std::remove_if(allNodes.begin(), allNodes.end(), removeIfUnused<_Node>)-allNodes.begin()); -// -// cout << "ok, after " << get_timestamp() - time << "s" << std::endl; -// time = get_timestamp(); + std::cout << "[extractor] setting number of nodes ... " << std::flush; + std::ios::pos_type positionInFile = fout.tellp(); + fout.seekp(std::ios::beg); + fout.write((char*)&usedNodeCounter, sizeof(unsigned)); + fout.seekp(positionInFile); - std::cout << "[extractor] Writing used nodes ... " << std::flush; - fout << usedNodeCounter << endl; - for(STXXLNodeVector::iterator ut = allNodes.begin(); ut != allNodes.end(); ut++) { - if(ut->used) - fout << ut->id<< " " << ut->lon << " " << ut->lat << "\n"; - } - - cout << "ok, after " << get_timestamp() - time << "s" << endl; + std::cout << "ok" << std::endl; time = get_timestamp(); // Sort edges by start. @@ -194,6 +190,7 @@ int main (int argc, char *argv[]) { time = get_timestamp(); std::cout << "[extractor] Setting start coords ... " << std::flush; + fout.write((char*)&usedEdgeCounter, sizeof(unsigned)); // Traverse list of edges and nodes in parallel and set start coord nodesIT = allNodes.begin(); STXXLEdgeVector::iterator edgeIT = allEdges.begin(); @@ -237,55 +234,61 @@ int main (int argc, char *argv[]) { if(edgeIT->startCoord.lat != INT_MIN && edgeIT->target == nodesIT->id) { edgeIT->targetCoord.lat = nodesIT->lat; edgeIT->targetCoord.lon = nodesIT->lon; - edgeIT->used = true; + + double distance = ApproximateDistance(edgeIT->startCoord.lat, edgeIT->startCoord.lon, nodesIT->lat, nodesIT->lon); + if(edgeIT->speed == -1) + edgeIT->speed = settings.speedProfile.speed[edgeIT->type]; + double weight = ( distance * 10. ) / (edgeIT->speed / 3.6); + int intWeight = max(1, (int) weight); + int intDist = max(1, (int)distance); + int ferryIndex = settings.indexInAccessListOf("ferry"); + assert(ferryIndex != -1); + short zero = 0; + short one = 1; + + fout.write((char*)&edgeIT->start, sizeof(unsigned)); + fout.write((char*)&edgeIT->target, sizeof(unsigned)); + fout.write((char*)&intDist, sizeof(int)); + switch(edgeIT->direction) { + case _Way::notSure: + fout.write((char*)&zero, sizeof(short)); + break; + case _Way::oneway: + fout.write((char*)&one, sizeof(short)); + break; + case _Way::bidirectional: + fout.write((char*)&zero, sizeof(short)); + + break; + case _Way::opposite: + fout.write((char*)&one, sizeof(short)); + break; + default: + std::cerr << "[error] edge with no direction: " << edgeIT->direction << std::endl; + assert(false); + break; + } + fout.write((char*)&intWeight, sizeof(int)); + short edgeType = edgeIT->type; + fout.write((char*)&edgeType, sizeof(short)); + fout.write((char*)&edgeIT->nameID, sizeof(unsigned)); + usedEdgeCounter++; edgeIT++; } } - - fout << usedEdgeCounter << "\n"; - cout << "ok, after " << get_timestamp() - time << "s" << endl; - time = get_timestamp(); - - cout << "[extractor] writing confirmed edges ... " << flush; - for(STXXLEdgeVector::iterator eit = allEdges.begin(); eit != allEdges.end(); eit++) { - if(eit->used == false) - continue; - double distance = ApproximateDistance(eit->startCoord.lat, eit->startCoord.lon, eit->targetCoord.lat, eit->targetCoord.lon); - if(eit->speed == -1) - eit->speed = settings.speedProfile.speed[eit->type]; - double weight = ( distance * 10. ) / (eit->speed / 3.6); - int intWeight = max(1, (int) weight); - int intDist = max(1, (int)distance); - int ferryIndex = settings.indexInAccessListOf("ferry"); - assert(ferryIndex != -1); - - switch(eit->direction) { - case _Way::notSure: - fout << eit->start << " " << eit->target << " " << intDist << " " << 0 << " " << intWeight << " " << eit->type << " " << eit->nameID << "\n"; - break; - case _Way::oneway: - fout << eit->start << " " << eit->target << " " << intDist << " " << 1 << " " << intWeight << " " << eit->type << " " << eit->nameID << "\n"; - break; - case _Way::bidirectional: - fout << eit->start << " " << eit->target << " " << intDist << " " << 0 << " " << intWeight << " " << eit->type << " " << eit->nameID << "\n"; - break; - case _Way::opposite: - fout << eit->start << " " << eit->target << " " << intDist << " " << 1 << " " << intWeight << " " << eit->type << " " << eit->nameID << "\n"; - break; - default: - std::cerr << "[error] edge with no direction: " << eit->direction << std::endl; - assert(false); - break; - } - } - fout.close(); - - outputFileName.append(".names"); std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl; time = get_timestamp(); - std::cout << "[extractor] writing street name index ... " << std::flush; + std::cout << "[extractor] setting number of edges ... " << std::flush; + fout.seekp(positionInFile); + fout.write((char*)&usedEdgeCounter, sizeof(unsigned)); + fout.close(); + std::cout << "ok" << std::endl; + time = get_timestamp(); + + + std::cout << "[extractor] writing street name index ... " << std::flush; std::vector * nameIndex = new std::vector(nameVector.size()+1, 0); unsigned currentNameIndex = 0; unsigned elementCounter(0); @@ -295,7 +298,8 @@ int main (int argc, char *argv[]) { elementCounter++; } nameIndex->at(nameVector.size()) = currentNameIndex; - ofstream nameOutFile(outputFileName.c_str(), ios::binary); + outputFileName.append(".names"); + std::ofstream nameOutFile(outputFileName.c_str(), std::ios::binary); unsigned sizeOfNameIndex = nameIndex->size(); nameOutFile.write((char *)&(sizeOfNameIndex), sizeof(unsigned)); @@ -327,7 +331,7 @@ int main (int argc, char *argv[]) { } delete extractCallBacks; - cout << "[extractor] finished." << endl; + std::cout << "[extractor] finished." << std::endl; return 0; }