BREAKING CHANGE! Intermediate file format is not ASCII encoded anymore. Saves roughly 40 % space. Loading and saving is now much faster. Also, 50% I/Os less by writing the intermediate data non-linear. Please recompute all your data files.

This commit is contained in:
Dennis Luxen 2011-03-23 17:15:13 +00:00
parent 29977c4b88
commit 846bb11cc0
6 changed files with 148 additions and 76 deletions

View File

@ -60,6 +60,7 @@ public:
/** warning: caller needs to take care of synchronization! */
bool adressFunction(_Node n, HashTable<std::string, std::string> &keyVals) {
/*
std::string housenumber(keyVals.Find("addr:housenumber"));
std::string housename(keyVals.Find("addr:housename"));
std::string street(keyVals.Find("addr:street"));
@ -73,6 +74,7 @@ public:
housenumber = housename;
addressVector->push_back(_Address(n, housenumber, street, state, country, postcode, city));
}
*/
return true;
}

View File

@ -50,8 +50,8 @@ std::string names[14] = { "motorway", "motorway_link", "trunk", "trunk_link", "p
double speeds[14] = { 110, 90, 90, 70, 70, 60, 60, 50, 55, 25, 40 , 10, 30, 5};
struct _Node : NodeInfo{
_Node(int _lat, int _lon, unsigned int _id) : NodeInfo(_lat, _lon, _id), used(false) {}
_Node() : used(false) {}
_Node(int _lat, int _lon, unsigned int _id) : NodeInfo(_lat, _lon, _id) {}
_Node() {}
static _Node min_value() {
return _Node(0,0,0);
@ -62,8 +62,6 @@ struct _Node : NodeInfo{
NodeID key() const {
return id;
}
bool used;
};
struct _Coordinate {
@ -123,16 +121,15 @@ struct _Relation {
};
struct _Edge {
_Edge() : used(false) {};
_Edge(NodeID s, NodeID t) : start(s), target(t), used(false) { }
_Edge(NodeID s, NodeID t, short tp, short d, double sp): start(s), target(t), type(tp), direction(d), speed(sp), used(false) { }
_Edge() {};
_Edge(NodeID s, NodeID t) : start(s), target(t) { }
_Edge(NodeID s, NodeID t, short tp, short d, double sp): start(s), target(t), type(tp), direction(d), speed(sp) { }
NodeID start;
NodeID target;
short type:15;
short type;
short direction;
double speed;
unsigned nameID;
bool used:1;
_Coordinate startCoord;
_Coordinate targetCoord;

View File

@ -93,8 +93,10 @@ public:
google::protobuf::ShutdownProtobufLibrary();
#ifdef DEBUG
std::cout << "[info] blocks: " << blockCount << std::endl;
std::cout << "[info] groups: " << groupCount << std::endl;
#endif
}
bool Init() {

View File

@ -100,7 +100,76 @@ NodeID readOSRMGraphFromStream(istream &in, vector<EdgeT>& edgeList, vector<Node
cout << "ok" << endl;
return n;
}
template<typename EdgeT>
NodeID readBinaryOSRMGraphFromStream(istream &in, vector<EdgeT>& edgeList, vector<NodeInfo> * int2ExtNodeMap) {
NodeID n, source, target, id;
EdgeID m;
short dir;
int xcoord, ycoord;// direction (0 = open, 1 = forward, 2+ = open)
ExternalNodeMap ext2IntNodeMap;
ext2IntNodeMap.set_empty_key(UINT_MAX);
// in >> n;
in.read((char*)&n, sizeof(NodeID));
VERBOSE(cout << "Importing n = " << n << " nodes ..." << flush;)
for (NodeID i=0; i<n;i++) {
// in >> id >> ycoord >> xcoord;
in.read((char*)&id, sizeof(unsigned));
in.read((char*)&ycoord, sizeof(int));
in.read((char*)&xcoord, sizeof(int));
int2ExtNodeMap->push_back(NodeInfo(xcoord, ycoord, id));
ext2IntNodeMap.insert(make_pair(id, i));
}
// in >> m;
in.read((char*)&m, sizeof(unsigned));
VERBOSE(cout << " and " << m << " edges ..." << flush;)
edgeList.reserve(m);
for (EdgeID i=0; i<m; i++) {
EdgeWeight weight;
short type;
NodeID nameID;
int length;
// in >> source >> target >> length >> dir >> weight >> type >> nameID;
in.read((char*)&source, sizeof(unsigned));
in.read((char*)&target, sizeof(unsigned));
in.read((char*)&length, sizeof(int));
in.read((char*)&dir, sizeof(short));
in.read((char*)&weight, sizeof(int));
in.read((char*)&type, sizeof(short));
in.read((char*)&nameID ,sizeof(unsigned));
assert(length > 0);
assert(weight > 0);
assert(0<=dir && dir<=2);
bool forward = true;
bool backward = true;
if (1 == dir) { backward = false; }
if (2 == dir) { forward = false; }
if(length == 0) { cerr << "loaded null length edge" << endl; exit(1); }
// translate the external NodeIDs to internal IDs
ExternalNodeMap::iterator intNodeID = ext2IntNodeMap.find(source);
if( ext2IntNodeMap.find(source) == ext2IntNodeMap.end()) {
cerr << "after " << edgeList.size() << " edges" << endl;
cerr << "->" << source << "," << target << "," << length << "," << dir << "," << weight << endl;
cerr << "unresolved source NodeID: " << source << endl; exit(0);
}
source = intNodeID->second;
intNodeID = ext2IntNodeMap.find(target);
if(ext2IntNodeMap.find(target) == ext2IntNodeMap.end()) { cerr << "unresolved target NodeID : " << target << endl; exit(0); }
target = intNodeID->second;
if(source == UINT_MAX || target == UINT_MAX) { cerr << "nonexisting source or target" << endl; exit(0); }
EdgeT inputEdge(source, target, nameID, weight, forward, backward, type );
edgeList.push_back(inputEdge);
}
ext2IntNodeMap.clear();
vector<ImportEdge>(edgeList.begin(), edgeList.end()).swap(edgeList); //remove excess candidates.
cout << "ok" << endl;
return n;
}
template<typename EdgeT>
NodeID readDTMPGraphFromStream(istream &in, vector<EdgeT>& edgeList, vector<NodeInfo> * int2ExtNodeMap) {
NodeID n, source, target, id;

View File

@ -88,8 +88,7 @@ int main (int argc, char *argv[]) {
cerr << "Cannot open " << argv[1] << endl; exit(-1);
}
vector<ImportEdge> edgeList;
const NodeID n = readOSRMGraphFromStream(in, edgeList, int2ExtNodeMap);
unsigned numberOfNodes = int2ExtNodeMap->size();
const NodeID n = readBinaryOSRMGraphFromStream(in, edgeList, int2ExtNodeMap);
in.close();
// cout << "computing turn vector info ..." << flush;
@ -137,7 +136,7 @@ int main (int argc, char *argv[]) {
for(unsigned currentLevelEntry = 0; currentLevelEntry < sizeOfLevel; currentLevelEntry++) {
unsigned node = level[currentLevelEntry];
levelOutFile.write((char *)&node, sizeof(unsigned));
assert(node < numberOfNodes);
assert(node < n);
}
}
levelOutFile.close();
@ -195,7 +194,6 @@ int main (int argc, char *argv[]) {
ofstream mapOutFile(nodeOut, ios::binary);
for(NodeID i = 0; i < int2ExtNodeMap->size(); i++) {
mapOutFile.write((char *)&(int2ExtNodeMap->at(i)), sizeof(NodeInfo));
}
mapOutFile.close();

View File

@ -115,7 +115,6 @@ int main (int argc, char *argv[]) {
std::cerr << "[error] parser not initialized!" << std::endl;
exit(-1);
}
/* flush needs to be called to flush the remaining local vector elements */
delete parser;
try {
@ -146,10 +145,11 @@ int main (int argc, char *argv[]) {
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
ofstream fout;
fout.open(outputFileName.c_str());
std::ofstream fout;
fout.open(outputFileName.c_str(), std::ios::binary);
fout.write((char*)&usedNodeCounter, sizeof(unsigned));
cout << "[extractor] Confirming used nodes ... " << flush;
std::cout << "[extractor] Confirming used nodes ... " << std::flush;
STXXLNodeVector::iterator nodesIT = allNodes.begin();
STXXLNodeIDVector::iterator usedNodeIDsIT = usedNodeIDs.begin();
while(usedNodeIDsIT != usedNodeIDs.end() && nodesIT != allNodes.end()) {
@ -162,29 +162,25 @@ int main (int argc, char *argv[]) {
continue;
}
if(*usedNodeIDsIT == nodesIT->id) {
nodesIT->used = true;
fout.write((char*)&(nodesIT->id), sizeof(unsigned));
fout.write((char*)&(nodesIT->lon), sizeof(int));
fout.write((char*)&(nodesIT->lat), sizeof(int));
// std::cout << "serializing: " << nodesIT->id << ", lat: " << nodesIT->lat << ", lon: " << nodesIT->lon << std::endl;
usedNodeCounter++;
usedNodeIDsIT++;
nodesIT++;
}
}
cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
// std::cout << "[extractor] Erasing unused nodes ... " << std::flush;
// allNodes.resize(std::remove_if(allNodes.begin(), allNodes.end(), removeIfUnused<_Node>)-allNodes.begin());
//
// cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
// time = get_timestamp();
std::cout << "[extractor] setting number of nodes ... " << std::flush;
std::ios::pos_type positionInFile = fout.tellp();
fout.seekp(std::ios::beg);
fout.write((char*)&usedNodeCounter, sizeof(unsigned));
fout.seekp(positionInFile);
std::cout << "[extractor] Writing used nodes ... " << std::flush;
fout << usedNodeCounter << endl;
for(STXXLNodeVector::iterator ut = allNodes.begin(); ut != allNodes.end(); ut++) {
if(ut->used)
fout << ut->id<< " " << ut->lon << " " << ut->lat << "\n";
}
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok" << std::endl;
time = get_timestamp();
// Sort edges by start.
@ -194,6 +190,7 @@ int main (int argc, char *argv[]) {
time = get_timestamp();
std::cout << "[extractor] Setting start coords ... " << std::flush;
fout.write((char*)&usedEdgeCounter, sizeof(unsigned));
// Traverse list of edges and nodes in parallel and set start coord
nodesIT = allNodes.begin();
STXXLEdgeVector::iterator edgeIT = allEdges.begin();
@ -237,55 +234,61 @@ int main (int argc, char *argv[]) {
if(edgeIT->startCoord.lat != INT_MIN && edgeIT->target == nodesIT->id) {
edgeIT->targetCoord.lat = nodesIT->lat;
edgeIT->targetCoord.lon = nodesIT->lon;
edgeIT->used = true;
double distance = ApproximateDistance(edgeIT->startCoord.lat, edgeIT->startCoord.lon, nodesIT->lat, nodesIT->lon);
if(edgeIT->speed == -1)
edgeIT->speed = settings.speedProfile.speed[edgeIT->type];
double weight = ( distance * 10. ) / (edgeIT->speed / 3.6);
int intWeight = max(1, (int) weight);
int intDist = max(1, (int)distance);
int ferryIndex = settings.indexInAccessListOf("ferry");
assert(ferryIndex != -1);
short zero = 0;
short one = 1;
fout.write((char*)&edgeIT->start, sizeof(unsigned));
fout.write((char*)&edgeIT->target, sizeof(unsigned));
fout.write((char*)&intDist, sizeof(int));
switch(edgeIT->direction) {
case _Way::notSure:
fout.write((char*)&zero, sizeof(short));
break;
case _Way::oneway:
fout.write((char*)&one, sizeof(short));
break;
case _Way::bidirectional:
fout.write((char*)&zero, sizeof(short));
break;
case _Way::opposite:
fout.write((char*)&one, sizeof(short));
break;
default:
std::cerr << "[error] edge with no direction: " << edgeIT->direction << std::endl;
assert(false);
break;
}
fout.write((char*)&intWeight, sizeof(int));
short edgeType = edgeIT->type;
fout.write((char*)&edgeType, sizeof(short));
fout.write((char*)&edgeIT->nameID, sizeof(unsigned));
usedEdgeCounter++;
edgeIT++;
}
}
fout << usedEdgeCounter << "\n";
cout << "ok, after " << get_timestamp() - time << "s" << endl;
time = get_timestamp();
cout << "[extractor] writing confirmed edges ... " << flush;
for(STXXLEdgeVector::iterator eit = allEdges.begin(); eit != allEdges.end(); eit++) {
if(eit->used == false)
continue;
double distance = ApproximateDistance(eit->startCoord.lat, eit->startCoord.lon, eit->targetCoord.lat, eit->targetCoord.lon);
if(eit->speed == -1)
eit->speed = settings.speedProfile.speed[eit->type];
double weight = ( distance * 10. ) / (eit->speed / 3.6);
int intWeight = max(1, (int) weight);
int intDist = max(1, (int)distance);
int ferryIndex = settings.indexInAccessListOf("ferry");
assert(ferryIndex != -1);
switch(eit->direction) {
case _Way::notSure:
fout << eit->start << " " << eit->target << " " << intDist << " " << 0 << " " << intWeight << " " << eit->type << " " << eit->nameID << "\n";
break;
case _Way::oneway:
fout << eit->start << " " << eit->target << " " << intDist << " " << 1 << " " << intWeight << " " << eit->type << " " << eit->nameID << "\n";
break;
case _Way::bidirectional:
fout << eit->start << " " << eit->target << " " << intDist << " " << 0 << " " << intWeight << " " << eit->type << " " << eit->nameID << "\n";
break;
case _Way::opposite:
fout << eit->start << " " << eit->target << " " << intDist << " " << 1 << " " << intWeight << " " << eit->type << " " << eit->nameID << "\n";
break;
default:
std::cerr << "[error] edge with no direction: " << eit->direction << std::endl;
assert(false);
break;
}
}
fout.close();
outputFileName.append(".names");
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
std::cout << "[extractor] writing street name index ... " << std::flush;
std::cout << "[extractor] setting number of edges ... " << std::flush;
fout.seekp(positionInFile);
fout.write((char*)&usedEdgeCounter, sizeof(unsigned));
fout.close();
std::cout << "ok" << std::endl;
time = get_timestamp();
std::cout << "[extractor] writing street name index ... " << std::flush;
std::vector<unsigned> * nameIndex = new std::vector<unsigned>(nameVector.size()+1, 0);
unsigned currentNameIndex = 0;
unsigned elementCounter(0);
@ -295,7 +298,8 @@ int main (int argc, char *argv[]) {
elementCounter++;
}
nameIndex->at(nameVector.size()) = currentNameIndex;
ofstream nameOutFile(outputFileName.c_str(), ios::binary);
outputFileName.append(".names");
std::ofstream nameOutFile(outputFileName.c_str(), std::ios::binary);
unsigned sizeOfNameIndex = nameIndex->size();
nameOutFile.write((char *)&(sizeOfNameIndex), sizeof(unsigned));
@ -327,7 +331,7 @@ int main (int argc, char *argv[]) {
}
delete extractCallBacks;
cout << "[extractor] finished." << endl;
std::cout << "[extractor] finished." << std::endl;
return 0;
}