Extracting additional features from graph
This commit is contained in:
parent
4883c6c197
commit
a37528362d
@ -21,12 +21,12 @@ or see http://www.gnu.org/licenses/agpl.txt.
|
||||
#ifndef BASEPARSER_H_
|
||||
#define BASEPARSER_H_
|
||||
|
||||
template<typename NodeT, typename RelationT, typename WayT>
|
||||
template<typename NodeT, typename RestrictionT, typename WayT>
|
||||
class BaseParser {
|
||||
public:
|
||||
virtual ~BaseParser() {}
|
||||
virtual bool Init() = 0;
|
||||
virtual bool RegisterCallbacks(bool (*nodeCallbackPointer)(NodeT), bool (*relationCallbackPointer)(RelationT), bool (*wayCallbackPointer)(WayT), bool (*addressCallbackPointer)(NodeT, HashTable<std::string, std::string>)) = 0;
|
||||
virtual bool RegisterCallbacks(bool (*nodeCallbackPointer)(NodeT), bool (*restrictionCallbackPointer)(RestrictionT), bool (*wayCallbackPointer)(WayT), bool (*addressCallbackPointer)(NodeT, HashTable<std::string, std::string>)) = 0;
|
||||
virtual bool Parse() = 0;
|
||||
private:
|
||||
};
|
||||
|
@ -30,28 +30,31 @@ typedef stxxl::vector<_Node> STXXLNodeVector;
|
||||
typedef stxxl::vector<_Edge> STXXLEdgeVector;
|
||||
typedef stxxl::vector<_Address> STXXLAddressVector;
|
||||
typedef stxxl::vector<string> STXXLStringVector;
|
||||
typedef stxxl::vector<_RawRestrictionContainer> STXXLRestrictionsVector;
|
||||
typedef stxxl::vector<_WayIDStartAndEndEdge> STXXLWayIDStartEndVector;
|
||||
|
||||
struct STXXLContainers {
|
||||
STXXLNodeIDVector usedNodeIDs;
|
||||
STXXLNodeVector allNodes;
|
||||
STXXLEdgeVector allEdges;
|
||||
STXXLAddressVector adressVector;
|
||||
STXXLStringVector nameVector;
|
||||
STXXLRestrictionsVector restrictionsVector;
|
||||
STXXLWayIDStartEndVector wayStartEndVector;
|
||||
};
|
||||
|
||||
class ExtractorCallbacks{
|
||||
private:
|
||||
static const unsigned MAX_LOCAL_VECTOR_SIZE = 100;
|
||||
|
||||
STXXLNodeVector * allNodes;
|
||||
STXXLNodeIDVector * usedNodeIDs;
|
||||
STXXLEdgeVector * allEdges;
|
||||
STXXLStringVector * nameVector;
|
||||
STXXLAddressVector * addressVector;
|
||||
Settings settings;
|
||||
StringMap * stringMap;
|
||||
STXXLContainers * externalMemory;
|
||||
|
||||
public:
|
||||
ExtractorCallbacks(STXXLNodeVector * aNodes, STXXLNodeIDVector * uNodes, STXXLEdgeVector * aEdges, STXXLStringVector * nVector, STXXLAddressVector * adrVector, Settings s, StringMap * strMap){
|
||||
allNodes = aNodes;
|
||||
usedNodeIDs = uNodes;
|
||||
allEdges = aEdges;
|
||||
nameVector = nVector;
|
||||
addressVector = adrVector;
|
||||
settings = s;
|
||||
ExtractorCallbacks(STXXLContainers * ext, Settings set, StringMap * strMap){
|
||||
externalMemory = ext;
|
||||
settings = set;
|
||||
stringMap = strMap;
|
||||
}
|
||||
|
||||
@ -80,12 +83,12 @@ public:
|
||||
|
||||
/** warning: caller needs to take care of synchronization! */
|
||||
bool nodeFunction(_Node &n) {
|
||||
allNodes->push_back(n);
|
||||
externalMemory->allNodes.push_back(n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool relationFunction(_Relation &r) {
|
||||
//do nothing;
|
||||
bool restrictionFunction(_RawRestrictionContainer &r) {
|
||||
externalMemory->restrictionsVector.push_back(r);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -178,18 +181,17 @@ public:
|
||||
w.access = false;
|
||||
}
|
||||
|
||||
if ( w.useful && w.access && w.path.size() ) {
|
||||
if ( w.useful && w.access && w.path.size() > 1 ) {
|
||||
StringMap::iterator strit = stringMap->find(w.name);
|
||||
if(strit == stringMap->end())
|
||||
{
|
||||
w.nameID = nameVector->size();
|
||||
nameVector->push_back(w.name);
|
||||
if(strit == stringMap->end()) {
|
||||
w.nameID = externalMemory->nameVector.size();
|
||||
externalMemory->nameVector.push_back(w.name);
|
||||
stringMap->insert(std::make_pair(w.name, w.nameID) );
|
||||
} else {
|
||||
w.nameID = strit->second;
|
||||
}
|
||||
for ( unsigned i = 0; i < w.path.size(); ++i ) {
|
||||
usedNodeIDs->push_back(w.path[i]);
|
||||
externalMemory->usedNodeIDs.push_back(w.path[i]);
|
||||
}
|
||||
|
||||
if ( w.direction == _Way::opposite ){
|
||||
@ -197,7 +199,7 @@ public:
|
||||
}
|
||||
vector< NodeID > & path = w.path;
|
||||
assert(w.type > -1 || w.maximumSpeed != -1);
|
||||
assert(path.size()>0);
|
||||
assert(path.size()>1);
|
||||
|
||||
if(w.maximumSpeed == -1)
|
||||
w.maximumSpeed = settings.speedProfile.speed[w.type];
|
||||
@ -209,8 +211,10 @@ public:
|
||||
e.direction = w.direction;
|
||||
e.speed = w.maximumSpeed;
|
||||
e.nameID = w.nameID;
|
||||
allEdges->push_back(e);
|
||||
externalMemory->allEdges.push_back(e);
|
||||
}
|
||||
assert(w.id != UINT_MAX);
|
||||
externalMemory->wayStartEndVector.push_back(_WayIDStartAndEndEdge(w.id, path[0], path[1], path[path.size()-2], path[path.size()-1]));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -82,7 +82,8 @@ ostream & operator<<(ostream & out, const _Coordinate & c){
|
||||
}
|
||||
|
||||
struct _Way {
|
||||
_Way() {
|
||||
_Way() : id(UINT_MAX) {
|
||||
|
||||
direction = _Way::notSure;
|
||||
maximumSpeed = -1;
|
||||
type = -1;
|
||||
@ -125,9 +126,11 @@ struct _Address {
|
||||
};
|
||||
|
||||
struct _Relation {
|
||||
_Relation() : type(unknown){}
|
||||
enum {
|
||||
unknown = 0, ferry
|
||||
unknown = 0, ferry, turnRestriction
|
||||
} type;
|
||||
HashTable<std::string, std::string> keyVals;
|
||||
};
|
||||
|
||||
struct _Edge {
|
||||
@ -153,13 +156,107 @@ struct _Edge {
|
||||
|
||||
};
|
||||
|
||||
struct _Restriction {
|
||||
NodeID viaNode;
|
||||
NodeID fromNode;
|
||||
NodeID toNode;
|
||||
struct bits { //mostly unused
|
||||
char isOnly:1;
|
||||
char unused1:1;
|
||||
char unused2:1;
|
||||
char unused3:1;
|
||||
char unused4:1;
|
||||
char unused5:1;
|
||||
char unused6:1;
|
||||
char unused7:1;
|
||||
} flags;
|
||||
|
||||
|
||||
_Restriction(NodeID vn) : viaNode(vn), fromNode(UINT_MAX), toNode(UINT_MAX) { }
|
||||
_Restriction(bool isOnly = false) : viaNode(UINT_MAX), fromNode(UINT_MAX), toNode(UINT_MAX) {
|
||||
flags.isOnly = isOnly;
|
||||
}
|
||||
};
|
||||
|
||||
struct _RawRestrictionContainer {
|
||||
_Restriction restriction;
|
||||
EdgeID fromWay;
|
||||
EdgeID toWay;
|
||||
unsigned viaWay;
|
||||
|
||||
_RawRestrictionContainer(EdgeID f, EdgeID t, NodeID vn, unsigned vw) : fromWay(f), toWay(t), viaWay(vw) { restriction.viaNode = vn;}
|
||||
_RawRestrictionContainer(bool isOnly = false) : fromWay(UINT_MAX), toWay(UINT_MAX), viaWay(UINT_MAX) { restriction.flags.isOnly = isOnly;}
|
||||
|
||||
static _RawRestrictionContainer min_value() {
|
||||
return _RawRestrictionContainer(numeric_limits<unsigned>::min(), numeric_limits<unsigned>::min(), numeric_limits<unsigned>::min(), numeric_limits<unsigned>::min());
|
||||
}
|
||||
static _RawRestrictionContainer max_value() {
|
||||
return _RawRestrictionContainer(numeric_limits<unsigned>::max(), numeric_limits<unsigned>::max(), numeric_limits<unsigned>::max(), numeric_limits<unsigned>::max());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct CmpRestrictionByFrom: public std::binary_function<_RawRestrictionContainer, _RawRestrictionContainer, bool> {
|
||||
typedef _RawRestrictionContainer value_type;
|
||||
bool operator () (const _RawRestrictionContainer & a, const _RawRestrictionContainer & b) const {
|
||||
return a.fromWay < b.fromWay;
|
||||
}
|
||||
value_type max_value() {
|
||||
return _RawRestrictionContainer::max_value();
|
||||
}
|
||||
value_type min_value() {
|
||||
return _RawRestrictionContainer::min_value();
|
||||
}
|
||||
};
|
||||
|
||||
struct CmpRestrictionByTo: public std::binary_function<_RawRestrictionContainer, _RawRestrictionContainer, bool> {
|
||||
typedef _RawRestrictionContainer value_type;
|
||||
bool operator () (const _RawRestrictionContainer & a, const _RawRestrictionContainer & b) const {
|
||||
return a.toWay < b.toWay;
|
||||
}
|
||||
value_type max_value() {
|
||||
return _RawRestrictionContainer::max_value();
|
||||
}
|
||||
value_type min_value() {
|
||||
return _RawRestrictionContainer::min_value();
|
||||
}
|
||||
};
|
||||
|
||||
struct _WayIDStartAndEndEdge {
|
||||
unsigned wayID;
|
||||
NodeID firstStart;
|
||||
NodeID firstTarget;
|
||||
NodeID lastStart;
|
||||
NodeID lastTarget;
|
||||
_WayIDStartAndEndEdge() : wayID(UINT_MAX), firstStart(UINT_MAX), firstTarget(UINT_MAX), lastStart(UINT_MAX), lastTarget(UINT_MAX) {}
|
||||
_WayIDStartAndEndEdge(unsigned w, NodeID fs, NodeID ft, NodeID ls, NodeID lt) : wayID(w), firstStart(fs), firstTarget(ft), lastStart(ls), lastTarget(lt) {}
|
||||
|
||||
static _WayIDStartAndEndEdge min_value() {
|
||||
return _WayIDStartAndEndEdge(numeric_limits<unsigned>::min(), numeric_limits<unsigned>::min(), numeric_limits<unsigned>::min(), numeric_limits<unsigned>::min(), numeric_limits<unsigned>::min());
|
||||
}
|
||||
static _WayIDStartAndEndEdge max_value() {
|
||||
return _WayIDStartAndEndEdge(numeric_limits<unsigned>::max(), numeric_limits<unsigned>::max(), numeric_limits<unsigned>::max(), numeric_limits<unsigned>::max(), numeric_limits<unsigned>::max());
|
||||
}
|
||||
};
|
||||
|
||||
struct CmpWayStartAndEnd : public std::binary_function<_WayIDStartAndEndEdge, _WayIDStartAndEndEdge, bool> {
|
||||
typedef _WayIDStartAndEndEdge value_type;
|
||||
bool operator () (const _WayIDStartAndEndEdge & a, const _WayIDStartAndEndEdge & b) const {
|
||||
return a.wayID < b.wayID;
|
||||
}
|
||||
value_type max_value() {
|
||||
return _WayIDStartAndEndEdge::max_value();
|
||||
}
|
||||
value_type min_value() {
|
||||
return _WayIDStartAndEndEdge::min_value();
|
||||
}
|
||||
};
|
||||
|
||||
struct Settings {
|
||||
struct SpeedProfile {
|
||||
vector< double > speed;
|
||||
vector< string > names;
|
||||
} speedProfile;
|
||||
// vector<string> accessList;
|
||||
// int trafficLightPenalty;
|
||||
int indexInAccessListOf( const string & key) {
|
||||
for(unsigned i = 0; i< speedProfile.names.size(); i++) {
|
||||
if(speedProfile.names[i] == key)
|
||||
|
@ -32,7 +32,7 @@ or see http://www.gnu.org/licenses/agpl.txt.
|
||||
#include "ExtractorStructs.h"
|
||||
|
||||
|
||||
class PBFParser : public BaseParser<_Node, _Relation, _Way> {
|
||||
class PBFParser : public BaseParser<_Node, _RawRestrictionContainer, _Way> {
|
||||
|
||||
enum EntityType {
|
||||
TypeNode = 1,
|
||||
@ -74,10 +74,10 @@ public:
|
||||
groupCount = 0;
|
||||
}
|
||||
|
||||
bool RegisterCallbacks(bool (*nodeCallbackPointer)(_Node), bool (*relationCallbackPointer)(_Relation), bool (*wayCallbackPointer)(_Way),bool (*addressCallbackPointer)(_Node, HashTable<std::string, std::string>) ) {
|
||||
bool RegisterCallbacks(bool (*nodeCallbackPointer)(_Node), bool (*restrictionCallbackPointer)(_RawRestrictionContainer), bool (*wayCallbackPointer)(_Way),bool (*addressCallbackPointer)(_Node, HashTable<std::string, std::string>) ) {
|
||||
nodeCallback = *nodeCallbackPointer;
|
||||
wayCallback = *wayCallbackPointer;
|
||||
relationCallback = *relationCallbackPointer;
|
||||
restrictionCallback = *restrictionCallbackPointer;
|
||||
addressCallback = *addressCallbackPointer;
|
||||
return true;
|
||||
}
|
||||
@ -226,15 +226,79 @@ private:
|
||||
void parseRelation(_ThreadData * threadData) {
|
||||
const OSMPBF::PrimitiveGroup& group = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID );
|
||||
for(int i = 0; i < group.relations_size(); i++ ) {
|
||||
_Relation r;
|
||||
r.type = _Relation::unknown;
|
||||
const OSMPBF::Relation& inputRelation = threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).relations(i);
|
||||
bool isRestriction = false;
|
||||
bool isOnlyRestriction = false;
|
||||
for(int k = 0; k < inputRelation.keys_size(); k++) {
|
||||
const std::string key = threadData->PBFprimitiveBlock.stringtable().s(inputRelation.keys(k));
|
||||
const std::string val = threadData->PBFprimitiveBlock.stringtable().s(inputRelation.vals(k));
|
||||
if ("type" == key && "restriction" == val) {
|
||||
isRestriction = true;
|
||||
}
|
||||
if ("restriction" == key) {
|
||||
if(val.find("only_") == 0)
|
||||
isOnlyRestriction = true;
|
||||
}
|
||||
|
||||
}
|
||||
if(isRestriction) {
|
||||
long long lastRef = 0;
|
||||
_RawRestrictionContainer currentRestrictionContainer(isOnlyRestriction);
|
||||
for(int rolesIndex = 0; rolesIndex < inputRelation.roles_sid_size(); rolesIndex++) {
|
||||
string role(threadData->PBFprimitiveBlock.stringtable().s( inputRelation.roles_sid( rolesIndex ) ).data());
|
||||
lastRef += inputRelation.memids(rolesIndex);
|
||||
|
||||
if(false == ("from" == role || "to" == role || "via" == role)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch(inputRelation.types(rolesIndex)) {
|
||||
case 0: //node
|
||||
if("from" == role || "to" == role) //Only via should be a node
|
||||
continue;
|
||||
assert("via" == role);
|
||||
if(UINT_MAX != currentRestrictionContainer.viaWay)
|
||||
currentRestrictionContainer.viaWay = UINT_MAX;
|
||||
assert(UINT_MAX == currentRestrictionContainer.viaWay);
|
||||
currentRestrictionContainer.restriction.viaNode = lastRef;
|
||||
break;
|
||||
case 1: //way
|
||||
assert("from" == role || "to" == role || "via" == role);
|
||||
if("from" == role) {
|
||||
currentRestrictionContainer.fromWay = lastRef;
|
||||
}
|
||||
if ("to" == role) {
|
||||
currentRestrictionContainer.toWay = lastRef;
|
||||
}
|
||||
if ("via" == role) {
|
||||
assert(currentRestrictionContainer.restriction.toNode == UINT_MAX);
|
||||
currentRestrictionContainer.viaWay = lastRef;
|
||||
}
|
||||
break;
|
||||
case 2: //relation, not used. relations relating to relations are evil.
|
||||
continue;
|
||||
assert(false);
|
||||
break;
|
||||
|
||||
default: //should not happen
|
||||
cout << "unknown";
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// if(UINT_MAX != currentRestriction.viaNode) {
|
||||
// cout << "restr from " << currentRestriction.from << " via ";
|
||||
// cout << "node " << currentRestriction.viaNode;
|
||||
// cout << " to " << currentRestriction.to << endl;
|
||||
// }
|
||||
#pragma omp critical
|
||||
{
|
||||
if(!(*relationCallback)(r))
|
||||
if(!(*restrictionCallback)(currentRestrictionContainer))
|
||||
std::cerr << "[PBFParser] relation not parsed" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parseWay(_ThreadData * threadData) {
|
||||
if( threadData->PBFprimitiveBlock.primitivegroup( threadData->currentGroupID ).ways_size() > 0) {
|
||||
@ -442,7 +506,7 @@ private:
|
||||
/* Function pointer for nodes */
|
||||
bool (*nodeCallback)(_Node);
|
||||
bool (*wayCallback)(_Way);
|
||||
bool (*relationCallback)(_Relation);
|
||||
bool (*restrictionCallback)(_RawRestrictionContainer);
|
||||
bool (*addressCallback)(_Node, HashTable<std::string, std::string>);
|
||||
/* the input stream to parse */
|
||||
std::fstream input;
|
||||
|
@ -29,17 +29,18 @@ or see http://www.gnu.org/licenses/agpl.txt.
|
||||
#include "ExtractorStructs.h"
|
||||
#include "InputReaderFactory.h"
|
||||
|
||||
class XMLParser : public BaseParser<_Node, _Relation, _Way> {
|
||||
class XMLParser : public BaseParser<_Node, _RawRestrictionContainer, _Way> {
|
||||
public:
|
||||
XMLParser(const char * filename) {
|
||||
WARN("Parsing plain .osm/.osm.bz2 is deprecated. Switch to .pbf");
|
||||
inputReader = inputReaderFactory(filename);
|
||||
}
|
||||
~XMLParser() {}
|
||||
|
||||
bool RegisterCallbacks(bool (*nodeCallbackPointer)(_Node), bool (*relationCallbackPointer)(_Relation), bool (*wayCallbackPointer)(_Way), bool (*addressCallbackPointer)(_Node, HashTable<std::string, std::string>) ) {
|
||||
bool RegisterCallbacks(bool (*nodeCallbackPointer)(_Node), bool (*restrictionCallbackPointer)(_RawRestrictionContainer), bool (*wayCallbackPointer)(_Way), bool (*addressCallbackPointer)(_Node, HashTable<std::string, std::string>) ) {
|
||||
nodeCallback = *nodeCallbackPointer;
|
||||
wayCallback = *wayCallbackPointer;
|
||||
relationCallback = *relationCallbackPointer;
|
||||
restrictionCallback = *restrictionCallbackPointer;
|
||||
return true;
|
||||
}
|
||||
bool Init() {
|
||||
@ -73,8 +74,7 @@ public:
|
||||
if ( xmlStrEqual( currentName, ( const xmlChar* ) "relation" ) == 1 ) {
|
||||
_Relation r;
|
||||
r.type = _Relation::unknown;
|
||||
if(!(*relationCallback)(r))
|
||||
std::cerr << "[XMLParser] relation not parsed" << std::endl;
|
||||
//todo: parse relation
|
||||
}
|
||||
xmlFree( currentName );
|
||||
}
|
||||
@ -210,7 +210,7 @@ private:
|
||||
/* Function pointer for nodes */
|
||||
bool (*nodeCallback)(_Node);
|
||||
bool (*wayCallback)(_Way);
|
||||
bool (*relationCallback)(_Relation);
|
||||
bool (*restrictionCallback)(_RawRestrictionContainer);
|
||||
|
||||
};
|
||||
|
||||
|
255
extractor.cpp
255
extractor.cpp
@ -50,12 +50,12 @@ or see http://www.gnu.org/licenses/agpl.txt.
|
||||
|
||||
typedef BaseConfiguration ExtractorConfiguration;
|
||||
|
||||
unsigned globalRelationCounter = 0;
|
||||
unsigned globalRestrictionCounter = 0;
|
||||
ExtractorCallbacks * extractCallBacks;
|
||||
|
||||
bool nodeFunction(_Node n);
|
||||
bool adressFunction(_Node n, HashTable<std::string, std::string> keyVals);
|
||||
bool relationFunction(_Relation r);
|
||||
bool adressFunction(_Node n, HashTable<string, string> keyVals);
|
||||
bool restrictionFunction(_RawRestrictionContainer r);
|
||||
bool wayFunction(_Way w);
|
||||
|
||||
template<class ClassT>
|
||||
@ -63,14 +63,15 @@ bool removeIfUnused(ClassT n) { return (false == n.used); }
|
||||
|
||||
int main (int argc, char *argv[]) {
|
||||
if(argc <= 1) {
|
||||
std::cerr << "usage: " << endl << argv[0] << " <file.osm/.osm.bz2/.osm.pbf>" << std::endl;
|
||||
cerr << "usage: " << endl << argv[0] << " <file.osm/.osm.bz2/.osm.pbf>" << endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
std::cout << "[extractor] extracting data from input file " << argv[1] << std::endl;
|
||||
cout << "[extractor] extracting data from input file " << argv[1] << endl;
|
||||
bool isPBF = false;
|
||||
std::string outputFileName(argv[1]);
|
||||
std::string::size_type pos = outputFileName.find(".osm.bz2");
|
||||
string outputFileName(argv[1]);
|
||||
string restrictionsFileName(argv[1]);
|
||||
string::size_type pos = outputFileName.find(".osm.bz2");
|
||||
if(pos==string::npos) {
|
||||
pos = outputFileName.find(".osm.pbf");
|
||||
if(pos!=string::npos) {
|
||||
@ -79,36 +80,34 @@ int main (int argc, char *argv[]) {
|
||||
}
|
||||
if(pos!=string::npos) {
|
||||
outputFileName.replace(pos, 8, ".osrm");
|
||||
restrictionsFileName.replace(pos, 8, ".osrm.restrictions");
|
||||
} else {
|
||||
pos=outputFileName.find(".osm");
|
||||
if(pos!=string::npos) {
|
||||
outputFileName.replace(pos, 5, ".osrm");
|
||||
restrictionsFileName.replace(pos, 5, ".osrm.restrictions");
|
||||
} else {
|
||||
outputFileName.append(".osrm");
|
||||
restrictionsFileName.append(".osrm.restrictions");
|
||||
}
|
||||
}
|
||||
std::string adressFileName(outputFileName);
|
||||
|
||||
|
||||
string adressFileName(outputFileName);
|
||||
|
||||
unsigned amountOfRAM = 1;
|
||||
unsigned installedRAM = GetPhysicalmemory();
|
||||
if(installedRAM < 2048264) {
|
||||
std::cout << "[Warning] Machine has less than 2GB RAM." << std::endl;
|
||||
cout << "[Warning] Machine has less than 2GB RAM." << endl;
|
||||
}
|
||||
if(testDataFile("extractor.ini")) {
|
||||
ExtractorConfiguration extractorConfig("extractor.ini");
|
||||
unsigned memoryAmountFromFile = atoi(extractorConfig.GetParameter("Memory").c_str());
|
||||
if( memoryAmountFromFile != 0 && memoryAmountFromFile <= installedRAM/(1024*1024*1024))
|
||||
amountOfRAM = memoryAmountFromFile;
|
||||
std::cout << "[extractor] using " << amountOfRAM << " GB of RAM for buffers" << std::endl;
|
||||
cout << "[extractor] using " << amountOfRAM << " GB of RAM for buffers" << endl;
|
||||
}
|
||||
|
||||
STXXLNodeIDVector usedNodeIDs;
|
||||
STXXLNodeVector allNodes;
|
||||
STXXLEdgeVector allEdges;
|
||||
STXXLAddressVector adressVector;
|
||||
STXXLStringVector nameVector;
|
||||
STXXLContainers externalMemory;
|
||||
|
||||
unsigned usedNodeCounter = 0;
|
||||
unsigned usedEdgeCounter = 0;
|
||||
|
||||
@ -120,60 +119,148 @@ int main (int argc, char *argv[]) {
|
||||
double time = get_timestamp();
|
||||
|
||||
stringMap->set_empty_key(GetRandomString());
|
||||
stringMap->insert(std::make_pair("", 0));
|
||||
extractCallBacks = new ExtractorCallbacks(&allNodes, &usedNodeIDs, &allEdges, &nameVector, &adressVector, settings, stringMap);
|
||||
stringMap->insert(make_pair("", 0));
|
||||
extractCallBacks = new ExtractorCallbacks(&externalMemory, settings, stringMap);
|
||||
|
||||
BaseParser<_Node, _Relation, _Way> * parser;
|
||||
BaseParser<_Node, _RawRestrictionContainer, _Way> * parser;
|
||||
if(isPBF) {
|
||||
parser = new PBFParser(argv[1]);
|
||||
} else {
|
||||
parser = new XMLParser(argv[1]);
|
||||
}
|
||||
parser->RegisterCallbacks(&nodeFunction, &relationFunction, &wayFunction, &adressFunction);
|
||||
parser->RegisterCallbacks(&nodeFunction, &restrictionFunction, &wayFunction, &adressFunction);
|
||||
if(parser->Init()) {
|
||||
parser->Parse();
|
||||
} else {
|
||||
std::cerr << "[error] parser not initialized!" << std::endl;
|
||||
cerr << "[error] parser not initialized!" << endl;
|
||||
exit(-1);
|
||||
}
|
||||
delete parser;
|
||||
|
||||
try {
|
||||
// std::cout << "[info] raw no. of names: " << nameVector.size() << std::endl;
|
||||
// std::cout << "[info] raw no. of nodes: " << allNodes.size() << std::endl;
|
||||
// std::cout << "[info] no. of used nodes: " << usedNodeIDs.size() << std::endl;
|
||||
// std::cout << "[info] raw no. of edges: " << allEdges.size() << std::endl;
|
||||
// std::cout << "[info] raw no. of relations: " << globalRelationCounter << std::endl;
|
||||
// std::cout << "[info] raw no. of addresses: " << adressVector.size() << std::endl;
|
||||
// INFO("raw no. of names: " << externalMemory.nameVector.size());
|
||||
// INFO("raw no. of nodes: " << externalMemory.allNodes.size());
|
||||
// INFO("no. of used nodes: " << externalMemory.usedNodeIDs.size());
|
||||
// INFO("raw no. of edges: " << externalMemory.allEdges.size());
|
||||
// INFO("raw no. of ways: " << externalMemory.wayStartEndVector.size());
|
||||
// INFO("raw no. of addresses: " << externalMemory.adressVector.size());
|
||||
// INFO("raw no. of restrictions: " << externalMemory.restrictionsVector.size());
|
||||
|
||||
std::cout << "[extractor] parsing finished after " << get_timestamp() - time << "seconds" << std::endl;
|
||||
cout << "[extractor] parsing finished after " << get_timestamp() - time << "seconds" << endl;
|
||||
time = get_timestamp();
|
||||
unsigned memory_to_use = amountOfRAM * 1024 * 1024 * 1024;
|
||||
|
||||
std::cout << "[extractor] Sorting used nodes ... " << std::flush;
|
||||
stxxl::sort(usedNodeIDs.begin(), usedNodeIDs.end(), Cmp(), memory_to_use);
|
||||
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
|
||||
cout << "[extractor] Sorting used nodes ... " << flush;
|
||||
stxxl::sort(externalMemory.usedNodeIDs.begin(), externalMemory.usedNodeIDs.end(), Cmp(), memory_to_use);
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
|
||||
time = get_timestamp();
|
||||
std::cout << "[extractor] Erasing duplicate nodes ... " << std::flush;
|
||||
stxxl::vector<NodeID>::iterator NewEnd = unique ( usedNodeIDs.begin(),usedNodeIDs.end() ) ;
|
||||
usedNodeIDs.resize ( NewEnd - usedNodeIDs.begin() );
|
||||
cout << "[extractor] Erasing duplicate nodes ... " << flush;
|
||||
stxxl::vector<NodeID>::iterator NewEnd = unique ( externalMemory.usedNodeIDs.begin(),externalMemory.usedNodeIDs.end() ) ;
|
||||
externalMemory.usedNodeIDs.resize ( NewEnd - externalMemory.usedNodeIDs.begin() );
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
time = get_timestamp();
|
||||
|
||||
std::cout << "[extractor] Sorting all nodes ... " << std::flush;
|
||||
stxxl::sort(allNodes.begin(), allNodes.end(), CmpNodeByID(), memory_to_use);
|
||||
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
|
||||
cout << "[extractor] Sorting all nodes ... " << flush;
|
||||
stxxl::sort(externalMemory.allNodes.begin(), externalMemory.allNodes.end(), CmpNodeByID(), memory_to_use);
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
time = get_timestamp();
|
||||
|
||||
std::ofstream fout;
|
||||
fout.open(outputFileName.c_str(), std::ios::binary);
|
||||
fout.write((char*)&usedNodeCounter, sizeof(unsigned));
|
||||
cout << "[extractor] Sorting used ways ... " << flush;
|
||||
stxxl::sort(externalMemory.wayStartEndVector.begin(), externalMemory.wayStartEndVector.end(), CmpWayStartAndEnd(), memory_to_use);
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
|
||||
std::cout << "[extractor] Confirming used nodes ... " << std::flush;
|
||||
STXXLNodeVector::iterator nodesIT = allNodes.begin();
|
||||
STXXLNodeIDVector::iterator usedNodeIDsIT = usedNodeIDs.begin();
|
||||
while(usedNodeIDsIT != usedNodeIDs.end() && nodesIT != allNodes.end()) {
|
||||
cout << "[extractor] Sorting restrctns. by from... " << flush;
|
||||
stxxl::sort(externalMemory.restrictionsVector.begin(), externalMemory.restrictionsVector.end(), CmpRestrictionByFrom(), memory_to_use);
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
|
||||
cout << "[extractor] Fixing restriction starts ... " << flush;
|
||||
STXXLRestrictionsVector::iterator restrictionsIT = externalMemory.restrictionsVector.begin();
|
||||
STXXLWayIDStartEndVector::iterator wayStartAndEndEdgeIT = externalMemory.wayStartEndVector.begin();
|
||||
|
||||
while(wayStartAndEndEdgeIT != externalMemory.wayStartEndVector.end() && restrictionsIT != externalMemory.restrictionsVector.end()) {
|
||||
if(wayStartAndEndEdgeIT->wayID < restrictionsIT->fromWay){
|
||||
wayStartAndEndEdgeIT++;
|
||||
continue;
|
||||
}
|
||||
if(wayStartAndEndEdgeIT->wayID > restrictionsIT->fromWay) {
|
||||
restrictionsIT++;
|
||||
continue;
|
||||
}
|
||||
assert(wayStartAndEndEdgeIT->wayID == restrictionsIT->fromWay);
|
||||
NodeID viaNode = restrictionsIT->restriction.viaNode;
|
||||
|
||||
if(wayStartAndEndEdgeIT->firstStart == viaNode) {
|
||||
restrictionsIT->restriction.fromNode = wayStartAndEndEdgeIT->firstTarget;
|
||||
} else if(wayStartAndEndEdgeIT->firstTarget == viaNode) {
|
||||
restrictionsIT->restriction.fromNode = wayStartAndEndEdgeIT->firstStart;
|
||||
} else if(wayStartAndEndEdgeIT->lastStart == viaNode) {
|
||||
restrictionsIT->restriction.fromNode = wayStartAndEndEdgeIT->lastTarget;
|
||||
} else if(wayStartAndEndEdgeIT->lastTarget == viaNode) {
|
||||
restrictionsIT->restriction.fromNode = wayStartAndEndEdgeIT->lastStart;
|
||||
}
|
||||
restrictionsIT++;
|
||||
}
|
||||
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
time = get_timestamp();
|
||||
|
||||
cout << "[extractor] Sorting restrctns. by to ... " << flush;
|
||||
stxxl::sort(externalMemory.restrictionsVector.begin(), externalMemory.restrictionsVector.end(), CmpRestrictionByTo(), memory_to_use);
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
|
||||
time = get_timestamp();
|
||||
unsigned usableRestrictionsCounter(0);
|
||||
cout << "[extractor] Fixing restriction ends ... " << flush;
|
||||
restrictionsIT = externalMemory.restrictionsVector.begin();
|
||||
wayStartAndEndEdgeIT = externalMemory.wayStartEndVector.begin();
|
||||
while(wayStartAndEndEdgeIT != externalMemory.wayStartEndVector.end() &&
|
||||
restrictionsIT != externalMemory.restrictionsVector.end()) {
|
||||
if(wayStartAndEndEdgeIT->wayID < restrictionsIT->toWay){
|
||||
wayStartAndEndEdgeIT++;
|
||||
continue;
|
||||
}
|
||||
if(wayStartAndEndEdgeIT->wayID > restrictionsIT->toWay) {
|
||||
restrictionsIT++;
|
||||
continue;
|
||||
}
|
||||
NodeID viaNode = restrictionsIT->restriction.viaNode;
|
||||
if(wayStartAndEndEdgeIT->lastStart == viaNode) {
|
||||
restrictionsIT->restriction.toNode = wayStartAndEndEdgeIT->lastTarget;
|
||||
} else if(wayStartAndEndEdgeIT->lastTarget == viaNode) {
|
||||
restrictionsIT->restriction.toNode = wayStartAndEndEdgeIT->lastStart;
|
||||
} else if(wayStartAndEndEdgeIT->firstStart == viaNode) {
|
||||
restrictionsIT->restriction.toNode = wayStartAndEndEdgeIT->firstTarget;
|
||||
} else if(wayStartAndEndEdgeIT->firstTarget == viaNode) {
|
||||
restrictionsIT->restriction.toNode = wayStartAndEndEdgeIT->firstStart;
|
||||
}
|
||||
|
||||
if(UINT_MAX != restrictionsIT->restriction.fromNode && UINT_MAX != restrictionsIT->restriction.toNode) {
|
||||
usableRestrictionsCounter++;
|
||||
}
|
||||
restrictionsIT++;
|
||||
}
|
||||
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
//todo: serialize restrictions
|
||||
ofstream restrictionsOutstream;
|
||||
restrictionsOutstream.open(restrictionsFileName.c_str(), ios::binary);
|
||||
restrictionsOutstream.write((char*)&usableRestrictionsCounter, sizeof(unsigned));
|
||||
for(restrictionsIT = externalMemory.restrictionsVector.begin(); restrictionsIT != externalMemory.restrictionsVector.end(); restrictionsIT++) {
|
||||
if(UINT_MAX != restrictionsIT->restriction.fromNode && UINT_MAX != restrictionsIT->restriction.toNode) {
|
||||
restrictionsOutstream.write((char *)&(restrictionsIT->restriction), sizeof(_Restriction));
|
||||
}
|
||||
}
|
||||
restrictionsOutstream.close();
|
||||
|
||||
ofstream fout;
|
||||
fout.open(outputFileName.c_str(), ios::binary);
|
||||
fout.write((char*)&usedNodeCounter, sizeof(unsigned));
|
||||
time = get_timestamp();
|
||||
cout << "[extractor] Confirming used nodes ... " << flush;
|
||||
STXXLNodeVector::iterator nodesIT = externalMemory.allNodes.begin();
|
||||
STXXLNodeIDVector::iterator usedNodeIDsIT = externalMemory.usedNodeIDs.begin();
|
||||
while(usedNodeIDsIT != externalMemory.usedNodeIDs.end() && nodesIT != externalMemory.allNodes.end()) {
|
||||
if(*usedNodeIDsIT < nodesIT->id){
|
||||
usedNodeIDsIT++;
|
||||
continue;
|
||||
@ -191,30 +278,31 @@ int main (int argc, char *argv[]) {
|
||||
nodesIT++;
|
||||
}
|
||||
}
|
||||
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
|
||||
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
time = get_timestamp();
|
||||
|
||||
std::cout << "[extractor] setting number of nodes ... " << std::flush;
|
||||
std::ios::pos_type positionInFile = fout.tellp();
|
||||
fout.seekp(std::ios::beg);
|
||||
cout << "[extractor] setting number of nodes ... " << flush;
|
||||
ios::pos_type positionInFile = fout.tellp();
|
||||
fout.seekp(ios::beg);
|
||||
fout.write((char*)&usedNodeCounter, sizeof(unsigned));
|
||||
fout.seekp(positionInFile);
|
||||
|
||||
std::cout << "ok" << std::endl;
|
||||
cout << "ok" << endl;
|
||||
time = get_timestamp();
|
||||
|
||||
// Sort edges by start.
|
||||
std::cout << "[extractor] Sorting edges by start ... " << std::flush;
|
||||
stxxl::sort(allEdges.begin(), allEdges.end(), CmpEdgeByStartID(), memory_to_use);
|
||||
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
|
||||
cout << "[extractor] Sorting edges by start ... " << flush;
|
||||
stxxl::sort(externalMemory.allEdges.begin(), externalMemory.allEdges.end(), CmpEdgeByStartID(), memory_to_use);
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
time = get_timestamp();
|
||||
|
||||
std::cout << "[extractor] Setting start coords ... " << std::flush;
|
||||
cout << "[extractor] Setting start coords ... " << flush;
|
||||
fout.write((char*)&usedEdgeCounter, sizeof(unsigned));
|
||||
// Traverse list of edges and nodes in parallel and set start coord
|
||||
nodesIT = allNodes.begin();
|
||||
STXXLEdgeVector::iterator edgeIT = allEdges.begin();
|
||||
while(edgeIT != allEdges.end() && nodesIT != allNodes.end()) {
|
||||
nodesIT = externalMemory.allNodes.begin();
|
||||
STXXLEdgeVector::iterator edgeIT = externalMemory.allEdges.begin();
|
||||
while(edgeIT != externalMemory.allEdges.end() && nodesIT != externalMemory.allNodes.end()) {
|
||||
if(edgeIT->start < nodesIT->id){
|
||||
edgeIT++;
|
||||
continue;
|
||||
@ -229,20 +317,20 @@ int main (int argc, char *argv[]) {
|
||||
edgeIT++;
|
||||
}
|
||||
}
|
||||
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
time = get_timestamp();
|
||||
|
||||
// Sort Edges by target
|
||||
std::cout << "[extractor] Sorting edges by target ... " << std::flush;
|
||||
stxxl::sort(allEdges.begin(), allEdges.end(), CmpEdgeByTargetID(), memory_to_use);
|
||||
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
|
||||
cout << "[extractor] Sorting edges by target ... " << flush;
|
||||
stxxl::sort(externalMemory.allEdges.begin(), externalMemory.allEdges.end(), CmpEdgeByTargetID(), memory_to_use);
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
time = get_timestamp();
|
||||
|
||||
std::cout << "[extractor] Setting target coords ... " << std::flush;
|
||||
cout << "[extractor] Setting target coords ... " << flush;
|
||||
// Traverse list of edges and nodes in parallel and set target coord
|
||||
nodesIT = allNodes.begin();
|
||||
edgeIT = allEdges.begin();
|
||||
while(edgeIT != allEdges.end() && nodesIT != allNodes.end()) {
|
||||
nodesIT = externalMemory.allNodes.begin();
|
||||
edgeIT = externalMemory.allEdges.begin();
|
||||
while(edgeIT != externalMemory.allEdges.end() && nodesIT != externalMemory.allNodes.end()) {
|
||||
if(edgeIT->target < nodesIT->id){
|
||||
edgeIT++;
|
||||
continue;
|
||||
@ -285,7 +373,7 @@ int main (int argc, char *argv[]) {
|
||||
fout.write((char*)&one, sizeof(short));
|
||||
break;
|
||||
default:
|
||||
std::cerr << "[error] edge with no direction: " << edgeIT->direction << std::endl;
|
||||
cerr << "[error] edge with no direction: " << edgeIT->direction << endl;
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
@ -298,25 +386,25 @@ int main (int argc, char *argv[]) {
|
||||
edgeIT++;
|
||||
}
|
||||
}
|
||||
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
time = get_timestamp();
|
||||
|
||||
std::cout << "[extractor] setting number of edges ... " << std::flush;
|
||||
cout << "[extractor] setting number of edges ... " << flush;
|
||||
fout.seekp(positionInFile);
|
||||
fout.write((char*)&usedEdgeCounter, sizeof(unsigned));
|
||||
fout.close();
|
||||
std::cout << "ok" << std::endl;
|
||||
cout << "ok" << endl;
|
||||
time = get_timestamp();
|
||||
|
||||
|
||||
std::cout << "[extractor] writing street name index ... " << std::flush;
|
||||
std::vector<unsigned> * nameIndex = new std::vector<unsigned>(nameVector.size()+1, 0);
|
||||
cout << "[extractor] writing street name index ... " << flush;
|
||||
vector<unsigned> * nameIndex = new vector<unsigned>(externalMemory.nameVector.size()+1, 0);
|
||||
outputFileName.append(".names");
|
||||
std::ofstream nameOutFile(outputFileName.c_str(), std::ios::binary);
|
||||
ofstream nameOutFile(outputFileName.c_str(), ios::binary);
|
||||
unsigned sizeOfNameIndex = nameIndex->size();
|
||||
nameOutFile.write((char *)&(sizeOfNameIndex), sizeof(unsigned));
|
||||
|
||||
for(STXXLStringVector::iterator it = nameVector.begin(); it != nameVector.end(); it++) {
|
||||
for(STXXLStringVector::iterator it = externalMemory.nameVector.begin(); it != externalMemory.nameVector.end(); it++) {
|
||||
unsigned lengthOfRawString = strlen(it->c_str());
|
||||
nameOutFile.write((char *)&(lengthOfRawString), sizeof(unsigned));
|
||||
nameOutFile.write(it->c_str(), lengthOfRawString);
|
||||
@ -324,26 +412,26 @@ int main (int argc, char *argv[]) {
|
||||
|
||||
nameOutFile.close();
|
||||
delete nameIndex;
|
||||
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
|
||||
cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
|
||||
// time = get_timestamp();
|
||||
// std::cout << "[extractor] writing address list ... " << std::flush;
|
||||
// cout << "[extractor] writing address list ... " << flush;
|
||||
//
|
||||
// adressFileName.append(".address");
|
||||
// std::ofstream addressOutFile(adressFileName.c_str());
|
||||
// ofstream addressOutFile(adressFileName.c_str());
|
||||
// for(STXXLAddressVector::iterator it = adressVector.begin(); it != adressVector.end(); it++) {
|
||||
// addressOutFile << it->node.id << "|" << it->node.lat << "|" << it->node.lon << "|" << it->city << "|" << it->street << "|" << it->housenumber << "|" << it->state << "|" << it->country << "\n";
|
||||
// }
|
||||
// addressOutFile.close();
|
||||
// std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
|
||||
// cout << "ok, after " << get_timestamp() - time << "s" << endl;
|
||||
|
||||
} catch ( const std::exception& e ) {
|
||||
std::cerr << "Caught Execption:" << e.what() << std::endl;
|
||||
} catch ( const exception& e ) {
|
||||
cerr << "Caught Execption:" << e.what() << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
delete extractCallBacks;
|
||||
std::cout << "[extractor] finished." << std::endl;
|
||||
cout << "[extractor] finished." << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -352,13 +440,14 @@ bool nodeFunction(_Node n) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool adressFunction(_Node n, HashTable<std::string, std::string> keyVals){
|
||||
bool adressFunction(_Node n, HashTable<string, string> keyVals){
|
||||
extractCallBacks->adressFunction(n, keyVals);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool relationFunction(_Relation r) {
|
||||
globalRelationCounter++;
|
||||
bool restrictionFunction(_RawRestrictionContainer r) {
|
||||
extractCallBacks->restrictionFunction(r);
|
||||
globalRestrictionCounter++;
|
||||
return true;
|
||||
}
|
||||
bool wayFunction(_Way w) {
|
||||
|
Loading…
Reference in New Issue
Block a user