Fixing data type issue that prevented large files on windows. See issue

#55
This commit is contained in:
DennisOSRM 2012-12-29 14:14:01 +01:00
parent b869184c10
commit 943c15927a
4 changed files with 89 additions and 82 deletions

View File

@ -274,7 +274,7 @@ public:
TemporaryStorage & tempStorage = TemporaryStorage::GetInstance();
//Write dummy number of edges to temporary file
// std::ofstream temporaryEdgeStorage(temporaryEdgeStorageFilename.c_str(), std::ios::binary);
long initialFilePosition = tempStorage.tell(temporaryStorageSlotID);
uint64_t initialFilePosition = tempStorage.tell(temporaryStorageSlotID);
unsigned numberOfTemporaryEdges = 0;
tempStorage.writeToSlot(temporaryStorageSlotID, (char*)&numberOfTemporaryEdges, sizeof(unsigned));

View File

@ -21,10 +21,12 @@ or see http://www.gnu.org/licenses/agpl.txt.
#ifndef NNGRID_H_
#define NNGRID_H_
#include <algorithm>
#include <cassert>
#include <cfloat>
#include <cmath>
#include <cstring>
#include <algorithm>
#include <fstream>
#include <limits>
#include <vector>
@ -58,7 +60,7 @@ template<bool WriteAccess = false>
class NNGrid {
public:
NNGrid() /*: cellCache(500), fileCache(500)*/ {
ramIndexTable.resize((1024*1024), ULONG_MAX);
ramIndexTable.resize((1024*1024), std::numeric_limits<uint64_t>::max());
}
NNGrid(const char* rif, const char* _i) {
@ -66,7 +68,7 @@ public:
ERR("Not available in Write mode");
}
iif = std::string(_i);
ramIndexTable.resize((1024*1024), ULONG_MAX);
ramIndexTable.resize((1024*1024), std::numeric_limits<uint64_t>::max());
ramInFile.open(rif, std::ios::in | std::ios::binary);
if(!ramInFile) { ERR(rif << " not found"); }
@ -87,7 +89,7 @@ public:
void OpenIndexFiles() {
assert(ramInFile.is_open());
ramInFile.read(static_cast<char*>(static_cast<void*>(&ramIndexTable[0]) ), sizeof(unsigned long)*1024*1024);
ramInFile.read(static_cast<char*>(static_cast<void*>(&ramIndexTable[0]) ), sizeof(uint64_t)*1024*1024);
ramInFile.close();
}
@ -114,8 +116,8 @@ public:
INFO("finished sorting after " << (get_timestamp() - timestamp) << "s");
std::vector<GridEntry> entriesInFileWithRAMSameIndex;
unsigned indexInRamTable = entries.begin()->ramIndex;
unsigned long lastPositionInIndexFile = 0;
cout << "writing data ..." << flush;
uint64_t lastPositionInIndexFile = 0;
std::cout << "writing data ..." << std::flush;
p.reinit(entries.size());
boost::unordered_map< unsigned, unsigned > cellMap(1024);
BOOST_FOREACH(GridEntry & gridEntry, entries) {
@ -143,9 +145,9 @@ public:
indexOutFile.close();
//Serialize RAM Index
ofstream ramFile(ramIndexOut, std::ios::out | std::ios::binary | std::ios::trunc);
std::ofstream ramFile(ramIndexOut, std::ios::out | std::ios::binary | std::ios::trunc);
//write 4 MB of index Table in RAM
ramFile.write((char *)&ramIndexTable[0], sizeof(unsigned long)*1024*1024 );
ramFile.write((char *)&ramIndexTable[0], sizeof(uint64_t)*1024*1024 );
//close ram index file
ramFile.close();
}
@ -174,7 +176,7 @@ public:
// INFO("looked up " << candidates.size());
_GridEdge smallestEdge;
_Coordinate tmp, edgeStartCoord, edgeEndCoord;
double dist = numeric_limits<double>::max();
double dist = std::numeric_limits<double>::max();
double r, tmpDist;
BOOST_FOREACH(_GridEdge candidate, candidates) {
@ -314,13 +316,13 @@ private:
return (std::fabs(d1 - d2) < FLT_EPSILON);
}
inline unsigned FillCell(std::vector<GridEntry>& entriesWithSameRAMIndex, const unsigned long fileOffset, boost::unordered_map< unsigned, unsigned > & cellMap ) {
inline unsigned FillCell(std::vector<GridEntry>& entriesWithSameRAMIndex, const uint64_t fileOffset, boost::unordered_map< unsigned, unsigned > & cellMap ) {
std::vector<char> tmpBuffer(32*32*4096,0);
unsigned long indexIntoTmpBuffer = 0;
uint64_t indexIntoTmpBuffer = 0;
unsigned numberOfWrittenBytes = 0;
assert(indexOutFile.is_open());
std::vector<unsigned long> cellIndex(32*32,ULONG_MAX);
std::vector<uint64_t> cellIndex(32*32,std::numeric_limits<uint64_t>::max());
for(unsigned i = 0; i < entriesWithSameRAMIndex.size() -1; ++i) {
assert(entriesWithSameRAMIndex[i].ramIndex== entriesWithSameRAMIndex[i+1].ramIndex);
@ -356,8 +358,8 @@ private:
indexIntoTmpBuffer += FlushEntriesWithSameFileIndexToBuffer(entriesWithSameFileIndex, tmpBuffer, indexIntoTmpBuffer);
assert(entriesWithSameFileIndex.size() == 0);
indexOutFile.write(static_cast<char*>(static_cast<void*>(&cellIndex[0])),32*32*sizeof(unsigned long));
numberOfWrittenBytes += 32*32*sizeof(unsigned long);
indexOutFile.write(static_cast<char*>(static_cast<void*>(&cellIndex[0])),32*32*sizeof(uint64_t));
numberOfWrittenBytes += 32*32*sizeof(uint64_t);
//write contents of tmpbuffer to disk
indexOutFile.write(&tmpBuffer[0], indexIntoTmpBuffer*sizeof(char));
@ -366,7 +368,7 @@ private:
return numberOfWrittenBytes;
}
inline unsigned FlushEntriesWithSameFileIndexToBuffer( std::vector<GridEntry> &vectorWithSameFileIndex, std::vector<char> & tmpBuffer, const unsigned long index) const {
inline unsigned FlushEntriesWithSameFileIndexToBuffer( std::vector<GridEntry> &vectorWithSameFileIndex, std::vector<char> & tmpBuffer, const uint64_t index) const {
sort( vectorWithSameFileIndex.begin(), vectorWithSameFileIndex.end() );
vectorWithSameFileIndex.erase(unique(vectorWithSameFileIndex.begin(), vectorWithSameFileIndex.end()), vectorWithSameFileIndex.end());
const unsigned lengthOfBucket = vectorWithSameFileIndex.size();
@ -394,8 +396,8 @@ private:
inline void GetContentsOfFileBucketEnumerated(const unsigned fileIndex, std::vector<_GridEdge>& result) const {
unsigned ramIndex = GetRAMIndexFromFileIndex(fileIndex);
unsigned long startIndexInFile = ramIndexTable[ramIndex];
if(startIndexInFile == ULONG_MAX) {
uint64_t startIndexInFile = ramIndexTable[ramIndex];
if(startIndexInFile == std::numeric_limits<uint64_t>::max()) {
return;
}
unsigned enumeratedIndex = GetCellIndexFromRAMAndFileIndex(ramIndex, fileIndex);
@ -409,14 +411,14 @@ private:
}
//only read the single necessary cell index
localStream->seekg(startIndexInFile+(enumeratedIndex*sizeof(unsigned long)));
unsigned long fetchedIndex = 0;
localStream->read(static_cast<char*>( static_cast<void*>(&fetchedIndex)), sizeof(unsigned long));
localStream->seekg(startIndexInFile+(enumeratedIndex*sizeof(uint64_t)));
uint64_t fetchedIndex = 0;
localStream->read(static_cast<char*>( static_cast<void*>(&fetchedIndex)), sizeof(uint64_t));
if(fetchedIndex == ULONG_MAX) {
if(fetchedIndex == std::numeric_limits<uint64_t>::max()) {
return;
}
const unsigned long position = fetchedIndex + 32*32*sizeof(unsigned long) ;
const uint64_t position = fetchedIndex + 32*32*sizeof(uint64_t) ;
unsigned lengthOfBucket;
unsigned currentSizeOfResult = result.size();
@ -428,12 +430,12 @@ private:
inline void GetContentsOfFileBucket(const unsigned fileIndex, std::vector<_GridEdge>& result, boost::unordered_map< unsigned, unsigned> & cellMap) {
unsigned ramIndex = GetRAMIndexFromFileIndex(fileIndex);
unsigned long startIndexInFile = ramIndexTable[ramIndex];
if(startIndexInFile == ULONG_MAX) {
uint64_t startIndexInFile = ramIndexTable[ramIndex];
if(startIndexInFile == std::numeric_limits<uint64_t>::max()) {
return;
}
unsigned long cellIndex[32*32];
uint64_t cellIndex[32*32];
cellMap.clear();
BuildCellIndexToFileIndexMap(ramIndex, cellMap);
@ -446,12 +448,12 @@ private:
}
localStream->seekg(startIndexInFile);
localStream->read(static_cast<char*>(static_cast<void*>( cellIndex)), 32*32*sizeof(unsigned long));
localStream->read(static_cast<char*>(static_cast<void*>( cellIndex)), 32*32*sizeof(uint64_t));
assert(cellMap.find(fileIndex) != cellMap.end());
if(cellIndex[cellMap[fileIndex]] == ULONG_MAX) {
if(cellIndex[cellMap[fileIndex]] == std::numeric_limits<uint64_t>::max()) {
return;
}
const unsigned long position = cellIndex[cellMap[fileIndex]] + 32*32*sizeof(unsigned long) ;
const uint64_t position = cellIndex[cellMap[fileIndex]] + 32*32*sizeof(uint64_t) ;
unsigned lengthOfBucket;
unsigned currentSizeOfResult = result.size();
@ -574,14 +576,14 @@ private:
return ramIndex;
}
const static unsigned long END_OF_BUCKET_DELIMITER = UINT_MAX;
const static uint64_t END_OF_BUCKET_DELIMITER = boost::integer_traits<uint64_t>::const_max;
std::ofstream indexOutFile;
std::ifstream ramInFile;
#ifndef ROUTED
stxxl::vector<GridEntry> entries;
#endif
std::vector<unsigned long> ramIndexTable; //8 MB for first level index in RAM
std::vector<uint64_t> ramIndexTable; //8 MB for first level index in RAM
std::string iif;
// LRUCache<int,std::vector<unsigned> > cellCache;
// LRUCache<int,std::vector<_Edge> > fileCache;

View File

@ -27,31 +27,31 @@ void ExtractionContainers::PrepareData(const std::string & outputFileName, const
double time = get_timestamp();
boost::uint64_t memory_to_use = static_cast<boost::uint64_t>(amountOfRAM) * 1024 * 1024 * 1024;
cout << "[extractor] Sorting used nodes ... " << flush;
std::cout << "[extractor] Sorting used nodes ... " << std::flush;
stxxl::sort(usedNodeIDs.begin(), usedNodeIDs.end(), Cmp(), memory_to_use);
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
cout << "[extractor] Erasing duplicate nodes ... " << flush;
stxxl::vector<NodeID>::iterator NewEnd = unique ( usedNodeIDs.begin(),usedNodeIDs.end() ) ;
std::cout << "[extractor] Erasing duplicate nodes ... " << std::flush;
stxxl::vector<NodeID>::iterator NewEnd = std::unique ( usedNodeIDs.begin(),usedNodeIDs.end() ) ;
usedNodeIDs.resize ( NewEnd - usedNodeIDs.begin() );
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
cout << "[extractor] Sorting all nodes ... " << flush;
std::cout << "[extractor] Sorting all nodes ... " << std::flush;
stxxl::sort(allNodes.begin(), allNodes.end(), CmpNodeByID(), memory_to_use);
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
cout << "[extractor] Sorting used ways ... " << flush;
std::cout << "[extractor] Sorting used ways ... " << std::flush;
stxxl::sort(wayStartEndVector.begin(), wayStartEndVector.end(), CmpWayByID(), memory_to_use);
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
cout << "[extractor] Sorting restrctns. by from... " << flush;
std::cout << "[extractor] Sorting restrctns. by from... " << std::flush;
stxxl::sort(restrictionsVector.begin(), restrictionsVector.end(), CmpRestrictionContainerByFrom(), memory_to_use);
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
cout << "[extractor] Fixing restriction starts ... " << flush;
std::cout << "[extractor] Fixing restriction starts ... " << std::flush;
STXXLRestrictionsVector::iterator restrictionsIT = restrictionsVector.begin();
STXXLWayIDStartEndVector::iterator wayStartAndEndEdgeIT = wayStartEndVector.begin();
@ -79,16 +79,16 @@ void ExtractionContainers::PrepareData(const std::string & outputFileName, const
++restrictionsIT;
}
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
cout << "[extractor] Sorting restrctns. by to ... " << flush;
std::cout << "[extractor] Sorting restrctns. by to ... " << std::flush;
stxxl::sort(restrictionsVector.begin(), restrictionsVector.end(), CmpRestrictionContainerByTo(), memory_to_use);
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
unsigned usableRestrictionsCounter(0);
cout << "[extractor] Fixing restriction ends ... " << flush;
std::cout << "[extractor] Fixing restriction ends ... " << std::flush;
restrictionsIT = restrictionsVector.begin();
wayStartAndEndEdgeIT = wayStartEndVector.begin();
while(wayStartAndEndEdgeIT != wayStartEndVector.end() && restrictionsIT != restrictionsVector.end()) {
@ -116,11 +116,11 @@ void ExtractionContainers::PrepareData(const std::string & outputFileName, const
}
++restrictionsIT;
}
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
INFO("usable restrictions: " << usableRestrictionsCounter );
//serialize restrictions
ofstream restrictionsOutstream;
restrictionsOutstream.open(restrictionsFileName.c_str(), ios::binary);
std::ofstream restrictionsOutstream;
restrictionsOutstream.open(restrictionsFileName.c_str(), std::ios::binary);
restrictionsOutstream.write((char*)&usableRestrictionsCounter, sizeof(unsigned));
for(restrictionsIT = restrictionsVector.begin(); restrictionsIT != restrictionsVector.end(); ++restrictionsIT) {
if(UINT_MAX != restrictionsIT->restriction.fromNode && UINT_MAX != restrictionsIT->restriction.toNode) {
@ -129,11 +129,11 @@ void ExtractionContainers::PrepareData(const std::string & outputFileName, const
}
restrictionsOutstream.close();
ofstream fout;
fout.open(outputFileName.c_str(), ios::binary);
std::ofstream fout;
fout.open(outputFileName.c_str(), std::ios::binary);
fout.write((char*)&usedNodeCounter, sizeof(unsigned));
time = get_timestamp();
cout << "[extractor] Confirming/Writing used nodes ... " << flush;
std::cout << "[extractor] Confirming/Writing used nodes ... " << std::flush;
STXXLNodeVector::iterator nodesIT = allNodes.begin();
STXXLNodeIDVector::iterator usedNodeIDsIT = usedNodeIDs.begin();
@ -154,24 +154,24 @@ void ExtractionContainers::PrepareData(const std::string & outputFileName, const
}
}
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
cout << "[extractor] setting number of nodes ... " << flush;
ios::pos_type positionInFile = fout.tellp();
fout.seekp(ios::beg);
std::cout << "[extractor] setting number of nodes ... " << std::flush;
std::ios::pos_type positionInFile = fout.tellp();
fout.seekp(std::ios::beg);
fout.write((char*)&usedNodeCounter, sizeof(unsigned));
fout.seekp(positionInFile);
cout << "ok" << endl;
std::cout << "ok" << std::endl;
time = get_timestamp();
// Sort edges by start.
cout << "[extractor] Sorting edges by start ... " << flush;
std::cout << "[extractor] Sorting edges by start ... " << std::flush;
stxxl::sort(allEdges.begin(), allEdges.end(), CmpEdgeByStartID(), memory_to_use);
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
cout << "[extractor] Setting start coords ... " << flush;
std::cout << "[extractor] Setting start coords ... " << std::flush;
fout.write((char*)&usedEdgeCounter, sizeof(unsigned));
// Traverse list of edges and nodes in parallel and set start coord
nodesIT = allNodes.begin();
@ -191,16 +191,16 @@ void ExtractionContainers::PrepareData(const std::string & outputFileName, const
++edgeIT;
}
}
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
// Sort Edges by target
cout << "[extractor] Sorting edges by target ... " << flush;
std::cout << "[extractor] Sorting edges by target ... " << std::flush;
stxxl::sort(allEdges.begin(), allEdges.end(), CmpEdgeByTargetID(), memory_to_use);
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
time = get_timestamp();
cout << "[extractor] Setting target coords ... " << flush;
std::cout << "[extractor] Setting target coords ... " << std::flush;
// Traverse list of edges and nodes in parallel and set target coord
nodesIT = allNodes.begin();
edgeIT = allEdges.begin();
@ -245,7 +245,7 @@ void ExtractionContainers::PrepareData(const std::string & outputFileName, const
fout.write((char*)&one, sizeof(short));
break;
default:
cerr << "[error] edge with no direction: " << edgeIT->direction << endl;
std::cerr << "[error] edge with no direction: " << edgeIT->direction << std::endl;
assert(false);
break;
}
@ -261,28 +261,28 @@ void ExtractionContainers::PrepareData(const std::string & outputFileName, const
++edgeIT;
}
}
cout << "ok, after " << get_timestamp() - time << "s" << endl;
cout << "[extractor] setting number of edges ... " << flush;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
std::cout << "[extractor] setting number of edges ... " << std::flush;
fout.seekp(positionInFile);
fout.write((char*)&usedEdgeCounter, sizeof(unsigned));
fout.close();
cout << "ok" << endl;
std::cout << "ok" << std::endl;
time = get_timestamp();
cout << "[extractor] writing street name index ... " << flush;
std::cout << "[extractor] writing street name index ... " << std::flush;
std::string nameOutFileName = (outputFileName + ".names");
ofstream nameOutFile(nameOutFileName.c_str(), ios::binary);
std::ofstream nameOutFile(nameOutFileName.c_str(), std::ios::binary);
unsigned sizeOfNameIndex = nameVector.size();
nameOutFile.write((char *)&(sizeOfNameIndex), sizeof(unsigned));
BOOST_FOREACH(string str, nameVector) {
BOOST_FOREACH(const std::string & str, nameVector) {
unsigned lengthOfRawString = strlen(str.c_str());
nameOutFile.write((char *)&(lengthOfRawString), sizeof(unsigned));
nameOutFile.write(str.c_str(), lengthOfRawString);
}
nameOutFile.close();
cout << "ok, after " << get_timestamp() - time << "s" << endl;
std::cout << "ok, after " << get_timestamp() - time << "s" << std::endl;
// time = get_timestamp();
// cout << "[extractor] writing address list ... " << flush;
@ -298,8 +298,8 @@ void ExtractionContainers::PrepareData(const std::string & outputFileName, const
INFO("Processed " << usedNodeCounter << " nodes and " << usedEdgeCounter << " edges");
} catch ( const exception& e ) {
cerr << "Caught Execption:" << e.what() << endl;
} catch ( const std::exception& e ) {
std::cerr << "Caught Execption:" << e.what() << std::endl;
}
}

View File

@ -21,13 +21,18 @@ or see http://www.gnu.org/licenses/agpl.txt.
#ifndef TYPEDEFS_H_
#define TYPEDEFS_H_
#include <cmath>
#include <climits>
#include <cstdlib>
// To fix long and long long woes
#include <boost/integer.hpp>
#include <boost/integer_traits.hpp>
#ifdef __APPLE__
#include <signal.h>
#endif
#include <cmath>
#include <climits>
#include <cstdlib>
#include <iostream>
using namespace std;
@ -66,7 +71,7 @@ typedef unsigned int NodeID;
typedef unsigned int EdgeID;
typedef unsigned int EdgeWeight;
static const NodeID SPECIAL_NODEID = UINT_MAX;
static const EdgeID SPECIAL_EDGEID = UINT_MAX;
static const NodeID SPECIAL_NODEID = boost::integer_traits<uint32_t>::const_max;
static const EdgeID SPECIAL_EDGEID = boost::integer_traits<uint32_t>::const_max;
#endif /* TYPEDEFS_H_ */