Refactor file loading to use a common class that has proper error handling.

This commit is contained in:
Daniel Patterson
2016-11-11 05:52:21 -08:00
parent e226b52f21
commit 4ad6d88888
6 changed files with 288 additions and 336 deletions
+141 -29
View File
@@ -8,10 +8,14 @@
#include "util/fingerprint.hpp"
#include "util/simple_logger.hpp"
#include "util/static_graph.hpp"
#include "util/exception.hpp"
#include <boost/filesystem/fstream.hpp>
#include <boost/iostreams/seek.hpp>
#include <tuple>
#include <cstring>
#include <cerrno>
namespace osrm
{
@@ -20,6 +24,122 @@ namespace storage
namespace io
{
class File
{
private:
std::string filename;
boost::filesystem::ifstream input_stream;
public:
File(const std::string &filename, const bool check_fingerprint = false)
: File(boost::filesystem::path(filename), check_fingerprint)
{
}
File(const boost::filesystem::path &filename_, const bool check_fingerprint = false)
{
filename = filename_.string();
input_stream.open(filename_, std::ios::binary);
if (!input_stream)
throw util::exception("Error opening " + filename + ":" + std::strerror(errno));
if (check_fingerprint && !readAndCheckFingerprint())
{
throw util::exception("Fingerprint mismatch in " + filename);
}
}
/* Read count objects of type T into pointer dest */
template <typename T> void readInto(T *dest, const std::size_t count)
{
static_assert(std::is_trivially_copyable<T>::value,
"bytewise reading requires trivially copyable type");
if (count == 0)
return;
input_stream.read(reinterpret_cast<char *>(dest), count * sizeof(T));
// safe to cast here, according to CPP docs, negative values for gcount
// are never used.
const unsigned long bytes_read = static_cast<unsigned long>(input_stream.gcount());
const auto expected_bytes = count * sizeof(T);
if (bytes_read == 0)
{
throw util::exception("Error reading from " + filename + ": " + std::strerror(errno));
}
else if (bytes_read < expected_bytes)
{
throw util::exception("Error reading from " + filename + ": Unexpected end of file");
}
}
template <typename T> void readInto(T &target) { readInto(&target, 1); }
template <typename T> T readOne()
{
T tmp;
readInto(tmp);
return tmp;
}
template <typename T> void skip(const std::size_t element_count)
{
boost::iostreams::seek(input_stream, element_count * sizeof(T), BOOST_IOS::cur);
}
/*******************************************/
std::uint32_t readElementCount32() { return readOne<std::uint32_t>(); }
std::uint64_t readElementCount64() { return readOne<std::uint64_t>(); }
template <typename T> void deserializeVector(std::vector<T> &data)
{
const auto count = readElementCount64();
data.resize(count);
readInto(data.data(), count);
}
bool readAndCheckFingerprint()
{
auto fingerprint = readOne<util::FingerPrint>();
const auto valid = util::FingerPrint::GetValid();
// compare the compilation state stored in the fingerprint
return valid.IsMagicNumberOK(fingerprint) && valid.TestContractor(fingerprint) &&
valid.TestGraphUtil(fingerprint) && valid.TestRTree(fingerprint) &&
valid.TestQueryObjects(fingerprint);
}
std::size_t size()
{
auto current_pos = input_stream.tellg();
input_stream.seekg(0, input_stream.end);
auto length = input_stream.tellg();
input_stream.seekg(current_pos, input_stream.beg);
return length;
}
std::vector<std::string> readLines()
{
std::vector<std::string> result;
std::string thisline;
try
{
while (std::getline(input_stream, thisline))
{
std::clog << "Read " << thisline << std::endl;
result.push_back(thisline);
}
}
catch (const std::ios_base::failure &e)
{
// EOF is OK here, everything else, re-throw
if (!input_stream.eof())
throw;
}
return result;
}
};
// Reads the count of elements that is written in the file header and returns the number
inline std::uint64_t readElementCount64(boost::filesystem::ifstream &input_stream)
{
@@ -59,11 +179,10 @@ static_assert(sizeof(HSGRHeader) == 20, "HSGRHeader is not packed");
// Reads the checksum, number of nodes and number of edges written in the header file of a `.hsgr`
// file and returns them in a HSGRHeader struct
inline HSGRHeader readHSGRHeader(boost::filesystem::ifstream &input_stream)
inline HSGRHeader readHSGRHeader(io::File &input_file)
{
const util::FingerPrint fingerprint_valid = util::FingerPrint::GetValid();
util::FingerPrint fingerprint_loaded;
input_stream.read(reinterpret_cast<char *>(&fingerprint_loaded), sizeof(util::FingerPrint));
const auto fingerprint_loaded = input_file.readOne<util::FingerPrint>();
if (!fingerprint_loaded.TestGraphUtil(fingerprint_valid))
{
util::SimpleLogger().Write(logWARNING) << ".hsgr was prepared with different build.\n"
@@ -71,11 +190,9 @@ inline HSGRHeader readHSGRHeader(boost::filesystem::ifstream &input_stream)
}
HSGRHeader header;
input_stream.read(reinterpret_cast<char *>(&header.checksum), sizeof(header.checksum));
input_stream.read(reinterpret_cast<char *>(&header.number_of_nodes),
sizeof(header.number_of_nodes));
input_stream.read(reinterpret_cast<char *>(&header.number_of_edges),
sizeof(header.number_of_edges));
input_file.readInto(header.checksum);
input_file.readInto(header.number_of_nodes);
input_file.readInto(header.number_of_edges);
BOOST_ASSERT_MSG(0 != header.number_of_nodes, "number of nodes is zero");
// number of edges can be zero, this is the case in a few test fixtures
@@ -87,7 +204,7 @@ inline HSGRHeader readHSGRHeader(boost::filesystem::ifstream &input_stream)
// Needs to be called after readHSGRHeader() to get the correct offset in the stream
using NodeT = typename util::StaticGraph<contractor::QueryEdge::EdgeData>::NodeArrayEntry;
using EdgeT = typename util::StaticGraph<contractor::QueryEdge::EdgeData>::EdgeArrayEntry;
inline void readHSGR(boost::filesystem::ifstream &input_stream,
inline void readHSGR(File &input_file,
NodeT *node_buffer,
const std::uint64_t number_of_nodes,
EdgeT *edge_buffer,
@@ -95,17 +212,17 @@ inline void readHSGR(boost::filesystem::ifstream &input_stream,
{
BOOST_ASSERT(node_buffer);
BOOST_ASSERT(edge_buffer);
input_stream.read(reinterpret_cast<char *>(node_buffer), number_of_nodes * sizeof(NodeT));
input_stream.read(reinterpret_cast<char *>(edge_buffer), number_of_edges * sizeof(EdgeT));
input_file.readInto(node_buffer, number_of_nodes);
input_file.readInto(edge_buffer, number_of_edges);
}
// Loads properties from a `.properties` file into memory
inline void readProperties(boost::filesystem::ifstream &properties_stream,
inline void readProperties(File &properties_file,
extractor::ProfileProperties *properties,
const std::size_t properties_size)
{
BOOST_ASSERT(properties);
properties_stream.read(reinterpret_cast<char *>(properties), properties_size);
properties_file.readInto(properties, properties_size);
}
// Reads the timestamp in a `.timestamp` file
@@ -120,19 +237,18 @@ inline void readTimestamp(boost::filesystem::ifstream &timestamp_input_stream,
// Loads datasource_indexes from .datasource_indexes into memory
// Needs to be called after readElementCount() to get the correct offset in the stream
inline void readDatasourceIndexes(boost::filesystem::ifstream &datasource_indexes_input_stream,
inline void readDatasourceIndexes(File &datasource_indexes_file,
uint8_t *datasource_buffer,
const std::uint64_t number_of_datasource_indexes)
{
BOOST_ASSERT(datasource_buffer);
datasource_indexes_input_stream.read(reinterpret_cast<char *>(datasource_buffer),
number_of_datasource_indexes * sizeof(std::uint8_t));
datasource_indexes_file.readInto(datasource_buffer, number_of_datasource_indexes);
}
// Loads edge data from .edge files into memory which includes its
// geometry, name ID, turn instruction, lane data ID, travel mode, entry class ID
// Needs to be called after readElementCount() to get the correct offset in the stream
inline void readEdges(boost::filesystem::ifstream &edges_input_stream,
inline void readEdges(File &edges_input_file,
GeometryID *geometry_list,
NameID *name_id_list,
extractor::guidance::TurnInstruction *turn_instruction_list,
@@ -152,7 +268,7 @@ inline void readEdges(boost::filesystem::ifstream &edges_input_stream,
extractor::OriginalEdgeData current_edge_data;
for (std::uint64_t i = 0; i < number_of_edges; ++i)
{
edges_input_stream.read((char *)&(current_edge_data), sizeof(extractor::OriginalEdgeData));
edges_input_file.readInto(current_edge_data);
geometry_list[i] = current_edge_data.via_geometry;
name_id_list[i] = current_edge_data.name_id;
@@ -168,7 +284,7 @@ inline void readEdges(boost::filesystem::ifstream &edges_input_stream,
// Loads coordinates and OSM node IDs from .nodes files into memory
// Needs to be called after readElementCount() to get the correct offset in the stream
template <typename OSMNodeIDVectorT>
void readNodes(boost::filesystem::ifstream &nodes_input_stream,
void readNodes(io::File &nodes_file,
util::Coordinate *coordinate_list,
OSMNodeIDVectorT &osmnodeid_list,
const std::uint64_t number_of_coordinates)
@@ -177,7 +293,7 @@ void readNodes(boost::filesystem::ifstream &nodes_input_stream,
extractor::QueryNode current_node;
for (std::uint64_t i = 0; i < number_of_coordinates; ++i)
{
nodes_input_stream.read((char *)&current_node, sizeof(extractor::QueryNode));
nodes_file.readInto(current_node);
coordinate_list[i] = util::Coordinate(current_node.lon, current_node.lat);
osmnodeid_list.push_back(current_node.node_id);
BOOST_ASSERT(coordinate_list[i].IsValid());
@@ -192,12 +308,11 @@ struct DatasourceNamesData
std::vector<std::size_t> offsets;
std::vector<std::size_t> lengths;
};
inline DatasourceNamesData
readDatasourceNames(boost::filesystem::ifstream &datasource_names_input_stream)
inline DatasourceNamesData readDatasourceNames(io::File &datasource_names_file)
{
DatasourceNamesData datasource_names_data;
std::string name;
while (std::getline(datasource_names_input_stream, name))
std::vector<std::string> lines = datasource_names_file.readLines();
for (const auto &name : lines)
{
datasource_names_data.offsets.push_back(datasource_names_data.names.size());
datasource_names_data.lengths.push_back(name.size());
@@ -214,13 +329,10 @@ readDatasourceNames(boost::filesystem::ifstream &datasource_names_input_stream)
// NB Cannot be written without templated type because of cyclic depencies between
// `static_rtree.hpp` and `io.hpp`
template <typename RTreeNodeT>
void readRamIndex(boost::filesystem::ifstream &ram_index_input_stream,
RTreeNodeT *rtree_buffer,
const std::uint64_t tree_size)
void readRamIndex(File &ram_index_file, RTreeNodeT *rtree_buffer, const std::uint64_t tree_size)
{
BOOST_ASSERT(rtree_buffer);
ram_index_input_stream.read(reinterpret_cast<char *>(rtree_buffer),
sizeof(RTreeNodeT) * tree_size);
ram_index_file.readInto(rtree_buffer, tree_size);
}
}
}
+23 -1
View File
@@ -17,6 +17,7 @@ namespace util
namespace guidance
{
class LaneTuple;
class LaneTupleIdPair;
} // namespace guidance
} // namespace util
} // namespace osrm
@@ -27,6 +28,11 @@ template <> struct hash<::osrm::util::guidance::LaneTuple>
{
inline std::size_t operator()(const ::osrm::util::guidance::LaneTuple &bearing_class) const;
};
template <> struct hash<::osrm::util::guidance::LaneTupleIdPair>
{
inline std::size_t
operator()(const ::osrm::util::guidance::LaneTupleIdPair &bearing_class) const;
};
} // namespace std
namespace osrm
@@ -73,7 +79,23 @@ class LaneTuple
}
};
using LaneTupleIdPair = std::pair<util::guidance::LaneTuple, LaneDescriptionID>;
class LaneTupleIdPair
{
public:
util::guidance::LaneTuple first;
LaneDescriptionID second;
bool operator==(const LaneTupleIdPair &other) const;
friend std::size_t hash_value(const LaneTupleIdPair &pair)
{
std::size_t seed{0};
boost::hash_combine(seed, pair.first);
boost::hash_combine(seed, pair.second);
return seed;
}
};
} // namespace guidance
} // namespace util
} // namespace osrm
+9 -26
View File
@@ -15,6 +15,7 @@
#include <vector>
#include "util/fingerprint.hpp"
#include "storage/io.hpp"
namespace osrm
{
@@ -28,17 +29,6 @@ inline bool writeFingerprint(std::ostream &stream)
return static_cast<bool>(stream);
}
inline bool readAndCheckFingerprint(std::istream &stream)
{
FingerPrint fingerprint;
const auto valid = FingerPrint::GetValid();
stream.read(reinterpret_cast<char *>(&fingerprint), sizeof(fingerprint));
// compare the compilation state stored in the fingerprint
return static_cast<bool>(stream) && valid.IsMagicNumberOK(fingerprint) &&
valid.TestContractor(fingerprint) && valid.TestGraphUtil(fingerprint) &&
valid.TestRTree(fingerprint) && valid.TestQueryObjects(fingerprint);
}
template <typename simple_type>
bool serializeVector(const std::string &filename, const std::vector<simple_type> &data)
{
@@ -66,17 +56,14 @@ bool serializeVector(std::ostream &stream, const std::vector<simple_type> &data)
template <typename simple_type>
bool deserializeVector(const std::string &filename, std::vector<simple_type> &data)
{
std::ifstream stream(filename, std::ios::binary);
if (!readAndCheckFingerprint(stream))
return false;
storage::io::File file(filename, true);
std::uint64_t count = 0;
stream.read(reinterpret_cast<char *>(&count), sizeof(count));
const auto count = file.readElementCount64();
data.resize(count);
if (count)
stream.read(reinterpret_cast<char *>(&data[0]), sizeof(simple_type) * count);
return static_cast<bool>(stream);
file.readInto(data.data(), count);
return true;
}
template <typename simple_type>
@@ -199,13 +186,9 @@ inline bool serializeFlags(const boost::filesystem::path &path, const std::vecto
inline bool deserializeFlags(const boost::filesystem::path &path, std::vector<bool> &flags)
{
SimpleLogger().Write() << "Reading flags from " << path;
std::ifstream flag_stream(path.string(), std::ios::binary);
storage::io::File flag_file(path, true);
if (!readAndCheckFingerprint(flag_stream))
return false;
std::uint32_t number_of_bits;
flag_stream.read(reinterpret_cast<char *>(&number_of_bits), sizeof(number_of_bits));
const auto number_of_bits = flag_file.readOne<std::uint32_t>();
flags.resize(number_of_bits);
// putting bits in ints
std::uint32_t chunks = (number_of_bits + 31) / 32;
@@ -213,14 +196,14 @@ inline bool deserializeFlags(const boost::filesystem::path &path, std::vector<bo
std::uint32_t chunk;
for (std::size_t chunk_id = 0; chunk_id < chunks; ++chunk_id)
{
flag_stream.read(reinterpret_cast<char *>(&chunk), sizeof(chunk));
flag_file.readInto(chunk);
std::bitset<32> chunk_bits(chunk);
for (std::size_t bit = 0; bit < 32 && bit_position < number_of_bits; ++bit, ++bit_position)
flags[bit_position] = chunk_bits[bit];
}
SimpleLogger().Write() << "Read " << number_of_bits << " bits in " << chunks
<< " Chunks from disk.";
return static_cast<bool>(flag_stream);
return true;
}
} // namespace util
} // namespace osrm
+2 -11
View File
@@ -346,18 +346,9 @@ class StaticRTree
const CoordinateListT &coordinate_list)
: m_coordinate_list(coordinate_list)
{
// open tree node file and load into RAM.
if (!boost::filesystem::exists(node_file))
{
throw exception("ram index file does not exist");
}
if (boost::filesystem::file_size(node_file) == 0)
{
throw exception("ram index file is empty");
}
boost::filesystem::ifstream tree_node_file(node_file, std::ios::binary);
storage::io::File tree_node_file(node_file);
const auto tree_size = storage::io::readElementCount64(tree_node_file);
const auto tree_size = tree_node_file.readElementCount64();
m_search_tree.resize(tree_size);
storage::io::readRamIndex(tree_node_file, &m_search_tree[0], tree_size);