Enable just-before-contraction updates to edge weights. For small
datasets, this enables things like traffic-based updates in the shortest possible processing turnaround time.
This commit is contained in:
parent
578d947e2d
commit
6228412e61
@ -130,4 +130,6 @@ void ContractorOptions::GenerateOutputFilesNames(ContractorConfig &contractor_co
|
|||||||
contractor_config.core_output_path = contractor_config.osrm_input_path.string() + ".core";
|
contractor_config.core_output_path = contractor_config.osrm_input_path.string() + ".core";
|
||||||
contractor_config.graph_output_path = contractor_config.osrm_input_path.string() + ".hsgr";
|
contractor_config.graph_output_path = contractor_config.osrm_input_path.string() + ".hsgr";
|
||||||
contractor_config.edge_based_graph_path = contractor_config.osrm_input_path.string() + ".ebg";
|
contractor_config.edge_based_graph_path = contractor_config.osrm_input_path.string() + ".ebg";
|
||||||
|
contractor_config.edge_segment_lookup_path = contractor_config.osrm_input_path.string() + ".edge_segment_lookup";
|
||||||
|
contractor_config.edge_penalty_path = contractor_config.osrm_input_path.string() + ".edge_penalties";
|
||||||
}
|
}
|
||||||
|
@ -52,6 +52,8 @@ struct ContractorConfig
|
|||||||
std::string graph_output_path;
|
std::string graph_output_path;
|
||||||
std::string edge_based_graph_path;
|
std::string edge_based_graph_path;
|
||||||
|
|
||||||
|
std::string edge_segment_lookup_path;
|
||||||
|
std::string edge_penalty_path;
|
||||||
bool use_cached_priority;
|
bool use_cached_priority;
|
||||||
|
|
||||||
unsigned requested_num_threads;
|
unsigned requested_num_threads;
|
||||||
@ -61,6 +63,9 @@ struct ContractorConfig
|
|||||||
//The remaining vertices form the core of the hierarchy
|
//The remaining vertices form the core of the hierarchy
|
||||||
//(e.g. 0.8 contracts 80 percent of the hierarchy, leaving a core of 20%)
|
//(e.g. 0.8 contracts 80 percent of the hierarchy, leaving a core of 20%)
|
||||||
double core_factor;
|
double core_factor;
|
||||||
|
|
||||||
|
std::string segment_speed_lookup_path;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ContractorOptions
|
struct ContractorOptions
|
||||||
|
@ -28,6 +28,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#include "processing_chain.hpp"
|
#include "processing_chain.hpp"
|
||||||
#include "contractor.hpp"
|
#include "contractor.hpp"
|
||||||
|
|
||||||
|
#include "contractor.hpp"
|
||||||
|
|
||||||
#include "../data_structures/deallocating_vector.hpp"
|
#include "../data_structures/deallocating_vector.hpp"
|
||||||
|
|
||||||
#include "../algorithms/crc32_processor.hpp"
|
#include "../algorithms/crc32_processor.hpp"
|
||||||
@ -40,6 +42,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#include "../util/timing_util.hpp"
|
#include "../util/timing_util.hpp"
|
||||||
#include "../typedefs.h"
|
#include "../typedefs.h"
|
||||||
|
|
||||||
|
#include <fast-cpp-csv-parser/csv.h>
|
||||||
|
|
||||||
#include <boost/filesystem/fstream.hpp>
|
#include <boost/filesystem/fstream.hpp>
|
||||||
#include <boost/program_options.hpp>
|
#include <boost/program_options.hpp>
|
||||||
|
|
||||||
@ -64,6 +68,11 @@ int Prepare::Run()
|
|||||||
"changing EdgeBasedEdge type has influence on memory consumption!");
|
"changing EdgeBasedEdge type has influence on memory consumption!");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (config.core_factor > 1.0 || config.core_factor < 0)
|
||||||
|
{
|
||||||
|
throw osrm::exception("Core factor must be between 0.0 to 1.0 (inclusive)");
|
||||||
|
}
|
||||||
|
|
||||||
TIMER_START(preparing);
|
TIMER_START(preparing);
|
||||||
|
|
||||||
// Create a new lua state
|
// Create a new lua state
|
||||||
@ -72,7 +81,9 @@ int Prepare::Run()
|
|||||||
|
|
||||||
DeallocatingVector<EdgeBasedEdge> edge_based_edge_list;
|
DeallocatingVector<EdgeBasedEdge> edge_based_edge_list;
|
||||||
|
|
||||||
size_t max_edge_id = LoadEdgeExpandedGraph(config.edge_based_graph_path, edge_based_edge_list);
|
size_t max_edge_id = LoadEdgeExpandedGraph(
|
||||||
|
config.edge_based_graph_path, edge_based_edge_list, config.edge_segment_lookup_path,
|
||||||
|
config.edge_penalty_path, config.segment_speed_lookup_path);
|
||||||
|
|
||||||
// Contracting the edge-expanded graph
|
// Contracting the edge-expanded graph
|
||||||
|
|
||||||
@ -89,8 +100,7 @@ int Prepare::Run()
|
|||||||
|
|
||||||
SimpleLogger().Write() << "Contraction took " << TIMER_SEC(contraction) << " sec";
|
SimpleLogger().Write() << "Contraction took " << TIMER_SEC(contraction) << " sec";
|
||||||
|
|
||||||
std::size_t number_of_used_edges =
|
std::size_t number_of_used_edges = WriteContractedGraph(max_edge_id, contracted_edge_list);
|
||||||
WriteContractedGraph(max_edge_id, contracted_edge_list);
|
|
||||||
WriteCoreNodeMarker(std::move(is_core_node));
|
WriteCoreNodeMarker(std::move(is_core_node));
|
||||||
if (!config.use_cached_priority)
|
if (!config.use_cached_priority)
|
||||||
{
|
{
|
||||||
@ -109,12 +119,42 @@ int Prepare::Run()
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t Prepare::LoadEdgeExpandedGraph(
|
namespace std
|
||||||
std::string const & edge_based_graph_filename,
|
{
|
||||||
DeallocatingVector<EdgeBasedEdge> & edge_based_edge_list)
|
|
||||||
|
template <> struct hash<std::pair<unsigned, unsigned>>
|
||||||
|
{
|
||||||
|
std::size_t operator()(const std::pair<unsigned, unsigned> &k) const
|
||||||
|
{
|
||||||
|
return k.first ^ (k.second << 12);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t Prepare::LoadEdgeExpandedGraph(std::string const &edge_based_graph_filename,
|
||||||
|
DeallocatingVector<EdgeBasedEdge> &edge_based_edge_list,
|
||||||
|
const std::string &edge_segment_lookup_filename,
|
||||||
|
const std::string &edge_penalty_filename,
|
||||||
|
const std::string &segment_speed_filename)
|
||||||
{
|
{
|
||||||
SimpleLogger().Write() << "Opening " << edge_based_graph_filename;
|
SimpleLogger().Write() << "Opening " << edge_based_graph_filename;
|
||||||
boost::filesystem::ifstream input_stream(edge_based_graph_filename, std::ios::in | std::ios::binary);
|
boost::filesystem::ifstream input_stream(edge_based_graph_filename, std::ios::binary);
|
||||||
|
|
||||||
|
const bool update_edge_weights = segment_speed_filename != "";
|
||||||
|
|
||||||
|
boost::filesystem::ifstream edge_segment_input_stream;
|
||||||
|
boost::filesystem::ifstream edge_fixed_penalties_input_stream;
|
||||||
|
|
||||||
|
if (update_edge_weights)
|
||||||
|
{
|
||||||
|
edge_segment_input_stream.open(edge_segment_lookup_filename, std::ios::binary);
|
||||||
|
edge_fixed_penalties_input_stream.open(edge_penalty_filename, std::ios::binary);
|
||||||
|
if (!edge_segment_input_stream || !edge_fixed_penalties_input_stream)
|
||||||
|
{
|
||||||
|
throw osrm::exception("Could not load .edge_segment_lookup or .edge_penalties, did you "
|
||||||
|
"run osrm-extract with '--generate-edge-lookup'?");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const FingerPrint fingerprint_valid = FingerPrint::GetValid();
|
const FingerPrint fingerprint_valid = FingerPrint::GetValid();
|
||||||
FingerPrint fingerprint_loaded;
|
FingerPrint fingerprint_loaded;
|
||||||
@ -129,11 +169,82 @@ std::size_t Prepare::LoadEdgeExpandedGraph(
|
|||||||
edge_based_edge_list.resize(number_of_edges);
|
edge_based_edge_list.resize(number_of_edges);
|
||||||
SimpleLogger().Write() << "Reading " << number_of_edges << " edges from the edge based graph";
|
SimpleLogger().Write() << "Reading " << number_of_edges << " edges from the edge based graph";
|
||||||
|
|
||||||
|
std::unordered_map<std::pair<unsigned, unsigned>, unsigned> segment_speed_lookup;
|
||||||
|
|
||||||
|
if (update_edge_weights)
|
||||||
|
{
|
||||||
|
SimpleLogger().Write() << "Segment speed data supplied, will update edge weights from "
|
||||||
|
<< segment_speed_filename;
|
||||||
|
io::CSVReader<3> csv_in(segment_speed_filename);
|
||||||
|
csv_in.set_header("from_node", "to_node", "speed");
|
||||||
|
unsigned from_node_id;
|
||||||
|
unsigned to_node_id;
|
||||||
|
unsigned speed;
|
||||||
|
while (csv_in.read_row(from_node_id, to_node_id, speed))
|
||||||
|
{
|
||||||
|
segment_speed_lookup[std::pair<unsigned, unsigned>(from_node_id, to_node_id)] = speed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: can we read this in bulk? DeallocatingVector isn't necessarily
|
// TODO: can we read this in bulk? DeallocatingVector isn't necessarily
|
||||||
// all stored contiguously
|
// all stored contiguously
|
||||||
for (;number_of_edges > 0; --number_of_edges) {
|
for (; number_of_edges > 0; --number_of_edges)
|
||||||
|
{
|
||||||
EdgeBasedEdge inbuffer;
|
EdgeBasedEdge inbuffer;
|
||||||
input_stream.read((char *)&inbuffer, sizeof(EdgeBasedEdge));
|
input_stream.read((char *)&inbuffer, sizeof(EdgeBasedEdge));
|
||||||
|
|
||||||
|
if (update_edge_weights)
|
||||||
|
{
|
||||||
|
// Processing-time edge updates
|
||||||
|
unsigned fixed_penalty;
|
||||||
|
edge_fixed_penalties_input_stream.read(reinterpret_cast<char *>(&fixed_penalty),
|
||||||
|
sizeof(fixed_penalty));
|
||||||
|
|
||||||
|
int new_weight = 0;
|
||||||
|
|
||||||
|
unsigned num_osm_nodes = 0;
|
||||||
|
edge_segment_input_stream.read(reinterpret_cast<char *>(&num_osm_nodes),
|
||||||
|
sizeof(num_osm_nodes));
|
||||||
|
NodeID previous_osm_node_id;
|
||||||
|
edge_segment_input_stream.read(reinterpret_cast<char *>(&previous_osm_node_id),
|
||||||
|
sizeof(previous_osm_node_id));
|
||||||
|
NodeID this_osm_node_id;
|
||||||
|
double segment_length;
|
||||||
|
int segment_weight;
|
||||||
|
--num_osm_nodes;
|
||||||
|
for (; num_osm_nodes != 0; --num_osm_nodes)
|
||||||
|
{
|
||||||
|
edge_segment_input_stream.read(reinterpret_cast<char *>(&this_osm_node_id),
|
||||||
|
sizeof(this_osm_node_id));
|
||||||
|
edge_segment_input_stream.read(reinterpret_cast<char *>(&segment_length),
|
||||||
|
sizeof(segment_length));
|
||||||
|
edge_segment_input_stream.read(reinterpret_cast<char *>(&segment_weight),
|
||||||
|
sizeof(segment_weight));
|
||||||
|
|
||||||
|
auto speed_iter = segment_speed_lookup.find(
|
||||||
|
std::pair<unsigned, unsigned>(previous_osm_node_id, this_osm_node_id));
|
||||||
|
if (speed_iter != segment_speed_lookup.end())
|
||||||
|
{
|
||||||
|
// This sets the segment weight using the same formula as the
|
||||||
|
// EdgeBasedGraphFactory for consistency. The *why* of this formula
|
||||||
|
// is lost in the annals of time.
|
||||||
|
int new_segment_weight =
|
||||||
|
std::max(1, static_cast<int>(std::floor(
|
||||||
|
(segment_length * 10.) / (speed_iter->second / 3.6) + .5)));
|
||||||
|
new_weight += new_segment_weight;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// If no lookup found, use the original weight value for this segment
|
||||||
|
new_weight += segment_weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
previous_osm_node_id = this_osm_node_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
inbuffer.weight = fixed_penalty + new_weight;
|
||||||
|
}
|
||||||
|
|
||||||
edge_based_edge_list.emplace_back(std::move(inbuffer));
|
edge_based_edge_list.emplace_back(std::move(inbuffer));
|
||||||
}
|
}
|
||||||
SimpleLogger().Write() << "Done reading edges";
|
SimpleLogger().Write() << "Done reading edges";
|
||||||
@ -295,8 +406,6 @@ std::size_t Prepare::WriteContractedGraph(unsigned max_node_id,
|
|||||||
return number_of_used_edges;
|
return number_of_used_edges;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
\brief Build contracted graph.
|
\brief Build contracted graph.
|
||||||
*/
|
*/
|
||||||
@ -315,5 +424,3 @@ void Prepare::ContractGraph(const unsigned max_edge_id,
|
|||||||
contractor.GetCoreMarker(is_core_node);
|
contractor.GetCoreMarker(is_core_node);
|
||||||
contractor.GetNodeLevels(inout_node_levels);
|
contractor.GetNodeLevels(inout_node_levels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -70,11 +70,14 @@ class Prepare
|
|||||||
void FindComponents(unsigned max_edge_id,
|
void FindComponents(unsigned max_edge_id,
|
||||||
const DeallocatingVector<EdgeBasedEdge> &edges,
|
const DeallocatingVector<EdgeBasedEdge> &edges,
|
||||||
std::vector<EdgeBasedNode> &nodes) const;
|
std::vector<EdgeBasedNode> &nodes) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ContractorConfig config;
|
ContractorConfig config;
|
||||||
std::size_t LoadEdgeExpandedGraph(
|
std::size_t LoadEdgeExpandedGraph(const std::string &edge_based_graph_path,
|
||||||
const std::string & edge_based_graph_path,
|
DeallocatingVector<EdgeBasedEdge> &edge_based_edge_list,
|
||||||
DeallocatingVector<EdgeBasedEdge> & edge_based_edge_list);
|
const std::string &edge_segment_lookup_path,
|
||||||
|
const std::string &edge_penalty_path,
|
||||||
|
const std::string &segment_speed_path);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // PROCESSING_CHAIN_HPP
|
#endif // PROCESSING_CHAIN_HPP
|
||||||
|
@ -26,12 +26,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "edge_based_graph_factory.hpp"
|
#include "edge_based_graph_factory.hpp"
|
||||||
|
#include "../algorithms/coordinate_calculation.hpp"
|
||||||
#include "../data_structures/percent.hpp"
|
#include "../data_structures/percent.hpp"
|
||||||
#include "../util/compute_angle.hpp"
|
#include "../util/compute_angle.hpp"
|
||||||
#include "../util/integer_range.hpp"
|
#include "../util/integer_range.hpp"
|
||||||
#include "../util/lua_util.hpp"
|
#include "../util/lua_util.hpp"
|
||||||
#include "../util/simple_logger.hpp"
|
#include "../util/simple_logger.hpp"
|
||||||
#include "../util/timing_util.hpp"
|
#include "../util/timing_util.hpp"
|
||||||
|
#include "../util/osrm_exception.hpp"
|
||||||
|
|
||||||
#include <boost/assert.hpp>
|
#include <boost/assert.hpp>
|
||||||
|
|
||||||
@ -221,7 +223,11 @@ void EdgeBasedGraphFactory::FlushVectorToStream(
|
|||||||
}
|
}
|
||||||
|
|
||||||
void EdgeBasedGraphFactory::Run(const std::string &original_edge_data_filename,
|
void EdgeBasedGraphFactory::Run(const std::string &original_edge_data_filename,
|
||||||
lua_State *lua_state)
|
lua_State *lua_state,
|
||||||
|
const std::string &edge_segment_lookup_filename,
|
||||||
|
const std::string &edge_penalty_filename,
|
||||||
|
const bool generate_edge_lookup
|
||||||
|
)
|
||||||
{
|
{
|
||||||
TIMER_START(renumber);
|
TIMER_START(renumber);
|
||||||
m_max_edge_id = RenumberEdges() - 1;
|
m_max_edge_id = RenumberEdges() - 1;
|
||||||
@ -232,7 +238,9 @@ void EdgeBasedGraphFactory::Run(const std::string &original_edge_data_filename,
|
|||||||
TIMER_STOP(generate_nodes);
|
TIMER_STOP(generate_nodes);
|
||||||
|
|
||||||
TIMER_START(generate_edges);
|
TIMER_START(generate_edges);
|
||||||
GenerateEdgeExpandedEdges(original_edge_data_filename, lua_state);
|
GenerateEdgeExpandedEdges(original_edge_data_filename, lua_state,
|
||||||
|
edge_segment_lookup_filename,edge_penalty_filename, generate_edge_lookup
|
||||||
|
);
|
||||||
TIMER_STOP(generate_edges);
|
TIMER_STOP(generate_edges);
|
||||||
|
|
||||||
SimpleLogger().Write() << "Timing statistics for edge-expanded graph:";
|
SimpleLogger().Write() << "Timing statistics for edge-expanded graph:";
|
||||||
@ -317,7 +325,11 @@ void EdgeBasedGraphFactory::GenerateEdgeExpandedNodes()
|
|||||||
|
|
||||||
/// Actually it also generates OriginalEdgeData and serializes them...
|
/// Actually it also generates OriginalEdgeData and serializes them...
|
||||||
void EdgeBasedGraphFactory::GenerateEdgeExpandedEdges(
|
void EdgeBasedGraphFactory::GenerateEdgeExpandedEdges(
|
||||||
const std::string &original_edge_data_filename, lua_State *lua_state)
|
const std::string &original_edge_data_filename, lua_State *lua_state,
|
||||||
|
const std::string &edge_segment_lookup_filename,
|
||||||
|
const std::string &edge_fixed_penalties_filename,
|
||||||
|
const bool generate_edge_lookup
|
||||||
|
)
|
||||||
{
|
{
|
||||||
SimpleLogger().Write() << "generating edge-expanded edges";
|
SimpleLogger().Write() << "generating edge-expanded edges";
|
||||||
|
|
||||||
@ -325,6 +337,14 @@ void EdgeBasedGraphFactory::GenerateEdgeExpandedEdges(
|
|||||||
unsigned original_edges_counter = 0;
|
unsigned original_edges_counter = 0;
|
||||||
|
|
||||||
std::ofstream edge_data_file(original_edge_data_filename.c_str(), std::ios::binary);
|
std::ofstream edge_data_file(original_edge_data_filename.c_str(), std::ios::binary);
|
||||||
|
std::ofstream edge_segment_file;
|
||||||
|
std::ofstream edge_penalty_file;
|
||||||
|
|
||||||
|
if (generate_edge_lookup)
|
||||||
|
{
|
||||||
|
edge_segment_file.open(edge_segment_lookup_filename.c_str(), std::ios::binary);
|
||||||
|
edge_penalty_file.open(edge_fixed_penalties_filename.c_str(), std::ios::binary);
|
||||||
|
}
|
||||||
|
|
||||||
// writes a dummy value that is updated later
|
// writes a dummy value that is updated later
|
||||||
edge_data_file.write((char *)&original_edges_counter, sizeof(unsigned));
|
edge_data_file.write((char *)&original_edges_counter, sizeof(unsigned));
|
||||||
@ -438,6 +458,7 @@ void EdgeBasedGraphFactory::GenerateEdgeExpandedEdges(
|
|||||||
{
|
{
|
||||||
distance += speed_profile.u_turn_penalty;
|
distance += speed_profile.u_turn_penalty;
|
||||||
}
|
}
|
||||||
|
|
||||||
distance += turn_penalty;
|
distance += turn_penalty;
|
||||||
|
|
||||||
const bool edge_is_compressed = m_compressed_edge_container.HasEntryForID(e1);
|
const bool edge_is_compressed = m_compressed_edge_container.HasEntryForID(e1);
|
||||||
@ -464,6 +485,59 @@ void EdgeBasedGraphFactory::GenerateEdgeExpandedEdges(
|
|||||||
|
|
||||||
m_edge_based_edge_list.emplace_back(edge_data1.edge_id, edge_data2.edge_id,
|
m_edge_based_edge_list.emplace_back(edge_data1.edge_id, edge_data2.edge_id,
|
||||||
m_edge_based_edge_list.size(), distance, true, false);
|
m_edge_based_edge_list.size(), distance, true, false);
|
||||||
|
|
||||||
|
|
||||||
|
// Here is where we write out the mapping between the edge-expanded edges, and
|
||||||
|
// the node-based edges that are originally used to calculate the `distance`
|
||||||
|
// for the edge-expanded edges. About 40 lines back, there is:
|
||||||
|
//
|
||||||
|
// unsigned distance = edge_data1.distance;
|
||||||
|
//
|
||||||
|
// This tells us that the weight for an edge-expanded-edge is based on the weight
|
||||||
|
// of the *source* node-based edge. Therefore, we will look up the individual
|
||||||
|
// segments of the source node-based edge, and write out a mapping between
|
||||||
|
// those and the edge-based-edge ID.
|
||||||
|
// External programs can then use this mapping to quickly perform
|
||||||
|
// updates to the edge-expanded-edge based directly on its ID.
|
||||||
|
if (generate_edge_lookup)
|
||||||
|
{
|
||||||
|
unsigned fixed_penalty = distance - edge_data1.distance;
|
||||||
|
edge_penalty_file.write(reinterpret_cast<const char *>(&fixed_penalty), sizeof(fixed_penalty));
|
||||||
|
if (edge_is_compressed)
|
||||||
|
{
|
||||||
|
const auto node_based_edges = m_compressed_edge_container.GetBucketReference(e1);
|
||||||
|
NodeID previous = node_u;
|
||||||
|
|
||||||
|
const unsigned node_count = node_based_edges.size()+1;
|
||||||
|
edge_segment_file.write(reinterpret_cast<const char *>(&node_count), sizeof(node_count));
|
||||||
|
const QueryNode &first_node = m_node_info_list[previous];
|
||||||
|
edge_segment_file.write(reinterpret_cast<const char *>(&first_node.node_id), sizeof(first_node.node_id));
|
||||||
|
|
||||||
|
for (auto target_node : node_based_edges)
|
||||||
|
{
|
||||||
|
const QueryNode &from = m_node_info_list[previous];
|
||||||
|
const QueryNode &to = m_node_info_list[target_node.first];
|
||||||
|
const double segment_length = coordinate_calculation::great_circle_distance(from.lat, from.lon, to.lat, to.lon);
|
||||||
|
|
||||||
|
edge_segment_file.write(reinterpret_cast<const char *>(&to.node_id), sizeof(to.node_id));
|
||||||
|
edge_segment_file.write(reinterpret_cast<const char *>(&segment_length), sizeof(segment_length));
|
||||||
|
edge_segment_file.write(reinterpret_cast<const char *>(&target_node.second), sizeof(target_node.second));
|
||||||
|
previous = target_node.first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
static const unsigned node_count = 2;
|
||||||
|
const QueryNode from = m_node_info_list[node_u];
|
||||||
|
const QueryNode to = m_node_info_list[node_v];
|
||||||
|
const double segment_length = coordinate_calculation::great_circle_distance(from.lat, from.lon, to.lat, to.lon);
|
||||||
|
edge_segment_file.write(reinterpret_cast<const char *>(&node_count), sizeof(node_count));
|
||||||
|
edge_segment_file.write(reinterpret_cast<const char *>(&from.node_id), sizeof(from.node_id));
|
||||||
|
edge_segment_file.write(reinterpret_cast<const char *>(&to.node_id), sizeof(to.node_id));
|
||||||
|
edge_segment_file.write(reinterpret_cast<const char *>(&segment_length), sizeof(segment_length));
|
||||||
|
edge_segment_file.write(reinterpret_cast<const char *>(&edge_data1.distance), sizeof(edge_data1.distance));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -67,7 +67,11 @@ class EdgeBasedGraphFactory
|
|||||||
SpeedProfileProperties speed_profile);
|
SpeedProfileProperties speed_profile);
|
||||||
|
|
||||||
void Run(const std::string &original_edge_data_filename,
|
void Run(const std::string &original_edge_data_filename,
|
||||||
lua_State *lua_state);
|
lua_State *lua_state,
|
||||||
|
const std::string &edge_segment_lookup_filename,
|
||||||
|
const std::string &edge_penalty_filename,
|
||||||
|
const bool generate_edge_lookup
|
||||||
|
);
|
||||||
|
|
||||||
void GetEdgeBasedEdges(DeallocatingVector<EdgeBasedEdge> &edges);
|
void GetEdgeBasedEdges(DeallocatingVector<EdgeBasedEdge> &edges);
|
||||||
|
|
||||||
@ -100,7 +104,11 @@ class EdgeBasedGraphFactory
|
|||||||
unsigned RenumberEdges();
|
unsigned RenumberEdges();
|
||||||
void GenerateEdgeExpandedNodes();
|
void GenerateEdgeExpandedNodes();
|
||||||
void GenerateEdgeExpandedEdges(const std::string &original_edge_data_filename,
|
void GenerateEdgeExpandedEdges(const std::string &original_edge_data_filename,
|
||||||
lua_State *lua_state);
|
lua_State *lua_state,
|
||||||
|
const std::string &edge_segment_lookup_filename,
|
||||||
|
const std::string &edge_fixed_penalties_filename,
|
||||||
|
const bool generate_edge_lookup
|
||||||
|
);
|
||||||
|
|
||||||
void InsertEdgeBasedNode(const NodeID u, const NodeID v);
|
void InsertEdgeBasedNode(const NodeID u, const NodeID v);
|
||||||
|
|
||||||
|
@ -532,7 +532,11 @@ extractor::BuildEdgeExpandedGraph(std::vector<QueryNode> &internal_to_external_n
|
|||||||
|
|
||||||
compressed_edge_container.SerializeInternalVector(config.geometry_output_path);
|
compressed_edge_container.SerializeInternalVector(config.geometry_output_path);
|
||||||
|
|
||||||
edge_based_graph_factory.Run(config.edge_output_path, lua_state);
|
edge_based_graph_factory.Run(config.edge_output_path, lua_state,
|
||||||
|
config.edge_segment_lookup_path,
|
||||||
|
config.edge_penalty_path,
|
||||||
|
config.generate_edge_lookup
|
||||||
|
);
|
||||||
lua_close(lua_state);
|
lua_close(lua_state);
|
||||||
|
|
||||||
edge_based_graph_factory.GetEdgeBasedEdges(edge_based_edge_list);
|
edge_based_graph_factory.GetEdgeBasedEdges(edge_based_edge_list);
|
||||||
|
@ -61,7 +61,10 @@ ExtractorOptions::ParseArguments(int argc, char *argv[], ExtractorConfig &extrac
|
|||||||
"threads,t",
|
"threads,t",
|
||||||
boost::program_options::value<unsigned int>(&extractor_config.requested_num_threads)
|
boost::program_options::value<unsigned int>(&extractor_config.requested_num_threads)
|
||||||
->default_value(tbb::task_scheduler_init::default_num_threads()),
|
->default_value(tbb::task_scheduler_init::default_num_threads()),
|
||||||
"Number of threads to use");
|
"Number of threads to use")(
|
||||||
|
"generate-edge-lookup",boost::program_options::value<bool>(
|
||||||
|
&extractor_config.generate_edge_lookup)->implicit_value(true)->default_value(false),
|
||||||
|
"Generate a lookup table for internal edge-expanded-edge IDs to OSM node pairs");
|
||||||
|
|
||||||
// hidden options, will be allowed both on command line and in config file, but will not be
|
// hidden options, will be allowed both on command line and in config file, but will not be
|
||||||
// shown to the user
|
// shown to the user
|
||||||
@ -70,6 +73,7 @@ ExtractorOptions::ParseArguments(int argc, char *argv[], ExtractorConfig &extrac
|
|||||||
&extractor_config.input_path),
|
&extractor_config.input_path),
|
||||||
"Input file in .osm, .osm.bz2 or .osm.pbf format");
|
"Input file in .osm, .osm.bz2 or .osm.pbf format");
|
||||||
|
|
||||||
|
|
||||||
// positional option
|
// positional option
|
||||||
boost::program_options::positional_options_description positional_options;
|
boost::program_options::positional_options_description positional_options;
|
||||||
positional_options.add("input", 1);
|
positional_options.add("input", 1);
|
||||||
@ -149,6 +153,8 @@ void ExtractorOptions::GenerateOutputFilesNames(ExtractorConfig &extractor_confi
|
|||||||
extractor_config.node_output_path = input_path.string();
|
extractor_config.node_output_path = input_path.string();
|
||||||
extractor_config.rtree_nodes_output_path = input_path.string();
|
extractor_config.rtree_nodes_output_path = input_path.string();
|
||||||
extractor_config.rtree_leafs_output_path = input_path.string();
|
extractor_config.rtree_leafs_output_path = input_path.string();
|
||||||
|
extractor_config.edge_segment_lookup_path = input_path.string();
|
||||||
|
extractor_config.edge_penalty_path = input_path.string();
|
||||||
std::string::size_type pos = extractor_config.output_file_name.find(".osm.bz2");
|
std::string::size_type pos = extractor_config.output_file_name.find(".osm.bz2");
|
||||||
if (pos == std::string::npos)
|
if (pos == std::string::npos)
|
||||||
{
|
{
|
||||||
@ -177,6 +183,8 @@ void ExtractorOptions::GenerateOutputFilesNames(ExtractorConfig &extractor_confi
|
|||||||
extractor_config.edge_graph_output_path.append(".osrm.ebg");
|
extractor_config.edge_graph_output_path.append(".osrm.ebg");
|
||||||
extractor_config.rtree_nodes_output_path.append(".osrm.ramIndex");
|
extractor_config.rtree_nodes_output_path.append(".osrm.ramIndex");
|
||||||
extractor_config.rtree_leafs_output_path.append(".osrm.fileIndex");
|
extractor_config.rtree_leafs_output_path.append(".osrm.fileIndex");
|
||||||
|
extractor_config.edge_segment_lookup_path.append(".osrm.edge_segment_lookup");
|
||||||
|
extractor_config.edge_penalty_path.append(".osrm.edge_penalties");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -190,6 +198,8 @@ void ExtractorOptions::GenerateOutputFilesNames(ExtractorConfig &extractor_confi
|
|||||||
extractor_config.edge_graph_output_path.replace(pos, 5, ".osrm.ebg");
|
extractor_config.edge_graph_output_path.replace(pos, 5, ".osrm.ebg");
|
||||||
extractor_config.rtree_nodes_output_path.replace(pos, 5, ".osrm.ramIndex");
|
extractor_config.rtree_nodes_output_path.replace(pos, 5, ".osrm.ramIndex");
|
||||||
extractor_config.rtree_leafs_output_path.replace(pos, 5, ".osrm.fileIndex");
|
extractor_config.rtree_leafs_output_path.replace(pos, 5, ".osrm.fileIndex");
|
||||||
|
extractor_config.edge_segment_lookup_path.replace(pos,5, ".osrm.edge_segment_lookup");
|
||||||
|
extractor_config.edge_penalty_path.replace(pos,5, ".osrm.edge_penalties");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -204,5 +214,7 @@ void ExtractorOptions::GenerateOutputFilesNames(ExtractorConfig &extractor_confi
|
|||||||
extractor_config.edge_graph_output_path.replace(pos, 8, ".osrm.ebg");
|
extractor_config.edge_graph_output_path.replace(pos, 8, ".osrm.ebg");
|
||||||
extractor_config.rtree_nodes_output_path.replace(pos, 8, ".osrm.ramIndex");
|
extractor_config.rtree_nodes_output_path.replace(pos, 8, ".osrm.ramIndex");
|
||||||
extractor_config.rtree_leafs_output_path.replace(pos, 8, ".osrm.fileIndex");
|
extractor_config.rtree_leafs_output_path.replace(pos, 8, ".osrm.fileIndex");
|
||||||
|
extractor_config.edge_segment_lookup_path.replace(pos,8, ".osrm.edge_segment_lookup");
|
||||||
|
extractor_config.edge_penalty_path.replace(pos,8, ".osrm.edge_penalties");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -58,6 +58,10 @@ struct ExtractorConfig
|
|||||||
std::string rtree_leafs_output_path;
|
std::string rtree_leafs_output_path;
|
||||||
|
|
||||||
unsigned requested_num_threads;
|
unsigned requested_num_threads;
|
||||||
|
|
||||||
|
bool generate_edge_lookup;
|
||||||
|
std::string edge_penalty_path;
|
||||||
|
std::string edge_segment_lookup_path;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ExtractorOptions
|
struct ExtractorOptions
|
||||||
|
@ -15,7 +15,8 @@ Feature: osrm-extract command line options: help
|
|||||||
And stdout should contain "Configuration:"
|
And stdout should contain "Configuration:"
|
||||||
And stdout should contain "--profile"
|
And stdout should contain "--profile"
|
||||||
And stdout should contain "--threads"
|
And stdout should contain "--threads"
|
||||||
And stdout should contain 12 lines
|
And stdout should contain "--generate-edge-lookup"
|
||||||
|
And stdout should contain 16 lines
|
||||||
And it should exit with code 0
|
And it should exit with code 0
|
||||||
|
|
||||||
Scenario: osrm-extract - Help, short
|
Scenario: osrm-extract - Help, short
|
||||||
@ -29,7 +30,8 @@ Feature: osrm-extract command line options: help
|
|||||||
And stdout should contain "Configuration:"
|
And stdout should contain "Configuration:"
|
||||||
And stdout should contain "--profile"
|
And stdout should contain "--profile"
|
||||||
And stdout should contain "--threads"
|
And stdout should contain "--threads"
|
||||||
And stdout should contain 12 lines
|
And stdout should contain "--generate-edge-lookup"
|
||||||
|
And stdout should contain 16 lines
|
||||||
And it should exit with code 0
|
And it should exit with code 0
|
||||||
|
|
||||||
Scenario: osrm-extract - Help, long
|
Scenario: osrm-extract - Help, long
|
||||||
@ -43,5 +45,6 @@ Feature: osrm-extract command line options: help
|
|||||||
And stdout should contain "Configuration:"
|
And stdout should contain "Configuration:"
|
||||||
And stdout should contain "--profile"
|
And stdout should contain "--profile"
|
||||||
And stdout should contain "--threads"
|
And stdout should contain "--threads"
|
||||||
And stdout should contain 12 lines
|
And stdout should contain "--generate-edge-lookup"
|
||||||
|
And stdout should contain 16 lines
|
||||||
And it should exit with code 0
|
And it should exit with code 0
|
||||||
|
28
third_party/fast-cpp-csv-parser/LICENSE
vendored
Normal file
28
third_party/fast-cpp-csv-parser/LICENSE
vendored
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
Copyright (c) 2015, ben-strasser
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of fast-cpp-csv-parser nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
252
third_party/fast-cpp-csv-parser/README.md
vendored
Normal file
252
third_party/fast-cpp-csv-parser/README.md
vendored
Normal file
@ -0,0 +1,252 @@
|
|||||||
|
# Fast C++ Csv Parser
|
||||||
|
|
||||||
|
This is a small, easy-to-use and fast header-only library for reading comma separated value (CSV) files.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
* Automatically rearranges columns by parsing the header line.
|
||||||
|
* Disk I/O and CSV-parsing are overlapped using threads for efficiency.
|
||||||
|
* Parsing features such as escaped strings can be enabled and disabled at compile time using templates. You only pay in speed for the features you actually use.
|
||||||
|
* Can read multiple GB files in reasonable time.
|
||||||
|
* Support for custom columns separators (i.e. Tab separated value files are supported), quote escaped strings, automatic space trimming.
|
||||||
|
* Works with `*`nix and Windows newlines and automatically ignores UTF-8 BOMs.
|
||||||
|
* Exception classes with enough context to format useful error messages. what() returns error messages ready to be shown to a user.
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
The following small example should contain most of the syntax you need to use the library.
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
# include "csv.h"
|
||||||
|
|
||||||
|
int main(){
|
||||||
|
io::CSVReader<3> in("ram.csv");
|
||||||
|
in.read_header(io::ignore_extra_column, "vendor", "size", "speed");
|
||||||
|
std::string vendor; int size; double speed;
|
||||||
|
while(in.read_row(vendor, size, speed)){
|
||||||
|
// do stuff with the data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
The library only needs a standard conformant C++11 compiler. It has no further dependencies. The library is completely contained inside a single header file and therefore it is sufficient to copy this file to some place on your include path. The library does not have to be explicitly build.
|
||||||
|
|
||||||
|
Note however, that std::future is used and some compiler (f.e. GCC) require you to link against additional libraries (i.e. -lpthread) to make it work. With GCC it is important to add -lpthread as the last item when linking, i.e. the order in
|
||||||
|
|
||||||
|
```
|
||||||
|
g++ a.o b.o -o prog -lpthread
|
||||||
|
```
|
||||||
|
|
||||||
|
is important.
|
||||||
|
|
||||||
|
Remember that the library makes use of C++11 features and therefore you have to enable support for it (f.e. add -std=C++0x or -std=gnu++0x).
|
||||||
|
|
||||||
|
The library was developed and tested with GCC 4.6.1
|
||||||
|
|
||||||
|
Note that VS2013 is not C++11 compilant and will therefore not work out of the box. See [here](https://code.google.com/p/fast-cpp-csv-parser/issues/detail?id=6) for what needs to be adjusted to make the code work.
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
The libary provides two classes:
|
||||||
|
|
||||||
|
* `LineReader`: A class to efficiently read large files line by line.
|
||||||
|
* `CSVReader`: A class that efficiently reads large CSV files.
|
||||||
|
|
||||||
|
Note that everything is contained in the `io` namespace.
|
||||||
|
|
||||||
|
### `LineReader`
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
class LineReader{
|
||||||
|
public:
|
||||||
|
// Constructors
|
||||||
|
LineReader(some_string_type file_name);
|
||||||
|
LineReader(some_string_type file_name, std::FILE*file);
|
||||||
|
|
||||||
|
// Reading
|
||||||
|
char*next_line();
|
||||||
|
|
||||||
|
// File Location
|
||||||
|
void set_file_line(unsigned);
|
||||||
|
unsigned get_file_line(unsigned)const;
|
||||||
|
void set_file_name(some_string_type file_name);
|
||||||
|
const char*get_truncated_file_name()const;
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
The constructor takes a file name and optionally a `stdio.h` file handle. If no file handle is provided the class tries to open the file and throws an `error::can_not_open_file exception` on failure. If a file handle is provided then the file name is only used to format error messages. The library will call `std::fclose` on the file handle. `some_string_type` can be a `std::string` or a `char*`.
|
||||||
|
|
||||||
|
Lines are read by calling the `next_line` function. It returns a pointer to a null terminated C-string that contains the line. If the end of file is reached a null pointer is returned. The newline character is not included in the string. You may modify the string as long as you do not write past the null terminator. The string stays valid until the destructor is called or until next_line is called again. Windows and `*`nix newlines are handled transparently. UTF-8 BOMs are automatically ignored and missing newlines at the end of the file are no problem.
|
||||||
|
|
||||||
|
**Important:** There is a limit of 2^24-1 characters per line. If this limit is exceeded a `error::line_length_limit_exceeded` exception is thrown.
|
||||||
|
|
||||||
|
Looping over all the lines in a file can be done in the following way.
|
||||||
|
```cpp
|
||||||
|
LineReader in(...);
|
||||||
|
while(char*line = in.next_line()){
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The remaining functions are mainly used used to format error messages. The file line indicates the current position in the file, i.e., after the first `next_line` call it is 1 and after the second 2. Before the first call it is 0. The file name is truncated as internally C-strings are used to avoid `std::bad_alloc` exceptions during error reporting.
|
||||||
|
|
||||||
|
**Note:** It is not possible to exchange the line termination character.
|
||||||
|
|
||||||
|
### `CSVReader`
|
||||||
|
|
||||||
|
`CSVReader` uses policies. These are classes with only static members to allow core functionality to be exchanged in an efficient way.
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
template<
|
||||||
|
unsigned column_count,
|
||||||
|
class trim_policy = trim_chars<' ', '\t'>,
|
||||||
|
class quote_policy = no_quote_escape<','>,
|
||||||
|
class overflow_policy = throw_on_overflow,
|
||||||
|
class comment_policy = no_comment
|
||||||
|
>
|
||||||
|
class CSVReader{
|
||||||
|
public:
|
||||||
|
// Constructors
|
||||||
|
CSVReader(some_string_type file_name);
|
||||||
|
CSVReader(some_string_type file_name, std::FILE*file);
|
||||||
|
|
||||||
|
// Parsing Header
|
||||||
|
void read_header(ignore_column ignore_policy, some_string_type col_name1, some_string_type col_name2, ...);
|
||||||
|
void set_header(some_string_type col_name1, some_string_type col_name2, ...);
|
||||||
|
bool has_column(some_string_type col_name)const;
|
||||||
|
|
||||||
|
// Read
|
||||||
|
bool read_row(ColType1&col1, ColType2&col2, ...);
|
||||||
|
|
||||||
|
// File Location
|
||||||
|
void set_file_line(unsigned);
|
||||||
|
unsigned get_file_line(unsigned)const;
|
||||||
|
void set_file_name(some_string_type file_name);
|
||||||
|
const char*get_truncated_file_name()const;
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
The `column_count` template parameter indicates how many columns you want to read from the CSV file. This must not necessarily coincide with the actual number of columns in the file. The three policies govern various aspects of the parsing.
|
||||||
|
|
||||||
|
The trim policy indicates what characters should be ignored at the begin and the end of every column. The default ignores spaces and tabs. This makes sure that
|
||||||
|
|
||||||
|
```
|
||||||
|
a,b,c
|
||||||
|
1,2,3
|
||||||
|
```
|
||||||
|
|
||||||
|
is interpreted in the same way as
|
||||||
|
|
||||||
|
```
|
||||||
|
a, b, c
|
||||||
|
1 , 2, 3
|
||||||
|
```
|
||||||
|
|
||||||
|
The trim_chars can take any number of template parameters. For example `trim_chars<' ', '\t', '_'> `is also valid. If no character should be trimmed use `trim_chars<>`.
|
||||||
|
|
||||||
|
The quote policy indicates how string should be escaped. It also specifies the column separator. The predefined policies are:
|
||||||
|
|
||||||
|
* `no_quote_escape<sep>` : Strings are not escaped. "`sep`" is used as column separator.
|
||||||
|
* `double_quote_escape<sep, quote>` : Strings are escaped using quotes. Quotes are escaped using two consecutive quotes. "`sep`" is used as column separator and "`quote`" as quoting character.
|
||||||
|
|
||||||
|
**Important**: When combining trimming and quoting the rows are first trimmed and then unquoted. A consequence is that spaces inside the quotes will be conserved. If you want to get rid of spaces inside the quotes, you need to remove them yourself.
|
||||||
|
|
||||||
|
**Important**: Quoting can be quite expensive. Disable it if you do not need it.
|
||||||
|
|
||||||
|
The overflow policy indicates what should be done if the integers in the input are too large to fit into the variables. There following policies are predefined:
|
||||||
|
|
||||||
|
* `throw_on_overflow` : Throw an `error::integer_overflow` or `error::integer_underflow` exception.
|
||||||
|
* `ignore_overflow` : Do nothing and let the overflow happen.
|
||||||
|
* `set_to_max_on_overflow` : Set the value to `numeric_limits<...>::max()` (or to the min-pendant).
|
||||||
|
|
||||||
|
The comment policy allows to skip lines based on some criteria. Valid predefined policies are:
|
||||||
|
|
||||||
|
* `no_comment` : Do not ignore any line.
|
||||||
|
* `empty_line_comment` : Ignore all lines that are empty or only contains spaces and tabs.
|
||||||
|
* `single_line_comment<com1, com2, ...>` : Ignore all lines that start with com1 or com2 or ... as the first character. There may not be any space between the beginning of the line and the comment character.
|
||||||
|
* `single_and_empty_line_comment<com1, com2, ...>` : Ignore all empty lines and single line comments.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
* `CSVReader<4, trim_chars<' '>, double_quote_escape<',','\"'> >` reads 4 columns from a normal CSV file with string escaping enabled.
|
||||||
|
* `CSVReader<3, trim_chars<' '>, no_quote_escape<'\t'>, single_line_comment<'#'> >` reads 3 columns from a tab separated file with string escaping disabled. Lines starting with a # are ignored.
|
||||||
|
|
||||||
|
The constructors and the file location functions are exactly the same as for `LineReader`. See its documentation for details.
|
||||||
|
|
||||||
|
There are three methods that deal with headers. The `read_header` methods reads a line from the file and rearranges the columns to match that order. It also checks whether all necessary columns are present. The `set_header` method does *not* read any input. Use it if the file does not have any header. Obviously it is impossible to rearrange columns or check for their availability when using it. The order in the file and in the program must match when using `set_header`. The `has_column` method checks whether a column is present in the file. The first argument of `read_header` is a bitfield that determines how the function should react to column mismatches. The default behavior is to throw an `error::extra_column_in_header` exception if the file contains more columns than expected and an `error::missing_column_in_header` when there are not enough. This behavior can be altered using the following flags.
|
||||||
|
|
||||||
|
* `ignore_no_column`: The default behavior, no flags are set
|
||||||
|
* `ignore_extra_column`: If a column with a name is in the file but not in the argument list, then it is silently ignored.
|
||||||
|
* `ignore_missing_column`: If a column with a name is not in the file but is in the argument list, then `read_row` will not modify the corresponding variable.
|
||||||
|
|
||||||
|
When using `ignore_column_missing` it is a good idea to initialize the variables passed to `read_row` with a default value, for example:
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
// The file only contains column "a"
|
||||||
|
CSVReader<2>in(...);
|
||||||
|
in.read_header(ignore_missing_column, "a", "b");
|
||||||
|
int a,b = 42;
|
||||||
|
while(in.read_row(a,b)){
|
||||||
|
// a contains the value from the file
|
||||||
|
// b is left unchanged by read_row, i.e., it is 42
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
If only some columns are optional or their default value depends on other columns you have to use `has_column`, for example:
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
// The file only contains the columns "a" and "b"
|
||||||
|
CSVReader<2>in(...);
|
||||||
|
in.read_header(ignore_missing_column, "a", "b", "sum");
|
||||||
|
if(!in.has_column("a") || !in.has_column("b"))
|
||||||
|
throw my_neat_error_class();
|
||||||
|
bool has_sum = in.has_column("sum");
|
||||||
|
int a,b,sum;
|
||||||
|
while(in.read_row(a,b,sum)){
|
||||||
|
if(!has_sum)
|
||||||
|
sum = a+b;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Important**: Do not call `has_column` from within the read-loop. It would work correctly but significantly slowdown processing.
|
||||||
|
|
||||||
|
If two columns have the same name an error::duplicated_column_in_header exception is thrown. If `read_header` is called but the file is empty a `error::header_missing` exception is thrown.
|
||||||
|
|
||||||
|
The `read_row` function reads a line, splits it into the columns and arranges them correctly. It trims the entries and unescapes them. If requested the content is interpreted as integer or as floating point. The variables passed to read_row may be of the following types.
|
||||||
|
|
||||||
|
* builtin signed integer: These are `signed char`, `short`, `int`, `long` and `long long`. The input must be encoded as a base 10 ASCII number optionally preceded by a + or -. The function detects whether the integer is too large would overflow (or underflow) and behaves as indicated by overflow_policy.
|
||||||
|
* builtin unsigned integer: Just as the signed counterparts except that a leading + or - is not allowed.
|
||||||
|
* builtin floating point: These are `float`, `double` and `long double`. The input may have a leading + or -. The number must be base 10 encoded. The decimal point may either be a dot or a comma. (Note that a comma will only work if it is not also used as column separator or the number is escaped.) A base 10 exponent may be specified using the "1e10" syntax. The "e" may be lower- or uppercase. Examples for valid floating points are "1", "-42.42" and "+123.456E789". The input is rounded to the next floating point or infinity if it is too large or small.
|
||||||
|
* `char`: The column content must be a single character.
|
||||||
|
* `std::string`: The column content is assigned to the string. The std::string is filled with the trimmed and unescaped version.
|
||||||
|
* `char*`: A pointer directly into the buffer. The string is trimmed and unescaped and null terminated. This pointer stays valid until read_row is called again or the CSVReader is destroyed. Use this for user defined types.
|
||||||
|
|
||||||
|
Note that there is no inherent overhead to using `char*` and then interpreting it compared to using one of the parsers directly build into `CSVReader`. The builtin number parsers are pure convenience. If you need a slightly different syntax then use `char*` and do the parsing yourself.
|
||||||
|
|
||||||
|
## FAQ
|
||||||
|
|
||||||
|
Q: The library is throwing a std::system_error with code -1. How to get it to work?
|
||||||
|
|
||||||
|
A: Your compiler's std::thread implementation is broken. Define CSV\_IO\_NO\_THREAD to disable threading support.
|
||||||
|
|
||||||
|
|
||||||
|
Q: My values are not just ints or strings. I want to parse my customized type. Is this possible?
|
||||||
|
|
||||||
|
A: Read a `char*` and parse the string. At first this seems expensive but it is not as the pointer you get points directly into the memory buffer. In fact there is no inherent reason why a custom int-parser realized this way must be any slower than the int-parser build into the library. By reading a `char*` the library takes care of column reordering and quote escaping and leaves the actual parsing to you. Note that using a std::string is slower as it involves a memory copy.
|
||||||
|
|
||||||
|
|
||||||
|
Q: I get lots of compiler errors when compiling the header! Please fix it. :(
|
||||||
|
|
||||||
|
A: Have you enabled the C++11 mode of your compiler? If you use GCC you have to add -std=c++0x to the commandline. If this does not resolve the problem, then please open a ticket.
|
||||||
|
|
||||||
|
|
||||||
|
Q: The library crashes when parsing large files! Please fix it. :(
|
||||||
|
|
||||||
|
A: When using GCC have you linked against -lpthread? Read the installation section for details on how to do this. If this does not resolve the issue then please open a ticket. (The reason why it only crashes only on large files is that the first chuck is read synchronous and if the whole file fits into this chuck then no asynchronous call is performed.) Alternatively you can define CSV\_IO\_NO\_THREAD.
|
||||||
|
|
||||||
|
|
||||||
|
Q: Does the library support UTF?
|
||||||
|
|
||||||
|
A: The library has basic UTF-8 support, or to be more precise it does not break when passing UTF-8 strings through it. If you read a `char*` then you get a pointer to the UTF-8 string. You will have to decode the string on your own. The separator, quoting, and commenting characters used by the library can only be ASCII characters.
|
1068
third_party/fast-cpp-csv-parser/csv.h
vendored
Normal file
1068
third_party/fast-cpp-csv-parser/csv.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user