From 8709fb6f7ccf2cf4598fc2bab635e757340f39c2 Mon Sep 17 00:00:00 2001 From: artemp Date: Mon, 24 Aug 2015 16:35:32 +0200 Subject: [PATCH] CSV - optimise parsing by providing num_columns hint --- include/mapnik/csv/csv_grammar.hpp | 13 +++++++------ plugins/input/csv/csv_featureset.cpp | 2 +- plugins/input/csv/csv_featureset.hpp | 2 +- plugins/input/csv/csv_utils.hpp | 7 ++++--- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/mapnik/csv/csv_grammar.hpp b/include/mapnik/csv/csv_grammar.hpp index 62bfc4166..d6996a96e 100644 --- a/include/mapnik/csv/csv_grammar.hpp +++ b/include/mapnik/csv/csv_grammar.hpp @@ -37,13 +37,14 @@ using csv_line = columns; using csv_data = std::vector; template -struct csv_line_grammar : qi::grammar +struct csv_line_grammar : qi::grammar { csv_line_grammar() : csv_line_grammar::base_type(line) { using namespace qi; qi::_a_type _a; qi::_r1_type _r1; + qi::_r2_type _r2; qi::lit_type lit; //qi::eol_type eol; qi::_val_type _val; @@ -65,18 +66,18 @@ struct csv_line_grammar : qi::grammar text(_a)[boost::phoenix::swap(_val,_1)] > -lit(_a) ; - quoted = omit[char_("\"'")[_a = _1]] >> text(_a)[_val = _1] >> -lit(_a) + text = *(unesc_char | (char_ - char_(_r1))) ; - BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted)); + //BOOST_SPIRIT_DEBUG_NODES((line)(column)(quoted)); } private: - qi::rule line; + qi::rule line; qi::rule column; // no-skip qi::rule text; qi::rule, std::string()> quoted; diff --git a/plugins/input/csv/csv_featureset.cpp b/plugins/input/csv/csv_featureset.cpp index 9219dec03..4a9e74a9f 100644 --- a/plugins/input/csv/csv_featureset.cpp +++ b/plugins/input/csv/csv_featureset.cpp @@ -55,7 +55,7 @@ csv_featureset::~csv_featureset() {} mapnik::feature_ptr csv_featureset::parse_feature(char const* beg, char const* end) { - auto values = csv_utils::parse_line(beg, end, separator_); + auto values = csv_utils::parse_line(beg, end, separator_, headers_.size()); auto geom = detail::extract_geometry(values, locator_); if (!geom.is()) { diff --git a/plugins/input/csv/csv_featureset.hpp b/plugins/input/csv/csv_featureset.hpp index 380a582bf..1fc2103f2 100644 --- a/plugins/input/csv/csv_featureset.hpp +++ b/plugins/input/csv/csv_featureset.hpp @@ -48,7 +48,7 @@ private: mapnik::feature_ptr parse_feature(char const* beg, char const* end); file_ptr file_; std::string const& separator_; - std::vector headers_; + std::vector const& headers_; const array_type index_array_; array_type::const_iterator index_itr_; array_type::const_iterator index_end_; diff --git a/plugins/input/csv/csv_utils.hpp b/plugins/input/csv/csv_utils.hpp index 79653e273..67bb47864 100644 --- a/plugins/input/csv/csv_utils.hpp +++ b/plugins/input/csv/csv_utils.hpp @@ -49,11 +49,12 @@ namespace csv_utils static const mapnik::csv_line_grammar line_g; -static mapnik::csv_line parse_line(char const* start, char const* end, std::string const& separator) +static mapnik::csv_line parse_line(char const* start, char const* end, std::string const& separator, std::size_t num_columns) { mapnik::csv_line values; + if (num_columns > 0) values.reserve(num_columns); boost::spirit::standard::blank_type blank; - if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::cref(separator)), blank,values)) + if (!boost::spirit::qi::phrase_parse(start, end, (line_g)(boost::phoenix::ref(values), boost::phoenix::cref(separator)), blank)) { throw std::runtime_error("Failed to parse CSV line:\n" + std::string(start, end)); } @@ -64,7 +65,7 @@ static mapnik::csv_line parse_line(std::string const& line_str, std::string cons { auto start = line_str.c_str(); auto end = start + line_str.length(); - return parse_line(start, end, separator); + return parse_line(start, end, separator, 0); } static inline bool is_likely_number(std::string const& value)