Vince's CSV Parser
csv_format.hpp
Go to the documentation of this file.
1 
5 #pragma once
6 #include <iterator>
7 #include <stdexcept>
8 #include <string>
9 #include <vector>
10 
11 #include "common.hpp"
12 
13 namespace csv {
14  namespace internals {
15  class IBasicCSVParser;
16  }
17 
18  class CSVReader;
19 
21  enum class VariableColumnPolicy {
22  THROW = -1,
23  IGNORE_ROW = 0,
24  KEEP = 1
25  };
26 
28  struct CSVGuessResult {
29  char delim;
30  int header_row;
31  };
32 
36  class CSVFormat {
37  public:
39  CSVFormat() = default;
40 
45  CSVFormat& delimiter(char delim);
46 
52  CSVFormat& delimiter(const std::vector<char> & delim);
53 
59  CSVFormat& trim(const std::vector<char> & ws);
60 
65  CSVFormat& quote(char quote);
66 
71  CSVFormat& column_names(const std::vector<std::string>& names);
72 
77  CSVFormat& header_row(int row);
78 
85  this->header_row(-1);
86  return *this;
87  }
88 
90  CSVFormat& quote(bool use_quote) {
91  this->no_quote = !use_quote;
92  return *this;
93  }
94 
96  CONSTEXPR_14 CSVFormat& variable_columns(VariableColumnPolicy policy = VariableColumnPolicy::IGNORE_ROW) {
97  this->variable_column_policy = policy;
98  return *this;
99  }
100 
102  CONSTEXPR_14 CSVFormat& variable_columns(bool policy) {
103  this->variable_column_policy = (VariableColumnPolicy)policy;
104  return *this;
105  }
106 
107  #ifndef DOXYGEN_SHOULD_SKIP_THIS
108  char get_delim() const {
109  // This error should never be received by end users.
110  if (this->possible_delimiters.size() > 1) {
111  throw std::runtime_error("There is more than one possible delimiter.");
112  }
113 
114  return this->possible_delimiters.at(0);
115  }
116 
117  CONSTEXPR bool is_quoting_enabled() const { return !this->no_quote; }
118  CONSTEXPR char get_quote_char() const { return this->quote_char; }
119  CONSTEXPR int get_header() const { return this->header; }
120  std::vector<char> get_possible_delims() const { return this->possible_delimiters; }
121  std::vector<char> get_trim_chars() const { return this->trim_chars; }
122  CONSTEXPR VariableColumnPolicy get_variable_column_policy() const { return this->variable_column_policy; }
123  #endif
124 
127  CSVFormat format;
128  format.delimiter({ ',', '|', '\t', ';', '^' })
129  .quote('"')
130  .header_row(0);
131 
132  return format;
133  }
134 
135  bool guess_delim() {
136  return this->possible_delimiters.size() > 1;
137  }
138 
139  friend CSVReader;
140  friend internals::IBasicCSVParser;
141 
142  private:
144  void assert_no_char_overlap();
145 
147  std::vector<char> possible_delimiters = { ',' };
148 
150  std::vector<char> trim_chars = {};
151 
153  int header = 0;
154 
156  bool no_quote = false;
157 
159  char quote_char = '"';
160 
162  std::vector<std::string> col_names = {};
163 
165  VariableColumnPolicy variable_column_policy = VariableColumnPolicy::IGNORE_ROW;
166  };
167 }
Stores information about how to parse a CSV file.
Definition: csv_format.hpp:36
CSVFormat & column_names(const std::vector< std::string > &names)
Sets the column names.
Definition: csv_format.cpp:36
CSVFormat & quote(bool use_quote)
Turn quoting on or off.
Definition: csv_format.hpp:90
CSVFormat()=default
Settings for parsing a RFC 4180 CSV file.
static CSVFormat guess_csv()
CSVFormat for guessing the delimiter.
Definition: csv_format.hpp:126
CONSTEXPR_14 CSVFormat & variable_columns(VariableColumnPolicy policy=VariableColumnPolicy::IGNORE_ROW)
Tells the parser how to handle columns of a different length than the others.
Definition: csv_format.hpp:96
CSVFormat & trim(const std::vector< char > &ws)
Sets the whitespace characters to be trimmed.
Definition: csv_format.cpp:30
CSVFormat & delimiter(char delim)
Sets the delimiter of the CSV file.
Definition: csv_format.cpp:11
CSVFormat & no_header()
Tells the parser that this CSV has no header row.
Definition: csv_format.hpp:84
CSVFormat & header_row(int row)
Sets the header row.
Definition: csv_format.cpp:42
CSVFormat & quote(char quote)
Sets the quote character.
Definition: csv_format.cpp:23
A standalone header file containing shared code.
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
Definition: common.hpp:117
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Definition: common.hpp:26
The all encompassing namespace.
VariableColumnPolicy
Determines how to handle rows that are shorter or longer than the majority.
Definition: csv_format.hpp:21
Stores the inferred format of a CSV file.
Definition: csv_format.hpp:28