Vince's CSV Parser
csv_row.cpp
Go to the documentation of this file.
1 
5 #include <cassert>
6 #include <functional>
7 #include "csv_row.hpp"
8 
9 namespace csv {
10  namespace internals {
11  CSV_INLINE RawCSVField& CSVFieldList::operator[](size_t n) const {
12  const size_t page_no = n / _single_buffer_capacity;
13  const size_t buffer_idx = (page_no < 1) ? n : n % _single_buffer_capacity;
14  return this->buffers[page_no][buffer_idx];
15  }
16 
17  CSV_INLINE void CSVFieldList::allocate() {
18  RawCSVField * buffer = new RawCSVField[_single_buffer_capacity];
19  buffers.push_back(buffer);
20  _current_buffer_size = 0;
21  _back = &(buffers.back()[0]);
22  }
23  }
24 
35  return CSVField(this->get_field(n));
36  }
37 
47  CSV_INLINE CSVField CSVRow::operator[](const std::string& col_name) const {
48  auto & col_names = this->data->col_names;
49  auto col_pos = col_names->index_of(col_name);
50  if (col_pos > -1) {
51  return this->operator[](col_pos);
52  }
53 
54  throw std::runtime_error("Can't find a column named " + col_name);
55  }
56 
57  CSV_INLINE CSVRow::operator std::vector<std::string>() const {
58  std::vector<std::string> ret;
59  for (size_t i = 0; i < size(); i++)
60  ret.push_back(std::string(this->get_field(i)));
61 
62  return ret;
63  }
64 
65  CSV_INLINE csv::string_view CSVRow::get_field(size_t index) const
66  {
68 
69  if (index >= this->size())
70  throw std::runtime_error("Index out of bounds.");
71 
72  const size_t field_index = this->fields_start + index;
73  auto& field = this->data->fields[field_index];
74  auto field_str = csv::string_view(this->data->data).substr(this->data_start + field.start);
75 
76  if (field.has_double_quote) {
77  auto& value = this->data->double_quote_fields[field_index];
78  if (value.empty()) {
79  bool prev_ch_quote = false;
80  for (size_t i = 0; i < field.length; i++) {
81  if (this->data->parse_flags[field_str[i] + 128] == ParseFlags::QUOTE) {
82  if (prev_ch_quote) {
83  prev_ch_quote = false;
84  continue;
85  }
86  else {
87  prev_ch_quote = true;
88  }
89  }
90 
91  value += field_str[i];
92  }
93  }
94 
95  return csv::string_view(value);
96  }
97 
98  return field_str.substr(0, field.length);
99  }
100 
101  CSV_INLINE bool CSVField::try_parse_hex(int& parsedValue) {
102  size_t start = 0, end = 0;
103 
104  // Trim out whitespace chars
105  for (; start < this->sv.size() && this->sv[start] == ' '; start++);
106  for (end = start; end < this->sv.size() && this->sv[end] != ' '; end++);
107 
108  int value_ = 0;
109 
110  size_t digits = (end - start);
111  size_t base16_exponent = digits - 1;
112 
113  if (digits == 0) return false;
114 
115  for (const auto& ch : this->sv.substr(start, digits)) {
116  int digit = 0;
117 
118  switch (ch) {
119  case '0':
120  case '1':
121  case '2':
122  case '3':
123  case '4':
124  case '5':
125  case '6':
126  case '7':
127  case '8':
128  case '9':
129  digit = static_cast<int>(ch - '0');
130  break;
131  case 'a':
132  case 'A':
133  digit = 10;
134  break;
135  case 'b':
136  case 'B':
137  digit = 11;
138  break;
139  case 'c':
140  case 'C':
141  digit = 12;
142  break;
143  case 'd':
144  case 'D':
145  digit = 13;
146  break;
147  case 'e':
148  case 'E':
149  digit = 14;
150  break;
151  case 'f':
152  case 'F':
153  digit = 15;
154  break;
155  default:
156  return false;
157  }
158 
159  value_ += digit * (int)pow(16, (double)base16_exponent);
160  base16_exponent--;
161  }
162 
163  parsedValue = value_;
164  return true;
165  }
166 
167  CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalSymbol) {
168  // If field has already been parsed to empty, no need to do it aagin:
169  if (this->_type == DataType::CSV_NULL)
170  return false;
171 
172  // Not yet parsed or possibly parsed with other decimalSymbol
173  if (this->_type == DataType::UNKNOWN || this->_type == DataType::CSV_STRING || this->_type == DataType::CSV_DOUBLE)
174  this->_type = internals::data_type(this->sv, &this->value, decimalSymbol); // parse again
175 
176  // Integral types are not affected by decimalSymbol and need not be parsed again
177 
178  // Either we already had an integral type before, or we we just got any numeric type now.
179  if (this->_type >= DataType::CSV_INT8 && this->_type <= DataType::CSV_DOUBLE) {
180  dVal = this->value;
181  return true;
182  }
183 
184  // CSV_NULL or CSV_STRING, not numeric
185  return false;
186  }
187 
188 #ifdef _MSC_VER
189 #pragma region CSVRow Iterator
190 #endif
193  return CSVRow::iterator(this, 0);
194  }
195 
202  return CSVRow::iterator(this, (int)this->size());
203  }
204 
205  CSV_INLINE CSVRow::reverse_iterator CSVRow::rbegin() const noexcept {
206  return std::reverse_iterator<CSVRow::iterator>(this->end());
207  }
208 
209  CSV_INLINE CSVRow::reverse_iterator CSVRow::rend() const {
210  return std::reverse_iterator<CSVRow::iterator>(this->begin());
211  }
212 
213  CSV_INLINE HEDLEY_NON_NULL(2)
214  CSVRow::iterator::iterator(const CSVRow* _reader, int _i)
215  : daddy(_reader), i(_i) {
216  if (_i < (int)this->daddy->size())
217  this->field = std::make_shared<CSVField>(
218  this->daddy->operator[](_i));
219  else
220  this->field = nullptr;
221  }
222 
223  CSV_INLINE CSVRow::iterator::reference CSVRow::iterator::operator*() const {
224  return *(this->field.get());
225  }
226 
227  CSV_INLINE CSVRow::iterator::pointer CSVRow::iterator::operator->() const {
228  return this->field;
229  }
230 
231  CSV_INLINE CSVRow::iterator& CSVRow::iterator::operator++() {
232  // Pre-increment operator
233  this->i++;
234  if (this->i < (int)this->daddy->size())
235  this->field = std::make_shared<CSVField>(
236  this->daddy->operator[](i));
237  else // Reached the end of row
238  this->field = nullptr;
239  return *this;
240  }
241 
242  CSV_INLINE CSVRow::iterator CSVRow::iterator::operator++(int) {
243  // Post-increment operator
244  auto temp = *this;
245  this->operator++();
246  return temp;
247  }
248 
249  CSV_INLINE CSVRow::iterator& CSVRow::iterator::operator--() {
250  // Pre-decrement operator
251  this->i--;
252  this->field = std::make_shared<CSVField>(
253  this->daddy->operator[](this->i));
254  return *this;
255  }
256 
257  CSV_INLINE CSVRow::iterator CSVRow::iterator::operator--(int) {
258  // Post-decrement operator
259  auto temp = *this;
260  this->operator--();
261  return temp;
262  }
263 
264  CSV_INLINE CSVRow::iterator CSVRow::iterator::operator+(difference_type n) const {
265  // Allows for iterator arithmetic
266  return CSVRow::iterator(this->daddy, i + (int)n);
267  }
268 
269  CSV_INLINE CSVRow::iterator CSVRow::iterator::operator-(difference_type n) const {
270  // Allows for iterator arithmetic
271  return CSVRow::iterator::operator+(-n);
272  }
273 #ifdef _MSC_VER
274 #pragma endregion CSVRow Iterator
275 #endif
276 }
Data type representing individual CSV values.
Definition: csv_row.hpp:143
bool try_parse_decimal(long double &dVal, const char decimalSymbol='.')
Attempts to parse a decimal (or integer) value using the given symbol, returning true if the value is...
Definition: csv_row.cpp:167
bool try_parse_hex(int &parsedValue)
Parse a hexadecimal value, returning false if the value is not hex.
Definition: csv_row.cpp:101
A random access iterator over the contents of a CSV row.
Definition: csv_row.hpp:343
iterator end() const noexcept
Return an iterator pointing to just after the end of the CSVRow.
Definition: csv_row.cpp:201
std::reverse_iterator< iterator > reverse_iterator
A reverse iterator over the contents of a CSVRow.
Definition: csv_row.hpp:382
CONSTEXPR size_t size() const noexcept
Return the number of fields in this row.
Definition: csv_row.hpp:319
CSVField operator[](size_t n) const
Return a CSVField object corrsponding to the nth value in the row.
Definition: csv_row.cpp:34
iterator begin() const
Return an iterator pointing to the first field.
Definition: csv_row.cpp:192
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Definition: common.hpp:26
Defines the data type used for storing information about a CSV row.
CONSTEXPR_14 DataType data_type(csv::string_view in, long double *const out, const char decimalSymbol)
Distinguishes numeric from other text values.
Definition: data_type.hpp:242
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
Definition: common.hpp:166
The all encompassing namespace.
@ CSV_DOUBLE
Floating point value.
@ CSV_NULL
Empty string.
@ CSV_INT8
8-bit integer
@ CSV_STRING
Non-numeric string.
nonstd::string_view string_view
The string_view class used by this library.
Definition: common.hpp:75