Vince's CSV Parser
csv_row.cpp
Go to the documentation of this file.
1 
5 #include <cassert>
6 #include <functional>
7 #include "csv_row.hpp"
8 
9 namespace csv {
10  namespace internals {
11  CSV_INLINE RawCSVField& CSVFieldList::operator[](size_t n) const {
12  const size_t page_no = n / _single_buffer_capacity;
13  const size_t buffer_idx = (page_no < 1) ? n : n % _single_buffer_capacity;
14  return this->buffers[page_no][buffer_idx];
15  }
16 
17  CSV_INLINE void CSVFieldList::allocate() {
18  RawCSVField * buffer = new RawCSVField[_single_buffer_capacity];
19  buffers.push_back(buffer);
20  _current_buffer_size = 0;
21  _back = &(buffers.back()[0]);
22  }
23  }
24 
35  return CSVField(this->get_field(n));
36  }
37 
47  CSV_INLINE CSVField CSVRow::operator[](const std::string& col_name) const {
48  auto & col_names = this->data->col_names;
49  auto col_pos = col_names->index_of(col_name);
50  if (col_pos > -1) {
51  return this->operator[](col_pos);
52  }
53 
54  throw std::runtime_error("Can't find a column named " + col_name);
55  }
56 
57  CSV_INLINE CSVRow::operator std::vector<std::string>() const {
58  std::vector<std::string> ret;
59  for (size_t i = 0; i < size(); i++)
60  ret.push_back(std::string(this->get_field(i)));
61 
62  return ret;
63  }
64 
65  CSV_INLINE csv::string_view CSVRow::get_field(size_t index) const
66  {
68 
69  if (index >= this->size())
70  throw std::runtime_error("Index out of bounds.");
71 
72  const size_t field_index = this->fields_start + index;
73  auto& field = this->data->fields[field_index];
74  auto field_str = csv::string_view(this->data->data).substr(this->data_start + field.start);
75 
76  if (field.has_double_quote) {
77  auto& value = this->data->double_quote_fields[field_index];
78  if (value.empty()) {
79  bool prev_ch_quote = false;
80  for (size_t i = 0; i < field.length; i++) {
81  if (this->data->parse_flags[field_str[i] + 128] == ParseFlags::QUOTE) {
82  if (prev_ch_quote) {
83  prev_ch_quote = false;
84  continue;
85  }
86  else {
87  prev_ch_quote = true;
88  }
89  }
90 
91  value += field_str[i];
92  }
93  }
94 
95  return csv::string_view(value);
96  }
97 
98  return field_str.substr(0, field.length);
99  }
100 
101  CSV_INLINE bool CSVField::try_parse_hex(int& parsedValue) {
102  size_t start = 0, end = 0;
103 
104  // Trim out whitespace chars
105  for (; start < this->sv.size() && this->sv[start] == ' '; start++);
106  for (end = start; end < this->sv.size() && this->sv[end] != ' '; end++);
107 
108  unsigned long long int value = 0;
109 
110  size_t digits = (end - start);
111  size_t base16_exponent = digits - 1;
112 
113  if (digits == 0) return false;
114 
115  for (const auto& ch : this->sv.substr(start, digits)) {
116  int digit = 0;
117 
118  switch (ch) {
119  case '0':
120  case '1':
121  case '2':
122  case '3':
123  case '4':
124  case '5':
125  case '6':
126  case '7':
127  case '8':
128  case '9':
129  digit = static_cast<int>(ch - '0');
130  break;
131  case 'a':
132  case 'A':
133  digit = 10;
134  break;
135  case 'b':
136  case 'B':
137  digit = 11;
138  break;
139  case 'c':
140  case 'C':
141  digit = 12;
142  break;
143  case 'd':
144  case 'D':
145  digit = 13;
146  break;
147  case 'e':
148  case 'E':
149  digit = 14;
150  break;
151  case 'f':
152  case 'F':
153  digit = 15;
154  break;
155  default:
156  return false;
157  }
158 
159  value += digit * pow(16, base16_exponent);
160  base16_exponent--;
161  }
162 
163  parsedValue = value;
164  return true;
165  }
166 
167 #ifdef _MSC_VER
168 #pragma region CSVRow Iterator
169 #endif
172  return CSVRow::iterator(this, 0);
173  }
174 
181  return CSVRow::iterator(this, (int)this->size());
182  }
183 
184  CSV_INLINE CSVRow::reverse_iterator CSVRow::rbegin() const noexcept {
185  return std::reverse_iterator<CSVRow::iterator>(this->end());
186  }
187 
188  CSV_INLINE CSVRow::reverse_iterator CSVRow::rend() const {
189  return std::reverse_iterator<CSVRow::iterator>(this->begin());
190  }
191 
192  CSV_INLINE HEDLEY_NON_NULL(2)
193  CSVRow::iterator::iterator(const CSVRow* _reader, int _i)
194  : daddy(_reader), i(_i) {
195  if (_i < (int)this->daddy->size())
196  this->field = std::make_shared<CSVField>(
197  this->daddy->operator[](_i));
198  else
199  this->field = nullptr;
200  }
201 
202  CSV_INLINE CSVRow::iterator::reference CSVRow::iterator::operator*() const {
203  return *(this->field.get());
204  }
205 
206  CSV_INLINE CSVRow::iterator::pointer CSVRow::iterator::operator->() const {
207  // Using CSVField * as pointer type causes segfaults in MSVC debug builds
208  #ifdef _MSC_BUILD
209  return this->field;
210  #else
211  return this->field.get();
212  #endif
213  }
214 
215  CSV_INLINE CSVRow::iterator& CSVRow::iterator::operator++() {
216  // Pre-increment operator
217  this->i++;
218  if (this->i < (int)this->daddy->size())
219  this->field = std::make_shared<CSVField>(
220  this->daddy->operator[](i));
221  else // Reached the end of row
222  this->field = nullptr;
223  return *this;
224  }
225 
226  CSV_INLINE CSVRow::iterator CSVRow::iterator::operator++(int) {
227  // Post-increment operator
228  auto temp = *this;
229  this->operator++();
230  return temp;
231  }
232 
233  CSV_INLINE CSVRow::iterator& CSVRow::iterator::operator--() {
234  // Pre-decrement operator
235  this->i--;
236  this->field = std::make_shared<CSVField>(
237  this->daddy->operator[](this->i));
238  return *this;
239  }
240 
241  CSV_INLINE CSVRow::iterator CSVRow::iterator::operator--(int) {
242  // Post-decrement operator
243  auto temp = *this;
244  this->operator--();
245  return temp;
246  }
247 
248  CSV_INLINE CSVRow::iterator CSVRow::iterator::operator+(difference_type n) const {
249  // Allows for iterator arithmetic
250  return CSVRow::iterator(this->daddy, i + (int)n);
251  }
252 
253  CSV_INLINE CSVRow::iterator CSVRow::iterator::operator-(difference_type n) const {
254  // Allows for iterator arithmetic
255  return CSVRow::iterator::operator+(-n);
256  }
257 #ifdef _MSC_VER
258 #pragma endregion CSVRow Iterator
259 #endif
260 }
Data type representing individual CSV values.
Definition: csv_row.hpp:143
bool try_parse_hex(int &parsedValue)
Parse a hexadecimal value, returning false if the value is not hex.
Definition: csv_row.cpp:101
A random access iterator over the contents of a CSV row.
Definition: csv_row.hpp:335
iterator end() const noexcept
Return an iterator pointing to just after the end of the CSVRow.
Definition: csv_row.cpp:180
std::reverse_iterator< iterator > reverse_iterator
A reverse iterator over the contents of a CSVRow.
Definition: csv_row.hpp:382
CONSTEXPR size_t size() const noexcept
Return the number of fields in this row.
Definition: csv_row.hpp:311
CSVField operator[](size_t n) const
Return a CSVField object corrsponding to the nth value in the row.
Definition: csv_row.cpp:34
iterator begin() const
Return an iterator pointing to the first field.
Definition: csv_row.cpp:171
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Definition: common.hpp:26
Defines the data type used for storing information about a CSV row.
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
Definition: common.hpp:166
The all encompassing namespace.
nonstd::string_view string_view
The string_view class used by this library.
Definition: common.hpp:75