Vince's CSV Parser
data_type.h
Go to the documentation of this file.
1 
5 #pragma once
6 #include <cmath>
7 #include <cctype>
8 #include <string>
9 #include <cassert>
10 
11 #include "common.hpp"
12 
13 namespace csv {
20  enum class DataType {
21  UNKNOWN = -1,
22  CSV_NULL,
23  CSV_STRING,
24  CSV_INT8,
25  CSV_INT16,
26  CSV_INT32,
27  CSV_INT64,
28  CSV_DOUBLE
29  };
30 
31  static_assert(DataType::CSV_STRING < DataType::CSV_INT8, "String type should come before numeric types.");
32  static_assert(DataType::CSV_INT8 < DataType::CSV_INT64, "Smaller integer types should come before larger integer types.");
33  static_assert(DataType::CSV_INT64 < DataType::CSV_DOUBLE, "Integer types should come before floating point value types.");
34 
35  namespace internals {
37  template<typename T>
38  HEDLEY_CONST CONSTEXPR_14
39  long double pow10(const T& n) noexcept {
40  long double multiplicand = n > 0 ? 10 : 0.1,
41  ret = 1;
42 
43  // Make all numbers positive
44  T iterations = n > 0 ? n : -n;
45 
46  for (T i = 0; i < iterations; i++) {
47  ret *= multiplicand;
48  }
49 
50  return ret;
51  }
52 
54  template<>
55  HEDLEY_CONST CONSTEXPR_14
56  long double pow10(const unsigned& n) noexcept {
57  long double multiplicand = n > 0 ? 10 : 0.1,
58  ret = 1;
59 
60  for (unsigned i = 0; i < n; i++) {
61  ret *= multiplicand;
62  }
63 
64  return ret;
65  }
66 
67 #ifndef DOXYGEN_SHOULD_SKIP_THIS
69  constexpr DataType int_type_arr[8] = {
70  DataType::CSV_INT8, // 1
72  DataType::UNKNOWN,
74  DataType::UNKNOWN,
75  DataType::UNKNOWN,
76  DataType::UNKNOWN,
78  };
79 
80  template<typename T>
81  inline DataType type_num() {
82  static_assert(std::is_integral<T>::value, "T should be an integral type.");
83  static_assert(sizeof(T) <= 8, "Byte size must be no greater than 8.");
84  return int_type_arr[sizeof(T) - 1];
85  }
86 
87  template<> inline DataType type_num<float>() { return DataType::CSV_DOUBLE; }
88  template<> inline DataType type_num<double>() { return DataType::CSV_DOUBLE; }
89  template<> inline DataType type_num<long double>() { return DataType::CSV_DOUBLE; }
90  template<> inline DataType type_num<std::nullptr_t>() { return DataType::CSV_NULL; }
91  template<> inline DataType type_num<std::string>() { return DataType::CSV_STRING; }
92 
93  CONSTEXPR_14 DataType data_type(csv::string_view in, long double* const out = nullptr);
94 #endif
95 
102  template<size_t Bytes>
103  CONSTEXPR_14 long double get_int_max() {
104  static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8,
105  "Bytes must be a power of 2 below 8.");
106 
107  IF_CONSTEXPR (sizeof(signed char) == Bytes) {
108  return (long double)std::numeric_limits<signed char>::max();
109  }
110 
111  IF_CONSTEXPR (sizeof(short) == Bytes) {
112  return (long double)std::numeric_limits<short>::max();
113  }
114 
115  IF_CONSTEXPR (sizeof(int) == Bytes) {
116  return (long double)std::numeric_limits<int>::max();
117  }
118 
119  IF_CONSTEXPR (sizeof(long int) == Bytes) {
120  return (long double)std::numeric_limits<long int>::max();
121  }
122 
123  IF_CONSTEXPR (sizeof(long long int) == Bytes) {
124  return (long double)std::numeric_limits<long long int>::max();
125  }
126 
127  HEDLEY_UNREACHABLE();
128  }
129 
133  template<size_t Bytes>
134  CONSTEXPR_14 long double get_uint_max() {
135  static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8,
136  "Bytes must be a power of 2 below 8.");
137 
138  IF_CONSTEXPR(sizeof(unsigned char) == Bytes) {
139  return (long double)std::numeric_limits<unsigned char>::max();
140  }
141 
142  IF_CONSTEXPR(sizeof(unsigned short) == Bytes) {
143  return (long double)std::numeric_limits<unsigned short>::max();
144  }
145 
146  IF_CONSTEXPR(sizeof(unsigned int) == Bytes) {
147  return (long double)std::numeric_limits<unsigned int>::max();
148  }
149 
150  IF_CONSTEXPR(sizeof(unsigned long int) == Bytes) {
151  return (long double)std::numeric_limits<unsigned long int>::max();
152  }
153 
154  IF_CONSTEXPR(sizeof(unsigned long long int) == Bytes) {
155  return (long double)std::numeric_limits<unsigned long long int>::max();
156  }
157 
158  HEDLEY_UNREACHABLE();
159  }
160 
162  CONSTEXPR_VALUE_14 long double CSV_INT8_MAX = get_int_max<1>();
163 
165  CONSTEXPR_VALUE_14 long double CSV_INT16_MAX = get_int_max<2>();
166 
168  CONSTEXPR_VALUE_14 long double CSV_INT32_MAX = get_int_max<4>();
169 
171  CONSTEXPR_VALUE_14 long double CSV_INT64_MAX = get_int_max<8>();
172 
174  CONSTEXPR_VALUE_14 long double CSV_UINT8_MAX = get_uint_max<1>();
175 
177  CONSTEXPR_VALUE_14 long double CSV_UINT16_MAX = get_uint_max<2>();
178 
180  CONSTEXPR_VALUE_14 long double CSV_UINT32_MAX = get_uint_max<4>();
181 
183  CONSTEXPR_VALUE_14 long double CSV_UINT64_MAX = get_uint_max<8>();
184 
189  HEDLEY_PRIVATE CONSTEXPR_14
191  csv::string_view exponential_part,
192  const long double& coeff,
193  long double * const out) {
194  long double exponent = 0;
195  auto result = data_type(exponential_part, &exponent);
196 
197  // Exponents in scientific notation should not be decimal numbers
198  if (result >= DataType::CSV_INT8 && result < DataType::CSV_DOUBLE) {
199  if (out) *out = coeff * pow10(exponent);
200  return DataType::CSV_DOUBLE;
201  }
202 
203  return DataType::CSV_STRING;
204  }
205 
209  HEDLEY_PRIVATE HEDLEY_PURE CONSTEXPR_14
210  DataType _determine_integral_type(const long double& number) noexcept {
211  // We can assume number is always non-negative
212  assert(number >= 0);
213 
214  if (number <= internals::CSV_INT8_MAX)
215  return DataType::CSV_INT8;
216  else if (number <= internals::CSV_INT16_MAX)
217  return DataType::CSV_INT16;
218  else if (number <= internals::CSV_INT32_MAX)
219  return DataType::CSV_INT32;
220  else if (number <= internals::CSV_INT64_MAX)
221  return DataType::CSV_INT64;
222  else // Conversion to long long will cause an overflow
223  return DataType::CSV_DOUBLE;
224  }
225 
237  CONSTEXPR_14
238  DataType data_type(csv::string_view in, long double* const out) {
239  // Empty string --> NULL
240  if (in.size() == 0)
241  return DataType::CSV_NULL;
242 
243  bool ws_allowed = true,
244  neg_allowed = true,
245  dot_allowed = true,
246  digit_allowed = true,
247  has_digit = false,
248  prob_float = false;
249 
250  unsigned places_after_decimal = 0;
251  long double integral_part = 0,
252  decimal_part = 0;
253 
254  for (size_t i = 0, ilen = in.size(); i < ilen; i++) {
255  const char& current = in[i];
256 
257  switch (current) {
258  case ' ':
259  if (!ws_allowed) {
260  if (isdigit(in[i - 1])) {
261  digit_allowed = false;
262  ws_allowed = true;
263  }
264  else {
265  // Ex: '510 123 4567'
266  return DataType::CSV_STRING;
267  }
268  }
269  break;
270  case '-':
271  if (!neg_allowed) {
272  // Ex: '510-123-4567'
273  return DataType::CSV_STRING;
274  }
275 
276  neg_allowed = false;
277  break;
278  case '.':
279  if (!dot_allowed) {
280  return DataType::CSV_STRING;
281  }
282 
283  dot_allowed = false;
284  prob_float = true;
285  break;
286  case 'e':
287  case 'E':
288  // Process scientific notation
289  if (prob_float || (i && i + 1 < ilen && isdigit(in[i - 1]))) {
290  size_t exponent_start_idx = i + 1;
291  prob_float = true;
292 
293  // Strip out plus sign
294  if (in[i + 1] == '+') {
295  exponent_start_idx++;
296  }
297 
299  in.substr(exponent_start_idx),
300  neg_allowed ? integral_part + decimal_part : -(integral_part + decimal_part),
301  out
302  );
303  }
304 
305  return DataType::CSV_STRING;
306  break;
307  default:
308  short digit = static_cast<short>(current - '0');
309  if (digit >= 0 && digit <= 9) {
310  // Process digit
311  has_digit = true;
312 
313  if (!digit_allowed)
314  return DataType::CSV_STRING;
315  else if (ws_allowed) // Ex: '510 456'
316  ws_allowed = false;
317 
318  // Build current number
319  if (prob_float)
320  decimal_part += digit / pow10(++places_after_decimal);
321  else
322  integral_part = (integral_part * 10) + digit;
323  }
324  else {
325  return DataType::CSV_STRING;
326  }
327  }
328  }
329 
330  // No non-numeric/non-whitespace characters found
331  if (has_digit) {
332  long double number = integral_part + decimal_part;
333  if (out) {
334  *out = neg_allowed ? number : -number;
335  }
336 
337  return prob_float ? DataType::CSV_DOUBLE : _determine_integral_type(number);
338  }
339 
340  // Just whitespace
341  return DataType::CSV_NULL;
342  }
343  }
344 }
A standalone header file containing shared code.
#define IF_CONSTEXPR
Expands to if constexpr in C++17 and if otherwise.
Definition: common.hpp:84
CONSTEXPR_VALUE_14 long double CSV_INT16_MAX
Largest number that can be stored in a 16-bit integer.
Definition: data_type.h:165
CONSTEXPR_VALUE_14 long double CSV_INT32_MAX
Largest number that can be stored in a 32-bit integer.
Definition: data_type.h:168
CONSTEXPR_VALUE_14 long double CSV_UINT16_MAX
Largest number that can be stored in a 16-bit unsigned integer.
Definition: data_type.h:177
CONSTEXPR_VALUE_14 long double CSV_UINT32_MAX
Largest number that can be stored in a 32-bit unsigned integer.
Definition: data_type.h:180
HEDLEY_PRIVATE CONSTEXPR_14 DataType _process_potential_exponential(csv::string_view exponential_part, const long double &coeff, long double *const out)
Given a pointer to the start of what is start of the exponential part of a number written (possibly) ...
Definition: data_type.h:190
CONSTEXPR_VALUE_14 long double CSV_INT64_MAX
Largest number that can be stored in a 64-bit integer.
Definition: data_type.h:171
CONSTEXPR_VALUE_14 long double CSV_INT8_MAX
Largest number that can be stored in a 8-bit integer.
Definition: data_type.h:162
CONSTEXPR_VALUE_14 long double CSV_UINT64_MAX
Largest number that can be stored in a 64-bit unsigned integer.
Definition: data_type.h:183
CONSTEXPR_VALUE_14 long double CSV_UINT8_MAX
Largest number that can be stored in a 8-bit ungisned integer.
Definition: data_type.h:174
CONSTEXPR_14 DataType data_type(csv::string_view in, long double *const out)
Distinguishes numeric from other text values.
Definition: data_type.h:238
HEDLEY_CONST CONSTEXPR_14 long double pow10(const T &n) noexcept
Compute 10 to the power of n.
Definition: data_type.h:39
HEDLEY_PRIVATE HEDLEY_PURE CONSTEXPR_14 DataType _determine_integral_type(const long double &number) noexcept
Given the absolute value of an integer, determine what numeric type it fits in.
Definition: data_type.h:210
CONSTEXPR_14 long double get_uint_max()
Given a byte size, return the largest number than can be stored in an unsigned integer of that size.
Definition: data_type.h:134
CONSTEXPR_14 long double get_int_max()
Given a byte size, return the largest number than can be stored in an integer of that size.
Definition: data_type.h:103
The all encompassing namespace.
DataType
Enumerates the different CSV field types that are recognized by this library.
Definition: data_type.h:20
@ CSV_INT64
64-bit integer (long long on MSVC/GCC)
@ CSV_DOUBLE
Floating point value.
@ CSV_NULL
Empty string.
@ CSV_INT16
16-bit integer (short on MSVC/GCC)
@ CSV_INT32
32-bit integer (int on MSVC/GCC)
@ CSV_INT8
8-bit integer
@ CSV_STRING
Non-numeric string.
nonstd::string_view string_view
The string_view class used by this library.
Definition: common.hpp:75