csv/data__type_8hpp_source.html

 #pragma once

 #include <cmath>

 #include <cctype>

 #include <string>

 #include <cassert>


 #include "common.hpp"


 namespace csv {

     enum class DataType {

         UNKNOWN = -1,

         CSV_NULL,

         CSV_STRING,

         CSV_INT8,

         CSV_INT16,

         CSV_INT32,

         CSV_INT64,

         CSV_BIGINT,

         CSV_DOUBLE

     };


     static_assert(DataType::CSV_STRING < DataType::CSV_INT8, "String type should come before numeric types.");

     static_assert(DataType::CSV_INT8 < DataType::CSV_INT64, "Smaller integer types should come before larger integer types.");

     static_assert(DataType::CSV_INT64 < DataType::CSV_DOUBLE, "Integer types should come before floating point value types.");


     namespace internals {

         template<typename T>

         HEDLEY_CONST CONSTEXPR_14

         long double pow10(const T& n) noexcept {

             long double multiplicand = n > 0 ? 10 : 0.1,

                 ret = 1;


             // Make all numbers positive

             T iterations = n > 0 ? n : -n;


             for (T i = 0; i < iterations; i++) {

                 ret *= multiplicand;

             }


             return ret;

         }


         template<>

         HEDLEY_CONST CONSTEXPR_14

         long double pow10(const unsigned& n) noexcept {

             long double multiplicand = n > 0 ? 10 : 0.1,

                 ret = 1;


             for (unsigned i = 0; i < n; i++) {

                 ret *= multiplicand;

             }


             return ret;

         }


 #ifndef DOXYGEN_SHOULD_SKIP_THIS

         constexpr DataType int_type_arr[8] = {

             DataType::CSV_INT8,  // 1

             DataType::CSV_INT16, // 2

             DataType::UNKNOWN,

             DataType::CSV_INT32, // 4

             DataType::UNKNOWN,

             DataType::UNKNOWN,

             DataType::UNKNOWN,

             DataType::CSV_INT64  // 8

         };


         template<typename T>

         inline DataType type_num() {

             static_assert(std::is_integral<T>::value, "T should be an integral type.");

             static_assert(sizeof(T) <= 8, "Byte size must be no greater than 8.");

             return int_type_arr[sizeof(T) - 1];

         }


         template<> inline DataType type_num<float>() { return DataType::CSV_DOUBLE; }

         template<> inline DataType type_num<double>() { return DataType::CSV_DOUBLE; }

         template<> inline DataType type_num<long double>() { return DataType::CSV_DOUBLE; }

         template<> inline DataType type_num<std::nullptr_t>() { return DataType::CSV_NULL; }

         template<> inline DataType type_num<std::string>() { return DataType::CSV_STRING; }


         CONSTEXPR_14 DataType data_type(csv::string_view in, long double* const out = nullptr,

             const char decimalsymbol = '.');

 #endif


         template<size_t Bytes>

         CONSTEXPR_14 long double get_int_max() {

             static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8,

                 "Bytes must be a power of 2 below 8.");


             IF_CONSTEXPR (sizeof(signed char) == Bytes) {

                 return (long double)std::numeric_limits<signed char>::max();

             }


             IF_CONSTEXPR (sizeof(short) == Bytes) {

                 return (long double)std::numeric_limits<short>::max();

             }


             IF_CONSTEXPR (sizeof(int) == Bytes) {

                 return (long double)std::numeric_limits<int>::max();

             }


             IF_CONSTEXPR (sizeof(long int) == Bytes) {

                 return (long double)std::numeric_limits<long int>::max();

             }


             IF_CONSTEXPR (sizeof(long long int) == Bytes) {

                 return (long double)std::numeric_limits<long long int>::max();

             }


             HEDLEY_UNREACHABLE();

         }


         template<size_t Bytes>

         CONSTEXPR_14 long double get_uint_max() {

             static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8,

                 "Bytes must be a power of 2 below 8.");


             IF_CONSTEXPR(sizeof(unsigned char) == Bytes) {

                 return (long double)std::numeric_limits<unsigned char>::max();

             }


             IF_CONSTEXPR(sizeof(unsigned short) == Bytes) {

                 return (long double)std::numeric_limits<unsigned short>::max();

             }


             IF_CONSTEXPR(sizeof(unsigned int) == Bytes) {

                 return (long double)std::numeric_limits<unsigned int>::max();

             }


             IF_CONSTEXPR(sizeof(unsigned long int) == Bytes) {

                 return (long double)std::numeric_limits<unsigned long int>::max();

             }


             IF_CONSTEXPR(sizeof(unsigned long long int) == Bytes) {

                 return (long double)std::numeric_limits<unsigned long long int>::max();

             }


             HEDLEY_UNREACHABLE();

         }


         CONSTEXPR_VALUE_14 long double CSV_INT8_MAX = get_int_max<1>();


         CONSTEXPR_VALUE_14 long double CSV_INT16_MAX = get_int_max<2>();


         CONSTEXPR_VALUE_14 long double CSV_INT32_MAX = get_int_max<4>();


         CONSTEXPR_VALUE_14 long double CSV_INT64_MAX = get_int_max<8>();


         CONSTEXPR_VALUE_14 long double CSV_UINT8_MAX = get_uint_max<1>();


         CONSTEXPR_VALUE_14 long double CSV_UINT16_MAX = get_uint_max<2>();


         CONSTEXPR_VALUE_14 long double CSV_UINT32_MAX = get_uint_max<4>();


         CONSTEXPR_VALUE_14 long double CSV_UINT64_MAX = get_uint_max<8>();


         HEDLEY_PRIVATE CONSTEXPR_14

         DataType _process_potential_exponential(

             csv::string_view exponential_part,

             const long double& coeff,

             long double * const out) {

             long double exponent = 0;

             auto result = data_type(exponential_part, &exponent);


             // Exponents in scientific notation should not be decimal numbers

             if (result >= DataType::CSV_INT8 && result < DataType::CSV_DOUBLE) {

                 if (out) *out = coeff * pow10(exponent);

                 return DataType::CSV_DOUBLE;

             }


             return DataType::CSV_STRING;

         }


         HEDLEY_PRIVATE HEDLEY_PURE CONSTEXPR_14

         DataType _determine_integral_type(const long double& number) noexcept {

             // We can assume number is always non-negative

             assert(number >= 0);


             if (number <= internals::CSV_INT8_MAX)

                 return DataType::CSV_INT8;

             else if (number <= internals::CSV_INT16_MAX)

                 return DataType::CSV_INT16;

             else if (number <= internals::CSV_INT32_MAX)

                 return DataType::CSV_INT32;

             else if (number <= internals::CSV_INT64_MAX)

                 return DataType::CSV_INT64;

             else // Conversion to long long will cause an overflow

                 return DataType::CSV_BIGINT;

         }


         CONSTEXPR_14

         DataType data_type(csv::string_view in, long double* const out, const char decimalSymbol) {

             // Empty string --> NULL

             if (in.size() == 0)

                 return DataType::CSV_NULL;


             bool ws_allowed = true,

                 dot_allowed = true,

                 digit_allowed = true,

                 is_negative = false,

                 has_digit = false,

                 prob_float = false;


             unsigned places_after_decimal = 0;

             long double integral_part = 0,

                 decimal_part = 0;


             for (size_t i = 0, ilen = in.size(); i < ilen; i++) {

                 const char& current = in[i];


                 switch (current) {

                 case ' ':

                     if (!ws_allowed) {

                         if (isdigit(in[i - 1])) {

                             digit_allowed = false;

                             ws_allowed = true;

                         }

                         else {

                             // Ex: '510 123 4567'

                             return DataType::CSV_STRING;

                         }

                     }

                     break;

                 case '+':

                     if (!ws_allowed) {

                         return DataType::CSV_STRING;

                     }


                     break;

                 case '-':

                     if (!ws_allowed) {

                         // Ex: '510-123-4567'

                         return DataType::CSV_STRING;

                     }


                     is_negative = true;

                     break;

                 // case decimalSymbol: not allowed because decimalSymbol is not a literal,

                 // it is handled in the default block

                 case 'e':

                 case 'E':

                     // Process scientific notation

                     if (prob_float || (i && i + 1 < ilen && isdigit(in[i - 1]))) {

                         size_t exponent_start_idx = i + 1;

                         prob_float = true;


                         // Strip out plus sign

                         if (in[i + 1] == '+') {

                             exponent_start_idx++;

                         }


                         return _process_potential_exponential(

                             in.substr(exponent_start_idx),

                             is_negative ? -(integral_part + decimal_part) : integral_part + decimal_part,

                             out

                         );

                     }


                     return DataType::CSV_STRING;

                     break;

                 default:

                     short digit = static_cast<short>(current - '0');

                     if (digit >= 0 && digit <= 9) {

                         // Process digit

                         has_digit = true;


                         if (!digit_allowed)

                             return DataType::CSV_STRING;

                         else if (ws_allowed) // Ex: '510 456'

                             ws_allowed = false;


                         // Build current number

                         if (prob_float)

                             decimal_part += digit / pow10(++places_after_decimal);

                         else

                             integral_part = (integral_part * 10) + digit;

                     }

                     // case decimalSymbol: not allowed because decimalSymbol is not a literal.

                     else if (dot_allowed && current == decimalSymbol) {

                         dot_allowed = false;

                         prob_float = true;

                     }

                     else {

                         return DataType::CSV_STRING;

                     }

                 }

             }


             // No non-numeric/non-whitespace characters found

             if (has_digit) {

                 long double number = integral_part + decimal_part;

                 if (out) {

                     *out = is_negative ? -number : number;

                 }


                 return prob_float ? DataType::CSV_DOUBLE : _determine_integral_type(number);

             }


             // Just whitespace

             return DataType::CSV_NULL;

         }

     }

 }

common.hpp
A standalone header file containing shared code.

IF_CONSTEXPR
#define IF_CONSTEXPR
Expands to if constexpr in C++17 and if otherwise.
Definition: common.hpp:84

csv::internals::CSV_INT16_MAX
CONSTEXPR_VALUE_14 long double CSV_INT16_MAX
Largest number that can be stored in a 16-bit integer.
Definition: data_type.hpp:167

csv::internals::CSV_INT32_MAX
CONSTEXPR_VALUE_14 long double CSV_INT32_MAX
Largest number that can be stored in a 32-bit integer.
Definition: data_type.hpp:170

csv::internals::CSV_UINT16_MAX
CONSTEXPR_VALUE_14 long double CSV_UINT16_MAX
Largest number that can be stored in a 16-bit unsigned integer.
Definition: data_type.hpp:179

csv::internals::data_type
CONSTEXPR_14 DataType data_type(csv::string_view in, long double *const out, const char decimalSymbol)
Distinguishes numeric from other text values.
Definition: data_type.hpp:242

csv::internals::CSV_UINT32_MAX
CONSTEXPR_VALUE_14 long double CSV_UINT32_MAX
Largest number that can be stored in a 32-bit unsigned integer.
Definition: data_type.hpp:182

csv::internals::_process_potential_exponential
HEDLEY_PRIVATE CONSTEXPR_14 DataType _process_potential_exponential(csv::string_view exponential_part, const long double &coeff, long double *const out)
Given a pointer to the start of what is start of the exponential part of a number written (possibly) ...
Definition: data_type.hpp:192

csv::internals::CSV_INT64_MAX
CONSTEXPR_VALUE_14 long double CSV_INT64_MAX
Largest number that can be stored in a 64-bit integer.
Definition: data_type.hpp:173

csv::internals::CSV_INT8_MAX
CONSTEXPR_VALUE_14 long double CSV_INT8_MAX
Largest number that can be stored in a 8-bit integer.
Definition: data_type.hpp:164

csv::internals::CSV_UINT64_MAX
CONSTEXPR_VALUE_14 long double CSV_UINT64_MAX
Largest number that can be stored in a 64-bit unsigned integer.
Definition: data_type.hpp:185

csv::internals::CSV_UINT8_MAX
CONSTEXPR_VALUE_14 long double CSV_UINT8_MAX
Largest number that can be stored in a 8-bit ungisned integer.
Definition: data_type.hpp:176

csv::internals::pow10
HEDLEY_CONST CONSTEXPR_14 long double pow10(const T &n) noexcept
Compute 10 to the power of n.
Definition: data_type.hpp:40

csv::internals::_determine_integral_type
HEDLEY_PRIVATE HEDLEY_PURE CONSTEXPR_14 DataType _determine_integral_type(const long double &number) noexcept
Given the absolute value of an integer, determine what numeric type it fits in.
Definition: data_type.hpp:212

csv::internals::get_uint_max
CONSTEXPR_14 long double get_uint_max()
Given a byte size, return the largest number than can be stored in an unsigned integer of that size.
Definition: data_type.hpp:136

csv::internals::get_int_max
CONSTEXPR_14 long double get_int_max()
Given a byte size, return the largest number than can be stored in an integer of that size.
Definition: data_type.hpp:105

csv
The all encompassing namespace.
Definition: basic_csv_parser.cpp:3

csv::DataType
DataType
Enumerates the different CSV field types that are recognized by this library.
Definition: data_type.hpp:20

csv::DataType::CSV_INT64
@ CSV_INT64
64-bit integer (long long on MSVC/GCC)

csv::DataType::CSV_DOUBLE
@ CSV_DOUBLE
Floating point value.

csv::DataType::CSV_NULL
@ CSV_NULL
Empty string.

csv::DataType::CSV_BIGINT
@ CSV_BIGINT
Value too big to fit in a 64-bit in.

csv::DataType::CSV_INT16
@ CSV_INT16
16-bit integer (short on MSVC/GCC)

csv::DataType::CSV_INT32
@ CSV_INT32
32-bit integer (int on MSVC/GCC)

csv::DataType::CSV_INT8
@ CSV_INT8
8-bit integer

csv::DataType::CSV_STRING
@ CSV_STRING
Non-numeric string.

csv::string_view
nonstd::string_view string_view
The string_view class used by this library.
Definition: common.hpp:75