10 #include <unordered_map>
11 #include <unordered_set>
18 #include "col_names.hpp"
22 class IBasicCSVParser;
24 static const std::string ERROR_NAN =
"Not a number.";
25 static const std::string ERROR_OVERFLOW =
"Overflow error.";
26 static const std::string ERROR_FLOAT_TO_INT =
27 "Attempted to convert a floating point value to an integral type.";
28 static const std::string ERROR_NEG_TO_UNSIGNED =
"Negative numbers cannot be converted to unsigned types.";
35 RawCSVField(
size_t _start,
size_t _length,
bool _double_quote =
false) {
66 _single_buffer_capacity(single_buffer_capacity) {
75 _single_buffer_capacity(other._single_buffer_capacity) {
76 buffers = std::move(other.buffers);
77 _current_buffer_size = other._current_buffer_size;
82 for (
auto& buffer : buffers)
86 template <
class... Args>
87 void emplace_back(Args&&... args) {
88 if (this->_current_buffer_size == this->_single_buffer_capacity) {
92 *(_back++) = RawCSVField(std::forward<Args>(args)...);
93 _current_buffer_size++;
96 size_t size() const noexcept {
97 return this->_current_buffer_size + ((this->buffers.size() - 1) * this->_single_buffer_capacity);
100 RawCSVField& operator[](
size_t n)
const;
103 const size_t _single_buffer_capacity;
105 std::vector<RawCSVField*> buffers = {};
108 size_t _current_buffer_size = 0;
111 RawCSVField* _back =
nullptr;
120 std::shared_ptr<void> _data =
nullptr;
125 std::unordered_set<size_t> has_double_quotes = {};
128 std::unordered_map<size_t, std::string> double_quote_fields = {};
130 internals::ColNamesPtr col_names =
nullptr;
135 using RawCSVDataPtr = std::shared_ptr<RawCSVData>;
148 operator std::string()
const {
149 return std::string(
"<CSVField> ") + std::string(this->sv);
180 template<
typename T = std::
string> T
get() {
184 throw std::runtime_error(internals::ERROR_NAN);
191 throw std::runtime_error(internals::ERROR_FLOAT_TO_INT);
195 if (this->value < 0) {
196 throw std::runtime_error(internals::ERROR_NEG_TO_UNSIGNED);
206 throw std::runtime_error(internals::ERROR_OVERFLOW);
209 else if (internals::type_num<T>() < this->_type) {
210 throw std::runtime_error(internals::ERROR_OVERFLOW);
214 return static_cast<T
>(this->value);
244 static_assert(std::is_arithmetic<T>::value,
245 "T should be a numeric value.");
247 if (this->_type != DataType::UNKNOWN) {
290 long double value = 0;
293 CONSTEXPR_14
void get_value() noexcept {
297 if ((
int)_type < 0) {
311 CSVRow(internals::RawCSVDataPtr _data) : data(_data) {}
312 CSVRow(internals::RawCSVDataPtr _data,
size_t _data_start,
size_t _field_bounds)
313 : data(_data), data_start(_data_start), fields_start(_field_bounds) {}
325 std::string
to_json(
const std::vector<std::string>& subset = {})
const;
326 std::string
to_json_array(
const std::vector<std::string>& subset = {})
const;
330 return this->data->col_names->get_col_names();
337 operator std::vector<std::string>()
const;
345 #ifndef DOXYGEN_SHOULD_SKIP_THIS
347 using difference_type = int;
348 using pointer = std::shared_ptr<CSVField>;
350 using iterator_category = std::random_access_iterator_tag;
354 reference operator*()
const;
355 pointer operator->()
const;
361 iterator operator+(difference_type n)
const;
362 iterator operator-(difference_type n)
const;
366 return this->i == other.i;
376 const CSVRow * daddy =
nullptr;
377 std::shared_ptr<CSVField> field =
nullptr;
398 internals::RawCSVDataPtr data;
401 size_t data_start = 0;
404 size_t fields_start = 0;
407 size_t row_length = 0;
411 #pragma region CSVField::get Specializations
415 inline std::string CSVField::get<std::string>() {
416 return std::string(this->sv);
431 CONSTEXPR_14
long double CSVField::get<long double>() {
433 throw std::runtime_error(internals::ERROR_NAN);
438 #pragma endregion CSVField::get Specializations
445 return this->sv == other;
452 return this->sv == other;
456 inline std::ostream& operator << (std::ostream& os,
csv::CSVField const& value) {
457 os << std::string(value);
Data type representing individual CSV values.
CONSTEXPR_14 bool is_num() noexcept
Returns true if field is an integer or float.
bool try_parse_decimal(long double &dVal, const char decimalSymbol='.')
Attempts to parse a decimal (or integer) value using the given symbol, returning true if the value is...
CONSTEXPR_14 bool is_str() noexcept
Returns true if field is a non-numeric, non-empty string.
CONSTEXPR_14 bool is_int() noexcept
Returns true if field is an integer.
CONSTEXPR_14 bool is_null() noexcept
Returns true if field is an empty string or string of whitespace characters.
constexpr CSVField(csv::string_view _sv) noexcept
Constructs a CSVField from a string_view.
CONSTEXPR_14 DataType type() noexcept
Return the type of the underlying CSV data.
T get()
Returns the value casted to the requested type, performing type checking before.
CONSTEXPR_14 bool operator==(T other) const noexcept
Compares the contents of this field to a numeric value.
CONSTEXPR_14 bool is_float() noexcept
Returns true if field is a floating point value.
CONSTEXPR csv::string_view get_sv() const noexcept
Return a string view over the field's contents.
bool try_parse_hex(int &parsedValue)
Parse a hexadecimal value, returning false if the value is not hex.
A random access iterator over the contents of a CSV row.
CONSTEXPR bool operator==(const iterator &other) const noexcept
Two iterators are equal if they point to the same field.
Data structure for representing CSV rows.
iterator end() const noexcept
Return an iterator pointing to just after the end of the CSVRow.
std::reverse_iterator< iterator > reverse_iterator
A reverse iterator over the contents of a CSVRow.
std::string to_json(const std::vector< std::string > &subset={}) const
Convert a CSV row to a JSON object, i.e.
CONSTEXPR bool empty() const noexcept
Indicates whether row is empty or not.
std::string to_json_array(const std::vector< std::string > &subset={}) const
Convert a CSV row to a JSON array, i.e.
CONSTEXPR size_t size() const noexcept
Return the number of fields in this row.
std::vector< std::string > get_col_names() const
Retrieve this row's associated column names.
CSVField operator[](size_t n) const
Return a CSVField object corrsponding to the nth value in the row.
iterator begin() const
Return an iterator pointing to the first field.
CSVRow(internals::RawCSVDataPtr _data)
Construct a CSVRow from a RawCSVDataPtr.
A class used for efficiently storing RawCSVField objects and expanding as necessary.
CSVFieldList(size_t single_buffer_capacity=(size_t)(internals::PAGE_SIZE/sizeof(RawCSVField)))
Construct a CSVFieldList which allocates blocks of a certain size.
Abstract base class which provides CSV parsing logic.
A standalone header file containing shared code.
#define IF_CONSTEXPR
Expands to if constexpr in C++17 and if otherwise.
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
Implements data type parsing functionality.
std::array< ParseFlags, 256 > ParseFlagMap
An array which maps ASCII chars to a parsing flag.
std::array< bool, 256 > WhitespaceMap
An array which maps ASCII chars to a flag indicating if it is whitespace.
CONSTEXPR_14 DataType data_type(csv::string_view in, long double *const out, const char decimalSymbol)
Distinguishes numeric from other text values.
bool is_equal(T a, T b, T epsilon=0.001)
const int PAGE_SIZE
Size of a memory page in bytes.
CONSTEXPR_14 long double get_uint_max()
Given a byte size, return the largest number than can be stored in an unsigned integer of that size.
The all encompassing namespace.
DataType
Enumerates the different CSV field types that are recognized by this library.
@ CSV_INT64
64-bit integer (long long on MSVC/GCC)
@ CSV_DOUBLE
Floating point value.
@ CSV_STRING
Non-numeric string.
nonstd::string_view string_view
The string_view class used by this library.
A class for storing raw CSV data and associated metadata.
A barebones class used for describing CSV fields.
size_t start
The start of the field, relative to the beginning of the row.
bool has_double_quote
Whether or not the field contains an escaped quote.
size_t length
The length of the row, ignoring quote escape characters.