8 #include <condition_variable> 
   13 #include <unordered_map> 
   14 #include <unordered_set> 
   18 #include "../external/mio.hpp" 
   19 #include "col_names.hpp" 
   31             std::array<ParseFlags, 256> ret = {};
 
   32             for (
int i = -128; i < 128; i++) {
 
   33                 const int arr_idx = i + 128;
 
   38                 else if (ch == 
'\r' || ch == 
'\n')
 
   62             std::array<bool, 256> ret = {};
 
   63             for (
int i = -128; i < 128; i++) {
 
   64                 const int arr_idx = i + 128;
 
   68                 for (
size_t j = 0; j < n_chars; j++) {
 
   69                     if (ws_chars[j] == ch) {
 
   96             ThreadSafeDeque(
size_t notify_size = 100) : _notify_size(notify_size) {};
 
   98                 this->data = other.data;
 
   99                 this->_notify_size = other._notify_size;
 
  106             void clear() noexcept { this->data.clear(); }
 
  108             bool empty() 
const noexcept {
 
  109                 return this->data.empty();
 
  112             T& front() noexcept {
 
  113                 return this->data.front();
 
  116             T& operator[](
size_t n) {
 
  117                 return this->data[n];
 
  120             void push_back(T&& item) {
 
  121                 std::lock_guard<std::mutex> lock{ this->_lock };
 
  122                 this->data.push_back(std::move(item));
 
  124                 if (this->size() >= _notify_size) {
 
  125                     this->_cond.notify_all();
 
  129             T pop_front() noexcept {
 
  130                 std::lock_guard<std::mutex> lock{ this->_lock };
 
  131                 T item = std::move(data.front());
 
  136             size_t size() 
const noexcept { 
return this->data.size(); }
 
  139             constexpr 
bool is_waitable() const noexcept { 
return this->_is_waitable; }
 
  147                 std::unique_lock<std::mutex> lock{ this->_lock };
 
  148                 this->_cond.wait(lock, [
this] { 
return this->size() >= _notify_size || !this->
is_waitable(); });
 
  152             typename std::deque<T>::iterator begin() noexcept {
 
  153                 return this->data.begin();
 
  156             typename std::deque<T>::iterator end() noexcept {
 
  157                 return this->data.end();
 
  162                 std::unique_lock<std::mutex> lock{ this->_lock };
 
  163                 this->_is_waitable = 
true;
 
  164                 this->_cond.notify_all();
 
  169                 std::unique_lock<std::mutex> lock{ this->_lock };
 
  170                 this->_is_waitable = 
false;
 
  171                 this->_cond.notify_all();
 
  175             bool _is_waitable = 
false;
 
  178             std::condition_variable _cond;
 
  182         constexpr 
const int UNINITIALIZED_FIELD = -1;
 
  188     namespace internals {
 
  205             bool eof() { 
return this->_eof; }
 
  208             virtual void next(
size_t bytes) = 0;
 
  213             CONSTEXPR_17 
ParseFlags parse_flag(
const char ch) 
const noexcept {
 
  217             CONSTEXPR_17 
ParseFlags compound_parse_flag(
const char ch) 
const noexcept {
 
  224             void set_output(
RowCollection& rows) { this->_records = &rows; }
 
  230             RawCSVDataPtr data_ptr = 
nullptr;
 
  231             ColNamesPtr _col_names = 
nullptr;
 
  232             CSVFieldList* fields = 
nullptr;
 
  233             int field_start = UNINITIALIZED_FIELD;
 
  234             size_t field_length = 0;
 
  264             bool quote_escape = 
false;
 
  265             bool field_has_double_quote = 
false;
 
  271             bool unicode_bom_scan = 
false;
 
  272             bool _utf8_bom = 
false;
 
  277             CONSTEXPR_17 
bool ws_flag(
const char ch) 
const noexcept {
 
  278                 return _ws_flags.data()[ch + 128];
 
  281             size_t& current_row_start() {
 
  282                 return this->current_row.data_start;
 
  285             void parse_field() noexcept;
 
  294             void trim_utf8_bom();
 
  300         template<typename TStream>
 
  307                 const ColNamesPtr& col_names = 
nullptr 
  315                 _source(std::move(source))
 
  321                 if (this->
eof()) 
return;
 
  324                 this->data_ptr->_data = std::make_shared<std::string>();
 
  327                     const auto start = _source.tellg();
 
  328                     _source.seekg(0, std::ios::end);
 
  329                     const auto end = _source.tellg();
 
  330                     _source.seekg(0, std::ios::beg);
 
  336                 size_t length = std::min(
source_size - stream_pos, bytes);
 
  337                 std::unique_ptr<char[]> buff(
new char[length]);
 
  338                 _source.seekg(stream_pos, std::ios::beg);
 
  339                 _source.read(buff.get(), length);
 
  340                 stream_pos = _source.tellg();
 
  341                 ((std::string*)(this->data_ptr->_data.get()))->assign(buff.get(), length);
 
  344                 this->data_ptr->data = *((std::string*)this->data_ptr->_data.get());
 
  347                 this->current_row = 
CSVRow(this->data_ptr);
 
  348                 size_t remainder = this->
parse();
 
  355                     this->stream_pos -= (length - remainder);
 
  361             size_t stream_pos = 0;
 
  377                 const ColNamesPtr& col_names = 
nullptr 
  379                 this->_filename = filename.data();
 
  380                 this->source_size = get_file_size(filename);
 
  385             void next(
size_t bytes) 
override;
 
  388             std::string _filename;
 
Data structure for representing CSV rows.
Abstract base class which provides CSV parsing logic.
CONSTEXPR bool no_chunk() const
Whether or not source needs to be read in chunks.
ParseFlagMap _parse_flags
An array where the (i + 128)th slot gives the ParseFlags for ASCII character i.
void reset_data_ptr()
Create a new RawCSVDataPtr for a new chunk of data.
bool eof()
Whether or not we have reached the end of source.
void end_feed()
Indicate the last block of data has been parsed.
size_t parse()
Parse the current chunk of data *.
virtual void next(size_t bytes)=0
Parse the next block of data.
size_t source_size
The size of the incoming CSV.
CONSTEXPR bool utf8_bom() const
Whether or not this CSV has a UTF-8 byte order mark.
Parser for memory-mapped files.
A class for parsing CSV data from a std::stringstream or an std::ifstream
void next(size_t bytes=ITERATION_CHUNK_SIZE) override
Parse the next block of data.
A std::deque wrapper which allows multiple read and write threads to concurrently access it along wit...
void wait()
Wait for an item to become available.
void notify_all()
Tell listeners that this deque is actively being pushed to.
constexpr bool is_waitable() const noexcept
Returns true if a thread is actively pushing items to this deque.
void kill_all()
Tell all listeners to stop.
A standalone header file containing shared code.
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Defines the data type used for storing information about a CSV row.
std::array< ParseFlags, 256 > ParseFlagMap
An array which maps ASCII chars to a parsing flag.
std::array< bool, 256 > WhitespaceMap
An array which maps ASCII chars to a flag indicating if it is whitespace.
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter)
Create a vector v where each index i corresponds to the ASCII number for a character and,...
constexpr size_t ITERATION_CHUNK_SIZE
For functions that lazy load a large CSV, this determines how many bytes are read at a time.
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
@ NOT_SPECIAL
Characters with no special meaning or escaped delimiters and newlines.
@ NEWLINE
Characters which signify a new row.
@ QUOTE
Characters which may signify a quote escape.
@ DELIMITER
Characters which signify a new field.
constexpr ParseFlags quote_escape_flag(ParseFlags flag, bool quote_escape) noexcept
Transform the ParseFlags given the context of whether or not the current field is quote escaped.
HEDLEY_CONST CONSTEXPR_17 WhitespaceMap make_ws_flags(const char *ws_chars, size_t n_chars)
Create a vector v where each index i corresponds to the ASCII number for a character c and,...
The all encompassing namespace.
nonstd::string_view string_view
The string_view class used by this library.