8 #include <condition_variable>
13 #include <unordered_map>
14 #include <unordered_set>
18 #include "../external/mio.hpp"
19 #include "col_names.hpp"
31 std::array<ParseFlags, 256> ret = {};
32 for (
int i = -128; i < 128; i++) {
33 const int arr_idx = i + 128;
38 else if (ch ==
'\r' || ch ==
'\n')
62 std::array<bool, 256> ret = {};
63 for (
int i = -128; i < 128; i++) {
64 const int arr_idx = i + 128;
68 for (
size_t j = 0; j < n_chars; j++) {
69 if (ws_chars[j] == ch) {
96 ThreadSafeDeque(
size_t notify_size = 100) : _notify_size(notify_size) {};
98 this->data = other.data;
99 this->_notify_size = other._notify_size;
106 void clear() noexcept { this->data.clear(); }
108 bool empty()
const noexcept {
109 return this->data.empty();
112 T& front() noexcept {
113 return this->data.front();
116 T& operator[](
size_t n) {
117 return this->data[n];
120 void push_back(T&& item) {
121 std::lock_guard<std::mutex> lock{ this->_lock };
122 this->data.push_back(std::move(item));
124 if (this->size() >= _notify_size) {
125 this->_cond.notify_all();
129 T pop_front() noexcept {
130 std::lock_guard<std::mutex> lock{ this->_lock };
131 T item = std::move(data.front());
136 size_t size()
const noexcept {
return this->data.size(); }
139 constexpr
bool is_waitable() const noexcept {
return this->_is_waitable; }
147 std::unique_lock<std::mutex> lock{ this->_lock };
148 this->_cond.wait(lock, [
this] {
return this->size() >= _notify_size || !this->
is_waitable(); });
152 typename std::deque<T>::iterator begin() noexcept {
153 return this->data.begin();
156 typename std::deque<T>::iterator end() noexcept {
157 return this->data.end();
162 std::unique_lock<std::mutex> lock{ this->_lock };
163 this->_is_waitable =
true;
164 this->_cond.notify_all();
169 std::unique_lock<std::mutex> lock{ this->_lock };
170 this->_is_waitable =
false;
171 this->_cond.notify_all();
175 bool _is_waitable =
false;
178 std::condition_variable _cond;
182 constexpr
const int UNINITIALIZED_FIELD = -1;
188 namespace internals {
205 bool eof() {
return this->_eof; }
208 virtual void next(
size_t bytes) = 0;
213 CONSTEXPR_17
ParseFlags parse_flag(
const char ch)
const noexcept {
217 CONSTEXPR_17
ParseFlags compound_parse_flag(
const char ch)
const noexcept {
224 void set_output(
RowCollection& rows) { this->_records = &rows; }
230 RawCSVDataPtr data_ptr =
nullptr;
231 ColNamesPtr _col_names =
nullptr;
232 CSVFieldList* fields =
nullptr;
233 int field_start = UNINITIALIZED_FIELD;
234 size_t field_length = 0;
264 bool quote_escape =
false;
265 bool field_has_double_quote =
false;
271 bool unicode_bom_scan =
false;
272 bool _utf8_bom =
false;
277 CONSTEXPR_17
bool ws_flag(
const char ch)
const noexcept {
278 return _ws_flags.data()[ch + 128];
281 size_t& current_row_start() {
282 return this->current_row.data_start;
285 void parse_field() noexcept;
294 void trim_utf8_bom();
300 template<typename TStream>
307 const ColNamesPtr& col_names =
nullptr
315 _source(std::move(source))
321 if (this->
eof())
return;
324 this->data_ptr->_data = std::make_shared<std::string>();
327 const auto start = _source.tellg();
328 _source.seekg(0, std::ios::end);
329 const auto end = _source.tellg();
330 _source.seekg(0, std::ios::beg);
336 size_t length = std::min(
source_size - stream_pos, bytes);
337 std::unique_ptr<char[]> buff(
new char[length]);
338 _source.seekg(stream_pos, std::ios::beg);
339 _source.read(buff.get(), length);
340 stream_pos = _source.tellg();
341 ((std::string*)(this->data_ptr->_data.get()))->assign(buff.get(), length);
344 this->data_ptr->data = *((std::string*)this->data_ptr->_data.get());
347 this->current_row =
CSVRow(this->data_ptr);
348 size_t remainder = this->
parse();
355 this->stream_pos -= (length - remainder);
361 size_t stream_pos = 0;
377 const ColNamesPtr& col_names =
nullptr
379 this->_filename = filename.data();
380 this->source_size = get_file_size(filename);
385 void next(
size_t bytes)
override;
388 std::string _filename;
Data structure for representing CSV rows.
Abstract base class which provides CSV parsing logic.
CONSTEXPR bool no_chunk() const
Whether or not source needs to be read in chunks.
ParseFlagMap _parse_flags
An array where the (i + 128)th slot gives the ParseFlags for ASCII character i.
void reset_data_ptr()
Create a new RawCSVDataPtr for a new chunk of data.
bool eof()
Whether or not we have reached the end of source.
void end_feed()
Indicate the last block of data has been parsed.
size_t parse()
Parse the current chunk of data *.
virtual void next(size_t bytes)=0
Parse the next block of data.
size_t source_size
The size of the incoming CSV.
CONSTEXPR bool utf8_bom() const
Whether or not this CSV has a UTF-8 byte order mark.
Parser for memory-mapped files.
A class for parsing CSV data from a std::stringstream or an std::ifstream
void next(size_t bytes=ITERATION_CHUNK_SIZE) override
Parse the next block of data.
A std::deque wrapper which allows multiple read and write threads to concurrently access it along wit...
void wait()
Wait for an item to become available.
void notify_all()
Tell listeners that this deque is actively being pushed to.
constexpr bool is_waitable() const noexcept
Returns true if a thread is actively pushing items to this deque.
void kill_all()
Tell all listeners to stop.
A standalone header file containing shared code.
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Defines the data type used for storing information about a CSV row.
std::array< ParseFlags, 256 > ParseFlagMap
An array which maps ASCII chars to a parsing flag.
std::array< bool, 256 > WhitespaceMap
An array which maps ASCII chars to a flag indicating if it is whitespace.
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter)
Create a vector v where each index i corresponds to the ASCII number for a character and,...
constexpr size_t ITERATION_CHUNK_SIZE
For functions that lazy load a large CSV, this determines how many bytes are read at a time.
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
@ NOT_SPECIAL
Characters with no special meaning or escaped delimiters and newlines.
@ NEWLINE
Characters which signify a new row.
@ QUOTE
Characters which may signify a quote escape.
@ DELIMITER
Characters which signify a new field.
constexpr ParseFlags quote_escape_flag(ParseFlags flag, bool quote_escape) noexcept
Transform the ParseFlags given the context of whether or not the current field is quote escaped.
HEDLEY_CONST CONSTEXPR_17 WhitespaceMap make_ws_flags(const char *ws_chars, size_t n_chars)
Create a vector v where each index i corresponds to the ASCII number for a character c and,...
The all encompassing namespace.
nonstd::string_view string_view
The string_view class used by this library.