6 std::ifstream infile(std::string(filename), std::ios::binary);
7 const auto start = infile.tellg();
8 infile.seekg(0, std::ios::end);
9 const auto end = infile.tellg();
15 return get_csv_head(filename, get_file_size(filename));
19 const size_t bytes = 500000;
21 std::error_code error;
22 size_t length = std::min((
size_t)file_size, bytes);
23 auto mmap = mio::make_mmap_source(std::string(filename), 0, length, error);
26 throw std::runtime_error(
"Cannot open file " + std::string(filename));
29 return std::string(mmap.begin(), mmap.end());
33 #pragma region IBasicCVParser
37 const ColNamesPtr& col_names
38 ) : _col_names(col_names) {
39 if (format.no_quote) {
47 format.trim_chars.data(), format.trim_chars.size()
54 bool empty_last_field = this->data_ptr
55 && this->data_ptr->_data
56 && !this->data_ptr->data.empty()
57 && (parse_flag(this->data_ptr->data.back()) == ParseFlags::DELIMITER
58 || parse_flag(this->data_ptr->data.back()) == ParseFlags::QUOTE);
61 if (this->field_length > 0 || empty_last_field) {
66 if (this->current_row.size() > 0)
70 CSV_INLINE void IBasicCSVParser::parse_field() noexcept {
72 auto& in = this->data_ptr->data;
75 while (data_pos < in.size() && ws_flag(in[data_pos]))
78 if (field_start == UNINITIALIZED_FIELD)
79 field_start = (int)(data_pos - current_row_start());
84 while (data_pos < in.size() && compound_parse_flag(in[data_pos]) == ParseFlags::NOT_SPECIAL)
87 field_length = data_pos - (field_start + current_row_start());
91 for (
size_t j = data_pos - 1; ws_flag(in[j]) && this->field_length > 0; j--)
98 if (field_has_double_quote) {
100 field_start == UNINITIALIZED_FIELD ? 0 : (
unsigned int)field_start,
104 field_has_double_quote =
false;
108 fields->emplace_back(
109 field_start == UNINITIALIZED_FIELD ? 0 : (
unsigned int)field_start,
114 current_row.row_length++;
117 field_start = UNINITIALIZED_FIELD;
126 this->quote_escape =
false;
128 this->current_row_start() = 0;
129 this->trim_utf8_bom();
131 auto& in = this->data_ptr->data;
132 while (this->data_pos < in.size()) {
133 switch (compound_parse_flag(in[this->data_pos])) {
134 case ParseFlags::DELIMITER:
139 case ParseFlags::NEWLINE:
143 while (this->data_pos < in.size() && parse_flag(in[this->data_pos]) == ParseFlags::NEWLINE)
151 this->current_row =
CSVRow(data_ptr, this->data_pos, fields->size());
154 case ParseFlags::NOT_SPECIAL:
158 case ParseFlags::QUOTE_ESCAPE_QUOTE:
159 if (data_pos + 1 == in.size())
return this->current_row_start();
160 else if (data_pos + 1 < in.size()) {
161 auto next_ch = parse_flag(in[data_pos + 1]);
162 if (next_ch >= ParseFlags::DELIMITER) {
163 quote_escape =
false;
167 else if (next_ch == ParseFlags::QUOTE) {
170 this->field_length += 2;
171 this->field_has_double_quote =
true;
177 this->field_length++;
183 if (this->field_length == 0) {
186 if (field_start == UNINITIALIZED_FIELD && data_pos < in.size() && !ws_flag(in[data_pos]))
187 field_start = (int)(data_pos - current_row_start());
192 this->field_length++;
199 return this->current_row_start();
203 current_row.row_length = fields->size() - current_row.fields_start;
204 this->_records->push_back(std::move(current_row));
208 this->data_ptr = std::make_shared<RawCSVData>();
209 this->data_ptr->parse_flags = this->_parse_flags;
210 this->data_ptr->col_names = this->_col_names;
211 this->fields = &(this->data_ptr->fields);
214 CSV_INLINE void IBasicCSVParser::trim_utf8_bom() {
215 auto& data = this->data_ptr->data;
217 if (!this->unicode_bom_scan && data.size() >= 3) {
218 if (data[0] ==
'\xEF' && data[1] ==
'\xBB' && data[2] ==
'\xBF') {
220 this->_utf8_bom =
true;
223 this->unicode_bom_scan =
true;
231 #pragma region Specializations
235 this->field_start = UNINITIALIZED_FIELD;
236 this->field_length = 0;
237 this->reset_data_ptr();
240 size_t length = std::min(this->source_size - this->mmap_pos, bytes);
241 std::error_code error;
242 this->data_ptr->_data = std::make_shared<mio::basic_mmap_source<char>>(mio::make_mmap_source(this->_filename, this->mmap_pos, length, error));
243 this->mmap_pos += length;
244 if (error)
throw error;
246 auto mmap_ptr = (mio::basic_mmap_source<char>*)(this->data_ptr->_data.get());
249 this->data_ptr->data =
csv::string_view(mmap_ptr->data(), mmap_ptr->length());
252 this->current_row =
CSVRow(this->data_ptr);
253 size_t remainder = this->
parse();
255 if (this->mmap_pos == this->source_size || no_chunk()) {
260 this->mmap_pos -= (length - remainder);
Contains the main CSV parsing algorithm and various utility functions.
Data structure for representing CSV rows.
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter)
Create a vector v where each index i corresponds to the ASCII number for a character and,...
constexpr size_t ITERATION_CHUNK_SIZE
For functions that lazy load a large CSV, this determines how many bytes are read at a time.
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
HEDLEY_CONST CONSTEXPR_17 WhitespaceMap make_ws_flags(const char *ws_chars, size_t n_chars)
Create a vector v where each index i corresponds to the ASCII number for a character c and,...
The all encompassing namespace.
CSVReader parse(csv::string_view in, CSVFormat format)
Shorthand function for parsing an in-memory CSV string.
nonstd::string_view string_view
The string_view class used by this library.