11             std::stringstream ret;
 
   12             for (
size_t i = 0; i < row.size(); i++) {
 
   14                 if (i + 1 < row.size()) ret << delim;
 
   30             auto trim_chars = format.get_trim_chars();
 
   31             std::stringstream source(head.data());
 
   35             parser.set_output(rows);
 
   38             return CSVRow(std::move(rows[format.get_header()]));
 
   43             std::unordered_map<size_t, size_t> row_tally = { { 0, 0 } };
 
   46             std::unordered_map<size_t, size_t> row_when = { { 0, 0 } };
 
   49             std::stringstream source(head.data());
 
   52             StreamParser<std::stringstream> parser(source, format);
 
   53             parser.set_output(rows);
 
   56             for (
size_t i = 0; i < rows.size(); i++) {
 
   61                     if (row_tally.find(row.size()) != row_tally.end()) {
 
   62                         row_tally[row.size()]++;
 
   65                         row_tally[row.size()] = 1;
 
   66                         row_when[row.size()] = i;
 
   71             double final_score = 0;
 
   72             size_t header_row = 0;
 
   76             for (
auto& pair : row_tally) {
 
   77                 auto row_size = pair.first;
 
   78                 auto row_count = pair.second;
 
   79                 double score = (double)(row_size * row_count);
 
   80                 if (score > final_score) {
 
   82                     header_row = row_when[row_size];
 
  101             size_t max_score = 0,
 
  103             char current_delim = delims[0];
 
  105             for (
char cand_delim : delims) {
 
  106                 auto result = calculate_score(head, format.
delimiter(cand_delim));
 
  108                 if ((
size_t)result.score > max_score) {
 
  109                     max_score = (size_t)result.score;
 
  110                     current_delim = cand_delim;
 
  111                     header = result.header;
 
  115             return { current_delim, (int)header };
 
  126         auto head = internals::get_csv_head(filename);
 
  129         if (format.guess_delim()) {
 
  130             auto guess_result = 
guess_format(filename, format.get_possible_delims());
 
  139         auto head = internals::get_csv_head(filename);
 
  155         auto head = internals::get_csv_head(filename);
 
  159         if (format.guess_delim()) {
 
  162             format.header = guess_result.header_row;
 
  163             this->_format = format;
 
  166         if (!format.col_names.empty())
 
  169         this->
parser = std::unique_ptr<Parser>(
new Parser(filename, format, this->
col_names)); 
 
  170         this->initial_read();
 
  180         new_format.col_names = this->
col_names->get_col_names();
 
  181         new_format.header = this->_format.header;
 
  192         return std::vector<std::string>();
 
  200         for (
size_t i = 0; i < _col_names.size(); i++)
 
  201             if (_col_names[i] == col_name) 
return (
int)i;
 
  207         if (!this->header_trimmed) {
 
  208             for (
int i = 0; i <= this->_format.header && !this->
records->empty(); i++) {
 
  209                 if (i == this->_format.header && this->col_names->empty()) {
 
  217             this->header_trimmed = 
true;
 
  227         this->
n_cols = names.size();
 
  246         this->
parser->next(bytes);
 
  248         if (!this->header_trimmed) {
 
  275                 if (this->
records->is_waitable())
 
  278                 else if (this->
parser->eof())
 
  283                     if (this->read_csv_worker.joinable())
 
  284                         this->read_csv_worker.join();
 
  289             else if (this->
records->front().size() != this->n_cols &&
 
  290                 this->_format.variable_column_policy != VariableColumnPolicy::KEEP) {
 
  291                 auto errored_row = this->
records->pop_front();
 
  293                 if (this->_format.variable_column_policy == VariableColumnPolicy::THROW) {
 
  294                     if (errored_row.size() < this->n_cols)
 
  301                 row = this->
records->pop_front();
 
CSVFormat get_format() const
Return the format of the original raw CSV.
int index_of(csv::string_view col_name) const
Return the index of the column name if found or csv::CSV_NOT_FOUND otherwise.
bool read_row(CSVRow &row)
Retrieve rows as CSVRow objects, returning true if more rows are available.
std::vector< std::string > get_col_names() const
Return the CSV's column names as a vector of strings.
CSVReader(csv::string_view filename, CSVFormat format=CSVFormat::guess_csv())
Reads an arbitrarily large CSV file using memory-mapped IO.
Data structure for representing CSV rows.
Parser for memory-mapped files.
A class for parsing CSV data from a std::stringstream or an std::ifstream
void next(size_t bytes=ITERATION_CHUNK_SIZE) override
Parse the next block of data.
A std::deque wrapper which allows multiple read and write threads to concurrently access it along wit...
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Defines functionality needed for basic CSV parsing.
std::unique_ptr< RowCollection > records
Queue of parsed CSV rows.
size_t _n_rows
How many rows (minus header) have been read so far.
bool read_csv(size_t bytes=internals::ITERATION_CHUNK_SIZE)
Read a chunk of CSV data.
internals::ColNamesPtr col_names
Pointer to a object containing column information.
void set_col_names(const std::vector< std::string > &)
Sets this reader's column names and associated data.
std::unique_ptr< internals::IBasicCSVParser > parser
Helper class which actually does the parsing.
size_t n_cols
The number of columns in this CSV.
constexpr size_t ITERATION_CHUNK_SIZE
For functions that lazy load a large CSV, this determines how many bytes are read at a time.
std::vector< std::string > _get_col_names(csv::string_view head, CSVFormat format)
Return a CSV's column names.
std::string format_row(const std::vector< std::string > &row, csv::string_view delim)
CSVGuessResult _guess_format(csv::string_view head, const std::vector< char > &delims)
Guess the delimiter used by a delimiter-separated values file.
The all encompassing namespace.
std::vector< std::string > get_col_names(csv::string_view filename, CSVFormat format)
Return a CSV's column names.
internals::ThreadSafeDeque< CSVRow > RowCollection
Standard type for storing collection of rows.
constexpr int CSV_NOT_FOUND
Integer indicating a requested column wasn't found.
CSVGuessResult guess_format(csv::string_view filename, const std::vector< char > &delims)
Guess the delimiter used by a delimiter-separated values file.
nonstd::string_view string_view
The string_view class used by this library.
Stores the inferred format of a CSV file.