Vince's CSV Parser
csv_stat.hpp
Go to the documentation of this file.
1 
5 #pragma once
6 #include <unordered_map>
7 #include <sstream>
8 #include <vector>
9 #include "csv_reader.hpp"
10 
11 namespace csv {
18  class CSVStat {
19  public:
20  using FreqCount = std::unordered_map<std::string, size_t>;
21  using TypeCount = std::unordered_map<DataType, size_t>;
22 
23  std::vector<long double> get_mean() const;
24  std::vector<long double> get_variance() const;
25  std::vector<long double> get_mins() const;
26  std::vector<long double> get_maxes() const;
27  std::vector<FreqCount> get_counts() const;
28  std::vector<TypeCount> get_dtypes() const;
29 
30  std::vector<std::string> get_col_names() const {
31  return this->reader.get_col_names();
32  }
33 
35  CSVStat(std::stringstream& source, CSVFormat format = CSVFormat());
36  private:
37  // An array of rolling averages
38  // Each index corresponds to the rolling mean for the column at said index
39  std::vector<long double> rolling_means;
40  std::vector<long double> rolling_vars;
41  std::vector<long double> mins;
42  std::vector<long double> maxes;
43  std::vector<FreqCount> counts;
44  std::vector<TypeCount> dtypes;
45  std::vector<long double> n;
46 
47  // Statistic calculators
48  void variance(const long double&, const size_t&);
49  void count(CSVField&, const size_t&);
50  void min_max(const long double&, const size_t&);
51  void dtype(CSVField&, const size_t&);
52 
53  void calc();
54  void calc_chunk();
55  void calc_worker(const size_t&);
56 
57  CSVReader reader;
58  std::deque<CSVRow> records = {};
59  };
60 }
Data type representing individual CSV values.
Definition: csv_row.hpp:143
Stores information about how to parse a CSV file.
Definition: csv_format.hpp:36
static CSVFormat guess_csv()
CSVFormat for guessing the delimiter.
Definition: csv_format.hpp:126
Main class for parsing CSVs from files and in-memory sources.
Definition: csv_reader.hpp:57
std::vector< std::string > get_col_names() const
Return the CSV's column names as a vector of strings.
Definition: csv_reader.cpp:187
Class for calculating statistics from CSV files and in-memory sources.
Definition: csv_stat.hpp:18
std::vector< long double > get_mean() const
Return current means.
Definition: csv_stat.cpp:25
std::vector< long double > get_variance() const
Return current variances.
Definition: csv_stat.cpp:34
CSVStat(csv::string_view filename, CSVFormat format=CSVFormat::guess_csv())
Calculate statistics for an arbitrarily large file.
Definition: csv_stat.cpp:13
std::vector< long double > get_mins() const
Return current mins.
Definition: csv_stat.cpp:43
std::vector< TypeCount > get_dtypes() const
Get data type counts for each column.
Definition: csv_stat.cpp:70
std::vector< long double > get_maxes() const
Return current maxes.
Definition: csv_stat.cpp:52
std::vector< FreqCount > get_counts() const
Get counts for each column.
Definition: csv_stat.cpp:61
Defines functionality needed for basic CSV parsing.
The all encompassing namespace.
nonstd::string_view string_view
The string_view class used by this library.
Definition: common.hpp:75