diff options
author | Loek Le Blansch <loek@pipeframe.xyz> | 2024-10-09 21:04:57 +0200 |
---|---|---|
committer | Loek Le Blansch <loek@pipeframe.xyz> | 2024-10-09 21:04:57 +0200 |
commit | f20e49a71a7ee186057f5b79aeb58f9f22b352f4 (patch) | |
tree | 2dc11aa28c1226a2baf90b182435dbdc596a8553 /CSVParser.cpp | |
parent | 76950c6100371c28f82f2e130dcfdb7d7dac97df (diff) |
parse artists.csv
Diffstat (limited to 'CSVParser.cpp')
-rw-r--r-- | CSVParser.cpp | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/CSVParser.cpp b/CSVParser.cpp new file mode 100644 index 0000000..3fbe804 --- /dev/null +++ b/CSVParser.cpp @@ -0,0 +1,82 @@ +#include <cstdlib> +#include <string> +#include <sstream> +#include <vector> +#include <algorithm> + +#include "CSVParser.h" +#include "Exception.h" +#include "Parser.h" + +using namespace std; + +CSVParser CSVParser::instance {}; +CSVParser::CSVParser() { + Parser::register_strategy(this); +} + +unsigned int CSVParser::heuristic(File & f) { + const string content = f.read(); + int global_columns = 0; + int columns = 1; + int rows = 0; + int penalty = 1; + for (char c : content) { + if (c == ',') columns++; + if (c == '\n') { + rows++; + if (global_columns == 0) global_columns = columns; + penalty += abs(global_columns - columns); + columns = 1; + } + } + if (global_columns == 1) penalty += 1000; + return (rows + global_columns) / penalty; +} + +static size_t header_idx(vector<string> header, string field) { + auto iter = find(header.begin(), header.end(), field); + if (iter == header.end()) + throw Exception("CSV file is missing \"%s\" column", field.c_str()); + return iter - header.begin(); +} + +void CSVParser::parse(File & f, Deserializer & d) { + vector<vector<string>> table = {}; + + istringstream rows(f.read()); + string row; + while (getline(rows, row)) { + // ignore windows line endings + if (row.back() == '\r') row.pop_back(); + istringstream columns(row); + string column; + + vector<string> table_row = {}; + while (getline(columns, column, ',')) { + table_row.push_back(column); + } + table.push_back(table_row); + } + + if (table.size() < 1) + throw Exception("not enough data rows in CSV file"); + + vector<string> table_header = table[0]; + table.erase(table.begin()); + + size_t x_idx = header_idx(table_header, "x"); + size_t y_idx = header_idx(table_header, "y"); + size_t vx_idx = header_idx(table_header, "vx"); + size_t vy_idx = header_idx(table_header, "vy"); + + for (vector<string> row : table) { + d.add_artist({ + .x = stof(row[x_idx]), + .y = stof(row[y_idx]), + .vx = stof(row[vx_idx]), + .vy = stof(row[vy_idx]), + }); + } +} + |