aboutsummaryrefslogtreecommitdiff
path: root/CSVParser.cpp
diff options
context:
space:
mode:
authorLoek Le Blansch <loek@pipeframe.xyz>2024-10-09 21:04:57 +0200
committerLoek Le Blansch <loek@pipeframe.xyz>2024-10-09 21:04:57 +0200
commitf20e49a71a7ee186057f5b79aeb58f9f22b352f4 (patch)
tree2dc11aa28c1226a2baf90b182435dbdc596a8553 /CSVParser.cpp
parent76950c6100371c28f82f2e130dcfdb7d7dac97df (diff)
parse artists.csv
Diffstat (limited to 'CSVParser.cpp')
-rw-r--r--CSVParser.cpp82
1 files changed, 82 insertions, 0 deletions
diff --git a/CSVParser.cpp b/CSVParser.cpp
new file mode 100644
index 0000000..3fbe804
--- /dev/null
+++ b/CSVParser.cpp
@@ -0,0 +1,82 @@
+#include <cstdlib>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <algorithm>
+
+#include "CSVParser.h"
+#include "Exception.h"
+#include "Parser.h"
+
+using namespace std;
+
+CSVParser CSVParser::instance {};
+CSVParser::CSVParser() {
+ Parser::register_strategy(this);
+}
+
+unsigned int CSVParser::heuristic(File & f) {
+ const string content = f.read();
+ int global_columns = 0;
+ int columns = 1;
+ int rows = 0;
+ int penalty = 1;
+ for (char c : content) {
+ if (c == ',') columns++;
+ if (c == '\n') {
+ rows++;
+ if (global_columns == 0) global_columns = columns;
+ penalty += abs(global_columns - columns);
+ columns = 1;
+ }
+ }
+ if (global_columns == 1) penalty += 1000;
+ return (rows + global_columns) / penalty;
+}
+
+static size_t header_idx(vector<string> header, string field) {
+ auto iter = find(header.begin(), header.end(), field);
+ if (iter == header.end())
+ throw Exception("CSV file is missing \"%s\" column", field.c_str());
+ return iter - header.begin();
+}
+
+void CSVParser::parse(File & f, Deserializer & d) {
+ vector<vector<string>> table = {};
+
+ istringstream rows(f.read());
+ string row;
+ while (getline(rows, row)) {
+ // ignore windows line endings
+ if (row.back() == '\r') row.pop_back();
+ istringstream columns(row);
+ string column;
+
+ vector<string> table_row = {};
+ while (getline(columns, column, ',')) {
+ table_row.push_back(column);
+ }
+ table.push_back(table_row);
+ }
+
+ if (table.size() < 1)
+ throw Exception("not enough data rows in CSV file");
+
+ vector<string> table_header = table[0];
+ table.erase(table.begin());
+
+ size_t x_idx = header_idx(table_header, "x");
+ size_t y_idx = header_idx(table_header, "y");
+ size_t vx_idx = header_idx(table_header, "vx");
+ size_t vy_idx = header_idx(table_header, "vy");
+
+ for (vector<string> row : table) {
+ d.add_artist({
+ .x = stof(row[x_idx]),
+ .y = stof(row[y_idx]),
+ .vx = stof(row[vx_idx]),
+ .vy = stof(row[vy_idx]),
+ });
+ }
+}
+