summaryrefslogtreecommitdiff
path: root/algo1w4d1
diff options
context:
space:
mode:
Diffstat (limited to 'algo1w4d1')
-rw-r--r--algo1w4d1/Stack.cpp23
-rw-r--r--algo1w4d1/Stack.h19
-rw-r--r--algo1w4d1/ValidXML.cpp49
-rw-r--r--algo1w4d1/ValidXML.h36
-rw-r--r--algo1w4d1/correct.xml5
-rw-r--r--algo1w4d1/invalid.xml4
-rw-r--r--algo1w4d1/main.cpp17
l---------algo1w4d1/makefile1
-rw-r--r--algo1w4d1/readme.md35
9 files changed, 189 insertions, 0 deletions
diff --git a/algo1w4d1/Stack.cpp b/algo1w4d1/Stack.cpp
new file mode 100644
index 0000000..a83dd50
--- /dev/null
+++ b/algo1w4d1/Stack.cpp
@@ -0,0 +1,23 @@
+#include "Stack.h"
+
+Stack::Stack() {}
+Stack::~Stack() {}
+
+void Stack::push(const std::string& s) {
+ _stack.push_back(std::string(s));
+}
+
+const std::string& Stack::pop() {
+ const std::string& s = _stack.back();
+ _stack.pop_back();
+ return s;
+}
+
+const std::string& Stack::peek() {
+ return _stack.back();
+}
+
+unsigned Stack::size() {
+ return _stack.size();
+}
+
diff --git a/algo1w4d1/Stack.h b/algo1w4d1/Stack.h
new file mode 100644
index 0000000..a842bba
--- /dev/null
+++ b/algo1w4d1/Stack.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+class Stack {
+public:
+ void push(const std::string&); /** @brief append to stack */
+ const std::string& pop(); /** @brief remove and return last value on stack */
+ const std::string& peek(); /** @brief return but keep last value on stack */
+ unsigned size(); /** @brief get size of stack */
+
+public:
+ Stack();
+ virtual ~Stack();
+
+private:
+ std::vector<std::string> _stack;
+};
diff --git a/algo1w4d1/ValidXML.cpp b/algo1w4d1/ValidXML.cpp
new file mode 100644
index 0000000..4629b43
--- /dev/null
+++ b/algo1w4d1/ValidXML.cpp
@@ -0,0 +1,49 @@
+#include "ValidXML.h"
+#include <iostream>
+
+ValidXML::ValidXML() {}
+ValidXML::~ValidXML() {}
+
+void ValidXML::tag_finished() {
+ if (_tag_is_closing) {
+ if (_tag_stack.size() == 0) {
+ _valid = false;
+ return;
+ }
+ if (_tag_name.compare(_tag_stack.pop()) != 0) _valid = false;
+ } else {
+ _tag_stack.push(_tag_name);
+ }
+
+ // reset tag parser state
+ _tag_name = "";
+ _tag_is_closing = false;
+ _parsing_tag = false;
+ _tag_counter = 0;
+}
+
+void ValidXML::parse(char input) {
+ if (!_valid) return; // invalid XML can never be valid again
+
+ if (!_parsing_tag) {
+ if (input == '<') _parsing_tag = true; // < starts tag
+ if (input == '>') _valid = false; // double tag close
+ return; // ignore text outside tags
+ } else {
+ if (input == '/') { // closing tag marker
+ if (_tag_counter == 0) _tag_is_closing = true; // only allowed directly after <
+ else _valid = false;
+ return; // prevent increment of tag counter
+ }
+ else if (input == '<') _valid = false; // double tag open
+ else if (input == '>') return tag_finished(); // > closes tag
+ else if (_tag_counter == _tag_name.size() && input != ' ') _tag_name += input; // name is content of tag until space
+
+ _tag_counter++;
+ }
+}
+
+bool ValidXML::input_valid() {
+ // if (_tag_stack.size() > 0) return false;
+ return _valid;
+}
diff --git a/algo1w4d1/ValidXML.h b/algo1w4d1/ValidXML.h
new file mode 100644
index 0000000..0810130
--- /dev/null
+++ b/algo1w4d1/ValidXML.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include "Stack.h"
+
+class ValidXML {
+public:
+ /**
+ * @brief parse XML byte by byte (todo: utf-8 safe)
+ *
+ * XML is considered 'valid' when no hierarchy errors or invalid XML syntax
+ * is supplied (double tag opening/closing brackets). This means that partial
+ * or unfinished XML is considered valid. To check if XML is finished,
+ * uncomment the stack size check in the ValidXML::input_valid function.
+ *
+ * Each ValidXML instance parses one XML input, and cannot be reused.
+ */
+ virtual void parse(char);
+ virtual bool input_valid(); /** @brief return if input is valid (true when valid) */
+
+public:
+ ValidXML();
+ virtual ~ValidXML();
+
+private:
+ bool _valid = true; /** @brief if parsed XML is still valid */
+ Stack _tag_stack; /** @brief tag stack (used to validate tag hierarchy) */
+
+ std::string _tag_name; /** @brief tag name (without attributes) */
+ bool _tag_is_closing; /** @brief current tag is closing tag (starts with "</" instead of "<") */
+ bool _parsing_tag; /** @brief parsing text or tag */
+ unsigned _tag_counter; /** @brief characters since tag open character */
+
+private:
+ void tag_finished(); /** @brief handle finished tag */
+};
+
diff --git a/algo1w4d1/correct.xml b/algo1w4d1/correct.xml
new file mode 100644
index 0000000..73edeff
--- /dev/null
+++ b/algo1w4d1/correct.xml
@@ -0,0 +1,5 @@
+<persoon>
+ <naam attr="test">Donald Duck</naam>
+ <adres>Eendenplein 17</adres>
+ <woonplaats>Duckstad</woonplaats>
+</persoon>
diff --git a/algo1w4d1/invalid.xml b/algo1w4d1/invalid.xml
new file mode 100644
index 0000000..91a0296
--- /dev/null
+++ b/algo1w4d1/invalid.xml
@@ -0,0 +1,4 @@
+<persoon>
+ <naam><adres>Donald Duck</naam>
+ Eendenplein 17</adres>
+ <woonplaats>Duckstad</woonplaats>
diff --git a/algo1w4d1/main.cpp b/algo1w4d1/main.cpp
new file mode 100644
index 0000000..d641db0
--- /dev/null
+++ b/algo1w4d1/main.cpp
@@ -0,0 +1,17 @@
+#include <iostream>
+#include <string>
+
+#include "ValidXML.h"
+
+int main(int argc, char** argv) {
+ // input is always read from stdin until EOF (ctrl-d when tty is connected to terminal)
+ std::string input((std::istreambuf_iterator<char>(std::cin)), std::istreambuf_iterator<char>());
+
+ ValidXML parser;
+ for (char c : input)
+ parser.parse(c);
+
+ std::cout << "XML is " << (parser.input_valid() ? "valid" : "invalid") << std::endl;
+
+ return 0;
+}
diff --git a/algo1w4d1/makefile b/algo1w4d1/makefile
new file mode 120000
index 0000000..a4e84c6
--- /dev/null
+++ b/algo1w4d1/makefile
@@ -0,0 +1 @@
+../week.mk \ No newline at end of file
diff --git a/algo1w4d1/readme.md b/algo1w4d1/readme.md
new file mode 100644
index 0000000..bfeff41
--- /dev/null
+++ b/algo1w4d1/readme.md
@@ -0,0 +1,35 @@
+# week 4 deel 1
+
+De criteria in de opdrachtbeschrijving zijn een beetje vaag, dus het
+standaardgedrag van de XML parser staat in Doxygen-formaat in ValidXML.h
+uitgelegd. De parser controleert letter voor letter of de XML nog geldig is,
+waarbij gecontroleerd wordt op hiërarchie (d.m.v. Stack-klasse), en juiste
+XML-syntax (geen dubbele `<` of `>`, en geen misplaatste `/`). Juiste
+hiërarchie betekent voor deze parser alleen dat een bovenliggende tag niet kan
+worden gesloten vóór een onderliggende tag. De main.cpp zorgt ervoor dat de
+parser altijd vanuit stdin leest:
+
+```bash
+$ make
+g++ -c Stack.cpp -o Stack.o
+g++ -c ValidXML.cpp -o ValidXML.o
+g++ -c main.cpp -o main.o
+g++ Stack.o ValidXML.o main.o -lstdc++ -o main
+$ cat correct.xml
+<persoon>
+ <naam attr="test">Donald Duck</naam>
+ <adres>Eendenplein 17</adres>
+ <woonplaats>Duckstad</woonplaats>
+</persoon>
+$ cat invalid.xml
+<persoon>
+ <naam><adres>Donald Duck</naam>
+ Eendenplein 17</adres>
+ <woonplaats>Duckstad</woonplaats>
+$ ./main < correct.xml
+XML is valid
+$ ./main < invalid.xml
+XML is invalid
+$
+```
+