From bdf6588d9154889d897d6b102fc32f49bf92fa7b Mon Sep 17 00:00:00 2001 From: lonkaars Date: Sun, 5 Mar 2023 16:29:54 +0100 Subject: algo1w4d1 --- .gitignore | 1 + algo1w4d1/Stack.cpp | 23 +++++++++++++++++++++++ algo1w4d1/Stack.h | 19 +++++++++++++++++++ algo1w4d1/ValidXML.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ algo1w4d1/ValidXML.h | 36 ++++++++++++++++++++++++++++++++++++ algo1w4d1/correct.xml | 5 +++++ algo1w4d1/invalid.xml | 4 ++++ algo1w4d1/main.cpp | 17 +++++++++++++++++ algo1w4d1/makefile | 1 + algo1w4d1/readme.md | 35 +++++++++++++++++++++++++++++++++++ week.mk | 2 +- 11 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 algo1w4d1/Stack.cpp create mode 100644 algo1w4d1/Stack.h create mode 100644 algo1w4d1/ValidXML.cpp create mode 100644 algo1w4d1/ValidXML.h create mode 100644 algo1w4d1/correct.xml create mode 100644 algo1w4d1/invalid.xml create mode 100644 algo1w4d1/main.cpp create mode 120000 algo1w4d1/makefile create mode 100644 algo1w4d1/readme.md diff --git a/.gitignore b/.gitignore index 4cf3fc4..2994961 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ **/main **/compile_commands.json **/.cache +**/*.zip diff --git a/algo1w4d1/Stack.cpp b/algo1w4d1/Stack.cpp new file mode 100644 index 0000000..a83dd50 --- /dev/null +++ b/algo1w4d1/Stack.cpp @@ -0,0 +1,23 @@ +#include "Stack.h" + +Stack::Stack() {} +Stack::~Stack() {} + +void Stack::push(const std::string& s) { + _stack.push_back(std::string(s)); +} + +const std::string& Stack::pop() { + const std::string& s = _stack.back(); + _stack.pop_back(); + return s; +} + +const std::string& Stack::peek() { + return _stack.back(); +} + +unsigned Stack::size() { + return _stack.size(); +} + diff --git a/algo1w4d1/Stack.h b/algo1w4d1/Stack.h new file mode 100644 index 0000000..a842bba --- /dev/null +++ b/algo1w4d1/Stack.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +class Stack { +public: + void push(const std::string&); /** @brief append to stack */ + const std::string& pop(); /** @brief remove and return last value on stack */ + const std::string& peek(); /** @brief return but keep last value on stack */ + unsigned size(); /** @brief get size of stack */ + +public: + Stack(); + virtual ~Stack(); + +private: + std::vector _stack; +}; diff --git a/algo1w4d1/ValidXML.cpp b/algo1w4d1/ValidXML.cpp new file mode 100644 index 0000000..4629b43 --- /dev/null +++ b/algo1w4d1/ValidXML.cpp @@ -0,0 +1,49 @@ +#include "ValidXML.h" +#include + +ValidXML::ValidXML() {} +ValidXML::~ValidXML() {} + +void ValidXML::tag_finished() { + if (_tag_is_closing) { + if (_tag_stack.size() == 0) { + _valid = false; + return; + } + if (_tag_name.compare(_tag_stack.pop()) != 0) _valid = false; + } else { + _tag_stack.push(_tag_name); + } + + // reset tag parser state + _tag_name = ""; + _tag_is_closing = false; + _parsing_tag = false; + _tag_counter = 0; +} + +void ValidXML::parse(char input) { + if (!_valid) return; // invalid XML can never be valid again + + if (!_parsing_tag) { + if (input == '<') _parsing_tag = true; // < starts tag + if (input == '>') _valid = false; // double tag close + return; // ignore text outside tags + } else { + if (input == '/') { // closing tag marker + if (_tag_counter == 0) _tag_is_closing = true; // only allowed directly after < + else _valid = false; + return; // prevent increment of tag counter + } + else if (input == '<') _valid = false; // double tag open + else if (input == '>') return tag_finished(); // > closes tag + else if (_tag_counter == _tag_name.size() && input != ' ') _tag_name += input; // name is content of tag until space + + _tag_counter++; + } +} + +bool ValidXML::input_valid() { + // if (_tag_stack.size() > 0) return false; + return _valid; +} diff --git a/algo1w4d1/ValidXML.h b/algo1w4d1/ValidXML.h new file mode 100644 index 0000000..0810130 --- /dev/null +++ b/algo1w4d1/ValidXML.h @@ -0,0 +1,36 @@ +#pragma once + +#include "Stack.h" + +class ValidXML { +public: + /** + * @brief parse XML byte by byte (todo: utf-8 safe) + * + * XML is considered 'valid' when no hierarchy errors or invalid XML syntax + * is supplied (double tag opening/closing brackets). This means that partial + * or unfinished XML is considered valid. To check if XML is finished, + * uncomment the stack size check in the ValidXML::input_valid function. + * + * Each ValidXML instance parses one XML input, and cannot be reused. + */ + virtual void parse(char); + virtual bool input_valid(); /** @brief return if input is valid (true when valid) */ + +public: + ValidXML(); + virtual ~ValidXML(); + +private: + bool _valid = true; /** @brief if parsed XML is still valid */ + Stack _tag_stack; /** @brief tag stack (used to validate tag hierarchy) */ + + std::string _tag_name; /** @brief tag name (without attributes) */ + bool _tag_is_closing; /** @brief current tag is closing tag (starts with " + Donald Duck + Eendenplein 17 + Duckstad + diff --git a/algo1w4d1/invalid.xml b/algo1w4d1/invalid.xml new file mode 100644 index 0000000..91a0296 --- /dev/null +++ b/algo1w4d1/invalid.xml @@ -0,0 +1,4 @@ + + Donald Duck + Eendenplein 17 + Duckstad diff --git a/algo1w4d1/main.cpp b/algo1w4d1/main.cpp new file mode 100644 index 0000000..d641db0 --- /dev/null +++ b/algo1w4d1/main.cpp @@ -0,0 +1,17 @@ +#include +#include + +#include "ValidXML.h" + +int main(int argc, char** argv) { + // input is always read from stdin until EOF (ctrl-d when tty is connected to terminal) + std::string input((std::istreambuf_iterator(std::cin)), std::istreambuf_iterator()); + + ValidXML parser; + for (char c : input) + parser.parse(c); + + std::cout << "XML is " << (parser.input_valid() ? "valid" : "invalid") << std::endl; + + return 0; +} diff --git a/algo1w4d1/makefile b/algo1w4d1/makefile new file mode 120000 index 0000000..a4e84c6 --- /dev/null +++ b/algo1w4d1/makefile @@ -0,0 +1 @@ +../week.mk \ No newline at end of file diff --git a/algo1w4d1/readme.md b/algo1w4d1/readme.md new file mode 100644 index 0000000..bfeff41 --- /dev/null +++ b/algo1w4d1/readme.md @@ -0,0 +1,35 @@ +# week 4 deel 1 + +De criteria in de opdrachtbeschrijving zijn een beetje vaag, dus het +standaardgedrag van de XML parser staat in Doxygen-formaat in ValidXML.h +uitgelegd. De parser controleert letter voor letter of de XML nog geldig is, +waarbij gecontroleerd wordt op hiërarchie (d.m.v. Stack-klasse), en juiste +XML-syntax (geen dubbele `<` of `>`, en geen misplaatste `/`). Juiste +hiërarchie betekent voor deze parser alleen dat een bovenliggende tag niet kan +worden gesloten vóór een onderliggende tag. De main.cpp zorgt ervoor dat de +parser altijd vanuit stdin leest: + +```bash +$ make +g++ -c Stack.cpp -o Stack.o +g++ -c ValidXML.cpp -o ValidXML.o +g++ -c main.cpp -o main.o +g++ Stack.o ValidXML.o main.o -lstdc++ -o main +$ cat correct.xml + + Donald Duck + Eendenplein 17 + Duckstad + +$ cat invalid.xml + + Donald Duck + Eendenplein 17 + Duckstad +$ ./main < correct.xml +XML is valid +$ ./main < invalid.xml +XML is invalid +$ +``` + diff --git a/week.mk b/week.mk index c414a31..851ae8e 100644 --- a/week.mk +++ b/week.mk @@ -33,5 +33,5 @@ compile_commands: clean compiledb make -Bn zip: all - zip -q $(OUTPUT_ZIP) makefile $(wildcard *.cpp) $(wildcard *.h) $(wildcard *.hpp) $(wildcard *.c) $(wildcard *.svg) + zip -q $(OUTPUT_ZIP) makefile $(wildcard *.cpp) $(wildcard *.h) $(wildcard *.hpp) $(wildcard *.c) $(wildcard *.svg) $(wildcard *.xml) -- cgit v1.2.3