diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-03-05 16:29:54 +0100 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-03-05 16:37:21 +0100 |
commit | bdf6588d9154889d897d6b102fc32f49bf92fa7b (patch) | |
tree | c8da7df7e7a4eb3931c001361a47ae90c92f36e5 | |
parent | 44ce67109256d497758fb95e0469e52c3e006b8e (diff) |
algo1w4d1
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | algo1w4d1/Stack.cpp | 23 | ||||
-rw-r--r-- | algo1w4d1/Stack.h | 19 | ||||
-rw-r--r-- | algo1w4d1/ValidXML.cpp | 49 | ||||
-rw-r--r-- | algo1w4d1/ValidXML.h | 36 | ||||
-rw-r--r-- | algo1w4d1/correct.xml | 5 | ||||
-rw-r--r-- | algo1w4d1/invalid.xml | 4 | ||||
-rw-r--r-- | algo1w4d1/main.cpp | 17 | ||||
l--------- | algo1w4d1/makefile | 1 | ||||
-rw-r--r-- | algo1w4d1/readme.md | 35 | ||||
-rw-r--r-- | week.mk | 2 |
11 files changed, 191 insertions, 1 deletions
@@ -3,4 +3,5 @@ **/main **/compile_commands.json **/.cache +**/*.zip diff --git a/algo1w4d1/Stack.cpp b/algo1w4d1/Stack.cpp new file mode 100644 index 0000000..a83dd50 --- /dev/null +++ b/algo1w4d1/Stack.cpp @@ -0,0 +1,23 @@ +#include "Stack.h" + +Stack::Stack() {} +Stack::~Stack() {} + +void Stack::push(const std::string& s) { + _stack.push_back(std::string(s)); +} + +const std::string& Stack::pop() { + const std::string& s = _stack.back(); + _stack.pop_back(); + return s; +} + +const std::string& Stack::peek() { + return _stack.back(); +} + +unsigned Stack::size() { + return _stack.size(); +} + diff --git a/algo1w4d1/Stack.h b/algo1w4d1/Stack.h new file mode 100644 index 0000000..a842bba --- /dev/null +++ b/algo1w4d1/Stack.h @@ -0,0 +1,19 @@ +#pragma once + +#include <string> +#include <vector> + +class Stack { +public: + void push(const std::string&); /** @brief append to stack */ + const std::string& pop(); /** @brief remove and return last value on stack */ + const std::string& peek(); /** @brief return but keep last value on stack */ + unsigned size(); /** @brief get size of stack */ + +public: + Stack(); + virtual ~Stack(); + +private: + std::vector<std::string> _stack; +}; diff --git a/algo1w4d1/ValidXML.cpp b/algo1w4d1/ValidXML.cpp new file mode 100644 index 0000000..4629b43 --- /dev/null +++ b/algo1w4d1/ValidXML.cpp @@ -0,0 +1,49 @@ +#include "ValidXML.h" +#include <iostream> + +ValidXML::ValidXML() {} +ValidXML::~ValidXML() {} + +void ValidXML::tag_finished() { + if (_tag_is_closing) { + if (_tag_stack.size() == 0) { + _valid = false; + return; + } + if (_tag_name.compare(_tag_stack.pop()) != 0) _valid = false; + } else { + _tag_stack.push(_tag_name); + } + + // reset tag parser state + _tag_name = ""; + _tag_is_closing = false; + _parsing_tag = false; + _tag_counter = 0; +} + +void ValidXML::parse(char input) { + if (!_valid) return; // invalid XML can never be valid again + + if (!_parsing_tag) { + if (input == '<') _parsing_tag = true; // < starts tag + if (input == '>') _valid = false; // double tag close + return; // ignore text outside tags + } else { + if (input == '/') { // closing tag marker + if (_tag_counter == 0) _tag_is_closing = true; // only allowed directly after < + else _valid = false; + return; // prevent increment of tag counter + } + else if (input == '<') _valid = false; // double tag open + else if (input == '>') return tag_finished(); // > closes tag + else if (_tag_counter == _tag_name.size() && input != ' ') _tag_name += input; // name is content of tag until space + + _tag_counter++; + } +} + +bool ValidXML::input_valid() { + // if (_tag_stack.size() > 0) return false; + return _valid; +} diff --git a/algo1w4d1/ValidXML.h b/algo1w4d1/ValidXML.h new file mode 100644 index 0000000..0810130 --- /dev/null +++ b/algo1w4d1/ValidXML.h @@ -0,0 +1,36 @@ +#pragma once + +#include "Stack.h" + +class ValidXML { +public: + /** + * @brief parse XML byte by byte (todo: utf-8 safe) + * + * XML is considered 'valid' when no hierarchy errors or invalid XML syntax + * is supplied (double tag opening/closing brackets). This means that partial + * or unfinished XML is considered valid. To check if XML is finished, + * uncomment the stack size check in the ValidXML::input_valid function. + * + * Each ValidXML instance parses one XML input, and cannot be reused. + */ + virtual void parse(char); + virtual bool input_valid(); /** @brief return if input is valid (true when valid) */ + +public: + ValidXML(); + virtual ~ValidXML(); + +private: + bool _valid = true; /** @brief if parsed XML is still valid */ + Stack _tag_stack; /** @brief tag stack (used to validate tag hierarchy) */ + + std::string _tag_name; /** @brief tag name (without attributes) */ + bool _tag_is_closing; /** @brief current tag is closing tag (starts with "</" instead of "<") */ + bool _parsing_tag; /** @brief parsing text or tag */ + unsigned _tag_counter; /** @brief characters since tag open character */ + +private: + void tag_finished(); /** @brief handle finished tag */ +}; + diff --git a/algo1w4d1/correct.xml b/algo1w4d1/correct.xml new file mode 100644 index 0000000..73edeff --- /dev/null +++ b/algo1w4d1/correct.xml @@ -0,0 +1,5 @@ +<persoon> + <naam attr="test">Donald Duck</naam> + <adres>Eendenplein 17</adres> + <woonplaats>Duckstad</woonplaats> +</persoon> diff --git a/algo1w4d1/invalid.xml b/algo1w4d1/invalid.xml new file mode 100644 index 0000000..91a0296 --- /dev/null +++ b/algo1w4d1/invalid.xml @@ -0,0 +1,4 @@ +<persoon> + <naam><adres>Donald Duck</naam> + Eendenplein 17</adres> + <woonplaats>Duckstad</woonplaats> diff --git a/algo1w4d1/main.cpp b/algo1w4d1/main.cpp new file mode 100644 index 0000000..d641db0 --- /dev/null +++ b/algo1w4d1/main.cpp @@ -0,0 +1,17 @@ +#include <iostream> +#include <string> + +#include "ValidXML.h" + +int main(int argc, char** argv) { + // input is always read from stdin until EOF (ctrl-d when tty is connected to terminal) + std::string input((std::istreambuf_iterator<char>(std::cin)), std::istreambuf_iterator<char>()); + + ValidXML parser; + for (char c : input) + parser.parse(c); + + std::cout << "XML is " << (parser.input_valid() ? "valid" : "invalid") << std::endl; + + return 0; +} diff --git a/algo1w4d1/makefile b/algo1w4d1/makefile new file mode 120000 index 0000000..a4e84c6 --- /dev/null +++ b/algo1w4d1/makefile @@ -0,0 +1 @@ +../week.mk
\ No newline at end of file diff --git a/algo1w4d1/readme.md b/algo1w4d1/readme.md new file mode 100644 index 0000000..bfeff41 --- /dev/null +++ b/algo1w4d1/readme.md @@ -0,0 +1,35 @@ +# week 4 deel 1 + +De criteria in de opdrachtbeschrijving zijn een beetje vaag, dus het +standaardgedrag van de XML parser staat in Doxygen-formaat in ValidXML.h +uitgelegd. De parser controleert letter voor letter of de XML nog geldig is, +waarbij gecontroleerd wordt op hiërarchie (d.m.v. Stack-klasse), en juiste +XML-syntax (geen dubbele `<` of `>`, en geen misplaatste `/`). Juiste +hiërarchie betekent voor deze parser alleen dat een bovenliggende tag niet kan +worden gesloten vóór een onderliggende tag. De main.cpp zorgt ervoor dat de +parser altijd vanuit stdin leest: + +```bash +$ make +g++ -c Stack.cpp -o Stack.o +g++ -c ValidXML.cpp -o ValidXML.o +g++ -c main.cpp -o main.o +g++ Stack.o ValidXML.o main.o -lstdc++ -o main +$ cat correct.xml +<persoon> + <naam attr="test">Donald Duck</naam> + <adres>Eendenplein 17</adres> + <woonplaats>Duckstad</woonplaats> +</persoon> +$ cat invalid.xml +<persoon> + <naam><adres>Donald Duck</naam> + Eendenplein 17</adres> + <woonplaats>Duckstad</woonplaats> +$ ./main < correct.xml +XML is valid +$ ./main < invalid.xml +XML is invalid +$ +``` + @@ -33,5 +33,5 @@ compile_commands: clean compiledb make -Bn zip: all - zip -q $(OUTPUT_ZIP) makefile $(wildcard *.cpp) $(wildcard *.h) $(wildcard *.hpp) $(wildcard *.c) $(wildcard *.svg) + zip -q $(OUTPUT_ZIP) makefile $(wildcard *.cpp) $(wildcard *.h) $(wildcard *.hpp) $(wildcard *.c) $(wildcard *.svg) $(wildcard *.xml) |