From 6cb0ea50e1829c0c6c2e0179d3c6b7573c4a1b24 Mon Sep 17 00:00:00 2001 From: Loek Le Blansch Date: Sat, 25 May 2024 20:10:45 +0200 Subject: split up hex string parser and number parser + small refactoring --- client/parse.cpp | 136 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 78 insertions(+), 58 deletions(-) diff --git a/client/parse.cpp b/client/parse.cpp index 16a7afc..6eca774 100644 --- a/client/parse.cpp +++ b/client/parse.cpp @@ -5,7 +5,7 @@ #include "parse.h" -static int parse_str(const char* str, char* data, size_t* size) { +static int parse_string(const char* str, char* data, size_t* offset) { char closing = str[0]; char escape = false; bool scan = data == NULL; @@ -28,18 +28,49 @@ static int parse_str(const char* str, char* data, size_t* size) { // TODO: handle escaped characters - if (c == closing) { - if (scan) printf("string%s of length %d\n", escape ? " (w/ escape)" : "", i - 1); + if (c == closing) return i + 1; // +1 for closing quote - } - if (scan) *size += 1; + *offset += 1; } return -i; } -static int parse_num(const char* str, char* data, size_t* size) { +static int parse_hexstr(const char* str, char* data, size_t* offset) { + const char* ifs = IFS; + size_t len = strcspn(str, ifs); + bool scan = data == NULL; + int i = 0; + + // check if token contains at least one colon + const char* colon = strchr(str, ':'); + if (colon == NULL) return -i; + if (colon >= str + len) return -i; + + // check if token only contains allowed characters [0-9a-fA-F:] + size_t len_ok = strspn(str + i, SET_HEX_STR) + i; + if (len != len_ok) return -len_ok; + + size_t c = 0; + while (c < len) { // count bytes in bytestring + if (strspn(str + c, SET_HEX) != 2) + return -i -c; + c += 2; + *offset += 1; + + if (str[c] == ':') { + c += 1; + continue; + } + break; + } + + i += len; + return i; +} + +static int parse_number(const char* str, char* data, size_t* offset) { const char* ifs = IFS; size_t len = strcspn(str, ifs); bool scan = data == NULL; @@ -47,11 +78,7 @@ static int parse_num(const char* str, char* data, size_t* size) { int base = 10; bool bytestring = false; - const char* colon = strchr(str, ':'); - if (colon != NULL && colon < str + len) { // byte string - base = 16; - bytestring = true; - } else if (len > 2 && strncmp(str, "0x", 2) == 0) { // hexadecimal prefix + if (len > 2 && strncmp(str, "0x", 2) == 0) { // hexadecimal prefix base = 16; i += 2; }/* else if (len > 1 && strncmp(str, "0", 1) == 0) { // octal prefix @@ -62,73 +89,66 @@ static int parse_num(const char* str, char* data, size_t* size) { const char* set; // if (base == 8) set = SET_OCT; if (base == 10) set = SET_DEC; - if (base == 16) { - if (bytestring) set = SET_HEX_STR; - else set = SET_HEX; - } + if (base == 16) set = SET_HEX; size_t len_ok = strspn(str + i, set) + i; if (len != len_ok) return -len_ok; - if (scan) { - if (base == 10) *size += 1; - else if (base == 16) { - if (!bytestring) { - size_t prefixless = len - i; - switch (prefixless) { - case 2: // 8-bit (2 hex characters) - case 4: // 16-bit - case 8: // 32-bit - case 16: // 64-bit - break; - default: - return -i; - } - *size += prefixless / 2; - } else { // if bytestring - size_t c = 0, field = strcspn(str, ifs); // length until end of field - while (c < field) { // count bytes in bytestring - if (strspn(str + c, SET_HEX) != 2) - return -i -c; - c += 2; - *size += 1; - - if (str[c] == ':') { - c += 1; - continue; - } - break; - } - } + if (base == 10) *offset += 1; + else if (base == 16) { + size_t prefixless = len - i; + switch (prefixless) { + case 2: // 8-bit (2 hex characters) + case 4: // 16-bit + case 8: // 32-bit + case 16: // 64-bit + break; + default: + return -i; } + *offset += prefixless / 2; } i += len; return i; } -int strtodata(const char* str, char** data, size_t* size) { +static int _strtodata_main(const char* str, char* _data, size_t* offset) { const char* ifs = IFS; - *size = 0; - size_t i = 0; size_t len = strlen(str); + size_t i = 0; + while (i < len) { - // skip whitespace - i += strspn(&str[i], ifs); - // end of string - if (str[i] == '\0') break; + i += strspn(&str[i], ifs); // skip whitespace + if (str[i] == '\0') break; // end of string int run; - if ((run = parse_str(str + i, NULL, size)) > 0) { i += run; continue; } - if ((run = parse_num(str + i, NULL, size)) > 0) { i += run; continue; } + char* data = _data == NULL ? NULL : _data + *offset; + if ((run = parse_string(str + i, data, offset)) > 0) goto format_ok; + if ((run = parse_hexstr(str + i, data, offset)) > 0) goto format_ok; + if ((run = parse_number(str + i, data, offset)) > 0) goto format_ok; + + return -i + run; // no format detected - // no format detected - return -i + run; +format_ok: + i += run; + continue; } - *data = (char*) malloc(*size); + return i; +} + +int strtodata(const char* str, char** data, size_t* size) { + *size = 0; - return *size; + // 1st pass: check data format + int ret = _strtodata_main(str, NULL, size); + if (ret <= 0) return ret; // on error + + // 2nd pass: convert string literals into binary data + *data = (char*) malloc(*size); + size_t written = 0; + return _strtodata_main(str, *data, &written); } -- cgit v1.2.3