diff options
author | Přemysl Janouch <p.janouch@gmail.com> | 2015-05-02 04:58:08 +0200 |
---|---|---|
committer | Přemysl Janouch <p.janouch@gmail.com> | 2015-05-02 04:59:55 +0200 |
commit | c4ea0e28fdc3b1be8c8f69f816acabfbe007dc79 (patch) | |
tree | a923b78a57fe9bfe65bf12e5e757f7807b160006 | |
parent | 83e159d9451fce521a04c2bf09bb6abe866c6231 (diff) | |
download | xK-c4ea0e28fdc3b1be8c8f69f816acabfbe007dc79.tar.gz xK-c4ea0e28fdc3b1be8c8f69f816acabfbe007dc79.tar.xz xK-c4ea0e28fdc3b1be8c8f69f816acabfbe007dc79.zip |
config: implement string tokenizing
-rw-r--r-- | common.c | 125 |
1 files changed, 123 insertions, 2 deletions
@@ -1009,6 +1009,120 @@ config_tokenizer_error (struct config_tokenizer *self, str_free (&description); } +static bool +config_tokenizer_hexa_escape (struct config_tokenizer *self, struct str *output) +{ + int i; + unsigned char code = 0; + + for (i = 0; self->len && i < 2; i++) + { + unsigned char c = tolower_ascii (*self->p); + if (c >= '0' && c <= '9') + code = (code << 4) | (c - '0'); + else if (c >= 'a' && c <= 'f') + code = (code << 4) | (c - 'a' + 10); + else + break; + + config_tokenizer_advance (self); + } + + if (!i) + return false; + + str_append_c (output, code); + return true; +} + +static bool +config_tokenizer_octal_escape + (struct config_tokenizer *self, struct str *output) +{ + int i; + unsigned char code = 0; + + for (i = 0; self->len && i < 3; i++) + { + unsigned char c = *self->p; + if (c >= '0' && c <= '7') + code = (code << 3) | (c - '0'); + else + break; + + config_tokenizer_advance (self); + } + + if (!i) + return false; + + str_append_c (output, code); + return true; +} + +static bool +config_tokenizer_escape_sequence + (struct config_tokenizer *self, struct str *output, struct error **e) +{ + if (!self->len) + { + config_tokenizer_error (self, e, "premature end of escape sequence"); + return false; + } + + unsigned char c; + switch ((c = *self->p)) + { + case '"': break; + case '\\': break; + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + + case 'x': + case 'X': + config_tokenizer_advance (self); + if (config_tokenizer_hexa_escape (self, output)) + return true; + + config_tokenizer_error (self, e, "invalid hexadecimal escape"); + return false; + + default: + if (config_tokenizer_octal_escape (self, output)) + return true; + + config_tokenizer_error (self, e, "unknown escape sequence"); + return false; + } + + str_append_c (output, c); + config_tokenizer_advance (self); + return true; +} + +static bool +config_tokenizer_string + (struct config_tokenizer *self, struct str *output, struct error **e) +{ + unsigned char c; + while (self->len) + { + if ((c = config_tokenizer_advance (self)) == '"') + return true; + if (c != '\\') + str_append_c (output, c); + else if (!config_tokenizer_escape_sequence (self, output, e)) + return false; + } + config_tokenizer_error (self, e, "premature end of string"); + return false; +} + static enum config_token config_tokenizer_next (struct config_tokenizer *self, struct error **e) { @@ -1033,8 +1147,15 @@ config_tokenizer_next (struct config_tokenizer *self, struct error **e) return CONFIG_T_ABORT; case '"': - // TODO: string, validate as UTF-8 - break; + config_tokenizer_advance (self); + str_reset (&self->string); + if (!config_tokenizer_string (self, &self->string, e)) + return CONFIG_T_ABORT; + if (!utf8_validate (self->string.str, self->string.len)) + { + config_tokenizer_error (self, e, "not a valid UTF-8 string"); + return CONFIG_T_ABORT; + } } bool is_word = false; |