aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Janouch <p.janouch@gmail.com>2015-05-02 04:58:08 +0200
committerPřemysl Janouch <p.janouch@gmail.com>2015-05-02 04:59:55 +0200
commitc4ea0e28fdc3b1be8c8f69f816acabfbe007dc79 (patch)
treea923b78a57fe9bfe65bf12e5e757f7807b160006
parent83e159d9451fce521a04c2bf09bb6abe866c6231 (diff)
downloadxK-c4ea0e28fdc3b1be8c8f69f816acabfbe007dc79.tar.gz
xK-c4ea0e28fdc3b1be8c8f69f816acabfbe007dc79.tar.xz
xK-c4ea0e28fdc3b1be8c8f69f816acabfbe007dc79.zip
config: implement string tokenizing
-rw-r--r--common.c125
1 files changed, 123 insertions, 2 deletions
diff --git a/common.c b/common.c
index 67781dc..7bfe9fe 100644
--- a/common.c
+++ b/common.c
@@ -1009,6 +1009,120 @@ config_tokenizer_error (struct config_tokenizer *self,
str_free (&description);
}
+static bool
+config_tokenizer_hexa_escape (struct config_tokenizer *self, struct str *output)
+{
+ int i;
+ unsigned char code = 0;
+
+ for (i = 0; self->len && i < 2; i++)
+ {
+ unsigned char c = tolower_ascii (*self->p);
+ if (c >= '0' && c <= '9')
+ code = (code << 4) | (c - '0');
+ else if (c >= 'a' && c <= 'f')
+ code = (code << 4) | (c - 'a' + 10);
+ else
+ break;
+
+ config_tokenizer_advance (self);
+ }
+
+ if (!i)
+ return false;
+
+ str_append_c (output, code);
+ return true;
+}
+
+static bool
+config_tokenizer_octal_escape
+ (struct config_tokenizer *self, struct str *output)
+{
+ int i;
+ unsigned char code = 0;
+
+ for (i = 0; self->len && i < 3; i++)
+ {
+ unsigned char c = *self->p;
+ if (c >= '0' && c <= '7')
+ code = (code << 3) | (c - '0');
+ else
+ break;
+
+ config_tokenizer_advance (self);
+ }
+
+ if (!i)
+ return false;
+
+ str_append_c (output, code);
+ return true;
+}
+
+static bool
+config_tokenizer_escape_sequence
+ (struct config_tokenizer *self, struct str *output, struct error **e)
+{
+ if (!self->len)
+ {
+ config_tokenizer_error (self, e, "premature end of escape sequence");
+ return false;
+ }
+
+ unsigned char c;
+ switch ((c = *self->p))
+ {
+ case '"': break;
+ case '\\': break;
+ case 'a': c = '\a'; break;
+ case 'b': c = '\b'; break;
+ case 'f': c = '\f'; break;
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'v': c = '\v'; break;
+
+ case 'x':
+ case 'X':
+ config_tokenizer_advance (self);
+ if (config_tokenizer_hexa_escape (self, output))
+ return true;
+
+ config_tokenizer_error (self, e, "invalid hexadecimal escape");
+ return false;
+
+ default:
+ if (config_tokenizer_octal_escape (self, output))
+ return true;
+
+ config_tokenizer_error (self, e, "unknown escape sequence");
+ return false;
+ }
+
+ str_append_c (output, c);
+ config_tokenizer_advance (self);
+ return true;
+}
+
+static bool
+config_tokenizer_string
+ (struct config_tokenizer *self, struct str *output, struct error **e)
+{
+ unsigned char c;
+ while (self->len)
+ {
+ if ((c = config_tokenizer_advance (self)) == '"')
+ return true;
+ if (c != '\\')
+ str_append_c (output, c);
+ else if (!config_tokenizer_escape_sequence (self, output, e))
+ return false;
+ }
+ config_tokenizer_error (self, e, "premature end of string");
+ return false;
+}
+
static enum config_token
config_tokenizer_next (struct config_tokenizer *self, struct error **e)
{
@@ -1033,8 +1147,15 @@ config_tokenizer_next (struct config_tokenizer *self, struct error **e)
return CONFIG_T_ABORT;
case '"':
- // TODO: string, validate as UTF-8
- break;
+ config_tokenizer_advance (self);
+ str_reset (&self->string);
+ if (!config_tokenizer_string (self, &self->string, e))
+ return CONFIG_T_ABORT;
+ if (!utf8_validate (self->string.str, self->string.len))
+ {
+ config_tokenizer_error (self, e, "not a valid UTF-8 string");
+ return CONFIG_T_ABORT;
+ }
}
bool is_word = false;