diff options
author | Přemysl Janouch <p.janouch@gmail.com> | 2015-12-10 19:34:10 +0100 |
---|---|---|
committer | Přemysl Janouch <p.janouch@gmail.com> | 2015-12-10 19:54:45 +0100 |
commit | 75d063e363f1f6c74c80246ffed71f180b8c10d5 (patch) | |
tree | b17915b750a848c3ad0ec75be832a0c5c79eda7c | |
parent | 122ab355a6ba4c2b2d2186aaa0a37d2f5bd8aa73 (diff) | |
download | liberty-75d063e363f1f6c74c80246ffed71f180b8c10d5.tar.gz liberty-75d063e363f1f6c74c80246ffed71f180b8c10d5.tar.xz liberty-75d063e363f1f6c74c80246ffed71f180b8c10d5.zip |
Fix a nasty bug in utf8_next()
Uppercase ASCII was read incorrectly.
-rw-r--r-- | liberty.c | 20 |
1 files changed, 9 insertions, 11 deletions
@@ -2242,14 +2242,11 @@ utf8_next (const char *s, size_t len, int32_t *codepoint) if (!len) return NULL; - // In the middle of a character -> error - const uint8_t *p = (const unsigned char *) s; - if ((*p & 0xC0) == 0x80) - return NULL; + // Find out how long the sequence is (0 for ASCII) + unsigned mask = 0x80; + unsigned sequence_len = 0; - // Find out how long the sequence is - unsigned mask = 0xC0; - unsigned tail_len = 0; + const uint8_t *p = (const uint8_t *) s; while ((*p & mask) == mask) { // Invalid start of sequence @@ -2257,15 +2254,16 @@ utf8_next (const char *s, size_t len, int32_t *codepoint) return NULL; mask |= mask >> 1; - tail_len++; + sequence_len++; } - // Check the rest of the sequence - if (tail_len > --len) + // In the middle of a character or the input is too short + if (sequence_len == 1 || sequence_len > len) return NULL; + // Check the rest of the sequence uint32_t cp = *p++ & ~mask; - while (tail_len--) + while (sequence_len && --sequence_len) { if ((*p & 0xC0) != 0x80) return NULL; |