diff options
author | Přemysl Eric Janouch <p@janouch.name> | 2020-10-21 05:20:20 +0200 |
---|---|---|
committer | Přemysl Eric Janouch <p@janouch.name> | 2020-10-21 05:20:20 +0200 |
commit | 53bcebc2f0bae3ba0bbcefb849bdb0ede0ea4385 (patch) | |
tree | 120b8d5685f1e507a61c82b58c21f40d74ae808b | |
parent | b08cf6c29f94373823a910e015c62d437f83dbfd (diff) | |
download | liberty-53bcebc2f0bae3ba0bbcefb849bdb0ede0ea4385.tar.gz liberty-53bcebc2f0bae3ba0bbcefb849bdb0ede0ea4385.tar.xz liberty-53bcebc2f0bae3ba0bbcefb849bdb0ede0ea4385.zip |
Split out utf8_validate_cp(), adhere to RFC 3629
-rw-r--r-- | liberty.c | 9 |
1 files changed, 8 insertions, 1 deletions
@@ -2770,6 +2770,13 @@ utf8_decode (const char **s, size_t len) return cp; } +static inline bool +utf8_validate_cp (int32_t cp) +{ + // RFC 3629, CESU-8 not allowed + return cp >= 0 && cp <= 0x10FFFF && (cp < 0xD800 || cp > 0xDFFF); +} + /// Very rough UTF-8 validation, just makes sure codepoints can be iterated static bool utf8_validate (const char *s, size_t len) @@ -2777,7 +2784,7 @@ utf8_validate (const char *s, size_t len) const char *end = s + len; int32_t codepoint; while ((codepoint = utf8_decode (&s, end - s)) >= 0 - && codepoint <= 0x10FFFF /* TODO: better validations */) + && utf8_validate_cp (codepoint)) ; return s == end; } |