aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2020-10-21 05:20:20 +0200
committerPřemysl Eric Janouch <p@janouch.name>2020-10-21 05:20:20 +0200
commit53bcebc2f0bae3ba0bbcefb849bdb0ede0ea4385 (patch)
tree120b8d5685f1e507a61c82b58c21f40d74ae808b
parentb08cf6c29f94373823a910e015c62d437f83dbfd (diff)
downloadliberty-53bcebc2f0bae3ba0bbcefb849bdb0ede0ea4385.tar.gz
liberty-53bcebc2f0bae3ba0bbcefb849bdb0ede0ea4385.tar.xz
liberty-53bcebc2f0bae3ba0bbcefb849bdb0ede0ea4385.zip
Split out utf8_validate_cp(), adhere to RFC 3629
-rw-r--r--liberty.c9
1 files changed, 8 insertions, 1 deletions
diff --git a/liberty.c b/liberty.c
index c0b6bb4..d3c6c25 100644
--- a/liberty.c
+++ b/liberty.c
@@ -2770,6 +2770,13 @@ utf8_decode (const char **s, size_t len)
return cp;
}
+static inline bool
+utf8_validate_cp (int32_t cp)
+{
+ // RFC 3629, CESU-8 not allowed
+ return cp >= 0 && cp <= 0x10FFFF && (cp < 0xD800 || cp > 0xDFFF);
+}
+
/// Very rough UTF-8 validation, just makes sure codepoints can be iterated
static bool
utf8_validate (const char *s, size_t len)
@@ -2777,7 +2784,7 @@ utf8_validate (const char *s, size_t len)
const char *end = s + len;
int32_t codepoint;
while ((codepoint = utf8_decode (&s, end - s)) >= 0
- && codepoint <= 0x10FFFF /* TODO: better validations */)
+ && utf8_validate_cp (codepoint))
;
return s == end;
}