aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2020-10-21 04:52:24 +0200
committerPřemysl Eric Janouch <p@janouch.name>2020-10-21 05:08:59 +0200
commitb08cf6c29f94373823a910e015c62d437f83dbfd (patch)
tree23a5fc7979f05d625b42d921b27accd0f6434094
parent69101eb1554ad2fca6de30cdbaccac076210d7e3 (diff)
downloadliberty-b08cf6c29f94373823a910e015c62d437f83dbfd.tar.gz
liberty-b08cf6c29f94373823a910e015c62d437f83dbfd.tar.xz
liberty-b08cf6c29f94373823a910e015c62d437f83dbfd.zip
Reject overlong UTF-8 sequences
-rw-r--r--liberty.c5
-rw-r--r--tests/liberty.c10
2 files changed, 11 insertions, 4 deletions
diff --git a/liberty.c b/liberty.c
index 3aadc29..c0b6bb4 100644
--- a/liberty.c
+++ b/liberty.c
@@ -2753,6 +2753,11 @@ utf8_decode (const char **s, size_t len)
// Check the rest of the sequence
uint32_t cp = *p++ & ~mask;
+
+ // Overlong sequence (possibly MUTF-8, not supported)
+ if (!cp && sequence_len)
+ return -1;
+
while (sequence_len && --sequence_len)
{
if (p == end)
diff --git a/tests/liberty.c b/tests/liberty.c
index b55fe2c..3f0bd3f 100644
--- a/tests/liberty.c
+++ b/tests/liberty.c
@@ -331,10 +331,12 @@ test_utf8 (void)
soft_assert (utf8_decode (&partial, 1) == -2);
soft_assert (utf8_decode (&empty, 0) == -1);
- const char valid [] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
- const char invalid[] = "\xf0\x90\x28\xbc";
- soft_assert ( utf8_validate (valid, sizeof valid));
- soft_assert (!utf8_validate (invalid, sizeof invalid));
+ const char valid[] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
+ const char invalid_1[] = "\xf0\x90\x28\xbc";
+ const char invalid_2[] = "\xc0\x80";
+ soft_assert ( utf8_validate (valid, sizeof valid));
+ soft_assert (!utf8_validate (invalid_1, sizeof invalid_1));
+ soft_assert (!utf8_validate (invalid_2, sizeof invalid_2));
struct utf8_iter iter = utf8_iter_make ("fóọ");
size_t ch_len;