diff options
author | Přemysl Eric Janouch <p@janouch.name> | 2020-10-24 19:06:41 +0200 |
---|---|---|
committer | Přemysl Eric Janouch <p@janouch.name> | 2020-10-24 19:09:09 +0200 |
commit | 96397778144722f7983774e9fc88521f4d36d3c7 (patch) | |
tree | 897544359c20130582fab849a0f22a0983dd84ca | |
parent | 929229a1d7f64ce345157443525dc2410e5e2381 (diff) | |
download | liberty-96397778144722f7983774e9fc88521f4d36d3c7.tar.gz liberty-96397778144722f7983774e9fc88521f4d36d3c7.tar.xz liberty-96397778144722f7983774e9fc88521f4d36d3c7.zip |
Fix validation of overlong UTF-8
It was too strict and Egyptian dicks didn't want to pass,
so we'll do it half-arsedly for a subset.
-rw-r--r-- | liberty.c | 8 | ||||
-rw-r--r-- | tests/liberty.c | 6 |
2 files changed, 6 insertions, 8 deletions
@@ -2748,16 +2748,12 @@ utf8_decode (const char **s, size_t len) } // In the middle of a character - if (sequence_len == 1) + // or an overlong sequence (subset, possibly MUTF-8, not supported) + if (sequence_len == 1 || *p == 0xC0 || *p == 0xC1) return -1; // Check the rest of the sequence uint32_t cp = *p++ & ~mask; - - // Overlong sequence (possibly MUTF-8, not supported) - if (!cp && sequence_len) - return -1; - while (sequence_len && --sequence_len) { if (p == end) diff --git a/tests/liberty.c b/tests/liberty.c index 3f0bd3f..dc445d8 100644 --- a/tests/liberty.c +++ b/tests/liberty.c @@ -331,10 +331,12 @@ test_utf8 (void) soft_assert (utf8_decode (&partial, 1) == -2); soft_assert (utf8_decode (&empty, 0) == -1); - const char valid[] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm"; + const char valid_1[] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm"; + const char valid_2[] = "\xf0\x93\x82\xb9"; const char invalid_1[] = "\xf0\x90\x28\xbc"; const char invalid_2[] = "\xc0\x80"; - soft_assert ( utf8_validate (valid, sizeof valid)); + soft_assert ( utf8_validate (valid_1, sizeof valid_1)); + soft_assert ( utf8_validate (valid_2, sizeof valid_2)); soft_assert (!utf8_validate (invalid_1, sizeof invalid_1)); soft_assert (!utf8_validate (invalid_2, sizeof invalid_2)); |