From 29bc035ecfed9e37ce63b42de982b95569753463 Mon Sep 17 00:00:00 2001 From: Přemysl Janouch Date: Mon, 10 Oct 2016 08:39:31 +0200 Subject: Ensure UTF-8 when parsing playlists --- nncmpp.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/nncmpp.c b/nncmpp.c index cf54119..29a82b3 100644 --- a/nncmpp.c +++ b/nncmpp.c @@ -142,6 +142,25 @@ clock_msec (clockid_t clock) return (int64_t) tp.tv_sec * 1000 + (int64_t) tp.tv_nsec / 1000000; } +static char * +latin1_to_utf8 (const char *latin1) +{ + struct str converted; + str_init (&converted); + while (*latin1) + { + uint8_t c = *latin1++; + if (c < 0x80) + str_append_c (&converted, c); + else + { + str_append_c (&converted, 0xC0 | (c >> 6)); + str_append_c (&converted, 0x80 | (c & 0x3F)); + } + } + return str_steal (&converted); +} + // --- cURL async wrapper ------------------------------------------------------ // You are meant to subclass this structure, no user_data pointers needed @@ -1888,11 +1907,18 @@ parse_playlist (const char *playlist, const char *content_type, regmatch_t groups[2]; for (size_t i = 0; i < lines.len; i++) - { if (regexec (re, lines.vector[i], 2, groups, 0) != REG_NOMATCH) - str_vector_add (out, xstrndup (lines.vector[i] + groups[1].rm_so, - groups[1].rm_eo - groups[1].rm_so)); - } + { + char *target = xstrndup (lines.vector[i] + groups[1].rm_so, + groups[1].rm_eo - groups[1].rm_so); + if (utf8_validate (target, strlen (target))) + str_vector_add_owned (out, target); + else + { + str_vector_add_owned (out, latin1_to_utf8 (target)); + free (target); + } + } regex_free (re); str_vector_free (&lines); } -- cgit v1.2.3-70-g09d2