From 29bc035ecfed9e37ce63b42de982b95569753463 Mon Sep 17 00:00:00 2001
From: Přemysl Janouch
Date: Mon, 10 Oct 2016 08:39:31 +0200
Subject: Ensure UTF-8 when parsing playlists
---
nncmpp.c | 34 ++++++++++++++++++++++++++++++----
1 file changed, 30 insertions(+), 4 deletions(-)
diff --git a/nncmpp.c b/nncmpp.c
index cf54119..29a82b3 100644
--- a/nncmpp.c
+++ b/nncmpp.c
@@ -142,6 +142,25 @@ clock_msec (clockid_t clock)
return (int64_t) tp.tv_sec * 1000 + (int64_t) tp.tv_nsec / 1000000;
}
+static char *
+latin1_to_utf8 (const char *latin1)
+{
+ struct str converted;
+ str_init (&converted);
+ while (*latin1)
+ {
+ uint8_t c = *latin1++;
+ if (c < 0x80)
+ str_append_c (&converted, c);
+ else
+ {
+ str_append_c (&converted, 0xC0 | (c >> 6));
+ str_append_c (&converted, 0x80 | (c & 0x3F));
+ }
+ }
+ return str_steal (&converted);
+}
+
// --- cURL async wrapper ------------------------------------------------------
// You are meant to subclass this structure, no user_data pointers needed
@@ -1888,11 +1907,18 @@ parse_playlist (const char *playlist, const char *content_type,
regmatch_t groups[2];
for (size_t i = 0; i < lines.len; i++)
- {
if (regexec (re, lines.vector[i], 2, groups, 0) != REG_NOMATCH)
- str_vector_add (out, xstrndup (lines.vector[i] + groups[1].rm_so,
- groups[1].rm_eo - groups[1].rm_so));
- }
+ {
+ char *target = xstrndup (lines.vector[i] + groups[1].rm_so,
+ groups[1].rm_eo - groups[1].rm_so);
+ if (utf8_validate (target, strlen (target)))
+ str_vector_add_owned (out, target);
+ else
+ {
+ str_vector_add_owned (out, latin1_to_utf8 (target));
+ free (target);
+ }
+ }
regex_free (re);
str_vector_free (&lines);
}
--
cgit v1.2.3-70-g09d2