From 3a8d70de66bb38a7af7d6bb38f01f154ac413650 Mon Sep 17 00:00:00 2001
From: Přemysl Janouch
Date: Sun, 3 Apr 2016 04:05:04 +0200
Subject: degesch: fix crash on invalid cp1252 characters
We don't even really need iconv here.
---
degesch.c | 50 ++++++++++++++++++++++++++++++++++++--------------
1 file changed, 36 insertions(+), 14 deletions(-)
diff --git a/degesch.c b/degesch.c
index 8932d5a..9f5ad7b 100644
--- a/degesch.c
+++ b/degesch.c
@@ -1973,7 +1973,6 @@ struct app_context
iconv_t term_to_utf8; ///< Terminal encoding to UTF-8
iconv_t term_from_utf8; ///< UTF-8 to terminal encoding
- iconv_t latin1_to_utf8; ///< ISO Latin 1 to UTF-8
struct input *input; ///< User interface
@@ -2054,12 +2053,9 @@ app_context_init (struct app_context *self)
self->backlog_limit = 1000;
self->last_displayed_msg_time = time (NULL);
- // Windows 1252 redefines several silly control characters as glyphs
char *native = nl_langinfo (CODESET);
if (!app_iconv_open (&self->term_from_utf8, native, "UTF-8")
- || !app_iconv_open (&self->term_to_utf8, "UTF-8", native)
- || (!app_iconv_open (&self->latin1_to_utf8, "UTF-8", "WINDOWS-1252")
- && !app_iconv_open (&self->latin1_to_utf8, "UTF-8", "ISO-8859-1")))
+ || !app_iconv_open (&self->term_to_utf8, "UTF-8", native))
exit_fatal ("creating the UTF-8 conversion object failed: %s",
strerror (errno));
@@ -2100,7 +2096,6 @@ app_context_free (struct app_context *self)
str_map_free (&self->servers);
poller_free (&self->poller);
- iconv_close (self->latin1_to_utf8);
iconv_close (self->term_from_utf8);
iconv_close (self->term_to_utf8);
@@ -2915,14 +2910,41 @@ irc_skip_statusmsg (struct server *s, const char *target)
// As of 2015, everything should be in UTF-8. And if it's not, we'll decode it
// as ISO Latin 1. This function should not be called on the whole message.
static char *
-irc_to_utf8 (struct app_context *ctx, const char *text)
+irc_to_utf8 (const char *text)
{
if (!text)
return NULL;
size_t len = strlen (text) + 1;
if (utf8_validate (text, len))
return xstrdup (text);
- return iconv_xstrdup (ctx->latin1_to_utf8, (char *) text, len, NULL);
+
+ // Windows 1252 redefines several silly C1 control characters as glyphs
+ static const char *c1[32] =
+ {
+ "\xe2\x82\xac", "\xc2\x81", "\xe2\x80\x9a", "\xc6\x92",
+ "\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1",
+ "\xcb\x86", "\xe2\x80\xb0", "\xc5\xa0", "\xe2\x80\xb9",
+ "\xc5\x92", "\xc2\x8d", "\xc5\xbd", "\xc2\x8f",
+ "\xc2\x90", "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c",
+ "\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94",
+ "\xcb\x9c", "\xe2\x84\xa2", "\xc5\xa1", "\xe2\x80\xba",
+ "\xc5\x93", "\xc2\x9d", "\xc5\xbe", "\xc5\xb8",
+ };
+
+ struct str s;
+ str_init (&s);
+ for (const char *p = text; *p; p++)
+ {
+ int c = *(unsigned char *) p;
+ if (c < 0x80)
+ str_append_c (&s, c);
+ else if (c < 0xA0)
+ str_append (&s, c1[c & 0x1f]);
+ else
+ str_append_data (&s,
+ (char[]) {0xc0 | (c >> 6), 0x80 | (c & 0x3f)}, 2);
+ }
+ return str_steal (&s);
}
// This function is used to output debugging IRC traffic to the terminal.
@@ -2931,7 +2953,7 @@ irc_to_utf8 (struct app_context *ctx, const char *text)
static char *
irc_to_term (struct app_context *ctx, const char *text)
{
- char *utf8 = irc_to_utf8 (ctx, text);
+ char *utf8 = irc_to_utf8 (text);
char *term = iconv_xstrdup (ctx->term_from_utf8, utf8, -1, NULL);
free (utf8);
return term;
@@ -3096,7 +3118,7 @@ formatter_parse_nick (struct formatter *self, char *s)
// which would also make us not cut off the userhost part, ever
if (irc_is_channel (self->s, irc_skip_statusmsg (self->s, s)))
{
- char *tmp = irc_to_utf8 (self->ctx, s);
+ char *tmp = irc_to_utf8 (s);
FORMATTER_ADD_TEXT (self, tmp);
free (tmp);
return;
@@ -3120,7 +3142,7 @@ formatter_parse_nick (struct formatter *self, char *s)
FORMATTER_ADD_ITEM (self, FG_COLOR, .color = color);
- char *x = irc_to_utf8 (self->ctx, nick);
+ char *x = irc_to_utf8 (nick);
free (nick);
FORMATTER_ADD_TEXT (self, x);
free (x);
@@ -3141,7 +3163,7 @@ formatter_parse_nick_full (struct formatter *self, char *s)
FORMATTER_ADD_TEXT (self, " (");
FORMATTER_ADD_ITEM (self, ATTR, .attribute = ATTR_USERHOST);
- char *x = irc_to_utf8 (self->ctx, userhost);
+ char *x = irc_to_utf8 (userhost);
FORMATTER_ADD_TEXT (self, x);
free (x);
@@ -3181,12 +3203,12 @@ restart:
break;
case 'S':
- tmp = irc_to_utf8 (self->ctx, (s = va_arg (*ap, char *)));
+ tmp = irc_to_utf8 ((s = va_arg (*ap, char *)));
str_append (buf, tmp);
free (tmp);
break;
case 'm':
- tmp = irc_to_utf8 (self->ctx, (s = va_arg (*ap, char *)));
+ tmp = irc_to_utf8 ((s = va_arg (*ap, char *)));
formatter_parse_mirc (self, tmp);
free (tmp);
break;
--
cgit v1.2.3-70-g09d2