From 3a8d70de66bb38a7af7d6bb38f01f154ac413650 Mon Sep 17 00:00:00 2001 From: Přemysl Janouch Date: Sun, 3 Apr 2016 04:05:04 +0200 Subject: degesch: fix crash on invalid cp1252 characters We don't even really need iconv here. --- degesch.c | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/degesch.c b/degesch.c index 8932d5a..9f5ad7b 100644 --- a/degesch.c +++ b/degesch.c @@ -1973,7 +1973,6 @@ struct app_context iconv_t term_to_utf8; ///< Terminal encoding to UTF-8 iconv_t term_from_utf8; ///< UTF-8 to terminal encoding - iconv_t latin1_to_utf8; ///< ISO Latin 1 to UTF-8 struct input *input; ///< User interface @@ -2054,12 +2053,9 @@ app_context_init (struct app_context *self) self->backlog_limit = 1000; self->last_displayed_msg_time = time (NULL); - // Windows 1252 redefines several silly control characters as glyphs char *native = nl_langinfo (CODESET); if (!app_iconv_open (&self->term_from_utf8, native, "UTF-8") - || !app_iconv_open (&self->term_to_utf8, "UTF-8", native) - || (!app_iconv_open (&self->latin1_to_utf8, "UTF-8", "WINDOWS-1252") - && !app_iconv_open (&self->latin1_to_utf8, "UTF-8", "ISO-8859-1"))) + || !app_iconv_open (&self->term_to_utf8, "UTF-8", native)) exit_fatal ("creating the UTF-8 conversion object failed: %s", strerror (errno)); @@ -2100,7 +2096,6 @@ app_context_free (struct app_context *self) str_map_free (&self->servers); poller_free (&self->poller); - iconv_close (self->latin1_to_utf8); iconv_close (self->term_from_utf8); iconv_close (self->term_to_utf8); @@ -2915,14 +2910,41 @@ irc_skip_statusmsg (struct server *s, const char *target) // As of 2015, everything should be in UTF-8. And if it's not, we'll decode it // as ISO Latin 1. This function should not be called on the whole message. static char * -irc_to_utf8 (struct app_context *ctx, const char *text) +irc_to_utf8 (const char *text) { if (!text) return NULL; size_t len = strlen (text) + 1; if (utf8_validate (text, len)) return xstrdup (text); - return iconv_xstrdup (ctx->latin1_to_utf8, (char *) text, len, NULL); + + // Windows 1252 redefines several silly C1 control characters as glyphs + static const char *c1[32] = + { + "\xe2\x82\xac", "\xc2\x81", "\xe2\x80\x9a", "\xc6\x92", + "\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1", + "\xcb\x86", "\xe2\x80\xb0", "\xc5\xa0", "\xe2\x80\xb9", + "\xc5\x92", "\xc2\x8d", "\xc5\xbd", "\xc2\x8f", + "\xc2\x90", "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c", + "\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94", + "\xcb\x9c", "\xe2\x84\xa2", "\xc5\xa1", "\xe2\x80\xba", + "\xc5\x93", "\xc2\x9d", "\xc5\xbe", "\xc5\xb8", + }; + + struct str s; + str_init (&s); + for (const char *p = text; *p; p++) + { + int c = *(unsigned char *) p; + if (c < 0x80) + str_append_c (&s, c); + else if (c < 0xA0) + str_append (&s, c1[c & 0x1f]); + else + str_append_data (&s, + (char[]) {0xc0 | (c >> 6), 0x80 | (c & 0x3f)}, 2); + } + return str_steal (&s); } // This function is used to output debugging IRC traffic to the terminal. @@ -2931,7 +2953,7 @@ irc_to_utf8 (struct app_context *ctx, const char *text) static char * irc_to_term (struct app_context *ctx, const char *text) { - char *utf8 = irc_to_utf8 (ctx, text); + char *utf8 = irc_to_utf8 (text); char *term = iconv_xstrdup (ctx->term_from_utf8, utf8, -1, NULL); free (utf8); return term; @@ -3096,7 +3118,7 @@ formatter_parse_nick (struct formatter *self, char *s) // which would also make us not cut off the userhost part, ever if (irc_is_channel (self->s, irc_skip_statusmsg (self->s, s))) { - char *tmp = irc_to_utf8 (self->ctx, s); + char *tmp = irc_to_utf8 (s); FORMATTER_ADD_TEXT (self, tmp); free (tmp); return; @@ -3120,7 +3142,7 @@ formatter_parse_nick (struct formatter *self, char *s) FORMATTER_ADD_ITEM (self, FG_COLOR, .color = color); - char *x = irc_to_utf8 (self->ctx, nick); + char *x = irc_to_utf8 (nick); free (nick); FORMATTER_ADD_TEXT (self, x); free (x); @@ -3141,7 +3163,7 @@ formatter_parse_nick_full (struct formatter *self, char *s) FORMATTER_ADD_TEXT (self, " ("); FORMATTER_ADD_ITEM (self, ATTR, .attribute = ATTR_USERHOST); - char *x = irc_to_utf8 (self->ctx, userhost); + char *x = irc_to_utf8 (userhost); FORMATTER_ADD_TEXT (self, x); free (x); @@ -3181,12 +3203,12 @@ restart: break; case 'S': - tmp = irc_to_utf8 (self->ctx, (s = va_arg (*ap, char *))); + tmp = irc_to_utf8 ((s = va_arg (*ap, char *))); str_append (buf, tmp); free (tmp); break; case 'm': - tmp = irc_to_utf8 (self->ctx, (s = va_arg (*ap, char *))); + tmp = irc_to_utf8 ((s = va_arg (*ap, char *))); formatter_parse_mirc (self, tmp); free (tmp); break; -- cgit v1.2.3-70-g09d2