aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--degesch.c23
1 files changed, 22 insertions, 1 deletions
diff --git a/degesch.c b/degesch.c
index 76721e5..10f96af 100644
--- a/degesch.c
+++ b/degesch.c
@@ -3022,7 +3022,7 @@ irc_skip_statusmsg (struct server *s, const char *target)
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// As of 2015, everything should be in UTF-8. And if it's not, we'll decode it
+// As of 2020, everything should be in UTF-8. And if it's not, we'll decode it
// as ISO Latin 1. This function should not be called on the whole message.
static char *
irc_to_utf8 (const char *text)
@@ -7812,8 +7812,29 @@ irc_process_numeric (struct server *s,
}
static void
+irc_fix_cut_off_utf8 (char **line)
+{
+ // A variation on utf8_validate(), we need to detect the -2 return
+ const char *p = *line, *end = strchr (p, 0);
+ int32_t codepoint;
+ while ((codepoint = utf8_decode (&p, end - p)) >= 0
+ && codepoint <= 0x10FFFF /* TODO: move this check into a function */)
+ ;
+ if (codepoint != -2)
+ return;
+
+ struct str fixed_up = str_make ();
+ str_append_data (&fixed_up, *line, p - *line);
+ str_append (&fixed_up, "\xEF\xBF\xBD" /* U+FFFD */);
+ cstr_set (line, str_steal (&fixed_up));
+}
+
+static void
irc_process_message (const struct irc_message *msg, struct server *s)
{
+ if (msg->params.len)
+ irc_fix_cut_off_utf8 (&msg->params.vector[msg->params.len - 1]);
+
// TODO: make use of IRCv3.2 server-time (with fallback to unixtime_msec())
// -> change all calls to log_{server,nick,outcoming,ctcp}*() to take
// an extra argument specifying time