summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2020-10-12 23:32:58 +0200
committerPřemysl Eric Janouch <p@janouch.name>2020-10-12 23:45:27 +0200
commit9d8a7a10d0a4c13dab9b7c407621839035707aa3 (patch)
tree195a876760e669737169132796f8c9df00905804
parent73c3ca3633dc0f94f6f7d7e3dab1f2d9fa7bb449 (diff)
downloadxK-9d8a7a10d0a4c13dab9b7c407621839035707aa3.tar.gz
xK-9d8a7a10d0a4c13dab9b7c407621839035707aa3.tar.xz
xK-9d8a7a10d0a4c13dab9b7c407621839035707aa3.zip
Tolerate cut-off UTF-8 messages
I've had this happen to me on Russian channels and it's highly annoying because you lose the entire message. On the contrary, this at worst screws up the last few characters of it. Closes #2
-rw-r--r--degesch.c23
1 files changed, 22 insertions, 1 deletions
diff --git a/degesch.c b/degesch.c
index 76721e5..10f96af 100644
--- a/degesch.c
+++ b/degesch.c
@@ -3022,7 +3022,7 @@ irc_skip_statusmsg (struct server *s, const char *target)
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// As of 2015, everything should be in UTF-8. And if it's not, we'll decode it
+// As of 2020, everything should be in UTF-8. And if it's not, we'll decode it
// as ISO Latin 1. This function should not be called on the whole message.
static char *
irc_to_utf8 (const char *text)
@@ -7812,8 +7812,29 @@ irc_process_numeric (struct server *s,
}
static void
+irc_fix_cut_off_utf8 (char **line)
+{
+ // A variation on utf8_validate(), we need to detect the -2 return
+ const char *p = *line, *end = strchr (p, 0);
+ int32_t codepoint;
+ while ((codepoint = utf8_decode (&p, end - p)) >= 0
+ && codepoint <= 0x10FFFF /* TODO: move this check into a function */)
+ ;
+ if (codepoint != -2)
+ return;
+
+ struct str fixed_up = str_make ();
+ str_append_data (&fixed_up, *line, p - *line);
+ str_append (&fixed_up, "\xEF\xBF\xBD" /* U+FFFD */);
+ cstr_set (line, str_steal (&fixed_up));
+}
+
+static void
irc_process_message (const struct irc_message *msg, struct server *s)
{
+ if (msg->params.len)
+ irc_fix_cut_off_utf8 (&msg->params.vector[msg->params.len - 1]);
+
// TODO: make use of IRCv3.2 server-time (with fallback to unixtime_msec())
// -> change all calls to log_{server,nick,outcoming,ctcp}*() to take
// an extra argument specifying time