From b8dbc70a9c1b9160c07e696b3a64655abc6b7d9d Mon Sep 17 00:00:00 2001 From: Přemysl Eric Janouch
Date: Sat, 28 Aug 2021 14:44:38 +0200 Subject: xC: respect text formatting when autosplitting --- NEWS | 5 +++ xC.c | 157 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 133 insertions(+), 29 deletions(-) diff --git a/NEWS b/NEWS index a037978..9edef71 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +1.4.0 (xxxx-xx-xx) + + * xC: made message autosplitting respect text formatting + + 1.3.0 (2021-08-07) "New World Order" * xC: made nick autocompletion offer recent speakers first diff --git a/xC.c b/xC.c index 438cf59..2b2bf73 100644 --- a/xC.c +++ b/xC.c @@ -2797,7 +2797,8 @@ enum TEXT_UNDERLINE = 1 << 2, TEXT_INVERSE = 1 << 3, TEXT_BLINK = 1 << 4, - TEXT_CROSSED_OUT = 1 << 5 + TEXT_CROSSED_OUT = 1 << 5, + TEXT_MONOSPACE = 1 << 6 }; struct attr_printer @@ -8227,12 +8228,99 @@ irc_process_message (const struct irc_message *msg, struct server *s) // --- Message autosplitting magic --------------------------------------------- -// This is the most basic acceptable algorithm; something like ICU with proper +// This is a rather basic algorithm; something like ICU with proper // locale specification would be needed to make it work better. +struct irc_char_attrs +{ + uint8_t fg, bg; ///< {Fore,back}ground colour or 99 + uint8_t attributes; ///< TEXT_* flags, except TEXT_BLINK + uint8_t starts_at_boundary; ///< Possible to split here? +}; + +static void +irc_serialize_char_attrs (const struct irc_char_attrs *attrs, struct str *out) +{ + soft_assert (attrs->fg < 100 && attrs->bg < 100); + + if (attrs->fg != 99 || attrs->bg != 99) + { + str_append_printf (out, "\x03%u", attrs->fg); + if (attrs->bg != 99) + str_append_printf (out, ",%02u", attrs->bg); + } + if (attrs->attributes & TEXT_BOLD) str_append_c (out, '\x02'); + if (attrs->attributes & TEXT_ITALIC) str_append_c (out, '\x1d'); + if (attrs->attributes & TEXT_UNDERLINE) str_append_c (out, '\x1f'); + if (attrs->attributes & TEXT_INVERSE) str_append_c (out, '\x16'); + if (attrs->attributes & TEXT_CROSSED_OUT) str_append_c (out, '\x1e'); + if (attrs->attributes & TEXT_MONOSPACE) str_append_c (out, '\x11'); +} + +static const char * +irc_analyze_mirc_color (const char *s, uint8_t *fg, uint8_t *bg) +{ + if (!isdigit_ascii (*s)) + { + *fg = *bg = 99; + return s; + } + + *fg = *s++ - '0'; + if (isdigit_ascii (*s)) + *fg = *fg * 10 + (*s++ - '0'); + + if (*s != ',' || !isdigit_ascii (s[1])) + return s; + s++; + + *bg = *s++ - '0'; + if (isdigit_ascii (*s)) + *bg = *bg * 10 + (*s++ - '0'); + return s; +} + +// The text needs to be NUL-terminated +// TODO: try to deduplicate analogous code in formatter_parse_mirc() +static struct irc_char_attrs * +irc_analyze_text (const char *text, size_t len) +{ + struct irc_char_attrs *attrs = xcalloc (len, sizeof *attrs), + blank = { .fg = 99, .bg = 99, .starts_at_boundary = true }, + next = blank, cur = next; + + for (size_t i = 0; i != len; cur = next) + { + const char *start = text; + hard_assert (utf8_decode (&text, len - i) >= 0); + switch (*start) + { + case '\x02': next.attributes ^= TEXT_BOLD; break; + case '\x11': next.attributes ^= TEXT_MONOSPACE; break; + case '\x1d': next.attributes ^= TEXT_ITALIC; break; + case '\x1e': next.attributes ^= TEXT_CROSSED_OUT; break; + case '\x1f': next.attributes ^= TEXT_UNDERLINE; break; + case '\x16': next.attributes ^= TEXT_INVERSE; break; + + case '\x03': + text = irc_analyze_mirc_color (text, &next.fg, &next.bg); + break; + case '\x0f': + next = blank; + } + + while (start++ != text) + { + attrs[i++] = cur; + cur.starts_at_boundary = false; + } + } + return attrs; +} + static size_t -wrap_text_for_single_line (const char *text, size_t text_len, - size_t line_len, struct str *output) +wrap_text_for_single_line (const char *text, struct irc_char_attrs *attrs, + size_t text_len, size_t target_len, struct str *output) { size_t eaten = 0; @@ -8240,7 +8328,7 @@ wrap_text_for_single_line (const char *text, size_t text_len, const char *word_start; const char *word_end = text + strcspn (text, " "); size_t word_len = word_end - text; - while (line_len && word_len <= line_len) + while (target_len && word_len <= target_len) { if (word_len) { @@ -8248,7 +8336,7 @@ wrap_text_for_single_line (const char *text, size_t text_len, text += word_len; eaten += word_len; - line_len -= word_len; + target_len -= word_len; } // Find the next word's end @@ -8262,53 +8350,62 @@ wrap_text_for_single_line (const char *text, size_t text_len, return eaten + (word_start - text); // And if that doesn't help, cut the longest valid block of characters - for (const char *p = text; (size_t) (p - text) <= line_len; ) - { - eaten = p - text; - hard_assert (utf8_decode (&p, text_len - eaten) >= 0); - } + for (size_t i = 1; i <= text_len && i <= target_len; i++) + if (i == text_len || attrs[i].starts_at_boundary) + eaten = i; + str_append_data (output, text, eaten); return eaten; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// In practice, this should never fail at all, although it's not guaranteed static bool wrap_message (const char *message, int line_max, struct strv *output, struct error **e) { + size_t message_left = strlen (message), i = 0; + struct irc_char_attrs *attrs = irc_analyze_text (message, message_left); + struct str m = str_make (); if (line_max <= 0) goto error; - int message_left = strlen (message); - while (message_left > line_max) + while (m.len + message_left > (size_t) line_max) { - struct str m = str_make (); - size_t eaten = wrap_text_for_single_line - (message, message_left, line_max, &m); + (message + i, attrs + i, message_left, line_max - m.len, &m); if (!eaten) - { - str_free (&m); goto error; - } strv_append_owned (output, str_steal (&m)); - message += eaten; - message_left -= eaten; - } + m = str_make (); + + i += eaten; + if (!(message_left -= eaten)) + break; + irc_serialize_char_attrs (attrs + i, &m); + if (m.len >= (size_t) line_max) + { + print_debug ("formatting continuation too long"); + str_reset (&m); + } + } if (message_left) - strv_append (output, message); + strv_append_owned (output, + xstrdup_printf ("%s%s", m.str, message + i)); + free (attrs); + str_free (&m); return true; error: - // Well, that's just weird - error_set (e, + free (attrs); + str_free (&m); + return error_set (e, "Message splitting was unsuccessful as there was " "too little room for UTF-8 characters"); - return false; } /// Automatically splits messages that arrive at other clients with our prefix @@ -14303,9 +14400,11 @@ test_aliases (void) static void test_wrapping (void) { - static const char *message = " foo bar foobar fóóbárbáz"; - static const char *split[] = - { " foo", "bar", "foob", "ar", "fó", "ób", "árb", "áz" }; + static const char *message = " foo bar foobar fóóbárbáz\002 a\0031 b"; + // XXX: formatting continuation order is implementation-dependent here + // (irc_serialize_char_attrs() makes a choice in serialization) + static const char *split[] = { " foo", "bar", "foob", "ar", + "fó", "ób", "árb", "áz\x02", "\002a\0031", "\0031\002b" }; struct strv v = strv_make (); hard_assert (wrap_message (message, 4, &v, NULL)); -- cgit v1.2.3-70-g09d2