diff options
| author | Přemysl Eric Janouch <p@janouch.name> | 2021-08-28 14:44:38 +0200 | 
|---|---|---|
| committer | Přemysl Eric Janouch <p@janouch.name> | 2021-08-28 18:24:20 +0200 | 
| commit | b8dbc70a9c1b9160c07e696b3a64655abc6b7d9d (patch) | |
| tree | 103f0537b49b253519ded434e399a2137d573fbd | |
| parent | e0ad67a921277608579c84182bafd7ebc006a889 (diff) | |
| download | xK-b8dbc70a9c1b9160c07e696b3a64655abc6b7d9d.tar.gz xK-b8dbc70a9c1b9160c07e696b3a64655abc6b7d9d.tar.xz xK-b8dbc70a9c1b9160c07e696b3a64655abc6b7d9d.zip  | |
xC: respect text formatting when autosplitting
| -rw-r--r-- | NEWS | 5 | ||||
| -rw-r--r-- | xC.c | 157 | 
2 files changed, 133 insertions, 29 deletions
@@ -1,3 +1,8 @@ +1.4.0 (xxxx-xx-xx) + + * xC: made message autosplitting respect text formatting + +  1.3.0 (2021-08-07) "New World Order"   * xC: made nick autocompletion offer recent speakers first @@ -2797,7 +2797,8 @@ enum  	TEXT_UNDERLINE   = 1 << 2,  	TEXT_INVERSE     = 1 << 3,  	TEXT_BLINK       = 1 << 4, -	TEXT_CROSSED_OUT = 1 << 5 +	TEXT_CROSSED_OUT = 1 << 5, +	TEXT_MONOSPACE   = 1 << 6  };  struct attr_printer @@ -8227,12 +8228,99 @@ irc_process_message (const struct irc_message *msg, struct server *s)  // --- Message autosplitting magic --------------------------------------------- -// This is the most basic acceptable algorithm; something like ICU with proper +// This is a rather basic algorithm; something like ICU with proper  // locale specification would be needed to make it work better. +struct irc_char_attrs +{ +	uint8_t fg, bg;                     ///< {Fore,back}ground colour or 99 +	uint8_t attributes;                 ///< TEXT_* flags, except TEXT_BLINK +	uint8_t starts_at_boundary;         ///< Possible to split here? +}; + +static void +irc_serialize_char_attrs (const struct irc_char_attrs *attrs, struct str *out) +{ +	soft_assert (attrs->fg < 100 && attrs->bg < 100); + +	if (attrs->fg != 99 || attrs->bg != 99) +	{ +		str_append_printf (out, "\x03%u", attrs->fg); +		if (attrs->bg != 99) +			str_append_printf (out, ",%02u", attrs->bg); +	} +	if (attrs->attributes & TEXT_BOLD)        str_append_c (out, '\x02'); +	if (attrs->attributes & TEXT_ITALIC)      str_append_c (out, '\x1d'); +	if (attrs->attributes & TEXT_UNDERLINE)   str_append_c (out, '\x1f'); +	if (attrs->attributes & TEXT_INVERSE)     str_append_c (out, '\x16'); +	if (attrs->attributes & TEXT_CROSSED_OUT) str_append_c (out, '\x1e'); +	if (attrs->attributes & TEXT_MONOSPACE)   str_append_c (out, '\x11'); +} + +static const char * +irc_analyze_mirc_color (const char *s, uint8_t *fg, uint8_t *bg) +{ +	if (!isdigit_ascii (*s)) +	{ +		*fg = *bg = 99; +		return s; +	} + +	*fg = *s++ - '0'; +	if (isdigit_ascii (*s)) +		*fg = *fg * 10 + (*s++ - '0'); + +	if (*s != ',' || !isdigit_ascii (s[1])) +		return s; +	s++; + +	*bg = *s++ - '0'; +	if (isdigit_ascii (*s)) +		*bg = *bg * 10 + (*s++ - '0'); +	return s; +} + +// The text needs to be NUL-terminated +// TODO: try to deduplicate analogous code in formatter_parse_mirc() +static struct irc_char_attrs * +irc_analyze_text (const char *text, size_t len) +{ +	struct irc_char_attrs *attrs = xcalloc (len, sizeof *attrs), +		blank = { .fg = 99, .bg = 99, .starts_at_boundary = true }, +		next = blank, cur = next; + +	for (size_t i = 0; i != len; cur = next) +	{ +		const char *start = text; +		hard_assert (utf8_decode (&text, len - i) >= 0); +		switch (*start) +		{ +		case '\x02': next.attributes ^= TEXT_BOLD;        break; +		case '\x11': next.attributes ^= TEXT_MONOSPACE;   break; +		case '\x1d': next.attributes ^= TEXT_ITALIC;      break; +		case '\x1e': next.attributes ^= TEXT_CROSSED_OUT; break; +		case '\x1f': next.attributes ^= TEXT_UNDERLINE;   break; +		case '\x16': next.attributes ^= TEXT_INVERSE;     break; + +		case '\x03': +			text = irc_analyze_mirc_color (text, &next.fg, &next.bg); +			break; +		case '\x0f': +			next = blank; +		} + +		while (start++ != text) +		{ +			attrs[i++] = cur; +			cur.starts_at_boundary = false; +		} +	} +	return attrs; +} +  static size_t -wrap_text_for_single_line (const char *text, size_t text_len, -	size_t line_len, struct str *output) +wrap_text_for_single_line (const char *text, struct irc_char_attrs *attrs, +	size_t text_len, size_t target_len, struct str *output)  {  	size_t eaten = 0; @@ -8240,7 +8328,7 @@ wrap_text_for_single_line (const char *text, size_t text_len,  	const char *word_start;  	const char *word_end = text + strcspn (text, " ");  	size_t word_len = word_end - text; -	while (line_len && word_len <= line_len) +	while (target_len && word_len <= target_len)  	{  		if (word_len)  		{ @@ -8248,7 +8336,7 @@ wrap_text_for_single_line (const char *text, size_t text_len,  			text += word_len;  			eaten += word_len; -			line_len -= word_len; +			target_len -= word_len;  		}  		// Find the next word's end @@ -8262,53 +8350,62 @@ wrap_text_for_single_line (const char *text, size_t text_len,  		return eaten + (word_start - text);  	// And if that doesn't help, cut the longest valid block of characters -	for (const char *p = text; (size_t) (p - text) <= line_len; ) -	{ -		eaten = p - text; -		hard_assert (utf8_decode (&p, text_len - eaten) >= 0); -	} +	for (size_t i = 1; i <= text_len && i <= target_len; i++) +		if (i == text_len || attrs[i].starts_at_boundary) +			eaten = i; +  	str_append_data (output, text, eaten);  	return eaten;  }  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// In practice, this should never fail at all, although it's not guaranteed  static bool  wrap_message (const char *message,  	int line_max, struct strv *output, struct error **e)  { +	size_t message_left = strlen (message), i = 0; +	struct irc_char_attrs *attrs = irc_analyze_text (message, message_left); +	struct str m = str_make ();  	if (line_max <= 0)  		goto error; -	int message_left = strlen (message); -	while (message_left > line_max) +	while (m.len + message_left > (size_t) line_max)  	{ -		struct str m = str_make (); -  		size_t eaten = wrap_text_for_single_line -			(message, message_left, line_max, &m); +			(message + i, attrs + i, message_left, line_max - m.len, &m);  		if (!eaten) -		{ -			str_free (&m);  			goto error; -		}  		strv_append_owned (output, str_steal (&m)); -		message += eaten; -		message_left -= eaten; -	} +		m = str_make (); + +		i += eaten; +		if (!(message_left -= eaten)) +			break; +		irc_serialize_char_attrs (attrs + i, &m); +		if (m.len >= (size_t) line_max) +		{ +			print_debug ("formatting continuation too long"); +			str_reset (&m); +		} +	}  	if (message_left) -		strv_append (output, message); +		strv_append_owned (output, +			xstrdup_printf ("%s%s", m.str, message + i)); +	free (attrs); +	str_free (&m);  	return true;  error: -	// Well, that's just weird -	error_set (e, +	free (attrs); +	str_free (&m); +	return error_set (e,  		"Message splitting was unsuccessful as there was "  		"too little room for UTF-8 characters"); -	return false;  }  /// Automatically splits messages that arrive at other clients with our prefix @@ -14303,9 +14400,11 @@ test_aliases (void)  static void  test_wrapping (void)  { -	static const char *message = " foo bar foobar fóóbárbáz"; -	static const char *split[] = -		{ " foo", "bar", "foob", "ar", "fó", "ób", "árb", "áz" }; +	static const char *message = " foo bar foobar fóóbárbáz\002 a\0031 b"; +	// XXX: formatting continuation order is implementation-dependent here +	//   (irc_serialize_char_attrs() makes a choice in serialization) +	static const char *split[] = { " foo", "bar", "foob", "ar", +		"fó", "ób", "árb", "áz\x02", "\002a\0031", "\0031\002b" };  	struct strv v = strv_make ();  	hard_assert (wrap_message (message, 4, &v, NULL));  | 
