aboutsummaryrefslogtreecommitdiff
path: root/degesch.c
diff options
context:
space:
mode:
authorPřemysl Janouch <p.janouch@gmail.com>2015-04-24 22:34:44 +0200
committerPřemysl Janouch <p.janouch@gmail.com>2015-04-24 22:34:44 +0200
commitd4413627e68325e954406e35c76b51325bb0e5d1 (patch)
tree570d246e533ca088f5bfd4a19787174ce822e6b8 /degesch.c
parent2a0dcc2addc2c949fa090f1e7f1e0de4f58cccb9 (diff)
downloadxK-d4413627e68325e954406e35c76b51325bb0e5d1.tar.gz
xK-d4413627e68325e954406e35c76b51325bb0e5d1.tar.xz
xK-d4413627e68325e954406e35c76b51325bb0e5d1.zip
degesch: better & working text wrapping
Now we respect word boundaries.
Diffstat (limited to 'degesch.c')
-rw-r--r--degesch.c63
1 files changed, 49 insertions, 14 deletions
diff --git a/degesch.c b/degesch.c
index 962b587..1808de2 100644
--- a/degesch.c
+++ b/degesch.c
@@ -2398,15 +2398,15 @@ irc_process_message (const struct irc_message *msg,
// --- Message autosplitting magic ---------------------------------------------
+// This is the most basic acceptable algorithm; something like ICU with proper
+// locale specification would be needed to make it work better.
+
static bool
wrap_text (const char *message,
int line_max, struct str_vector *output, struct error **e)
{
- // Attempt to split the message if it doesn't completely fit into a single
- // IRC protocol message while trying not to break UTF-8. Unicode can still
- // end up being wrong, though. As well as any mIRC formatting.
- //
- // TODO: at least try to word-wrap if nothing else
+ // Initialize to the first word, even if it's empty
+ const char *word_end = message + strcspn (message, " ");
for (int message_left = strlen (message); message_left; )
{
@@ -2415,12 +2415,46 @@ wrap_text (const char *message,
int part_left = MIN (line_max, message_left);
bool empty = true;
+
+ // First try going word by word
+ const char *word_start;
+ int word_len = word_end - message;
+ while (part_left && word_len <= part_left)
+ {
+ if (word_len)
+ {
+ str_append_data (&m, message, word_len);
+ message += word_len;
+ message_left -= word_len;
+ part_left -= word_len;
+ empty = false;
+ }
+
+ // Find the next word's end
+ word_start = message + strspn (message, " ");
+ word_end = word_start + strcspn (word_start, " ");
+ word_len = word_end - message;
+ }
+
+ if (!empty)
+ {
+ // Discard whitespace between words if split
+ message_left -= word_start - message;
+ message = word_start;
+
+ str_vector_add (output, m.str);
+ str_free (&m);
+ continue;
+ }
+
+ // And if that doesn't help, cut the longest valid block of characters.
+ // Note that we never get to the end of the word, so "word_end" stays.
while (true)
{
const char *next = utf8_next (message, message_left);
hard_assert (next);
- int char_len = message - next;
+ int char_len = next - message;
if (char_len > part_left)
break;
@@ -2428,6 +2462,7 @@ wrap_text (const char *message,
message += char_len;
message_left -= char_len;
+ part_left -= char_len;
empty = false;
}
@@ -2436,14 +2471,14 @@ wrap_text (const char *message,
str_free (&m);
- if (empty)
- {
- // Well, that's just weird
- error_set (e,
- "Message splitting was unsuccessful as there was "
- "too little room for UTF-8 characters");
- return false;
- }
+ if (!empty)
+ continue;
+
+ // Well, that's just weird
+ error_set (e,
+ "Message splitting was unsuccessful as there was "
+ "too little room for UTF-8 characters");
+ return false;
}
return true;
}