diff options
| author | Přemysl Janouch <p.janouch@gmail.com> | 2016-09-26 15:59:26 +0200 | 
|---|---|---|
| committer | Přemysl Janouch <p.janouch@gmail.com> | 2016-09-26 15:59:26 +0200 | 
| commit | a59104191270b74d0a1312a6b83884ffb9334691 (patch) | |
| tree | f2cfc11f27110e8570871773db0236a08bff7778 /src | |
| parent | a87aca9c76c57770a69740b7e1ac9a56eb73f22c (diff) | |
| download | tdv-a59104191270b74d0a1312a6b83884ffb9334691.tar.gz tdv-a59104191270b74d0a1312a6b83884ffb9334691.tar.xz tdv-a59104191270b74d0a1312a6b83884ffb9334691.zip | |
Highlight the longest matching prefix of entries
Diffstat (limited to 'src')
| -rw-r--r-- | src/sdtui.c | 29 | ||||
| -rw-r--r-- | src/stardict.c | 68 | ||||
| -rw-r--r-- | src/stardict.h | 3 | 
3 files changed, 97 insertions, 3 deletions
| diff --git a/src/sdtui.c b/src/sdtui.c index d41058b..f54d5f5 100644 --- a/src/sdtui.c +++ b/src/sdtui.c @@ -606,6 +606,9 @@ app_redraw_view (Application *self)  	move (TOP_BAR_CUTOFF, 0); +	gchar *input_utf8 = g_ucs4_to_utf8 +		((gunichar *) self->input->data, -1, NULL, NULL, NULL); +  	guint i, k = self->top_offset, shown = 0;  	for (i = 0; i < self->entries->len; i++)  	{ @@ -617,9 +620,27 @@ app_redraw_view (Application *self)  			if (k + 1 == ve->definitions_length)  attrs |= A_UNDERLINE;  			attrset (attrs); -			guint left_width = app_get_left_column_width (self); -			app_add_utf8_string (self, ve->word, 0, left_width); -			addstr (" "); +			RowBuffer buf; +			row_buffer_init (&buf, self); + +			size_t common = stardict_longest_common_collation_prefix +				(self->dict, ve->word, input_utf8); + +			gchar *prefix = g_strndup (ve->word, common); +			row_buffer_append (&buf, prefix, A_BOLD); +			g_free (prefix); + +			row_buffer_append (&buf, ve->word + common, 0); + +			gint left_width = app_get_left_column_width (self); +			if (buf.total_width > left_width) +				row_buffer_ellipsis (&buf, left_width, attrs); + +			row_buffer_flush (&buf); +			for (int i = buf.total_width; i < left_width + 1; i++) +				addch (' '); +			row_buffer_free (&buf); +  			app_add_utf8_string (self,  				ve->definitions[k], 0, COLS - left_width - 1); @@ -631,6 +652,8 @@ app_redraw_view (Application *self)  	}  done: +	free (input_utf8); +  	attrset (0);  	clrtobot ();  	refresh (); diff --git a/src/stardict.c b/src/stardict.c index 42c7548..9ce6059 100644 --- a/src/stardict.c +++ b/src/stardict.c @@ -29,6 +29,7 @@  #include <unicode/ucol.h>  #include <unicode/ustring.h> +#include <unicode/ubrk.h>  #include "stardict.h"  #include "stardict-private.h" @@ -934,6 +935,73 @@ stardict_dict_search (StardictDict *sd, const gchar *word, gboolean *success)  	return stardict_iterator_new (sd, imin);  } +/// Return the longest sequence of bytes from @a s1 that form a common prefix +/// with @a s2 wrt. collation rules for this dictionary. +size_t +stardict_longest_common_collation_prefix (StardictDict *sd, +	const gchar *s1, const gchar *s2) +{ +	UErrorCode error; +	int32_t uc1_len = 0; +	int32_t uc2_len = 0; + +	// It sets the error to overflow each time, even during pre-flight +	error = U_ZERO_ERROR; +	u_strFromUTF8 (NULL, 0, &uc1_len, s1, -1, &error); +	error = U_ZERO_ERROR; +	u_strFromUTF8 (NULL, 0, &uc2_len, s2, -1, &error); +	error = U_ZERO_ERROR; + +	UChar uc1[uc1_len]; +	UChar uc2[uc2_len]; +	u_strFromUTF8 (uc1, uc1_len, NULL, s1, -1, &error); +	u_strFromUTF8 (uc2, uc2_len, NULL, s2, -1, &error); + +	// Both inputs need to be valid UTF-8 because of all the iteration mess +	if (U_FAILURE (error)) +		return 0; + +	// ucol_getSortKey() can't be used for these purposes, so the only +	// reasonable thing remaining is iterating by full graphemes.  It doesn't +	// work entirely correctly (e.g. Czech "ch" should be regarded as a single +	// unit, and punctuation could be ignored).  It's just good enough. +	// +	// In theory we could set the strength to UCOL_PRIMARY and ignore accents +	// but that's likely not what the user wants most of the time. +	// +	// Locale shouldn't matter much with graphemes, let's use the default. +	UBreakIterator *it1 = +		ubrk_open (UBRK_CHARACTER, NULL, uc1, uc1_len, &error); +	UBreakIterator *it2 = +		ubrk_open (UBRK_CHARACTER, NULL, uc2, uc2_len, &error); + +	int32_t longest = 0; +	int32_t pos1, pos2; +	while ((pos1 = ubrk_next (it1)) != UBRK_DONE +		&& (pos2 = ubrk_next (it2)) != UBRK_DONE) +	{ +		if (!ucol_strcoll (sd->priv->collator, uc1, pos1, uc2, pos2)) +			longest = pos1; +	} +	ubrk_close (it1); +	ubrk_close (it2); + +	if (!longest) +		return 0; + +	int32_t common_len = 0; +	u_strToUTF8 (NULL, 0, &common_len, uc1, longest, &error); + +	// Since this heavily depends on UTF-16 <-> UTF-8 not modifying the chars +	// (surrogate pairs interference?), let's add some paranoia here +	char common[common_len]; +	error = U_ZERO_ERROR; +	u_strToUTF8 (common, common_len, NULL, uc1, longest, &error); +	g_return_val_if_fail (!memcmp (s1, common, common_len), 0); + +	return (size_t) common_len; +} +  static void  stardict_entry_field_free (StardictEntryField *sef)  { diff --git a/src/stardict.h b/src/stardict.h index 7b8322a..25c7939 100644 --- a/src/stardict.h +++ b/src/stardict.h @@ -138,6 +138,9 @@ gchar **stardict_dict_get_synonyms (StardictDict *sd, const gchar *word);  StardictIterator *stardict_dict_search  	(StardictDict *sd, const gchar *word, gboolean *success); +size_t stardict_longest_common_collation_prefix +	(StardictDict *sd, const gchar *w1, const gchar *w2); +  // --- Dictionary iterators ----------------------------------------------------  struct stardict_iterator | 
