From 627c296057a91e73d3cd1631caa1e61ad4f2d124 Mon Sep 17 00:00:00 2001
From: Přemysl Eric Janouch <p@janouch.name>
Date: Tue, 12 Oct 2021 01:26:06 +0200
Subject: query-tool: support more field types

Add options to format the output for the terminal, or IRC messages.

Changed the output format to separate dictionary name with a tab,
so it's now rather similar to tabfiles.
---
 src/query-tool.c | 221 ++++++++++++++++++++++++++++++++++++++++++++-----------
 src/sdtui.c      |  20 +----
 src/stardict.c   |   2 +-
 src/utils.c      |  21 ++++++
 src/utils.h      |   2 +
 5 files changed, 207 insertions(+), 59 deletions(-)

(limited to 'src')

diff --git a/src/query-tool.c b/src/query-tool.c
index 63817ba..825bada 100644
--- a/src/query-tool.c
+++ b/src/query-tool.c
@@ -1,14 +1,14 @@
 /*
  * A tool to query multiple dictionaries for the specified word
  *
- * Intended for use in IRC bots and similar silly things---words go in, one
- * on a line, and entries come out, one dictionary at a time, finalised with
- * an empty line.  Newlines are escaped with `\n', backslashes with `\\'.
+ * Intended for use in IRC bots and similar silly things---words go in,
+ * one per each line, and entries come out, one dictionary at a time,
+ * finalised with an empty line.  Newlines are escaped with `\n',
+ * backslashes with `\\'.
  *
- * So far only the `m' field is supported.  Feel free to extend the program
- * according to your needs, it's not very complicated.
+ * So far only the `m', `g`, and `x` fields are supported, as in sdtui.
  *
- * Copyright (c) 2013, Přemysl Eric Janouch <p@janouch.name>
+ * Copyright (c) 2013 - 2021, Přemysl Eric Janouch <p@janouch.name>
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted.
@@ -30,10 +30,124 @@
 
 #include <glib.h>
 #include <gio/gio.h>
+#include <pango/pango.h>
 
 #include "stardict.h"
 #include "stardict-private.h"
 #include "generator.h"
+#include "utils.h"
+
+
+// --- Output formatting -------------------------------------------------------
+
+/// Transform Pango attributes to in-line formatting sequences (non-reentrant)
+typedef const gchar *(*FormatterFunc) (PangoAttrIterator *);
+
+static const gchar *
+pango_attrs_ignore (G_GNUC_UNUSED PangoAttrIterator *iterator)
+{
+	return "";
+}
+
+static const gchar *
+pango_attrs_to_irc (PangoAttrIterator *iterator)
+{
+	static gchar buf[5];
+	gchar *p = buf;
+	*p++ = 0x0f;
+
+	if (!iterator)
+		goto reset_formatting;
+
+	PangoAttrInt *attr = NULL;
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD)
+		*p++ = 0x02;
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE)
+		*p++ = 0x1f;
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC)
+		*p++ = 0x1d;
+
+reset_formatting:
+	*p++ = 0;
+	return buf;
+}
+
+static const gchar *
+pango_attrs_to_ansi (PangoAttrIterator *iterator)
+{
+	static gchar buf[16];
+	g_strlcpy (buf, "\x1b[0", sizeof buf);
+	if (!iterator)
+		goto reset_formatting;
+
+	PangoAttrInt *attr = NULL;
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD)
+		g_strlcat (buf, ";1", sizeof buf);
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE)
+		g_strlcat (buf, ";4", sizeof buf);
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC)
+		g_strlcat (buf, ";3", sizeof buf);
+
+reset_formatting:
+	g_strlcat (buf, "m", sizeof buf);
+	return buf;
+}
+
+static gchar *
+pango_to_output_text (const gchar *markup, FormatterFunc formatter)
+{
+	// This function skips leading whitespace, but it's the canonical one
+	gchar *text = NULL;
+	PangoAttrList *attrs = NULL;
+	if (!pango_parse_markup (markup, -1, 0, &attrs, &text, NULL, NULL))
+		return g_strdup_printf ("<%s>", ("error in entry"));
+
+	PangoAttrIterator *iterator = pango_attr_list_get_iterator (attrs);
+	GString *result = g_string_new ("");
+	do
+	{
+		gint start = 0, end = 0;
+		pango_attr_iterator_range (iterator, &start, &end);
+		if (end == G_MAXINT)
+			end = strlen (text);
+
+		g_string_append (result, formatter (iterator));
+		g_string_append_len (result, text + start, end - start);
+	}
+	while (pango_attr_iterator_next (iterator));
+	g_string_append (result, formatter (NULL));
+
+	g_free (text);
+	pango_attr_iterator_destroy (iterator);
+	pango_attr_list_unref (attrs);
+	return g_string_free (result, FALSE);
+}
+
+static gchar *
+field_to_output_text (const StardictEntryField *field, FormatterFunc formatter)
+{
+	const gchar *definition = field->data;
+	if (field->type == STARDICT_FIELD_MEANING)
+		return g_strdup (definition);
+	if (field->type == STARDICT_FIELD_PANGO)
+		return pango_to_output_text (definition, formatter);
+	if (field->type == STARDICT_FIELD_XDXF)
+	{
+		gchar *markup = xdxf_to_pango_markup_with_reduced_effort (definition);
+		gchar *result = pango_to_output_text (markup, formatter);
+		g_free (markup);
+		return result;
+	}
+	return NULL;
+}
+
+// --- Main --------------------------------------------------------------------
 
 static guint
 count_equal_chars (const gchar *a, const gchar *b)
@@ -46,15 +160,16 @@ count_equal_chars (const gchar *a, const gchar *b)
 }
 
 static void
-do_dictionary (StardictDict *dict, const gchar *word)
+do_dictionary (StardictDict *dict, const gchar *word, FormatterFunc formatter)
 {
 	gboolean found;
 	StardictIterator *iter = stardict_dict_search (dict, word, &found);
 	if (!found)
 		goto out;
 
-	// Default Stardict ordering is ASCII case-insensitive.
-	// Try to find a better matching entry based on letter case:
+	// Default Stardict ordering is ASCII case-insensitive,
+	// which may be further exacerbated by our own collation feature.
+	// Try to find a better matching entry:
 
 	gint64 best_offset = stardict_iterator_get_offset (iter);
 	guint best_score = count_equal_chars
@@ -86,50 +201,76 @@ do_dictionary (StardictDict *dict, const gchar *word)
 	for (; list; list = list->next)
 	{
 		StardictEntryField *field = list->data;
-		if (field->type == STARDICT_FIELD_MEANING)
+		gchar *definitions = field_to_output_text (field, formatter);
+		if (!definitions)
+			continue;
+
+		printf ("%s\t", info->book_name);
+		for (const gchar *p = definitions; *p; p++)
 		{
-			const gchar *desc = field->data;
-			printf ("%s:", info->book_name);
-			for (; *desc; desc++)
-			{
-				if (*desc == '\\')
-					printf ("\\\\");
-				else if (*desc == '\n')
-					printf ("\\n");
-				else
-					putchar (*desc);
-			}
-			putchar ('\n');
+			if (*p == '\\')
+				printf ("\\\\");
+			else if (*p == '\n')
+				printf ("\\n");
+			else
+				putchar (*p);
 		}
+		putchar ('\n');
+		g_free (definitions);
 	}
 	g_object_unref (entry);
 out:
 	g_object_unref (iter);
 }
 
-int
-main (int argc, char *argv[])
+static FormatterFunc
+parse_options (int *argc, char ***argv)
 {
-G_GNUC_BEGIN_IGNORE_DEPRECATIONS
-	if (glib_check_version (2, 36, 0))
-		g_type_init ();
-G_GNUC_END_IGNORE_DEPRECATIONS
-
 	GError *error = NULL;
 	GOptionContext *ctx = g_option_context_new
 		("DICTIONARY.ifo... - query multiple dictionaries");
-	if (!g_option_context_parse (ctx, &argc, &argv, &error))
+
+	gboolean format_with_ansi = FALSE;
+	gboolean format_with_irc = FALSE;
+	GOptionEntry entries[] =
+	{
+		{ "ansi", 'a', 0, G_OPTION_ARG_NONE, &format_with_ansi,
+		  "Format with ANSI sequences", NULL },
+		{ "irc", 'i', 0, G_OPTION_ARG_NONE, &format_with_irc,
+		  "Format with IRC codes", NULL },
+		{ }
+	};
+
+	g_option_context_add_main_entries (ctx, entries, NULL);
+	if (!g_option_context_parse (ctx, argc, argv, &error))
 	{
 		g_printerr ("Error: option parsing failed: %s\n", error->message);
 		exit (EXIT_FAILURE);
 	}
-	g_option_context_free (ctx);
-
-	if (argc < 2)
+	if (*argc < 2)
 	{
-		g_printerr ("Error: no dictionaries given\n");
+		g_printerr ("%s\n", g_option_context_get_help (ctx, TRUE, NULL));
 		exit (EXIT_FAILURE);
 	}
+	g_option_context_free (ctx);
+
+	if (format_with_ansi)
+		return pango_attrs_to_ansi;
+	if (format_with_irc)
+		return pango_attrs_to_irc;
+
+	return pango_attrs_ignore;
+}
+
+int
+main (int argc, char *argv[])
+{
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+	if (glib_check_version (2, 36, 0))
+		g_type_init ();
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+	FormatterFunc formatter = parse_options (&argc, &argv);
 
 	guint n_dicts = argc - 1;
 	StardictDict **dicts = g_alloca (sizeof *dicts * n_dicts);
@@ -137,6 +278,7 @@ G_GNUC_END_IGNORE_DEPRECATIONS
 	guint i;
 	for (i = 1; i <= n_dicts; i++)
 	{
+		GError *error = NULL;
 		dicts[i - 1] = stardict_dict_new (argv[i], &error);
 		if (error)
 		{
@@ -146,26 +288,23 @@ G_GNUC_END_IGNORE_DEPRECATIONS
 		}
 	}
 
-	while (TRUE)
+	gint c;
+	do
 	{
 		GString *s = g_string_new (NULL);
-
-		gint c;
 		while ((c = getchar ()) != EOF && c != '\n')
 			if (c != '\r')
 				g_string_append_c (s, c);
 
 		if (s->len)
 			for (i = 0; i < n_dicts; i++)
-				do_dictionary (dicts[i], s->str);
+				do_dictionary (dicts[i], s->str, formatter);
 
 		printf ("\n");
 		fflush (NULL);
 		g_string_free (s, TRUE);
-
-		if (c == EOF)
-			break;
 	}
+	while (c != EOF);
 
 	for (i = 0; i < n_dicts; i++)
 		g_object_unref (dicts[i]);
diff --git a/src/sdtui.c b/src/sdtui.c
index 5e00d7c..d64f1d1 100644
--- a/src/sdtui.c
+++ b/src/sdtui.c
@@ -348,23 +348,9 @@ view_entry_split_add_pango (ViewEntry *ve, const gchar *markup)
 static void
 view_entry_split_add_xdxf (ViewEntry *ve, const gchar *xml)
 {
-	// Trivially filter out all tags we can't quite handle,
-	// then parse the reduced XML as Pango markup--this seems to work well.
-	// Given the nature of our display, also skip keyword elements.
-	GString *filtered = g_string_new ("");
-	while (*xml)
-	{
-		// GMarkup can read some of the wilder XML constructs, Pango skips them
-		const gchar *p = NULL;
-		if (*xml != '<' || xml[1] == '!' || xml[1] == '?'
-		 || g_ascii_isspace (xml[1]) || !*(p = xml + 1 + (xml[1] == '/'))
-		 || (strchr ("biu", *p) && p[1] == '>') || !(p = strchr (p, '>')))
-			g_string_append_c (filtered, *xml++);
-		else if (xml[1] != 'k' || xml[2] != '>' || !(xml = strstr (p, "</k>")))
-			xml = ++p;
-	}
-	view_entry_split_add_pango (ve, filtered->str);
-	g_string_free (filtered, TRUE);
+	gchar *markup = xdxf_to_pango_markup_with_reduced_effort (xml);
+	view_entry_split_add_pango (ve, markup);
+	g_free (markup);
 }
 
 /// Decomposes a dictionary entry into the format we want.
diff --git a/src/stardict.c b/src/stardict.c
index d371eb1..8b55f99 100644
--- a/src/stardict.c
+++ b/src/stardict.c
@@ -1354,7 +1354,7 @@ stardict_iterator_get_entry (StardictIterator *sdi)
 {
 	g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL);
 	if (!stardict_iterator_is_valid (sdi))
-		return FALSE;
+		return NULL;
 	return stardict_dict_get_entry (sdi->owner, sdi->offset);
 }
 
diff --git a/src/utils.c b/src/utils.c
index 8c63548..275e4e1 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -33,6 +33,27 @@
 #include "utils.h"
 
 
+/// Trivially filter out all tags that aren't part of the Pango markup language,
+/// or no frontend can quite handle--this seems to work well.
+/// Given the nature of our display, also skip whole keyword elements.
+gchar *
+xdxf_to_pango_markup_with_reduced_effort (const gchar *xml)
+{
+	GString *filtered = g_string_new ("");
+	while (*xml)
+	{
+		// GMarkup can read some of the wilder XML constructs, Pango skips them
+		const gchar *p = NULL;
+		if (*xml != '<' || xml[1] == '!' || xml[1] == '?'
+		 || g_ascii_isspace (xml[1]) || !*(p = xml + 1 + (xml[1] == '/'))
+		 || (strchr ("biu", *p) && p[1] == '>') || !(p = strchr (p, '>')))
+			g_string_append_c (filtered, *xml++);
+		else if (xml[1] != 'k' || xml[2] != '>' || !(xml = strstr (p, "</k>")))
+			xml = ++p;
+	}
+	return g_string_free (filtered, FALSE);
+}
+
 /// Read the whole stream into a byte array.
 gboolean
 stream_read_all (GByteArray *ba, GInputStream *is, GError **error)
diff --git a/src/utils.h b/src/utils.h
index 178a1d9..99ad19a 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -36,6 +36,8 @@
 		}                                                                     \
 	}
 
+gchar *xdxf_to_pango_markup_with_reduced_effort (const gchar *xml);
+
 gboolean stream_read_all (GByteArray *ba, GInputStream *is, GError **error);
 gchar *stream_read_string (GDataInputStream *dis, GError **error);
 gboolean xstrtoul (unsigned long *out, const char *s, int base);
-- 
cgit v1.3.1