From 627c296057a91e73d3cd1631caa1e61ad4f2d124 Mon Sep 17 00:00:00 2001
From: Přemysl Eric Janouch
Date: Tue, 12 Oct 2021 01:26:06 +0200
Subject: query-tool: support more field types
Add options to format the output for the terminal, or IRC messages.
Changed the output format to separate dictionary name with a tab,
so it's now rather similar to tabfiles.
---
src/query-tool.c | 221 ++++++++++++++++++++++++++++++++++++++++++++-----------
src/sdtui.c | 20 +----
src/stardict.c | 2 +-
src/utils.c | 21 ++++++
src/utils.h | 2 +
5 files changed, 207 insertions(+), 59 deletions(-)
(limited to 'src')
diff --git a/src/query-tool.c b/src/query-tool.c
index 63817ba..825bada 100644
--- a/src/query-tool.c
+++ b/src/query-tool.c
@@ -1,14 +1,14 @@
/*
* A tool to query multiple dictionaries for the specified word
*
- * Intended for use in IRC bots and similar silly things---words go in, one
- * on a line, and entries come out, one dictionary at a time, finalised with
- * an empty line. Newlines are escaped with `\n', backslashes with `\\'.
+ * Intended for use in IRC bots and similar silly things---words go in,
+ * one per each line, and entries come out, one dictionary at a time,
+ * finalised with an empty line. Newlines are escaped with `\n',
+ * backslashes with `\\'.
*
- * So far only the `m' field is supported. Feel free to extend the program
- * according to your needs, it's not very complicated.
+ * So far only the `m', `g`, and `x` fields are supported, as in sdtui.
*
- * Copyright (c) 2013, Přemysl Eric Janouch
+ * Copyright (c) 2013 - 2021, Přemysl Eric Janouch
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
@@ -30,10 +30,124 @@
#include
#include
+#include
#include "stardict.h"
#include "stardict-private.h"
#include "generator.h"
+#include "utils.h"
+
+
+// --- Output formatting -------------------------------------------------------
+
+/// Transform Pango attributes to in-line formatting sequences (non-reentrant)
+typedef const gchar *(*FormatterFunc) (PangoAttrIterator *);
+
+static const gchar *
+pango_attrs_ignore (G_GNUC_UNUSED PangoAttrIterator *iterator)
+{
+ return "";
+}
+
+static const gchar *
+pango_attrs_to_irc (PangoAttrIterator *iterator)
+{
+ static gchar buf[5];
+ gchar *p = buf;
+ *p++ = 0x0f;
+
+ if (!iterator)
+ goto reset_formatting;
+
+ PangoAttrInt *attr = NULL;
+ if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+ PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD)
+ *p++ = 0x02;
+ if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+ PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE)
+ *p++ = 0x1f;
+ if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+ PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC)
+ *p++ = 0x1d;
+
+reset_formatting:
+ *p++ = 0;
+ return buf;
+}
+
+static const gchar *
+pango_attrs_to_ansi (PangoAttrIterator *iterator)
+{
+ static gchar buf[16];
+ g_strlcpy (buf, "\x1b[0", sizeof buf);
+ if (!iterator)
+ goto reset_formatting;
+
+ PangoAttrInt *attr = NULL;
+ if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+ PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD)
+ g_strlcat (buf, ";1", sizeof buf);
+ if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+ PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE)
+ g_strlcat (buf, ";4", sizeof buf);
+ if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+ PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC)
+ g_strlcat (buf, ";3", sizeof buf);
+
+reset_formatting:
+ g_strlcat (buf, "m", sizeof buf);
+ return buf;
+}
+
+static gchar *
+pango_to_output_text (const gchar *markup, FormatterFunc formatter)
+{
+ // This function skips leading whitespace, but it's the canonical one
+ gchar *text = NULL;
+ PangoAttrList *attrs = NULL;
+ if (!pango_parse_markup (markup, -1, 0, &attrs, &text, NULL, NULL))
+ return g_strdup_printf ("<%s>", ("error in entry"));
+
+ PangoAttrIterator *iterator = pango_attr_list_get_iterator (attrs);
+ GString *result = g_string_new ("");
+ do
+ {
+ gint start = 0, end = 0;
+ pango_attr_iterator_range (iterator, &start, &end);
+ if (end == G_MAXINT)
+ end = strlen (text);
+
+ g_string_append (result, formatter (iterator));
+ g_string_append_len (result, text + start, end - start);
+ }
+ while (pango_attr_iterator_next (iterator));
+ g_string_append (result, formatter (NULL));
+
+ g_free (text);
+ pango_attr_iterator_destroy (iterator);
+ pango_attr_list_unref (attrs);
+ return g_string_free (result, FALSE);
+}
+
+static gchar *
+field_to_output_text (const StardictEntryField *field, FormatterFunc formatter)
+{
+ const gchar *definition = field->data;
+ if (field->type == STARDICT_FIELD_MEANING)
+ return g_strdup (definition);
+ if (field->type == STARDICT_FIELD_PANGO)
+ return pango_to_output_text (definition, formatter);
+ if (field->type == STARDICT_FIELD_XDXF)
+ {
+ gchar *markup = xdxf_to_pango_markup_with_reduced_effort (definition);
+ gchar *result = pango_to_output_text (markup, formatter);
+ g_free (markup);
+ return result;
+ }
+ return NULL;
+}
+
+// --- Main --------------------------------------------------------------------
static guint
count_equal_chars (const gchar *a, const gchar *b)
@@ -46,15 +160,16 @@ count_equal_chars (const gchar *a, const gchar *b)
}
static void
-do_dictionary (StardictDict *dict, const gchar *word)
+do_dictionary (StardictDict *dict, const gchar *word, FormatterFunc formatter)
{
gboolean found;
StardictIterator *iter = stardict_dict_search (dict, word, &found);
if (!found)
goto out;
- // Default Stardict ordering is ASCII case-insensitive.
- // Try to find a better matching entry based on letter case:
+ // Default Stardict ordering is ASCII case-insensitive,
+ // which may be further exacerbated by our own collation feature.
+ // Try to find a better matching entry:
gint64 best_offset = stardict_iterator_get_offset (iter);
guint best_score = count_equal_chars
@@ -86,50 +201,76 @@ do_dictionary (StardictDict *dict, const gchar *word)
for (; list; list = list->next)
{
StardictEntryField *field = list->data;
- if (field->type == STARDICT_FIELD_MEANING)
+ gchar *definitions = field_to_output_text (field, formatter);
+ if (!definitions)
+ continue;
+
+ printf ("%s\t", info->book_name);
+ for (const gchar *p = definitions; *p; p++)
{
- const gchar *desc = field->data;
- printf ("%s:", info->book_name);
- for (; *desc; desc++)
- {
- if (*desc == '\\')
- printf ("\\\\");
- else if (*desc == '\n')
- printf ("\\n");
- else
- putchar (*desc);
- }
- putchar ('\n');
+ if (*p == '\\')
+ printf ("\\\\");
+ else if (*p == '\n')
+ printf ("\\n");
+ else
+ putchar (*p);
}
+ putchar ('\n');
+ g_free (definitions);
}
g_object_unref (entry);
out:
g_object_unref (iter);
}
-int
-main (int argc, char *argv[])
+static FormatterFunc
+parse_options (int *argc, char ***argv)
{
-G_GNUC_BEGIN_IGNORE_DEPRECATIONS
- if (glib_check_version (2, 36, 0))
- g_type_init ();
-G_GNUC_END_IGNORE_DEPRECATIONS
-
GError *error = NULL;
GOptionContext *ctx = g_option_context_new
("DICTIONARY.ifo... - query multiple dictionaries");
- if (!g_option_context_parse (ctx, &argc, &argv, &error))
+
+ gboolean format_with_ansi = FALSE;
+ gboolean format_with_irc = FALSE;
+ GOptionEntry entries[] =
+ {
+ { "ansi", 'a', 0, G_OPTION_ARG_NONE, &format_with_ansi,
+ "Format with ANSI sequences", NULL },
+ { "irc", 'i', 0, G_OPTION_ARG_NONE, &format_with_irc,
+ "Format with IRC codes", NULL },
+ { }
+ };
+
+ g_option_context_add_main_entries (ctx, entries, NULL);
+ if (!g_option_context_parse (ctx, argc, argv, &error))
{
g_printerr ("Error: option parsing failed: %s\n", error->message);
exit (EXIT_FAILURE);
}
- g_option_context_free (ctx);
-
- if (argc < 2)
+ if (*argc < 2)
{
- g_printerr ("Error: no dictionaries given\n");
+ g_printerr ("%s\n", g_option_context_get_help (ctx, TRUE, NULL));
exit (EXIT_FAILURE);
}
+ g_option_context_free (ctx);
+
+ if (format_with_ansi)
+ return pango_attrs_to_ansi;
+ if (format_with_irc)
+ return pango_attrs_to_irc;
+
+ return pango_attrs_ignore;
+}
+
+int
+main (int argc, char *argv[])
+{
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+ if (glib_check_version (2, 36, 0))
+ g_type_init ();
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+ FormatterFunc formatter = parse_options (&argc, &argv);
guint n_dicts = argc - 1;
StardictDict **dicts = g_alloca (sizeof *dicts * n_dicts);
@@ -137,6 +278,7 @@ G_GNUC_END_IGNORE_DEPRECATIONS
guint i;
for (i = 1; i <= n_dicts; i++)
{
+ GError *error = NULL;
dicts[i - 1] = stardict_dict_new (argv[i], &error);
if (error)
{
@@ -146,26 +288,23 @@ G_GNUC_END_IGNORE_DEPRECATIONS
}
}
- while (TRUE)
+ gint c;
+ do
{
GString *s = g_string_new (NULL);
-
- gint c;
while ((c = getchar ()) != EOF && c != '\n')
if (c != '\r')
g_string_append_c (s, c);
if (s->len)
for (i = 0; i < n_dicts; i++)
- do_dictionary (dicts[i], s->str);
+ do_dictionary (dicts[i], s->str, formatter);
printf ("\n");
fflush (NULL);
g_string_free (s, TRUE);
-
- if (c == EOF)
- break;
}
+ while (c != EOF);
for (i = 0; i < n_dicts; i++)
g_object_unref (dicts[i]);
diff --git a/src/sdtui.c b/src/sdtui.c
index 5e00d7c..d64f1d1 100644
--- a/src/sdtui.c
+++ b/src/sdtui.c
@@ -348,23 +348,9 @@ view_entry_split_add_pango (ViewEntry *ve, const gchar *markup)
static void
view_entry_split_add_xdxf (ViewEntry *ve, const gchar *xml)
{
- // Trivially filter out all tags we can't quite handle,
- // then parse the reduced XML as Pango markup--this seems to work well.
- // Given the nature of our display, also skip keyword elements.
- GString *filtered = g_string_new ("");
- while (*xml)
- {
- // GMarkup can read some of the wilder XML constructs, Pango skips them
- const gchar *p = NULL;
- if (*xml != '<' || xml[1] == '!' || xml[1] == '?'
- || g_ascii_isspace (xml[1]) || !*(p = xml + 1 + (xml[1] == '/'))
- || (strchr ("biu", *p) && p[1] == '>') || !(p = strchr (p, '>')))
- g_string_append_c (filtered, *xml++);
- else if (xml[1] != 'k' || xml[2] != '>' || !(xml = strstr (p, "")))
- xml = ++p;
- }
- view_entry_split_add_pango (ve, filtered->str);
- g_string_free (filtered, TRUE);
+ gchar *markup = xdxf_to_pango_markup_with_reduced_effort (xml);
+ view_entry_split_add_pango (ve, markup);
+ g_free (markup);
}
/// Decomposes a dictionary entry into the format we want.
diff --git a/src/stardict.c b/src/stardict.c
index d371eb1..8b55f99 100644
--- a/src/stardict.c
+++ b/src/stardict.c
@@ -1354,7 +1354,7 @@ stardict_iterator_get_entry (StardictIterator *sdi)
{
g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL);
if (!stardict_iterator_is_valid (sdi))
- return FALSE;
+ return NULL;
return stardict_dict_get_entry (sdi->owner, sdi->offset);
}
diff --git a/src/utils.c b/src/utils.c
index 8c63548..275e4e1 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -33,6 +33,27 @@
#include "utils.h"
+/// Trivially filter out all tags that aren't part of the Pango markup language,
+/// or no frontend can quite handle--this seems to work well.
+/// Given the nature of our display, also skip whole keyword elements.
+gchar *
+xdxf_to_pango_markup_with_reduced_effort (const gchar *xml)
+{
+ GString *filtered = g_string_new ("");
+ while (*xml)
+ {
+ // GMarkup can read some of the wilder XML constructs, Pango skips them
+ const gchar *p = NULL;
+ if (*xml != '<' || xml[1] == '!' || xml[1] == '?'
+ || g_ascii_isspace (xml[1]) || !*(p = xml + 1 + (xml[1] == '/'))
+ || (strchr ("biu", *p) && p[1] == '>') || !(p = strchr (p, '>')))
+ g_string_append_c (filtered, *xml++);
+ else if (xml[1] != 'k' || xml[2] != '>' || !(xml = strstr (p, "")))
+ xml = ++p;
+ }
+ return g_string_free (filtered, FALSE);
+}
+
/// Read the whole stream into a byte array.
gboolean
stream_read_all (GByteArray *ba, GInputStream *is, GError **error)
diff --git a/src/utils.h b/src/utils.h
index 178a1d9..99ad19a 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -36,6 +36,8 @@
} \
}
+gchar *xdxf_to_pango_markup_with_reduced_effort (const gchar *xml);
+
gboolean stream_read_all (GByteArray *ba, GInputStream *is, GError **error);
gchar *stream_read_string (GDataInputStream *dis, GError **error);
gboolean xstrtoul (unsigned long *out, const char *s, int base);
--
cgit v1.2.3-70-g09d2