From c77d994dc44a9ef8f87dd36661201f499877fc34 Mon Sep 17 00:00:00 2001
From: Přemysl Eric Janouch <p@janouch.name>
Date: Sun, 11 Jun 2023 17:45:38 +0200
Subject: Rename tools, make them installable

---
 CMakeLists.txt              |  10 +-
 README.adoc                 |   8 +-
 src/add-pronunciation.c     | 469 --------------------------------------------
 src/query-tool.c            | 313 -----------------------------
 src/tabfile.c               | 223 ---------------------
 src/tdv-add-pronunciation.c | 469 ++++++++++++++++++++++++++++++++++++++++++++
 src/tdv-query-tool.c        | 313 +++++++++++++++++++++++++++++
 src/tdv-tabfile.c           | 223 +++++++++++++++++++++
 src/tdv-transform.c         | 226 +++++++++++++++++++++
 src/transform.c             | 226 ---------------------
 10 files changed, 1243 insertions(+), 1237 deletions(-)
 delete mode 100644 src/add-pronunciation.c
 delete mode 100644 src/query-tool.c
 delete mode 100644 src/tabfile.c
 create mode 100644 src/tdv-add-pronunciation.c
 create mode 100644 src/tdv-query-tool.c
 create mode 100644 src/tdv-tabfile.c
 create mode 100644 src/tdv-transform.c
 delete mode 100644 src/transform.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9d4c494..f995dd4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -285,14 +285,17 @@ endif ()
 target_link_libraries (${PROJECT_NAME} ${project_libraries})
 
 # Tools
-set (tools tabfile add-pronunciation query-tool transform)
+set (tools tdv-tabfile tdv-add-pronunciation tdv-query-tool tdv-transform)
 foreach (tool ${tools})
 	add_executable (${tool} EXCLUDE_FROM_ALL
 		src/${tool}.c ${project_common_sources})
 	target_link_libraries (${tool} ${project_common_libraries})
 endforeach ()
 
-add_custom_target (tools DEPENDS ${tools})
+option (WITH_TOOLS "Build and install some StarDict tools" ${UNIX})
+if (WITH_TOOLS)
+	add_custom_target (tools ALL DEPENDS ${tools})
+endif ()
 
 # Example dictionaries
 file (GLOB dicts_scripts "${PROJECT_SOURCE_DIR}/dicts/*.*")
@@ -315,6 +318,9 @@ if (NOT WIN32)
 	install (TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
 	install (FILES LICENSE DESTINATION ${CMAKE_INSTALL_DOCDIR})
 
+	if (WITH_TOOLS)
+		install (TARGETS ${tools} DESTINATION ${CMAKE_INSTALL_BINDIR})
+	endif ()
 	if (WITH_GUI)
 		install (FILES ${PROJECT_NAME}.svg
 			DESTINATION ${CMAKE_INSTALL_DATADIR}/icons/hicolor/scalable/apps)
diff --git a/README.adoc b/README.adoc
index 0d9ca4e..ab2b4be 100644
--- a/README.adoc
+++ b/README.adoc
@@ -81,10 +81,10 @@ The `make dicts` command will build some examples from freely available sources:
  - Czech foreign words (the site's export is broken as of 2022/08, no response)
  - Czech WordNet 1.9 PDT (synonyms, hypernyms, hyponyms)
 
-You can use the included 'transform' tool to convert already existing StarDict
-dictionaries that are nearly good as they are.  Remember that you can change
-the `sametypesequence` of the resulting '.ifo' file to another format, or run
-'dictzip' on '.dict' files to make them compact.
+You can use the included 'tdv-transform' tool to convert already existing
+StarDict dictionaries that are nearly good as they are.  Remember that you can
+change the `sametypesequence` of the resulting '.ifo' file to another format,
+or run 'dictzip' on '.dict' files to make them compact.
 
 https://mega.co.nz/#!axtD0QRK!sbtBgizksyfkPqKvKEgr8GQ11rsWhtqyRgUUV0B7pwg[CZ <--> EN/DE/PL/RU dictionaries]
 
diff --git a/src/add-pronunciation.c b/src/add-pronunciation.c
deleted file mode 100644
index 90d9673..0000000
--- a/src/add-pronunciation.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * A tool to add eSpeak-generated pronunciation to dictionaries
- *
- * Here I use the `espeak' process rather than libespeak because of the GPL.
- * It's far from ideal, rather good as a starting point.
- *
- * Copyright (c) 2013, Přemysl Eric Janouch <p@janouch.name>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <glib.h>
-#include <gio/gio.h>
-
-#include "stardict.h"
-#include "stardict-private.h"
-#include "generator.h"
-#include "utils.h"
-
-
-// --- Pronunciation generator -------------------------------------------------
-
-typedef struct worker_data WorkerData;
-
-struct worker_data
-{
-	gchar **cmdline;                    ///< eSpeak command line
-	guint ignore_acronyms : 1;          ///< Don't spell out acronyms
-	GRegex *re_stop;                    ///< Regex for stop sequences
-	GRegex *re_acronym;                 ///< Regex for ACRONYMS
-
-	guint32 start_entry;                ///< The first entry to be processed
-	guint32 end_entry;                  ///< Past the last entry to be processed
-
-	// Reader, writer
-	GMutex *dict_mutex;                 ///< Locks the dictionary object
-
-	// Reader
-	GThread *main_thread;               ///< A handle to the reader thread
-	StardictDict *dict;                 ///< The dictionary object
-	gpointer output;                    ///< Linked-list of pronunciation data
-
-	GMutex *remaining_mutex;            ///< Locks the progress stats
-	GCond *remaining_cond;              ///< Signals a change in progress
-	guint32 remaining;                  ///< How many entries remain
-	guint32 total;                      ///< Total number of entries
-
-	// Writer
-	StardictIterator *iterator;         ///< Iterates over the dictionary
-	FILE *child_stdin;                  ///< Standard input of eSpeak
-};
-
-/// eSpeak splits the output on certain characters.
-#define LINE_SPLITTING_CHARS            ".,:;?!"
-
-/// We don't want to include brackets either.
-#define OTHER_STOP_CHARS                "([{<"
-
-/// A void word used to make a unique "no pronunciation available" mark.
-#define VOID_ENTRY                      "not present in any dictionary"
-
-
-/// Adds dots between characters.
-static gboolean
-writer_acronym_cb (const GMatchInfo *info, GString *res,
-	G_GNUC_UNUSED gpointer data)
-{
-	gchar *preceding = g_match_info_fetch (info, 1);
-	g_string_append (res, preceding);
-	g_free (preceding);
-
-	gchar *word = g_match_info_fetch (info, 2);
-
-	g_string_append_c (res, *word);
-	const gchar *p;
-	for (p = word + 1; *p; p++)
-	{
-		g_string_append_c (res, '.');
-		g_string_append_c (res, *p);
-	}
-
-	g_free (word);
-	return FALSE;
-}
-
-/// Writes to espeak's stdin.
-static gpointer
-worker_writer (WorkerData *data)
-{
-	GError *error = NULL;
-	GMatchInfo *match_info;
-	while (stardict_iterator_get_offset (data->iterator) != data->end_entry)
-	{
-		g_mutex_lock (data->dict_mutex);
-		const gchar *word = stardict_iterator_get_word (data->iterator);
-		g_mutex_unlock (data->dict_mutex);
-
-		word += strspn (word, LINE_SPLITTING_CHARS " \t");
-		gchar *x = g_strdup (word);
-
-		// Cut the word if needed be
-		error = NULL;
-		if (g_regex_match_full (data->re_stop,
-			x, -1, 0, 0, &match_info, &error))
-		{
-			gint start_pos;
-			g_match_info_fetch_pos (match_info, 0, &start_pos, NULL);
-			x[start_pos] = 0;
-		}
-		g_match_info_free (match_info);
-
-		// Change acronyms so that they're not pronounced as words
-		if (!error && !data->ignore_acronyms)
-		{
-			char *tmp = g_regex_replace_eval (data->re_acronym,
-				x, -1, 0, 0, writer_acronym_cb, NULL, &error);
-			g_free (x);
-			x = tmp;
-		}
-
-		if (error)
-		{
-			g_printerr ("Notice: error processing '%s': %s\n",
-				word, error->message);
-			g_clear_error (&error);
-			*x = 0;
-		}
-
-		// We might have accidentally cut off everything
-		if (!*x)
-		{
-			g_free (x);
-			x = g_strdup (VOID_ENTRY);
-		}
-
-		stardict_iterator_next (data->iterator);
-		if (fprintf (data->child_stdin, "%s\n", x) < 0)
-			fatal ("write to eSpeak failed: %s\n", g_strerror (errno));
-
-		g_free (x);
-	}
-
-	g_object_unref (data->iterator);
-	return GINT_TO_POINTER (fclose (data->child_stdin));
-}
-
-/// Get the void entry (and test if espeak works).
-static gchar *
-get_void_entry (gchar *cmdline[])
-{
-	gchar *output;
-	gint exit_status;
-
-	GError *error = NULL;
-	if (!g_spawn_sync (NULL, cmdline, NULL,
-		G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL, NULL, NULL,
-		&output, NULL, &exit_status, &error))
-		fatal ("Error: couldn't spawn espeak: %s\n", error->message);
-
-	if (exit_status)
-		fatal ("Error: espeak returned %d\n", exit_status);
-
-	return output;
-}
-
-/// Reads from espeak's stdout.
-static gpointer
-worker (WorkerData *data)
-{
-	// Spawn eSpeak
-	GError *error = NULL;
-	gint child_in, child_out;
-	if (!g_spawn_async_with_pipes (NULL, data->cmdline, NULL,
-		G_SPAWN_SEARCH_PATH, NULL, NULL,
-		NULL, &child_in, &child_out, NULL, &error))
-		fatal ("g_spawn: %s\n", error->message);
-
-	data->child_stdin = fdopen (child_in, "wb");
-	if (!data->child_stdin)
-		perror ("fdopen");
-
-	FILE *child_stdout = fdopen (child_out, "rb");
-	if (!child_stdout)
-		perror ("fdopen");
-
-	// Spawn a writer thread
-	g_mutex_lock (data->dict_mutex);
-	data->iterator = stardict_iterator_new (data->dict, data->start_entry);
-	g_mutex_unlock (data->dict_mutex);
-
-	GThread *writer = g_thread_new ("write worker",
-		(GThreadFunc) worker_writer, data);
-
-	// Read the output
-	g_mutex_lock (data->remaining_mutex);
-	guint32 remaining = data->remaining;
-	g_mutex_unlock (data->remaining_mutex);
-
-	data->output = NULL;
-	gpointer *output_end = &data->output;
-	while (remaining)
-	{
-		static gchar next[sizeof (gpointer)];
-		GString *s = g_string_new (NULL);
-		g_string_append_len (s, next, sizeof next);
-
-		gint c;
-		while ((c = fgetc (child_stdout)) != EOF && c != '\n')
-			g_string_append_c (s, c);
-		if (c == EOF)
-			fatal ("eSpeak process died too soon\n");
-
-		gchar *translation = g_string_free (s, FALSE);
-		*output_end = translation;
-		output_end = (gpointer *) translation;
-
-		// We limit progress reporting so that
-		// the mutex doesn't spin like crazy
-		if ((--remaining & 255) != 0)
-			continue;
-
-		g_mutex_lock (data->remaining_mutex);
-		data->remaining = remaining;
-		g_cond_broadcast (data->remaining_cond);
-		g_mutex_unlock (data->remaining_mutex);
-	}
-
-	if (fgetc (child_stdout) != EOF)
-		fatal ("Error: eSpeak has written more lines than it should. "
-			"The output would be corrupt, aborting.\n");
-
-	fclose (child_stdout);
-	return g_thread_join (writer);
-}
-
-// --- Main --------------------------------------------------------------------
-
-int
-main (int argc, char *argv[])
-{
-	gint n_processes = 1;
-	gchar *voice = NULL;
-	gboolean ignore_acronyms = FALSE;
-
-	GOptionEntry entries[] =
-	{
-		{ "processes", 'N', G_OPTION_FLAG_IN_MAIN,
-		  G_OPTION_ARG_INT, &n_processes,
-		  "The number of espeak processes run in parallel", "PROCESSES" },
-		{ "voice", 'v', G_OPTION_FLAG_IN_MAIN,
-		  G_OPTION_ARG_STRING, &voice,
-		  "The voice to be used by eSpeak to pronounce the words", "VOICE" },
-		{ "ignore-acronyms", 0, G_OPTION_FLAG_IN_MAIN,
-		  G_OPTION_ARG_NONE, &ignore_acronyms,
-		  "Don't spell out words composed of big letters only", NULL },
-		{ NULL }
-	};
-
-G_GNUC_BEGIN_IGNORE_DEPRECATIONS
-	if (glib_check_version (2, 36, 0))
-		g_type_init ();
-G_GNUC_END_IGNORE_DEPRECATIONS
-
-	GError *error = NULL;
-	GOptionContext *ctx = g_option_context_new
-		("input.ifo output-basename - add pronunciation to dictionaries");
-	g_option_context_add_main_entries (ctx, entries, NULL);
-	if (!g_option_context_parse (ctx, &argc, &argv, &error))
-		fatal ("Error: option parsing failed: %s\n", error->message);
-
-	if (argc != 3)
-		fatal ("%s", g_option_context_get_help (ctx, TRUE, NULL));
-
-	g_option_context_free (ctx);
-
-	// See if we can run espeak
-	static gchar *cmdline[] = { "espeak", "--ipa", "-q", NULL, NULL, NULL };
-
-	if (voice)
-	{
-		cmdline[3] = "-v";
-		cmdline[4] = voice;
-	}
-
-	gchar *void_entry = g_strstrip (get_void_entry (cmdline));
-
-	// Load the dictionary
-	printf ("Loading the original dictionary...\n");
-	StardictDict *dict = stardict_dict_new (argv[1], &error);
-	if (!dict)
-		fatal ("Error: opening the dictionary failed: %s\n", error->message);
-
-	gsize n_words = stardict_info_get_word_count
-		(stardict_dict_get_info (dict));
-
-	if (n_processes <= 0)
-		fatal ("Error: there must be at least one process\n");
-
-	if ((gsize) n_processes > n_words * 1024)
-	{
-		n_processes = n_words / 1024;
-		if (!n_processes)
-			n_processes = 1;
-		g_printerr ("Warning: too many processes, reducing to %d\n",
-			n_processes);
-	}
-
-	// Spawn worker threads to generate pronunciation data
-	static GMutex dict_mutex;
-
-	static GMutex remaining_mutex;
-	static GCond remaining_cond;
-
-	WorkerData *data = g_alloca (sizeof *data * n_processes);
-
-	GRegex *re_stop = g_regex_new ("[" LINE_SPLITTING_CHARS "][ ?]"
-		"|\\.\\.\\.|[" OTHER_STOP_CHARS "]", G_REGEX_OPTIMIZE, 0, &error);
-	g_assert (re_stop != NULL);
-
-	GRegex *re_acronym = g_regex_new ("(^|\\pZ)(\\p{Lu}+)(?=\\pZ|$)",
-		G_REGEX_OPTIMIZE, 0, &error);
-	g_assert (re_acronym != NULL);
-
-	gint i;
-	for (i = 0; i < n_processes; i++)
-	{
-		data[i].start_entry = n_words *  i      / n_processes;
-		data[i].end_entry   = n_words * (i + 1) / n_processes;
-
-		data[i].total = data[i].remaining =
-			data[i].end_entry - data[i].start_entry;
-		data[i].remaining_mutex = &remaining_mutex;
-		data[i].remaining_cond = &remaining_cond;
-
-		data[i].dict = dict;
-		data[i].dict_mutex = &dict_mutex;
-
-		data[i].re_stop = re_stop;
-		data[i].re_acronym = re_acronym;
-
-		data[i].cmdline = cmdline;
-		data[i].ignore_acronyms = ignore_acronyms;
-		data[i].main_thread =
-			g_thread_new ("worker", (GThreadFunc) worker, &data[i]);
-	}
-
-	// Loop while the threads still have some work to do and report status
-	g_mutex_lock (&remaining_mutex);
-	for (;;)
-	{
-		gboolean all_finished = TRUE;
-		printf ("\rRetrieving pronunciation... ");
-		for (i = 0; i < n_processes; i++)
-		{
-			printf ("%3u%% ", 100 - data[i].remaining * 100 / data[i].total);
-			if (data[i].remaining)
-				all_finished = FALSE;
-		}
-
-		if (all_finished)
-			break;
-		g_cond_wait (&remaining_cond, &remaining_mutex);
-	}
-	g_mutex_unlock (&remaining_mutex);
-
-	putchar ('\n');
-	for (i = 0; i < n_processes; i++)
-		g_thread_join (data[i].main_thread);
-
-	g_regex_unref (re_stop);
-	g_regex_unref (re_acronym);
-
-	// Put extended entries into a new dictionary
-	Generator *generator = generator_new (argv[2], &error);
-	if (!generator)
-		fatal ("Error: failed to create the output dictionary: %s\n",
-			error->message);
-
-	StardictInfo *info = generator->info;
-	stardict_info_copy (info, stardict_dict_get_info (dict));
-
-	// This gets incremented each time an entry is finished
-	info->word_count = 0;
-
-	if (info->same_type_sequence)
-	{
-		gchar *new_sts = g_strconcat ("t", info->same_type_sequence, NULL);
-		g_free (info->same_type_sequence);
-		info->same_type_sequence = new_sts;
-	}
-
-	// Write out all the entries together with the pronunciation
-	for (i = 0; i < n_processes; i++)
-	{
-		StardictIterator *iterator =
-			stardict_iterator_new (dict, data[i].start_entry);
-
-		gpointer *output = data[i].output;
-		while (stardict_iterator_get_offset (iterator) != data[i].end_entry)
-		{
-			printf ("\rCreating a new dictionary... %3lu%%",
-				(gulong) stardict_iterator_get_offset (iterator) * 100
-				/ stardict_dict_get_info (dict)->word_count);
-
-			g_assert (output != NULL);
-
-			gchar *pronunciation = g_strstrip ((gchar *) (output + 1));
-			StardictEntry *entry = stardict_iterator_get_entry (iterator);
-
-			generator_begin_entry (generator);
-
-			if (!strcmp (pronunciation, void_entry))
-				*pronunciation = 0;
-
-//			g_printerr ("%s /%s/\n",
-//				stardict_iterator_get_word (iterator), pronunciation);
-
-			// For the sake of simplicity we fake a new start;
-			// write_fields() only iterates the list in one direction.
-			StardictEntryField field;
-			field.type = 't';
-			field.data = pronunciation;
-
-			GList start_link;
-			start_link.next = entry->fields;
-			start_link.data = &field;
-
-			if (!generator_write_fields (generator, &start_link, &error)
-			 || !generator_finish_entry (generator,
-					stardict_iterator_get_word (iterator), &error))
-				fatal ("Error: write failed: %s\n", error->message);
-
-			g_object_unref (entry);
-
-			gpointer *tmp = output;
-			output = *output;
-			g_free (tmp);
-
-			stardict_iterator_next (iterator);
-		}
-
-		g_assert (output == NULL);
-		g_object_unref (iterator);
-	}
-
-	putchar ('\n');
-	if (!generator_finish (generator, &error))
-		fatal ("Error: failed to write the dictionary: %s\n", error->message);
-
-	generator_free (generator);
-	g_object_unref (dict);
-	g_free (void_entry);
-	return 0;
-}
diff --git a/src/query-tool.c b/src/query-tool.c
deleted file mode 100644
index 6cfdc66..0000000
--- a/src/query-tool.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * A tool to query multiple dictionaries for the specified word
- *
- * Intended for use in IRC bots and similar silly things---words go in,
- * one per each line, and entries come out, one dictionary at a time,
- * finalised with an empty line.  Newlines are escaped with `\n',
- * backslashes with `\\'.
- *
- * So far only the `m', `g`, and `x` fields are supported, as in tdv.
- *
- * Copyright (c) 2013 - 2021, Přemysl Eric Janouch <p@janouch.name>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <glib.h>
-#include <gio/gio.h>
-#include <pango/pango.h>
-
-#include "stardict.h"
-#include "stardict-private.h"
-#include "generator.h"
-#include "utils.h"
-
-
-// --- Output formatting -------------------------------------------------------
-
-/// Transform Pango attributes to in-line formatting sequences (non-reentrant)
-typedef const gchar *(*FormatterFunc) (PangoAttrIterator *);
-
-static const gchar *
-pango_attrs_ignore (G_GNUC_UNUSED PangoAttrIterator *iterator)
-{
-	return "";
-}
-
-static const gchar *
-pango_attrs_to_irc (PangoAttrIterator *iterator)
-{
-	static gchar buf[5];
-	gchar *p = buf;
-	*p++ = 0x0f;
-
-	if (!iterator)
-		goto reset_formatting;
-
-	PangoAttrInt *attr = NULL;
-	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
-			PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD)
-		*p++ = 0x02;
-	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
-			PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE)
-		*p++ = 0x1f;
-	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
-			PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC)
-		*p++ = 0x1d;
-
-reset_formatting:
-	*p++ = 0;
-	return buf;
-}
-
-static const gchar *
-pango_attrs_to_ansi (PangoAttrIterator *iterator)
-{
-	static gchar buf[16];
-	g_strlcpy (buf, "\x1b[0", sizeof buf);
-	if (!iterator)
-		goto reset_formatting;
-
-	PangoAttrInt *attr = NULL;
-	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
-			PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD)
-		g_strlcat (buf, ";1", sizeof buf);
-	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
-			PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE)
-		g_strlcat (buf, ";4", sizeof buf);
-	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
-			PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC)
-		g_strlcat (buf, ";3", sizeof buf);
-
-reset_formatting:
-	g_strlcat (buf, "m", sizeof buf);
-	return buf;
-}
-
-static gchar *
-pango_to_output_text (const gchar *markup, FormatterFunc formatter)
-{
-	// This function skips leading whitespace, but it's the canonical one
-	gchar *text = NULL;
-	PangoAttrList *attrs = NULL;
-	if (!pango_parse_markup (markup, -1, 0, &attrs, &text, NULL, NULL))
-		return g_strdup_printf ("<%s>", ("error in entry"));
-
-	PangoAttrIterator *iterator = pango_attr_list_get_iterator (attrs);
-	GString *result = g_string_new ("");
-	do
-	{
-		gint start = 0, end = 0;
-		pango_attr_iterator_range (iterator, &start, &end);
-		if (end == G_MAXINT)
-			end = strlen (text);
-
-		g_string_append (result, formatter (iterator));
-		g_string_append_len (result, text + start, end - start);
-	}
-	while (pango_attr_iterator_next (iterator));
-	g_string_append (result, formatter (NULL));
-
-	g_free (text);
-	pango_attr_iterator_destroy (iterator);
-	pango_attr_list_unref (attrs);
-	return g_string_free (result, FALSE);
-}
-
-static gchar *
-field_to_output_text (const StardictEntryField *field, FormatterFunc formatter)
-{
-	const gchar *definition = field->data;
-	if (field->type == STARDICT_FIELD_MEANING)
-		return g_strdup (definition);
-	if (field->type == STARDICT_FIELD_PANGO)
-		return pango_to_output_text (definition, formatter);
-	if (field->type == STARDICT_FIELD_XDXF)
-	{
-		gchar *markup = xdxf_to_pango_markup_with_reduced_effort (definition);
-		gchar *result = pango_to_output_text (markup, formatter);
-		g_free (markup);
-		return result;
-	}
-	return NULL;
-}
-
-// --- Main --------------------------------------------------------------------
-
-static guint
-count_equal_chars (const gchar *a, const gchar *b)
-{
-	guint count = 0;
-	while (*a && *b)
-		if (*a++ == *b++)
-			count++;
-	return count;
-}
-
-static void
-do_dictionary (StardictDict *dict, const gchar *word, FormatterFunc formatter)
-{
-	gboolean found;
-	StardictIterator *iter = stardict_dict_search (dict, word, &found);
-	if (!found)
-		goto out;
-
-	// Default Stardict ordering is ASCII case-insensitive,
-	// which may be further exacerbated by our own collation feature.
-	// Try to find a better matching entry:
-
-	gint64 best_offset = stardict_iterator_get_offset (iter);
-	guint best_score = count_equal_chars
-		(stardict_iterator_get_word (iter), word);
-
-	while (TRUE)
-	{
-		stardict_iterator_next (iter);
-		if (!stardict_iterator_is_valid (iter))
-			break;
-
-		const gchar *iter_word = stardict_iterator_get_word (iter);
-		if (g_ascii_strcasecmp (iter_word, word))
-			break;
-
-		guint score = count_equal_chars (iter_word, word);
-		if (score > best_score)
-		{
-			best_offset = stardict_iterator_get_offset (iter);
-			best_score = score;
-		}
-	}
-
-	stardict_iterator_set_offset (iter, best_offset, FALSE);
-
-	StardictEntry *entry = stardict_iterator_get_entry (iter);
-	StardictInfo *info = stardict_dict_get_info (dict);
-	const GList *list = stardict_entry_get_fields (entry);
-	for (; list; list = list->next)
-	{
-		StardictEntryField *field = list->data;
-		gchar *definitions = field_to_output_text (field, formatter);
-		if (!definitions)
-			continue;
-
-		printf ("%s\t", info->book_name);
-		for (const gchar *p = definitions; *p; p++)
-		{
-			if (*p == '\\')
-				printf ("\\\\");
-			else if (*p == '\n')
-				printf ("\\n");
-			else
-				putchar (*p);
-		}
-		putchar ('\n');
-		g_free (definitions);
-	}
-	g_object_unref (entry);
-out:
-	g_object_unref (iter);
-}
-
-static FormatterFunc
-parse_options (int *argc, char ***argv)
-{
-	GError *error = NULL;
-	GOptionContext *ctx = g_option_context_new
-		("DICTIONARY.ifo... - query multiple dictionaries");
-
-	gboolean format_with_ansi = FALSE;
-	gboolean format_with_irc = FALSE;
-	GOptionEntry entries[] =
-	{
-		{ "ansi", 'a', 0, G_OPTION_ARG_NONE, &format_with_ansi,
-		  "Format with ANSI sequences", NULL },
-		{ "irc", 'i', 0, G_OPTION_ARG_NONE, &format_with_irc,
-		  "Format with IRC codes", NULL },
-		{ }
-	};
-
-	g_option_context_add_main_entries (ctx, entries, NULL);
-	if (!g_option_context_parse (ctx, argc, argv, &error))
-	{
-		g_printerr ("Error: option parsing failed: %s\n", error->message);
-		exit (EXIT_FAILURE);
-	}
-	if (*argc < 2)
-	{
-		g_printerr ("%s\n", g_option_context_get_help (ctx, TRUE, NULL));
-		exit (EXIT_FAILURE);
-	}
-	g_option_context_free (ctx);
-
-	if (format_with_ansi)
-		return pango_attrs_to_ansi;
-	if (format_with_irc)
-		return pango_attrs_to_irc;
-
-	return pango_attrs_ignore;
-}
-
-int
-main (int argc, char *argv[])
-{
-G_GNUC_BEGIN_IGNORE_DEPRECATIONS
-	if (glib_check_version (2, 36, 0))
-		g_type_init ();
-G_GNUC_END_IGNORE_DEPRECATIONS
-
-	FormatterFunc formatter = parse_options (&argc, &argv);
-
-	guint n_dicts = argc - 1;
-	StardictDict **dicts = g_alloca (sizeof *dicts * n_dicts);
-
-	guint i;
-	for (i = 1; i <= n_dicts; i++)
-	{
-		GError *error = NULL;
-		dicts[i - 1] = stardict_dict_new (argv[i], &error);
-		if (error)
-		{
-			g_printerr ("Error: opening dictionary `%s' failed: %s\n",
-				argv[i], error->message);
-			exit (EXIT_FAILURE);
-		}
-	}
-
-	gint c;
-	do
-	{
-		GString *s = g_string_new (NULL);
-		while ((c = getchar ()) != EOF && c != '\n')
-			if (c != '\r')
-				g_string_append_c (s, c);
-
-		if (s->len)
-			for (i = 0; i < n_dicts; i++)
-				do_dictionary (dicts[i], s->str, formatter);
-
-		printf ("\n");
-		fflush (NULL);
-		g_string_free (s, TRUE);
-	}
-	while (c != EOF);
-
-	for (i = 0; i < n_dicts; i++)
-		g_object_unref (dicts[i]);
-
-	return 0;
-}
diff --git a/src/tabfile.c b/src/tabfile.c
deleted file mode 100644
index fab0ef2..0000000
--- a/src/tabfile.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * A clean reimplementation of StarDict's tabfile
- *
- * Copyright (c) 2020 - 2021, Přemysl Eric Janouch <p@janouch.name>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <locale.h>
-
-#include <glib.h>
-#include <gio/gio.h>
-#include <pango/pango.h>
-
-#include <unicode/ucol.h>
-
-#include "config.h"
-#include "stardict.h"
-#include "stardict-private.h"
-#include "generator.h"
-#include "utils.h"
-
-
-static gboolean
-set_data_error (GError **error, const gchar *message)
-{
-	g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA, message);
-	return FALSE;
-}
-
-static const gchar escapes[256] = { ['n'] = '\n', ['t'] = '\t', ['\\'] = '\\' };
-
-static gboolean
-inplace_unescape (gchar *line, GError **error)
-{
-	gboolean escape = FALSE;
-	gchar *dest = line;
-	for (gchar *src = line; *src; src++)
-	{
-		if (escape)
-		{
-			escape = FALSE;
-			if (!(*dest++ = escapes[(guchar) *src]))
-				return set_data_error (error, "unsupported escape");
-		}
-		else if (*src == '\\')
-			escape = TRUE;
-		else
-			*dest++ = *src;
-	}
-	if (escape)
-		return set_data_error (error, "trailing escape character");
-
-	*dest = 0;
-	return TRUE;
-}
-
-static gboolean
-import_line (Generator *generator, gchar *line, gsize len, GError **error)
-{
-	if (!len)
-		return TRUE;
-	if (!g_utf8_validate_len (line, len, NULL))
-		return set_data_error (error, "not valid UTF-8");
-
-	gchar *separator = strchr (line, '\t');
-	if (!separator)
-		return set_data_error (error, "keyword separator not found");
-
-	*separator++ = 0;
-	if (strchr (line, '\\'))
-		// The index wouldn't be sorted correctly with our method
-		return set_data_error (error, "escapes not allowed in keywords");
-
-	gchar *newline = strpbrk (separator, "\r\n");
-	if (newline)
-		*newline = 0;
-
-	if (!inplace_unescape (line, error)
-	 || !inplace_unescape (separator, error))
-		return FALSE;
-
-	if (generator->info->same_type_sequence
-	 && *generator->info->same_type_sequence == STARDICT_FIELD_PANGO
-	 && !pango_parse_markup (separator, -1, 0, NULL, NULL, NULL, error))
-		return FALSE;
-
-	generator_begin_entry (generator);
-	return generator_write_string (generator, separator, TRUE, error)
-		&& generator_finish_entry (generator, line, error);
-}
-
-static gboolean
-transform (FILE *fsorted, Generator *generator, GError **error)
-{
-	gchar *line = NULL;
-	gsize size = 0, ln = 1;
-	for (ssize_t read; (read = getline (&line, &size, fsorted)) >= 0; ln++)
-		if (!import_line (generator, line, read, error))
-			break;
-
-	free (line);
-	if (ferror (fsorted))
-	{
-		g_set_error_literal (error, G_IO_ERROR,
-			g_io_error_from_errno (errno), g_strerror (errno));
-		return FALSE;
-	}
-	if (!feof (fsorted))
-	{
-		// You'll only get good line number output with presorted input!
-		g_prefix_error (error, "line %zu: ", ln);
-		return FALSE;
-	}
-	return TRUE;
-}
-
-static void
-validate_collation_locale (const gchar *locale)
-{
-	UErrorCode error = U_ZERO_ERROR;
-	UCollator *collator = ucol_open (locale, &error);
-	if (!collator)
-		fatal ("failed to create a collator for %s: %s\n",
-			locale, u_errorName (error));
-	ucol_close (collator);
-}
-
-int
-main (int argc, char *argv[])
-{
-	// The GLib help includes an ellipsis character, for some reason
-	(void) setlocale (LC_ALL, "");
-
-	GError *error = NULL;
-	GOptionContext *ctx = g_option_context_new ("output-basename < input");
-	g_option_context_set_summary (ctx,
-		"Create a StarDict dictionary from plaintext.");
-
-	gboolean pango_markup = FALSE;
-	StardictInfo template = {};
-	GOptionEntry entries[] =
-	{
-		{ "pango",       'p', 0, G_OPTION_ARG_NONE,   &pango_markup,
-		  "Entries use Pango markup", NULL },
-
-		{ "book-name",   'b', 0, G_OPTION_ARG_STRING, &template.book_name,
-		  "Set the book name field", "TEXT" },
-		{ "author",      'a', 0, G_OPTION_ARG_STRING, &template.author,
-		  "Set the author field ", "NAME" },
-		{ "e-mail",      'e', 0, G_OPTION_ARG_STRING, &template.email,
-		  "Set the e-mail field", "ADDRESS" },
-		{ "website",     'w', 0, G_OPTION_ARG_STRING, &template.website,
-		  "Set the website field", "LINK" },
-		{ "description", 'd', 0, G_OPTION_ARG_STRING, &template.description,
-		  "Set the description field (newlines supported)", "TEXT" },
-		{ "date",        'D', 0, G_OPTION_ARG_STRING, &template.date,
-		  "Set the date field", "DATE" },
-		{ "collation",   'c', 0, G_OPTION_ARG_STRING, &template.collation,
-		  "Set the collation field (for ICU)", "LOCALE" },
-		{ }
-	};
-
-	g_option_context_add_main_entries (ctx, entries, GETTEXT_PACKAGE);
-	if (!g_option_context_parse (ctx, &argc, &argv, &error))
-		fatal ("Error: option parsing failed: %s\n", error->message);
-	if (argc != 2)
-		fatal ("%s", g_option_context_get_help (ctx, TRUE, NULL));
-	g_option_context_free (ctx);
-
-	template.version = SD_VERSION_3_0_0;
-	template.same_type_sequence = pango_markup
-		? (char[]) { STARDICT_FIELD_PANGO, 0 }
-		: (char[]) { STARDICT_FIELD_MEANING, 0 };
-
-	if (!template.book_name)
-		template.book_name = argv[1];
-	if (template.description)
-	{
-		gchar **lines = g_strsplit (template.description, "\n", -1);
-		g_free (template.description);
-		gchar *in_one_line = g_strjoinv ("<br>", lines);
-		g_strfreev (lines);
-		template.description = in_one_line;
-	}
-	if (template.collation)
-		validate_collation_locale (template.collation);
-
-	// This actually implements stardict_strcmp(), POSIX-compatibly.
-	// Your sort(1) is not expected to be stable by default, like bsdsort is.
-	FILE *fsorted = popen ("LC_ALL=C sort -t'\t' -k1f,1", "r");
-	if (!fsorted)
-		fatal ("%s: %s\n", "popen", g_strerror (errno));
-
-	Generator *generator = generator_new (argv[1], &error);
-	if (!generator)
-		fatal ("Error: failed to create the output dictionary: %s\n",
-			error->message);
-
-	StardictInfo *info = generator->info;
-	stardict_info_copy (info, &template);
-	if (!transform (fsorted, generator, &error)
-	 || !generator_finish (generator, &error))
-		fatal ("Error: failed to write the dictionary: %s\n", error->message);
-
-	generator_free (generator);
-	pclose (fsorted);
-	return 0;
-}
diff --git a/src/tdv-add-pronunciation.c b/src/tdv-add-pronunciation.c
new file mode 100644
index 0000000..90d9673
--- /dev/null
+++ b/src/tdv-add-pronunciation.c
@@ -0,0 +1,469 @@
+/*
+ * A tool to add eSpeak-generated pronunciation to dictionaries
+ *
+ * Here I use the `espeak' process rather than libespeak because of the GPL.
+ * It's far from ideal, rather good as a starting point.
+ *
+ * Copyright (c) 2013, Přemysl Eric Janouch <p@janouch.name>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+
+#include "stardict.h"
+#include "stardict-private.h"
+#include "generator.h"
+#include "utils.h"
+
+
+// --- Pronunciation generator -------------------------------------------------
+
+typedef struct worker_data WorkerData;
+
+struct worker_data
+{
+	gchar **cmdline;                    ///< eSpeak command line
+	guint ignore_acronyms : 1;          ///< Don't spell out acronyms
+	GRegex *re_stop;                    ///< Regex for stop sequences
+	GRegex *re_acronym;                 ///< Regex for ACRONYMS
+
+	guint32 start_entry;                ///< The first entry to be processed
+	guint32 end_entry;                  ///< Past the last entry to be processed
+
+	// Reader, writer
+	GMutex *dict_mutex;                 ///< Locks the dictionary object
+
+	// Reader
+	GThread *main_thread;               ///< A handle to the reader thread
+	StardictDict *dict;                 ///< The dictionary object
+	gpointer output;                    ///< Linked-list of pronunciation data
+
+	GMutex *remaining_mutex;            ///< Locks the progress stats
+	GCond *remaining_cond;              ///< Signals a change in progress
+	guint32 remaining;                  ///< How many entries remain
+	guint32 total;                      ///< Total number of entries
+
+	// Writer
+	StardictIterator *iterator;         ///< Iterates over the dictionary
+	FILE *child_stdin;                  ///< Standard input of eSpeak
+};
+
+/// eSpeak splits the output on certain characters.
+#define LINE_SPLITTING_CHARS            ".,:;?!"
+
+/// We don't want to include brackets either.
+#define OTHER_STOP_CHARS                "([{<"
+
+/// A void word used to make a unique "no pronunciation available" mark.
+#define VOID_ENTRY                      "not present in any dictionary"
+
+
+/// Adds dots between characters.
+static gboolean
+writer_acronym_cb (const GMatchInfo *info, GString *res,
+	G_GNUC_UNUSED gpointer data)
+{
+	gchar *preceding = g_match_info_fetch (info, 1);
+	g_string_append (res, preceding);
+	g_free (preceding);
+
+	gchar *word = g_match_info_fetch (info, 2);
+
+	g_string_append_c (res, *word);
+	const gchar *p;
+	for (p = word + 1; *p; p++)
+	{
+		g_string_append_c (res, '.');
+		g_string_append_c (res, *p);
+	}
+
+	g_free (word);
+	return FALSE;
+}
+
+/// Writes to espeak's stdin.
+static gpointer
+worker_writer (WorkerData *data)
+{
+	GError *error = NULL;
+	GMatchInfo *match_info;
+	while (stardict_iterator_get_offset (data->iterator) != data->end_entry)
+	{
+		g_mutex_lock (data->dict_mutex);
+		const gchar *word = stardict_iterator_get_word (data->iterator);
+		g_mutex_unlock (data->dict_mutex);
+
+		word += strspn (word, LINE_SPLITTING_CHARS " \t");
+		gchar *x = g_strdup (word);
+
+		// Cut the word if needed be
+		error = NULL;
+		if (g_regex_match_full (data->re_stop,
+			x, -1, 0, 0, &match_info, &error))
+		{
+			gint start_pos;
+			g_match_info_fetch_pos (match_info, 0, &start_pos, NULL);
+			x[start_pos] = 0;
+		}
+		g_match_info_free (match_info);
+
+		// Change acronyms so that they're not pronounced as words
+		if (!error && !data->ignore_acronyms)
+		{
+			char *tmp = g_regex_replace_eval (data->re_acronym,
+				x, -1, 0, 0, writer_acronym_cb, NULL, &error);
+			g_free (x);
+			x = tmp;
+		}
+
+		if (error)
+		{
+			g_printerr ("Notice: error processing '%s': %s\n",
+				word, error->message);
+			g_clear_error (&error);
+			*x = 0;
+		}
+
+		// We might have accidentally cut off everything
+		if (!*x)
+		{
+			g_free (x);
+			x = g_strdup (VOID_ENTRY);
+		}
+
+		stardict_iterator_next (data->iterator);
+		if (fprintf (data->child_stdin, "%s\n", x) < 0)
+			fatal ("write to eSpeak failed: %s\n", g_strerror (errno));
+
+		g_free (x);
+	}
+
+	g_object_unref (data->iterator);
+	return GINT_TO_POINTER (fclose (data->child_stdin));
+}
+
+/// Get the void entry (and test if espeak works).
+static gchar *
+get_void_entry (gchar *cmdline[])
+{
+	gchar *output;
+	gint exit_status;
+
+	GError *error = NULL;
+	if (!g_spawn_sync (NULL, cmdline, NULL,
+		G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL, NULL, NULL,
+		&output, NULL, &exit_status, &error))
+		fatal ("Error: couldn't spawn espeak: %s\n", error->message);
+
+	if (exit_status)
+		fatal ("Error: espeak returned %d\n", exit_status);
+
+	return output;
+}
+
+/// Reads from espeak's stdout.
+static gpointer
+worker (WorkerData *data)
+{
+	// Spawn eSpeak
+	GError *error = NULL;
+	gint child_in, child_out;
+	if (!g_spawn_async_with_pipes (NULL, data->cmdline, NULL,
+		G_SPAWN_SEARCH_PATH, NULL, NULL,
+		NULL, &child_in, &child_out, NULL, &error))
+		fatal ("g_spawn: %s\n", error->message);
+
+	data->child_stdin = fdopen (child_in, "wb");
+	if (!data->child_stdin)
+		perror ("fdopen");
+
+	FILE *child_stdout = fdopen (child_out, "rb");
+	if (!child_stdout)
+		perror ("fdopen");
+
+	// Spawn a writer thread
+	g_mutex_lock (data->dict_mutex);
+	data->iterator = stardict_iterator_new (data->dict, data->start_entry);
+	g_mutex_unlock (data->dict_mutex);
+
+	GThread *writer = g_thread_new ("write worker",
+		(GThreadFunc) worker_writer, data);
+
+	// Read the output
+	g_mutex_lock (data->remaining_mutex);
+	guint32 remaining = data->remaining;
+	g_mutex_unlock (data->remaining_mutex);
+
+	data->output = NULL;
+	gpointer *output_end = &data->output;
+	while (remaining)
+	{
+		static gchar next[sizeof (gpointer)];
+		GString *s = g_string_new (NULL);
+		g_string_append_len (s, next, sizeof next);
+
+		gint c;
+		while ((c = fgetc (child_stdout)) != EOF && c != '\n')
+			g_string_append_c (s, c);
+		if (c == EOF)
+			fatal ("eSpeak process died too soon\n");
+
+		gchar *translation = g_string_free (s, FALSE);
+		*output_end = translation;
+		output_end = (gpointer *) translation;
+
+		// We limit progress reporting so that
+		// the mutex doesn't spin like crazy
+		if ((--remaining & 255) != 0)
+			continue;
+
+		g_mutex_lock (data->remaining_mutex);
+		data->remaining = remaining;
+		g_cond_broadcast (data->remaining_cond);
+		g_mutex_unlock (data->remaining_mutex);
+	}
+
+	if (fgetc (child_stdout) != EOF)
+		fatal ("Error: eSpeak has written more lines than it should. "
+			"The output would be corrupt, aborting.\n");
+
+	fclose (child_stdout);
+	return g_thread_join (writer);
+}
+
+// --- Main --------------------------------------------------------------------
+
+int
+main (int argc, char *argv[])
+{
+	gint n_processes = 1;
+	gchar *voice = NULL;
+	gboolean ignore_acronyms = FALSE;
+
+	GOptionEntry entries[] =
+	{
+		{ "processes", 'N', G_OPTION_FLAG_IN_MAIN,
+		  G_OPTION_ARG_INT, &n_processes,
+		  "The number of espeak processes run in parallel", "PROCESSES" },
+		{ "voice", 'v', G_OPTION_FLAG_IN_MAIN,
+		  G_OPTION_ARG_STRING, &voice,
+		  "The voice to be used by eSpeak to pronounce the words", "VOICE" },
+		{ "ignore-acronyms", 0, G_OPTION_FLAG_IN_MAIN,
+		  G_OPTION_ARG_NONE, &ignore_acronyms,
+		  "Don't spell out words composed of big letters only", NULL },
+		{ NULL }
+	};
+
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+	if (glib_check_version (2, 36, 0))
+		g_type_init ();
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+	GError *error = NULL;
+	GOptionContext *ctx = g_option_context_new
+		("input.ifo output-basename - add pronunciation to dictionaries");
+	g_option_context_add_main_entries (ctx, entries, NULL);
+	if (!g_option_context_parse (ctx, &argc, &argv, &error))
+		fatal ("Error: option parsing failed: %s\n", error->message);
+
+	if (argc != 3)
+		fatal ("%s", g_option_context_get_help (ctx, TRUE, NULL));
+
+	g_option_context_free (ctx);
+
+	// See if we can run espeak
+	static gchar *cmdline[] = { "espeak", "--ipa", "-q", NULL, NULL, NULL };
+
+	if (voice)
+	{
+		cmdline[3] = "-v";
+		cmdline[4] = voice;
+	}
+
+	gchar *void_entry = g_strstrip (get_void_entry (cmdline));
+
+	// Load the dictionary
+	printf ("Loading the original dictionary...\n");
+	StardictDict *dict = stardict_dict_new (argv[1], &error);
+	if (!dict)
+		fatal ("Error: opening the dictionary failed: %s\n", error->message);
+
+	gsize n_words = stardict_info_get_word_count
+		(stardict_dict_get_info (dict));
+
+	if (n_processes <= 0)
+		fatal ("Error: there must be at least one process\n");
+
+	if ((gsize) n_processes > n_words * 1024)
+	{
+		n_processes = n_words / 1024;
+		if (!n_processes)
+			n_processes = 1;
+		g_printerr ("Warning: too many processes, reducing to %d\n",
+			n_processes);
+	}
+
+	// Spawn worker threads to generate pronunciation data
+	static GMutex dict_mutex;
+
+	static GMutex remaining_mutex;
+	static GCond remaining_cond;
+
+	WorkerData *data = g_alloca (sizeof *data * n_processes);
+
+	GRegex *re_stop = g_regex_new ("[" LINE_SPLITTING_CHARS "][ ?]"
+		"|\\.\\.\\.|[" OTHER_STOP_CHARS "]", G_REGEX_OPTIMIZE, 0, &error);
+	g_assert (re_stop != NULL);
+
+	GRegex *re_acronym = g_regex_new ("(^|\\pZ)(\\p{Lu}+)(?=\\pZ|$)",
+		G_REGEX_OPTIMIZE, 0, &error);
+	g_assert (re_acronym != NULL);
+
+	gint i;
+	for (i = 0; i < n_processes; i++)
+	{
+		data[i].start_entry = n_words *  i      / n_processes;
+		data[i].end_entry   = n_words * (i + 1) / n_processes;
+
+		data[i].total = data[i].remaining =
+			data[i].end_entry - data[i].start_entry;
+		data[i].remaining_mutex = &remaining_mutex;
+		data[i].remaining_cond = &remaining_cond;
+
+		data[i].dict = dict;
+		data[i].dict_mutex = &dict_mutex;
+
+		data[i].re_stop = re_stop;
+		data[i].re_acronym = re_acronym;
+
+		data[i].cmdline = cmdline;
+		data[i].ignore_acronyms = ignore_acronyms;
+		data[i].main_thread =
+			g_thread_new ("worker", (GThreadFunc) worker, &data[i]);
+	}
+
+	// Loop while the threads still have some work to do and report status
+	g_mutex_lock (&remaining_mutex);
+	for (;;)
+	{
+		gboolean all_finished = TRUE;
+		printf ("\rRetrieving pronunciation... ");
+		for (i = 0; i < n_processes; i++)
+		{
+			printf ("%3u%% ", 100 - data[i].remaining * 100 / data[i].total);
+			if (data[i].remaining)
+				all_finished = FALSE;
+		}
+
+		if (all_finished)
+			break;
+		g_cond_wait (&remaining_cond, &remaining_mutex);
+	}
+	g_mutex_unlock (&remaining_mutex);
+
+	putchar ('\n');
+	for (i = 0; i < n_processes; i++)
+		g_thread_join (data[i].main_thread);
+
+	g_regex_unref (re_stop);
+	g_regex_unref (re_acronym);
+
+	// Put extended entries into a new dictionary
+	Generator *generator = generator_new (argv[2], &error);
+	if (!generator)
+		fatal ("Error: failed to create the output dictionary: %s\n",
+			error->message);
+
+	StardictInfo *info = generator->info;
+	stardict_info_copy (info, stardict_dict_get_info (dict));
+
+	// This gets incremented each time an entry is finished
+	info->word_count = 0;
+
+	if (info->same_type_sequence)
+	{
+		gchar *new_sts = g_strconcat ("t", info->same_type_sequence, NULL);
+		g_free (info->same_type_sequence);
+		info->same_type_sequence = new_sts;
+	}
+
+	// Write out all the entries together with the pronunciation
+	for (i = 0; i < n_processes; i++)
+	{
+		StardictIterator *iterator =
+			stardict_iterator_new (dict, data[i].start_entry);
+
+		gpointer *output = data[i].output;
+		while (stardict_iterator_get_offset (iterator) != data[i].end_entry)
+		{
+			printf ("\rCreating a new dictionary... %3lu%%",
+				(gulong) stardict_iterator_get_offset (iterator) * 100
+				/ stardict_dict_get_info (dict)->word_count);
+
+			g_assert (output != NULL);
+
+			gchar *pronunciation = g_strstrip ((gchar *) (output + 1));
+			StardictEntry *entry = stardict_iterator_get_entry (iterator);
+
+			generator_begin_entry (generator);
+
+			if (!strcmp (pronunciation, void_entry))
+				*pronunciation = 0;
+
+//			g_printerr ("%s /%s/\n",
+//				stardict_iterator_get_word (iterator), pronunciation);
+
+			// For the sake of simplicity we fake a new start;
+			// write_fields() only iterates the list in one direction.
+			StardictEntryField field;
+			field.type = 't';
+			field.data = pronunciation;
+
+			GList start_link;
+			start_link.next = entry->fields;
+			start_link.data = &field;
+
+			if (!generator_write_fields (generator, &start_link, &error)
+			 || !generator_finish_entry (generator,
+					stardict_iterator_get_word (iterator), &error))
+				fatal ("Error: write failed: %s\n", error->message);
+
+			g_object_unref (entry);
+
+			gpointer *tmp = output;
+			output = *output;
+			g_free (tmp);
+
+			stardict_iterator_next (iterator);
+		}
+
+		g_assert (output == NULL);
+		g_object_unref (iterator);
+	}
+
+	putchar ('\n');
+	if (!generator_finish (generator, &error))
+		fatal ("Error: failed to write the dictionary: %s\n", error->message);
+
+	generator_free (generator);
+	g_object_unref (dict);
+	g_free (void_entry);
+	return 0;
+}
diff --git a/src/tdv-query-tool.c b/src/tdv-query-tool.c
new file mode 100644
index 0000000..6cfdc66
--- /dev/null
+++ b/src/tdv-query-tool.c
@@ -0,0 +1,313 @@
+/*
+ * A tool to query multiple dictionaries for the specified word
+ *
+ * Intended for use in IRC bots and similar silly things---words go in,
+ * one per each line, and entries come out, one dictionary at a time,
+ * finalised with an empty line.  Newlines are escaped with `\n',
+ * backslashes with `\\'.
+ *
+ * So far only the `m', `g`, and `x` fields are supported, as in tdv.
+ *
+ * Copyright (c) 2013 - 2021, Přemysl Eric Janouch <p@janouch.name>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+#include <pango/pango.h>
+
+#include "stardict.h"
+#include "stardict-private.h"
+#include "generator.h"
+#include "utils.h"
+
+
+// --- Output formatting -------------------------------------------------------
+
+/// Transform Pango attributes to in-line formatting sequences (non-reentrant)
+typedef const gchar *(*FormatterFunc) (PangoAttrIterator *);
+
+static const gchar *
+pango_attrs_ignore (G_GNUC_UNUSED PangoAttrIterator *iterator)
+{
+	return "";
+}
+
+static const gchar *
+pango_attrs_to_irc (PangoAttrIterator *iterator)
+{
+	static gchar buf[5];
+	gchar *p = buf;
+	*p++ = 0x0f;
+
+	if (!iterator)
+		goto reset_formatting;
+
+	PangoAttrInt *attr = NULL;
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD)
+		*p++ = 0x02;
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE)
+		*p++ = 0x1f;
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC)
+		*p++ = 0x1d;
+
+reset_formatting:
+	*p++ = 0;
+	return buf;
+}
+
+static const gchar *
+pango_attrs_to_ansi (PangoAttrIterator *iterator)
+{
+	static gchar buf[16];
+	g_strlcpy (buf, "\x1b[0", sizeof buf);
+	if (!iterator)
+		goto reset_formatting;
+
+	PangoAttrInt *attr = NULL;
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD)
+		g_strlcat (buf, ";1", sizeof buf);
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE)
+		g_strlcat (buf, ";4", sizeof buf);
+	if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
+			PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC)
+		g_strlcat (buf, ";3", sizeof buf);
+
+reset_formatting:
+	g_strlcat (buf, "m", sizeof buf);
+	return buf;
+}
+
+static gchar *
+pango_to_output_text (const gchar *markup, FormatterFunc formatter)
+{
+	// This function skips leading whitespace, but it's the canonical one
+	gchar *text = NULL;
+	PangoAttrList *attrs = NULL;
+	if (!pango_parse_markup (markup, -1, 0, &attrs, &text, NULL, NULL))
+		return g_strdup_printf ("<%s>", ("error in entry"));
+
+	PangoAttrIterator *iterator = pango_attr_list_get_iterator (attrs);
+	GString *result = g_string_new ("");
+	do
+	{
+		gint start = 0, end = 0;
+		pango_attr_iterator_range (iterator, &start, &end);
+		if (end == G_MAXINT)
+			end = strlen (text);
+
+		g_string_append (result, formatter (iterator));
+		g_string_append_len (result, text + start, end - start);
+	}
+	while (pango_attr_iterator_next (iterator));
+	g_string_append (result, formatter (NULL));
+
+	g_free (text);
+	pango_attr_iterator_destroy (iterator);
+	pango_attr_list_unref (attrs);
+	return g_string_free (result, FALSE);
+}
+
+static gchar *
+field_to_output_text (const StardictEntryField *field, FormatterFunc formatter)
+{
+	const gchar *definition = field->data;
+	if (field->type == STARDICT_FIELD_MEANING)
+		return g_strdup (definition);
+	if (field->type == STARDICT_FIELD_PANGO)
+		return pango_to_output_text (definition, formatter);
+	if (field->type == STARDICT_FIELD_XDXF)
+	{
+		gchar *markup = xdxf_to_pango_markup_with_reduced_effort (definition);
+		gchar *result = pango_to_output_text (markup, formatter);
+		g_free (markup);
+		return result;
+	}
+	return NULL;
+}
+
+// --- Main --------------------------------------------------------------------
+
+static guint
+count_equal_chars (const gchar *a, const gchar *b)
+{
+	guint count = 0;
+	while (*a && *b)
+		if (*a++ == *b++)
+			count++;
+	return count;
+}
+
+static void
+do_dictionary (StardictDict *dict, const gchar *word, FormatterFunc formatter)
+{
+	gboolean found;
+	StardictIterator *iter = stardict_dict_search (dict, word, &found);
+	if (!found)
+		goto out;
+
+	// Default Stardict ordering is ASCII case-insensitive,
+	// which may be further exacerbated by our own collation feature.
+	// Try to find a better matching entry:
+
+	gint64 best_offset = stardict_iterator_get_offset (iter);
+	guint best_score = count_equal_chars
+		(stardict_iterator_get_word (iter), word);
+
+	while (TRUE)
+	{
+		stardict_iterator_next (iter);
+		if (!stardict_iterator_is_valid (iter))
+			break;
+
+		const gchar *iter_word = stardict_iterator_get_word (iter);
+		if (g_ascii_strcasecmp (iter_word, word))
+			break;
+
+		guint score = count_equal_chars (iter_word, word);
+		if (score > best_score)
+		{
+			best_offset = stardict_iterator_get_offset (iter);
+			best_score = score;
+		}
+	}
+
+	stardict_iterator_set_offset (iter, best_offset, FALSE);
+
+	StardictEntry *entry = stardict_iterator_get_entry (iter);
+	StardictInfo *info = stardict_dict_get_info (dict);
+	const GList *list = stardict_entry_get_fields (entry);
+	for (; list; list = list->next)
+	{
+		StardictEntryField *field = list->data;
+		gchar *definitions = field_to_output_text (field, formatter);
+		if (!definitions)
+			continue;
+
+		printf ("%s\t", info->book_name);
+		for (const gchar *p = definitions; *p; p++)
+		{
+			if (*p == '\\')
+				printf ("\\\\");
+			else if (*p == '\n')
+				printf ("\\n");
+			else
+				putchar (*p);
+		}
+		putchar ('\n');
+		g_free (definitions);
+	}
+	g_object_unref (entry);
+out:
+	g_object_unref (iter);
+}
+
+static FormatterFunc
+parse_options (int *argc, char ***argv)
+{
+	GError *error = NULL;
+	GOptionContext *ctx = g_option_context_new
+		("DICTIONARY.ifo... - query multiple dictionaries");
+
+	gboolean format_with_ansi = FALSE;
+	gboolean format_with_irc = FALSE;
+	GOptionEntry entries[] =
+	{
+		{ "ansi", 'a', 0, G_OPTION_ARG_NONE, &format_with_ansi,
+		  "Format with ANSI sequences", NULL },
+		{ "irc", 'i', 0, G_OPTION_ARG_NONE, &format_with_irc,
+		  "Format with IRC codes", NULL },
+		{ }
+	};
+
+	g_option_context_add_main_entries (ctx, entries, NULL);
+	if (!g_option_context_parse (ctx, argc, argv, &error))
+	{
+		g_printerr ("Error: option parsing failed: %s\n", error->message);
+		exit (EXIT_FAILURE);
+	}
+	if (*argc < 2)
+	{
+		g_printerr ("%s\n", g_option_context_get_help (ctx, TRUE, NULL));
+		exit (EXIT_FAILURE);
+	}
+	g_option_context_free (ctx);
+
+	if (format_with_ansi)
+		return pango_attrs_to_ansi;
+	if (format_with_irc)
+		return pango_attrs_to_irc;
+
+	return pango_attrs_ignore;
+}
+
+int
+main (int argc, char *argv[])
+{
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+	if (glib_check_version (2, 36, 0))
+		g_type_init ();
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+	FormatterFunc formatter = parse_options (&argc, &argv);
+
+	guint n_dicts = argc - 1;
+	StardictDict **dicts = g_alloca (sizeof *dicts * n_dicts);
+
+	guint i;
+	for (i = 1; i <= n_dicts; i++)
+	{
+		GError *error = NULL;
+		dicts[i - 1] = stardict_dict_new (argv[i], &error);
+		if (error)
+		{
+			g_printerr ("Error: opening dictionary `%s' failed: %s\n",
+				argv[i], error->message);
+			exit (EXIT_FAILURE);
+		}
+	}
+
+	gint c;
+	do
+	{
+		GString *s = g_string_new (NULL);
+		while ((c = getchar ()) != EOF && c != '\n')
+			if (c != '\r')
+				g_string_append_c (s, c);
+
+		if (s->len)
+			for (i = 0; i < n_dicts; i++)
+				do_dictionary (dicts[i], s->str, formatter);
+
+		printf ("\n");
+		fflush (NULL);
+		g_string_free (s, TRUE);
+	}
+	while (c != EOF);
+
+	for (i = 0; i < n_dicts; i++)
+		g_object_unref (dicts[i]);
+
+	return 0;
+}
diff --git a/src/tdv-tabfile.c b/src/tdv-tabfile.c
new file mode 100644
index 0000000..fab0ef2
--- /dev/null
+++ b/src/tdv-tabfile.c
@@ -0,0 +1,223 @@
+/*
+ * A clean reimplementation of StarDict's tabfile
+ *
+ * Copyright (c) 2020 - 2021, Přemysl Eric Janouch <p@janouch.name>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <locale.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+#include <pango/pango.h>
+
+#include <unicode/ucol.h>
+
+#include "config.h"
+#include "stardict.h"
+#include "stardict-private.h"
+#include "generator.h"
+#include "utils.h"
+
+
+static gboolean
+set_data_error (GError **error, const gchar *message)
+{
+	g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA, message);
+	return FALSE;
+}
+
+static const gchar escapes[256] = { ['n'] = '\n', ['t'] = '\t', ['\\'] = '\\' };
+
+static gboolean
+inplace_unescape (gchar *line, GError **error)
+{
+	gboolean escape = FALSE;
+	gchar *dest = line;
+	for (gchar *src = line; *src; src++)
+	{
+		if (escape)
+		{
+			escape = FALSE;
+			if (!(*dest++ = escapes[(guchar) *src]))
+				return set_data_error (error, "unsupported escape");
+		}
+		else if (*src == '\\')
+			escape = TRUE;
+		else
+			*dest++ = *src;
+	}
+	if (escape)
+		return set_data_error (error, "trailing escape character");
+
+	*dest = 0;
+	return TRUE;
+}
+
+static gboolean
+import_line (Generator *generator, gchar *line, gsize len, GError **error)
+{
+	if (!len)
+		return TRUE;
+	if (!g_utf8_validate_len (line, len, NULL))
+		return set_data_error (error, "not valid UTF-8");
+
+	gchar *separator = strchr (line, '\t');
+	if (!separator)
+		return set_data_error (error, "keyword separator not found");
+
+	*separator++ = 0;
+	if (strchr (line, '\\'))
+		// The index wouldn't be sorted correctly with our method
+		return set_data_error (error, "escapes not allowed in keywords");
+
+	gchar *newline = strpbrk (separator, "\r\n");
+	if (newline)
+		*newline = 0;
+
+	if (!inplace_unescape (line, error)
+	 || !inplace_unescape (separator, error))
+		return FALSE;
+
+	if (generator->info->same_type_sequence
+	 && *generator->info->same_type_sequence == STARDICT_FIELD_PANGO
+	 && !pango_parse_markup (separator, -1, 0, NULL, NULL, NULL, error))
+		return FALSE;
+
+	generator_begin_entry (generator);
+	return generator_write_string (generator, separator, TRUE, error)
+		&& generator_finish_entry (generator, line, error);
+}
+
+static gboolean
+transform (FILE *fsorted, Generator *generator, GError **error)
+{
+	gchar *line = NULL;
+	gsize size = 0, ln = 1;
+	for (ssize_t read; (read = getline (&line, &size, fsorted)) >= 0; ln++)
+		if (!import_line (generator, line, read, error))
+			break;
+
+	free (line);
+	if (ferror (fsorted))
+	{
+		g_set_error_literal (error, G_IO_ERROR,
+			g_io_error_from_errno (errno), g_strerror (errno));
+		return FALSE;
+	}
+	if (!feof (fsorted))
+	{
+		// You'll only get good line number output with presorted input!
+		g_prefix_error (error, "line %zu: ", ln);
+		return FALSE;
+	}
+	return TRUE;
+}
+
+static void
+validate_collation_locale (const gchar *locale)
+{
+	UErrorCode error = U_ZERO_ERROR;
+	UCollator *collator = ucol_open (locale, &error);
+	if (!collator)
+		fatal ("failed to create a collator for %s: %s\n",
+			locale, u_errorName (error));
+	ucol_close (collator);
+}
+
+int
+main (int argc, char *argv[])
+{
+	// The GLib help includes an ellipsis character, for some reason
+	(void) setlocale (LC_ALL, "");
+
+	GError *error = NULL;
+	GOptionContext *ctx = g_option_context_new ("output-basename < input");
+	g_option_context_set_summary (ctx,
+		"Create a StarDict dictionary from plaintext.");
+
+	gboolean pango_markup = FALSE;
+	StardictInfo template = {};
+	GOptionEntry entries[] =
+	{
+		{ "pango",       'p', 0, G_OPTION_ARG_NONE,   &pango_markup,
+		  "Entries use Pango markup", NULL },
+
+		{ "book-name",   'b', 0, G_OPTION_ARG_STRING, &template.book_name,
+		  "Set the book name field", "TEXT" },
+		{ "author",      'a', 0, G_OPTION_ARG_STRING, &template.author,
+		  "Set the author field ", "NAME" },
+		{ "e-mail",      'e', 0, G_OPTION_ARG_STRING, &template.email,
+		  "Set the e-mail field", "ADDRESS" },
+		{ "website",     'w', 0, G_OPTION_ARG_STRING, &template.website,
+		  "Set the website field", "LINK" },
+		{ "description", 'd', 0, G_OPTION_ARG_STRING, &template.description,
+		  "Set the description field (newlines supported)", "TEXT" },
+		{ "date",        'D', 0, G_OPTION_ARG_STRING, &template.date,
+		  "Set the date field", "DATE" },
+		{ "collation",   'c', 0, G_OPTION_ARG_STRING, &template.collation,
+		  "Set the collation field (for ICU)", "LOCALE" },
+		{ }
+	};
+
+	g_option_context_add_main_entries (ctx, entries, GETTEXT_PACKAGE);
+	if (!g_option_context_parse (ctx, &argc, &argv, &error))
+		fatal ("Error: option parsing failed: %s\n", error->message);
+	if (argc != 2)
+		fatal ("%s", g_option_context_get_help (ctx, TRUE, NULL));
+	g_option_context_free (ctx);
+
+	template.version = SD_VERSION_3_0_0;
+	template.same_type_sequence = pango_markup
+		? (char[]) { STARDICT_FIELD_PANGO, 0 }
+		: (char[]) { STARDICT_FIELD_MEANING, 0 };
+
+	if (!template.book_name)
+		template.book_name = argv[1];
+	if (template.description)
+	{
+		gchar **lines = g_strsplit (template.description, "\n", -1);
+		g_free (template.description);
+		gchar *in_one_line = g_strjoinv ("<br>", lines);
+		g_strfreev (lines);
+		template.description = in_one_line;
+	}
+	if (template.collation)
+		validate_collation_locale (template.collation);
+
+	// This actually implements stardict_strcmp(), POSIX-compatibly.
+	// Your sort(1) is not expected to be stable by default, like bsdsort is.
+	FILE *fsorted = popen ("LC_ALL=C sort -t'\t' -k1f,1", "r");
+	if (!fsorted)
+		fatal ("%s: %s\n", "popen", g_strerror (errno));
+
+	Generator *generator = generator_new (argv[1], &error);
+	if (!generator)
+		fatal ("Error: failed to create the output dictionary: %s\n",
+			error->message);
+
+	StardictInfo *info = generator->info;
+	stardict_info_copy (info, &template);
+	if (!transform (fsorted, generator, &error)
+	 || !generator_finish (generator, &error))
+		fatal ("Error: failed to write the dictionary: %s\n", error->message);
+
+	generator_free (generator);
+	pclose (fsorted);
+	return 0;
+}
diff --git a/src/tdv-transform.c b/src/tdv-transform.c
new file mode 100644
index 0000000..7520eb8
--- /dev/null
+++ b/src/tdv-transform.c
@@ -0,0 +1,226 @@
+/*
+ * A tool to transform dictionaries dictionaries by an external filter
+ *
+ * The external filter needs to process NUL-separated textual entries.
+ *
+ * Example: tdv-transform input.ifo output -- perl -p0e s/bullshit/soykaf/g
+ *
+ * Copyright (c) 2020, Přemysl Eric Janouch <p@janouch.name>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <locale.h>
+
+#include <glib.h>
+#include <glib/gstdio.h>
+#include <glib-unix.h>
+#include <gio/gio.h>
+
+#include "stardict.h"
+#include "stardict-private.h"
+#include "generator.h"
+#include "utils.h"
+
+enum { PIPE_READ, PIPE_WRITE };
+
+
+// --- Main --------------------------------------------------------------------
+
+static inline void
+print_progress (gulong *last_percent, StardictIterator *iterator, gsize total)
+{
+	gulong percent =
+		(gulong) stardict_iterator_get_offset (iterator) * 100 / total;
+	if (percent != *last_percent)
+	{
+		printf ("\r  Writing entries... %3lu%%", percent);
+		*last_percent = percent;
+	}
+}
+
+static gboolean
+write_to_filter (StardictDict *dict, gint fd, GError **error)
+{
+	StardictInfo *info = stardict_dict_get_info (dict);
+	gsize n_words = stardict_info_get_word_count (info);
+
+	StardictIterator *iterator = stardict_iterator_new (dict, 0);
+	gulong last_percent = -1;
+	while (stardict_iterator_is_valid (iterator))
+	{
+		print_progress (&last_percent, iterator, n_words);
+
+		StardictEntry *entry = stardict_iterator_get_entry (iterator);
+		for (const GList *fields = stardict_entry_get_fields (entry);
+			fields; fields = fields->next)
+		{
+			StardictEntryField *field = fields->data;
+			if (!g_ascii_islower (field->type))
+				continue;
+
+			if (write (fd, field->data, field->data_size)
+				!= (ssize_t) field->data_size)
+			{
+				g_set_error (error, G_IO_ERROR, g_io_error_from_errno (errno),
+					"%s", g_strerror (errno));
+				return FALSE;
+			}
+		}
+
+		g_object_unref (entry);
+		stardict_iterator_next (iterator);
+	}
+	printf ("\n");
+	return TRUE;
+}
+
+static gboolean
+update_from_filter (StardictDict *dict, Generator *generator,
+	GMappedFile *filtered_file, GError **error)
+{
+	gchar *filtered = g_mapped_file_get_contents (filtered_file);
+	gchar *filtered_end = filtered + g_mapped_file_get_length (filtered_file);
+
+	StardictInfo *info = stardict_dict_get_info (dict);
+	gsize n_words = stardict_info_get_word_count (info);
+
+	StardictIterator *iterator = stardict_iterator_new (dict, 0);
+	gulong last_percent = -1;
+	while (stardict_iterator_is_valid (iterator))
+	{
+		print_progress (&last_percent, iterator, n_words);
+
+		StardictEntry *entry = stardict_iterator_get_entry (iterator);
+		generator_begin_entry (generator);
+
+		for (GList *fields = entry->fields; fields; fields = fields->next)
+		{
+			StardictEntryField *field = fields->data;
+			if (!g_ascii_islower (field->type))
+				continue;
+
+			gchar *end = memchr (filtered, 0, filtered_end - filtered);
+			if (!end)
+			{
+				g_set_error (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
+					"filter seems to have ended too early");
+				return FALSE;
+			}
+
+			g_free (field->data);
+			field->data = g_strdup (filtered);
+			field->data_size = end - filtered + 1;
+			filtered = end + 1;
+		}
+
+		if (!generator_write_fields (generator, entry->fields, error)
+		 || !generator_finish_entry (generator,
+				stardict_iterator_get_word (iterator), error))
+			return FALSE;
+
+		g_object_unref (entry);
+		stardict_iterator_next (iterator);
+	}
+	printf ("\n");
+	return TRUE;
+}
+
+int
+main (int argc, char *argv[])
+{
+	// The GLib help includes an ellipsis character, for some reason
+	(void) setlocale (LC_ALL, "");
+
+	GError *error = NULL;
+	GOptionContext *ctx = g_option_context_new
+		("input.ifo output-basename -- FILTER [ARG...]");
+	g_option_context_set_summary
+		(ctx, "Transform dictionaries using a filter program.");
+	if (!g_option_context_parse (ctx, &argc, &argv, &error))
+		fatal ("Error: option parsing failed: %s\n", error->message);
+
+	if (argc < 3)
+		fatal ("%s", g_option_context_get_help (ctx, TRUE, NULL));
+
+	// GLib is bullshit, getopt_long() always correctly removes this
+	gint program_argv_start = 3;
+	if (!strcmp (argv[program_argv_start], "--"))
+		program_argv_start++;
+
+	g_option_context_free (ctx);
+
+	printf ("Loading the original dictionary...\n");
+	StardictDict *dict = stardict_dict_new (argv[1], &error);
+	if (!dict)
+		fatal ("Error: opening the dictionary failed: %s\n", error->message);
+
+	printf ("Filtering entries...\n");
+	gint child_in[2];
+	if (!g_unix_open_pipe (child_in, 0, &error))
+		fatal ("g_unix_open_pipe: %s\n", error->message);
+
+	FILE *child_out = tmpfile ();
+	if (!child_out)
+		fatal ("tmpfile: %s\n", g_strerror (errno));
+
+	GPid pid = -1;
+	if (!g_spawn_async_with_fds (NULL /* working_directory */,
+		argv + program_argv_start /* forward a part of ours */, NULL /* envp */,
+		G_SPAWN_SEARCH_PATH | G_SPAWN_DO_NOT_REAP_CHILD,
+		NULL /* child_setup */, NULL /* user_data */,
+		&pid, child_in[PIPE_READ], fileno (child_out), STDERR_FILENO, &error))
+		fatal ("g_spawn: %s\n", error->message);
+	if (!write_to_filter (dict, child_in[PIPE_WRITE], &error))
+		fatal ("write_to_filter: %s\n", error->message);
+	if (!g_close (child_in[PIPE_READ], &error)
+	 || !g_close (child_in[PIPE_WRITE], &error))
+		fatal ("g_close: %s\n", error->message);
+
+	printf ("Waiting for the filter to finish...\n");
+	int wstatus = errno = 0;
+	if (waitpid (pid, &wstatus, 0) < 1
+	 || !WIFEXITED (wstatus) || WEXITSTATUS (wstatus) > 0)
+		fatal ("Filter failed (%s, status %d)\n", g_strerror (errno), wstatus);
+
+	GMappedFile *filtered = g_mapped_file_new_from_fd (fileno (child_out),
+		FALSE /* writable */, &error);
+	if (!filtered)
+		fatal ("g_mapped_file_new_from_fd: %s\n", error->message);
+
+	printf ("Writing the new dictionary...\n");
+	Generator *generator = generator_new (argv[2], &error);
+	if (!generator)
+		fatal ("Error: failed to create the output dictionary: %s\n",
+			error->message);
+
+	StardictInfo *info = generator->info;
+	stardict_info_copy (info, stardict_dict_get_info (dict));
+
+	// This gets incremented each time an entry is finished
+	info->word_count = 0;
+
+	if (!update_from_filter (dict, generator, filtered, &error)
+	 || !generator_finish (generator, &error))
+		fatal ("Error: failed to write the dictionary: %s\n", error->message);
+
+	g_mapped_file_unref (filtered);
+	fclose (child_out);
+	generator_free (generator);
+	g_object_unref (dict);
+	return 0;
+}
diff --git a/src/transform.c b/src/transform.c
deleted file mode 100644
index ba33dee..0000000
--- a/src/transform.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * A tool to transform dictionaries dictionaries by an external filter
- *
- * The external filter needs to process NUL-separated textual entries.
- *
- * Example: transform input.ifo output -- perl -p0e s/bullshit/soykaf/g
- *
- * Copyright (c) 2020, Přemysl Eric Janouch <p@janouch.name>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <locale.h>
-
-#include <glib.h>
-#include <glib/gstdio.h>
-#include <glib-unix.h>
-#include <gio/gio.h>
-
-#include "stardict.h"
-#include "stardict-private.h"
-#include "generator.h"
-#include "utils.h"
-
-enum { PIPE_READ, PIPE_WRITE };
-
-
-// --- Main --------------------------------------------------------------------
-
-static inline void
-print_progress (gulong *last_percent, StardictIterator *iterator, gsize total)
-{
-	gulong percent =
-		(gulong) stardict_iterator_get_offset (iterator) * 100 / total;
-	if (percent != *last_percent)
-	{
-		printf ("\r  Writing entries... %3lu%%", percent);
-		*last_percent = percent;
-	}
-}
-
-static gboolean
-write_to_filter (StardictDict *dict, gint fd, GError **error)
-{
-	StardictInfo *info = stardict_dict_get_info (dict);
-	gsize n_words = stardict_info_get_word_count (info);
-
-	StardictIterator *iterator = stardict_iterator_new (dict, 0);
-	gulong last_percent = -1;
-	while (stardict_iterator_is_valid (iterator))
-	{
-		print_progress (&last_percent, iterator, n_words);
-
-		StardictEntry *entry = stardict_iterator_get_entry (iterator);
-		for (const GList *fields = stardict_entry_get_fields (entry);
-			fields; fields = fields->next)
-		{
-			StardictEntryField *field = fields->data;
-			if (!g_ascii_islower (field->type))
-				continue;
-
-			if (write (fd, field->data, field->data_size)
-				!= (ssize_t) field->data_size)
-			{
-				g_set_error (error, G_IO_ERROR, g_io_error_from_errno (errno),
-					"%s", g_strerror (errno));
-				return FALSE;
-			}
-		}
-
-		g_object_unref (entry);
-		stardict_iterator_next (iterator);
-	}
-	printf ("\n");
-	return TRUE;
-}
-
-static gboolean
-update_from_filter (StardictDict *dict, Generator *generator,
-	GMappedFile *filtered_file, GError **error)
-{
-	gchar *filtered = g_mapped_file_get_contents (filtered_file);
-	gchar *filtered_end = filtered + g_mapped_file_get_length (filtered_file);
-
-	StardictInfo *info = stardict_dict_get_info (dict);
-	gsize n_words = stardict_info_get_word_count (info);
-
-	StardictIterator *iterator = stardict_iterator_new (dict, 0);
-	gulong last_percent = -1;
-	while (stardict_iterator_is_valid (iterator))
-	{
-		print_progress (&last_percent, iterator, n_words);
-
-		StardictEntry *entry = stardict_iterator_get_entry (iterator);
-		generator_begin_entry (generator);
-
-		for (GList *fields = entry->fields; fields; fields = fields->next)
-		{
-			StardictEntryField *field = fields->data;
-			if (!g_ascii_islower (field->type))
-				continue;
-
-			gchar *end = memchr (filtered, 0, filtered_end - filtered);
-			if (!end)
-			{
-				g_set_error (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
-					"filter seems to have ended too early");
-				return FALSE;
-			}
-
-			g_free (field->data);
-			field->data = g_strdup (filtered);
-			field->data_size = end - filtered + 1;
-			filtered = end + 1;
-		}
-
-		if (!generator_write_fields (generator, entry->fields, error)
-		 || !generator_finish_entry (generator,
-				stardict_iterator_get_word (iterator), error))
-			return FALSE;
-
-		g_object_unref (entry);
-		stardict_iterator_next (iterator);
-	}
-	printf ("\n");
-	return TRUE;
-}
-
-int
-main (int argc, char *argv[])
-{
-	// The GLib help includes an ellipsis character, for some reason
-	(void) setlocale (LC_ALL, "");
-
-	GError *error = NULL;
-	GOptionContext *ctx = g_option_context_new
-		("input.ifo output-basename -- FILTER [ARG...]");
-	g_option_context_set_summary
-		(ctx, "Transform dictionaries using a filter program.");
-	if (!g_option_context_parse (ctx, &argc, &argv, &error))
-		fatal ("Error: option parsing failed: %s\n", error->message);
-
-	if (argc < 3)
-		fatal ("%s", g_option_context_get_help (ctx, TRUE, NULL));
-
-	// GLib is bullshit, getopt_long() always correctly removes this
-	gint program_argv_start = 3;
-	if (!strcmp (argv[program_argv_start], "--"))
-		program_argv_start++;
-
-	g_option_context_free (ctx);
-
-	printf ("Loading the original dictionary...\n");
-	StardictDict *dict = stardict_dict_new (argv[1], &error);
-	if (!dict)
-		fatal ("Error: opening the dictionary failed: %s\n", error->message);
-
-	printf ("Filtering entries...\n");
-	gint child_in[2];
-	if (!g_unix_open_pipe (child_in, 0, &error))
-		fatal ("g_unix_open_pipe: %s\n", error->message);
-
-	FILE *child_out = tmpfile ();
-	if (!child_out)
-		fatal ("tmpfile: %s\n", g_strerror (errno));
-
-	GPid pid = -1;
-	if (!g_spawn_async_with_fds (NULL /* working_directory */,
-		argv + program_argv_start /* forward a part of ours */, NULL /* envp */,
-		G_SPAWN_SEARCH_PATH | G_SPAWN_DO_NOT_REAP_CHILD,
-		NULL /* child_setup */, NULL /* user_data */,
-		&pid, child_in[PIPE_READ], fileno (child_out), STDERR_FILENO, &error))
-		fatal ("g_spawn: %s\n", error->message);
-	if (!write_to_filter (dict, child_in[PIPE_WRITE], &error))
-		fatal ("write_to_filter: %s\n", error->message);
-	if (!g_close (child_in[PIPE_READ], &error)
-	 || !g_close (child_in[PIPE_WRITE], &error))
-		fatal ("g_close: %s\n", error->message);
-
-	printf ("Waiting for the filter to finish...\n");
-	int wstatus = errno = 0;
-	if (waitpid (pid, &wstatus, 0) < 1
-	 || !WIFEXITED (wstatus) || WEXITSTATUS (wstatus) > 0)
-		fatal ("Filter failed (%s, status %d)\n", g_strerror (errno), wstatus);
-
-	GMappedFile *filtered = g_mapped_file_new_from_fd (fileno (child_out),
-		FALSE /* writable */, &error);
-	if (!filtered)
-		fatal ("g_mapped_file_new_from_fd: %s\n", error->message);
-
-	printf ("Writing the new dictionary...\n");
-	Generator *generator = generator_new (argv[2], &error);
-	if (!generator)
-		fatal ("Error: failed to create the output dictionary: %s\n",
-			error->message);
-
-	StardictInfo *info = generator->info;
-	stardict_info_copy (info, stardict_dict_get_info (dict));
-
-	// This gets incremented each time an entry is finished
-	info->word_count = 0;
-
-	if (!update_from_filter (dict, generator, filtered, &error)
-	 || !generator_finish (generator, &error))
-		fatal ("Error: failed to write the dictionary: %s\n", error->message);
-
-	g_mapped_file_unref (filtered);
-	fclose (child_out);
-	generator_free (generator);
-	g_object_unref (dict);
-	return 0;
-}
-- 
cgit v1.3.1