summaryrefslogtreecommitdiff
path: root/add-pronunciation.c
diff options
context:
space:
mode:
Diffstat (limited to 'add-pronunciation.c')
-rw-r--r--add-pronunciation.c262
1 files changed, 0 insertions, 262 deletions
diff --git a/add-pronunciation.c b/add-pronunciation.c
deleted file mode 100644
index 45eae61..0000000
--- a/add-pronunciation.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * A tool to add eSpeak-generated pronunciation to dictionaries
- *
- * Here I use the `espeak' process rather than libespeak because of the GPL.
- *
- * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
- * All rights reserved.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <glib.h>
-#include <gio/gio.h>
-
-#include "stardict.h"
-
-
-// --- Pronunciation generator -------------------------------------------------
-
-typedef struct worker_data WorkerData;
-
-struct worker_data
-{
- guint32 start_entry; //! The first entry to be processed
- guint32 end_entry; //! Past the last entry to be processed
-
- /* Reader, writer */
- GMutex *dict_mutex; //! Locks the dictionary object
-
- /* Reader */
- GThread *main_thread; //! A handle to the reader thread
- StardictDict *dict; //! The dictionary object
- gpointer output; //! Linked-list of pronunciation data
-
- GMutex *remaining_mutex; //! Locks the progress stats
- GCond *remaining_cond; //! Signals a change in progress
- guint32 remaining; //! How many entries remain
-
- /* Writer */
- StardictIterator *iterator; //! Iterates over the dictionary
- FILE *child_stdin; //! Standard input of eSpeak
-};
-
-/** Writes to espeak's stdin. */
-static gpointer
-worker_writer (WorkerData *data)
-{
- while (stardict_iterator_get_offset (data->iterator) != data->end_entry)
- {
- g_mutex_lock (data->dict_mutex);
- const gchar *word = stardict_iterator_get_word (data->iterator);
- g_mutex_unlock (data->dict_mutex);
-
- stardict_iterator_next (data->iterator);
- if (fprintf (data->child_stdin, "%s\n", word) < 0)
- g_error ("write to eSpeak failed: %s", strerror (errno));
- }
-
- g_object_unref (data->iterator);
- return GINT_TO_POINTER (fclose (data->child_stdin));
-}
-
-/** Reads from espeak's stdout. */
-static gpointer
-worker (WorkerData *data)
-{
- /* Spawn eSpeak */
- static gchar *cmdline[] = { "espeak", "--ipa", "-q", NULL };
- gint child_in, child_out;
-
- GError *error;
- if (!g_spawn_async_with_pipes (NULL, cmdline, NULL,
- G_SPAWN_SEARCH_PATH, NULL, NULL,
- NULL, &child_in, &child_out, NULL, &error))
- g_error ("g_spawn() failed: %s", error->message);
-
- data->child_stdin = fdopen (child_in, "wb");
- if (!data->child_stdin)
- perror ("fdopen");
-
- FILE *child_stdout = fdopen (child_out, "rb");
- if (!child_stdout)
- perror ("fdopen");
-
- /* Spawn a writer thread */
- g_mutex_lock (data->dict_mutex);
- data->iterator = stardict_iterator_new (data->dict, data->start_entry);
- g_mutex_unlock (data->dict_mutex);
-
- GThread *writer = g_thread_new ("write worker",
- (GThreadFunc) worker_writer, data);
-
- /* Read the output */
- g_mutex_lock (data->remaining_mutex);
- guint32 remaining = data->remaining;
- g_mutex_unlock (data->remaining_mutex);
-
- data->output = NULL;
- gpointer *output_end = &data->output;
- while (remaining)
- {
- static gchar next[sizeof (gpointer)];
- GString *s = g_string_new (NULL);
- g_string_append_len (s, next, sizeof next);
-
- gint c;
- while ((c = fgetc (child_stdout)) != EOF && c != '\n')
- g_string_append_c (s, c);
- if (c == EOF)
- g_error ("eSpeak process died too soon");
-
- gchar *translation = g_string_free (s, FALSE);
- *output_end = translation;
- output_end = (gpointer *) translation;
-
- /* We limit progress reporting so that
- * the mutex doesn't spin like crazy */
- if ((--remaining & 1023) != 0)
- continue;
-
- g_mutex_lock (data->remaining_mutex);
- data->remaining = remaining;
- g_cond_broadcast (data->remaining_cond);
- g_mutex_unlock (data->remaining_mutex);
- }
-
- fclose (child_stdout);
- return g_thread_join (writer);
-}
-
-// --- Main --------------------------------------------------------------------
-
-int
-main (int argc, char *argv[])
-{
- gint n_processes = 1;
-
- GOptionEntry entries[] =
- {
- { "processes", 'N', G_OPTION_FLAG_IN_MAIN,
- G_OPTION_ARG_INT, &n_processes,
- "the number of espeak processes run in parallel", "PROCESSES" },
- { NULL }
- };
-
- GError *error = NULL;
- GOptionContext *ctx = g_option_context_new
- ("input.ifo output.ifo - add pronunciation to dictionaries");
- g_option_context_add_main_entries (ctx, entries, NULL);
- if (!g_option_context_parse (ctx, &argc, &argv, &error))
- {
- g_print ("option parsing failed: %s\n", error->message);
- exit (EXIT_FAILURE);
- }
-
- if (argc != 3)
- {
- gchar *help = g_option_context_get_help (ctx, TRUE, FALSE);
- g_print ("%s", help);
- g_free (help);
- exit (EXIT_FAILURE);
- }
-
- StardictDict *dict = stardict_dict_new (argv[1], &error);
- if (!dict)
- {
- g_printerr ("opening the dictionary failed: %s\n", error->message);
- exit (EXIT_FAILURE);
- }
-
- gsize n_words = stardict_info_get_word_count
- (stardict_dict_get_info (dict));
-
- if (n_processes <= 0)
- {
- g_printerr ("Error: there must be at least one process\n");
- exit (EXIT_FAILURE);
- }
-
- if ((gsize) n_processes > n_words * 1024)
- {
- n_processes = n_words / 1024;
- if (!n_processes)
- n_processes = 1;
- g_printerr ("Warning: too many processes, reducing to %d\n",
- n_processes);
- }
-
- /* Spawn worker threads to generate pronunciations */
- static GMutex dict_mutex;
-
- static GMutex remaining_mutex;
- static GCond remaining_cond;
-
- WorkerData *data = g_alloca (sizeof *data * n_processes);
-
- gint i;
- for (i = 0; i < n_processes; i++)
- {
- data[i].start_entry = (n_words - 1) * i / n_processes;
- data[i].end_entry = (n_words - 1) * (i + 1) / n_processes;
-
- data[i].remaining = data[i].end_entry - data[i].start_entry;
- data[i].remaining_mutex = &remaining_mutex;
- data[i].remaining_cond = &remaining_cond;
-
- data[i].dict = dict;
- data[i].dict_mutex = &dict_mutex;
-
- data->main_thread = g_thread_new ("worker", (GThreadFunc) worker, data);
- }
-
- /* Loop while the threads still have some work to do and report status */
- g_mutex_lock (&remaining_mutex);
- for (;;)
- {
- gboolean all_finished = TRUE;
- printf ("\rRetrieving pronunciation... ");
- for (i = 0; i < n_processes; i++)
- {
- printf ("%3u%% ", data[i].remaining * 100
- / (data[i].end_entry - data[i].start_entry));
- if (data[i].remaining)
- all_finished = FALSE;
- }
-
- if (all_finished)
- break;
- g_cond_wait (&remaining_cond, &remaining_mutex);
- }
- g_mutex_unlock (&remaining_mutex);
-
- for (i = 0; i < n_processes; i++)
- g_thread_join (data[i].main_thread);
-
- // TODO after all processing is done, the program will go through the whole
- // dictionary and put extended data entries into a new one.
- StardictIterator *iterator = stardict_iterator_new (dict, 0);
- while (stardict_iterator_is_valid (iterator))
- {
- // ...
- stardict_iterator_next (iterator);
- }
-
- return 0;
-}