From febff16ed4736c9fcb4dc65c04f0260dc2fe2b33 Mon Sep 17 00:00:00 2001 From: Přemysl Janouch Date: Wed, 8 May 2013 20:54:06 +0200 Subject: Move sources into their own directory --- .gitignore | 2 + Makefile | 8 +- add-pronunciation.c | 262 ------------ sdcli.c | 274 ------------ src/add-pronunciation.c | 262 ++++++++++++ src/sdcli.c | 274 ++++++++++++ src/stardict.c | 1070 +++++++++++++++++++++++++++++++++++++++++++++++ src/stardict.h | 215 ++++++++++ src/test-stardict.c | 429 +++++++++++++++++++ stardict.c | 1070 ----------------------------------------------- stardict.h | 215 ---------- test-stardict.c | 429 ------------------- 12 files changed, 2256 insertions(+), 2254 deletions(-) delete mode 100644 add-pronunciation.c delete mode 100644 sdcli.c create mode 100644 src/add-pronunciation.c create mode 100644 src/sdcli.c create mode 100644 src/stardict.c create mode 100644 src/stardict.h create mode 100644 src/test-stardict.c delete mode 100644 stardict.c delete mode 100644 stardict.h delete mode 100644 test-stardict.c diff --git a/.gitignore b/.gitignore index fee94d8..f4d000c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ *.*~ # Compile output /sdcli +/add-pronunciation +/test-stardict *.o # IDE project files /sdcli.creator* diff --git a/Makefile b/Makefile index 41e69c1..ce20ed3 100644 --- a/Makefile +++ b/Makefile @@ -14,15 +14,15 @@ LDFLAGS = `pkg-config --libs $(pkgs)` all: $(targets) clean: - rm -f $(targets) *.o + rm -f $(targets) src/*.o -sdcli: sdcli.o stardict.o +sdcli: src/sdcli.o src/stardict.o $(CC) $^ -o $@ $(LDFLAGS) -add-pronunciation: add-pronunciation.o stardict.o +add-pronunciation: src/add-pronunciation.o src/stardict.o $(CC) $^ -o $@ $(LDFLAGS) -test-stardict: test-stardict.o stardict.o +test-stardict: src/test-stardict.o src/stardict.o $(CC) $^ -o $@ $(LDFLAGS) test: $(tests) diff --git a/add-pronunciation.c b/add-pronunciation.c deleted file mode 100644 index 45eae61..0000000 --- a/add-pronunciation.c +++ /dev/null @@ -1,262 +0,0 @@ -/* - * A tool to add eSpeak-generated pronunciation to dictionaries - * - * Here I use the `espeak' process rather than libespeak because of the GPL. - * - * Copyright (c) 2013, Přemysl Janouch - * All rights reserved. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - */ - -#include -#include -#include -#include - -#include -#include - -#include "stardict.h" - - -// --- Pronunciation generator ------------------------------------------------- - -typedef struct worker_data WorkerData; - -struct worker_data -{ - guint32 start_entry; //! The first entry to be processed - guint32 end_entry; //! Past the last entry to be processed - - /* Reader, writer */ - GMutex *dict_mutex; //! Locks the dictionary object - - /* Reader */ - GThread *main_thread; //! A handle to the reader thread - StardictDict *dict; //! The dictionary object - gpointer output; //! Linked-list of pronunciation data - - GMutex *remaining_mutex; //! Locks the progress stats - GCond *remaining_cond; //! Signals a change in progress - guint32 remaining; //! How many entries remain - - /* Writer */ - StardictIterator *iterator; //! Iterates over the dictionary - FILE *child_stdin; //! Standard input of eSpeak -}; - -/** Writes to espeak's stdin. */ -static gpointer -worker_writer (WorkerData *data) -{ - while (stardict_iterator_get_offset (data->iterator) != data->end_entry) - { - g_mutex_lock (data->dict_mutex); - const gchar *word = stardict_iterator_get_word (data->iterator); - g_mutex_unlock (data->dict_mutex); - - stardict_iterator_next (data->iterator); - if (fprintf (data->child_stdin, "%s\n", word) < 0) - g_error ("write to eSpeak failed: %s", strerror (errno)); - } - - g_object_unref (data->iterator); - return GINT_TO_POINTER (fclose (data->child_stdin)); -} - -/** Reads from espeak's stdout. */ -static gpointer -worker (WorkerData *data) -{ - /* Spawn eSpeak */ - static gchar *cmdline[] = { "espeak", "--ipa", "-q", NULL }; - gint child_in, child_out; - - GError *error; - if (!g_spawn_async_with_pipes (NULL, cmdline, NULL, - G_SPAWN_SEARCH_PATH, NULL, NULL, - NULL, &child_in, &child_out, NULL, &error)) - g_error ("g_spawn() failed: %s", error->message); - - data->child_stdin = fdopen (child_in, "wb"); - if (!data->child_stdin) - perror ("fdopen"); - - FILE *child_stdout = fdopen (child_out, "rb"); - if (!child_stdout) - perror ("fdopen"); - - /* Spawn a writer thread */ - g_mutex_lock (data->dict_mutex); - data->iterator = stardict_iterator_new (data->dict, data->start_entry); - g_mutex_unlock (data->dict_mutex); - - GThread *writer = g_thread_new ("write worker", - (GThreadFunc) worker_writer, data); - - /* Read the output */ - g_mutex_lock (data->remaining_mutex); - guint32 remaining = data->remaining; - g_mutex_unlock (data->remaining_mutex); - - data->output = NULL; - gpointer *output_end = &data->output; - while (remaining) - { - static gchar next[sizeof (gpointer)]; - GString *s = g_string_new (NULL); - g_string_append_len (s, next, sizeof next); - - gint c; - while ((c = fgetc (child_stdout)) != EOF && c != '\n') - g_string_append_c (s, c); - if (c == EOF) - g_error ("eSpeak process died too soon"); - - gchar *translation = g_string_free (s, FALSE); - *output_end = translation; - output_end = (gpointer *) translation; - - /* We limit progress reporting so that - * the mutex doesn't spin like crazy */ - if ((--remaining & 1023) != 0) - continue; - - g_mutex_lock (data->remaining_mutex); - data->remaining = remaining; - g_cond_broadcast (data->remaining_cond); - g_mutex_unlock (data->remaining_mutex); - } - - fclose (child_stdout); - return g_thread_join (writer); -} - -// --- Main -------------------------------------------------------------------- - -int -main (int argc, char *argv[]) -{ - gint n_processes = 1; - - GOptionEntry entries[] = - { - { "processes", 'N', G_OPTION_FLAG_IN_MAIN, - G_OPTION_ARG_INT, &n_processes, - "the number of espeak processes run in parallel", "PROCESSES" }, - { NULL } - }; - - GError *error = NULL; - GOptionContext *ctx = g_option_context_new - ("input.ifo output.ifo - add pronunciation to dictionaries"); - g_option_context_add_main_entries (ctx, entries, NULL); - if (!g_option_context_parse (ctx, &argc, &argv, &error)) - { - g_print ("option parsing failed: %s\n", error->message); - exit (EXIT_FAILURE); - } - - if (argc != 3) - { - gchar *help = g_option_context_get_help (ctx, TRUE, FALSE); - g_print ("%s", help); - g_free (help); - exit (EXIT_FAILURE); - } - - StardictDict *dict = stardict_dict_new (argv[1], &error); - if (!dict) - { - g_printerr ("opening the dictionary failed: %s\n", error->message); - exit (EXIT_FAILURE); - } - - gsize n_words = stardict_info_get_word_count - (stardict_dict_get_info (dict)); - - if (n_processes <= 0) - { - g_printerr ("Error: there must be at least one process\n"); - exit (EXIT_FAILURE); - } - - if ((gsize) n_processes > n_words * 1024) - { - n_processes = n_words / 1024; - if (!n_processes) - n_processes = 1; - g_printerr ("Warning: too many processes, reducing to %d\n", - n_processes); - } - - /* Spawn worker threads to generate pronunciations */ - static GMutex dict_mutex; - - static GMutex remaining_mutex; - static GCond remaining_cond; - - WorkerData *data = g_alloca (sizeof *data * n_processes); - - gint i; - for (i = 0; i < n_processes; i++) - { - data[i].start_entry = (n_words - 1) * i / n_processes; - data[i].end_entry = (n_words - 1) * (i + 1) / n_processes; - - data[i].remaining = data[i].end_entry - data[i].start_entry; - data[i].remaining_mutex = &remaining_mutex; - data[i].remaining_cond = &remaining_cond; - - data[i].dict = dict; - data[i].dict_mutex = &dict_mutex; - - data->main_thread = g_thread_new ("worker", (GThreadFunc) worker, data); - } - - /* Loop while the threads still have some work to do and report status */ - g_mutex_lock (&remaining_mutex); - for (;;) - { - gboolean all_finished = TRUE; - printf ("\rRetrieving pronunciation... "); - for (i = 0; i < n_processes; i++) - { - printf ("%3u%% ", data[i].remaining * 100 - / (data[i].end_entry - data[i].start_entry)); - if (data[i].remaining) - all_finished = FALSE; - } - - if (all_finished) - break; - g_cond_wait (&remaining_cond, &remaining_mutex); - } - g_mutex_unlock (&remaining_mutex); - - for (i = 0; i < n_processes; i++) - g_thread_join (data[i].main_thread); - - // TODO after all processing is done, the program will go through the whole - // dictionary and put extended data entries into a new one. - StardictIterator *iterator = stardict_iterator_new (dict, 0); - while (stardict_iterator_is_valid (iterator)) - { - // ... - stardict_iterator_next (iterator); - } - - return 0; -} diff --git a/sdcli.c b/sdcli.c deleted file mode 100644 index 8a8f4ca..0000000 --- a/sdcli.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * StarDict console UI - * - * Copyright (c) 2013, Přemysl Janouch - * All rights reserved. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - */ - -#define _XOPEN_SOURCE_EXTENDED /**< Yes, we want ncursesw. */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include "stardict.h" - - -#define KEY_ESCAPE 27 /**< Curses doesn't define this. */ - -// --- Utilities --------------------------------------------------------------- - -static void -display (const gchar *format, ...) -{ - va_list ap; - - va_start (ap, format); - vw_printw (stdscr, format, ap); - va_end (ap); - refresh (); -} - -static gchar * -wchar_to_mb (wchar_t ch) -{ - /* Convert the character back to a multi-byte sequence. */ - static gchar buffer[MB_LEN_MAX + 1]; - size_t len = wcrtomb (buffer, ch, NULL); - - /* This shouldn't happen. It would mean that the user has - * somehow managed to enter something inexpressable in the - * current locale. */ - if (len == (size_t) -1) - abort (); - - /* Here I hope the buffer doesn't overflow. Who uses - * shift states nowadays, anyway? */ - if (wcrtomb (buffer + len, L'\0', NULL) == (size_t) -1) - abort (); - - return buffer; -} - -static const gchar * -wchar_to_mb_escaped (wchar_t ch) -{ - switch (ch) - { - case L'\r': return "\\r"; - case L'\n': return "\\n"; - case L'\t': return "\\t"; - default: return wchar_to_mb (ch); - } -} - -static int -poll_restart (struct pollfd *fds, nfds_t nfds, int timeout) -{ - int ret; - do - ret = poll (fds, nfds, timeout); - while (ret == -1 && errno == EINTR); - return ret; -} - -// --- SIGWINCH ---------------------------------------------------------------- - -static int g_winch_pipe[2]; /**< SIGWINCH signalling pipe. */ -static void (*g_old_winch_handler) (int); - -static void -winch_handler (int signum) -{ - /* Call the ncurses handler. */ - if (g_old_winch_handler) - g_old_winch_handler (signum); - - /* And wake up the poll() call. */ - write (g_winch_pipe[1], "x", 1); -} - -static void -install_winch_handler (void) -{ - struct sigaction act, oldact; - - act.sa_handler = winch_handler; - act.sa_flags = SA_RESTART; - sigemptyset (&act.sa_mask); - sigaction (SIGWINCH, &act, &oldact); - - /* Save the ncurses handler. */ - if (oldact.sa_handler != SIG_DFL - && oldact.sa_handler != SIG_IGN) - g_old_winch_handler = oldact.sa_handler; -} - -// --- Event handlers ---------------------------------------------------------- - -typedef struct -{ - wint_t code; - guint is_char : 1; - MEVENT mouse; -} -CursesEvent; - -static gboolean -process_curses_event (CursesEvent *event) -{ - if (!event->is_char) - { - switch (event->code) - { - case KEY_RESIZE: - display ("Screen has been resized to %u x %u\n", - COLS, LINES); - break; - case KEY_MOUSE: - display ("Mouse event at (%d, %d), state %#lx\n", - event->mouse.x, event->mouse.y, event->mouse.bstate); - break; - default: - display ("Keyboard event: non-character: %u\n", - event->code); - } - return TRUE; - } - - display ("Keyboard event: character: '%s'\n", - wchar_to_mb_escaped (event->code)); - - if (event->code == L'q' || event->code == KEY_ESCAPE) - { - display ("Quitting...\n"); - return FALSE; - } - - return TRUE; -} - -static gboolean -process_stdin_input (void) -{ - CursesEvent event; - int sta; - - while ((sta = get_wch (&event.code)) != ERR) - { - event.is_char = (sta == OK); - if (sta == KEY_CODE_YES && event.code == KEY_MOUSE - && getmouse (&event.mouse) == ERR) - abort (); - if (!process_curses_event (&event)) - return FALSE; - } - - return TRUE; -} - -static gboolean -process_winch_input (int fd) -{ - char c; - - read (fd, &c, 1); - return process_stdin_input (); -} - -// --- Main -------------------------------------------------------------------- - -int -main (int argc, char *argv[]) -{ - static GOptionEntry entries[] = - { - { NULL } - }; - - if (!setlocale (LC_ALL, "")) - abort (); - - GError *error = NULL; - GOptionContext *ctx = g_option_context_new ("- StarDict console UI"); - g_option_context_add_main_entries (ctx, entries, NULL); - if (!g_option_context_parse (ctx, &argc, &argv, &error)) - { - g_print ("option parsing failed: %s\n", error->message); - exit (EXIT_FAILURE); - } - - if (!initscr () - || cbreak () == ERR - || noecho () == ERR) - abort (); - - keypad (stdscr, TRUE); /* Enable character processing. */ - nodelay (stdscr, TRUE); /* Don't block on get_wch(). */ - - mousemask (ALL_MOUSE_EVENTS, NULL); - - display ("Press Q, Escape or ^C to quit\n"); - - if (pipe (g_winch_pipe) == -1) - abort (); - - install_winch_handler (); - -// --- Message loop ------------------------------------------------------------ - - struct pollfd pollfd[2]; - - pollfd[0].fd = fileno (stdin); - pollfd[0].events = POLLIN; - pollfd[1].fd = g_winch_pipe[0]; - pollfd[1].events = POLLIN; - - while (TRUE) - { - if (poll_restart (pollfd, 3, -1) == -1) - abort (); - - if ((pollfd[0].revents & POLLIN) - && !process_stdin_input ()) - break; - if ((pollfd[1].revents & POLLIN) - && !process_winch_input (pollfd[2].fd)) - break; - } - -// --- Cleanup ----------------------------------------------------------------- - - endwin (); - - if (close (g_winch_pipe[0]) == -1 - || close (g_winch_pipe[1]) == -1) - abort (); - - return 0; -} - diff --git a/src/add-pronunciation.c b/src/add-pronunciation.c new file mode 100644 index 0000000..45eae61 --- /dev/null +++ b/src/add-pronunciation.c @@ -0,0 +1,262 @@ +/* + * A tool to add eSpeak-generated pronunciation to dictionaries + * + * Here I use the `espeak' process rather than libespeak because of the GPL. + * + * Copyright (c) 2013, Přemysl Janouch + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include +#include +#include +#include + +#include +#include + +#include "stardict.h" + + +// --- Pronunciation generator ------------------------------------------------- + +typedef struct worker_data WorkerData; + +struct worker_data +{ + guint32 start_entry; //! The first entry to be processed + guint32 end_entry; //! Past the last entry to be processed + + /* Reader, writer */ + GMutex *dict_mutex; //! Locks the dictionary object + + /* Reader */ + GThread *main_thread; //! A handle to the reader thread + StardictDict *dict; //! The dictionary object + gpointer output; //! Linked-list of pronunciation data + + GMutex *remaining_mutex; //! Locks the progress stats + GCond *remaining_cond; //! Signals a change in progress + guint32 remaining; //! How many entries remain + + /* Writer */ + StardictIterator *iterator; //! Iterates over the dictionary + FILE *child_stdin; //! Standard input of eSpeak +}; + +/** Writes to espeak's stdin. */ +static gpointer +worker_writer (WorkerData *data) +{ + while (stardict_iterator_get_offset (data->iterator) != data->end_entry) + { + g_mutex_lock (data->dict_mutex); + const gchar *word = stardict_iterator_get_word (data->iterator); + g_mutex_unlock (data->dict_mutex); + + stardict_iterator_next (data->iterator); + if (fprintf (data->child_stdin, "%s\n", word) < 0) + g_error ("write to eSpeak failed: %s", strerror (errno)); + } + + g_object_unref (data->iterator); + return GINT_TO_POINTER (fclose (data->child_stdin)); +} + +/** Reads from espeak's stdout. */ +static gpointer +worker (WorkerData *data) +{ + /* Spawn eSpeak */ + static gchar *cmdline[] = { "espeak", "--ipa", "-q", NULL }; + gint child_in, child_out; + + GError *error; + if (!g_spawn_async_with_pipes (NULL, cmdline, NULL, + G_SPAWN_SEARCH_PATH, NULL, NULL, + NULL, &child_in, &child_out, NULL, &error)) + g_error ("g_spawn() failed: %s", error->message); + + data->child_stdin = fdopen (child_in, "wb"); + if (!data->child_stdin) + perror ("fdopen"); + + FILE *child_stdout = fdopen (child_out, "rb"); + if (!child_stdout) + perror ("fdopen"); + + /* Spawn a writer thread */ + g_mutex_lock (data->dict_mutex); + data->iterator = stardict_iterator_new (data->dict, data->start_entry); + g_mutex_unlock (data->dict_mutex); + + GThread *writer = g_thread_new ("write worker", + (GThreadFunc) worker_writer, data); + + /* Read the output */ + g_mutex_lock (data->remaining_mutex); + guint32 remaining = data->remaining; + g_mutex_unlock (data->remaining_mutex); + + data->output = NULL; + gpointer *output_end = &data->output; + while (remaining) + { + static gchar next[sizeof (gpointer)]; + GString *s = g_string_new (NULL); + g_string_append_len (s, next, sizeof next); + + gint c; + while ((c = fgetc (child_stdout)) != EOF && c != '\n') + g_string_append_c (s, c); + if (c == EOF) + g_error ("eSpeak process died too soon"); + + gchar *translation = g_string_free (s, FALSE); + *output_end = translation; + output_end = (gpointer *) translation; + + /* We limit progress reporting so that + * the mutex doesn't spin like crazy */ + if ((--remaining & 1023) != 0) + continue; + + g_mutex_lock (data->remaining_mutex); + data->remaining = remaining; + g_cond_broadcast (data->remaining_cond); + g_mutex_unlock (data->remaining_mutex); + } + + fclose (child_stdout); + return g_thread_join (writer); +} + +// --- Main -------------------------------------------------------------------- + +int +main (int argc, char *argv[]) +{ + gint n_processes = 1; + + GOptionEntry entries[] = + { + { "processes", 'N', G_OPTION_FLAG_IN_MAIN, + G_OPTION_ARG_INT, &n_processes, + "the number of espeak processes run in parallel", "PROCESSES" }, + { NULL } + }; + + GError *error = NULL; + GOptionContext *ctx = g_option_context_new + ("input.ifo output.ifo - add pronunciation to dictionaries"); + g_option_context_add_main_entries (ctx, entries, NULL); + if (!g_option_context_parse (ctx, &argc, &argv, &error)) + { + g_print ("option parsing failed: %s\n", error->message); + exit (EXIT_FAILURE); + } + + if (argc != 3) + { + gchar *help = g_option_context_get_help (ctx, TRUE, FALSE); + g_print ("%s", help); + g_free (help); + exit (EXIT_FAILURE); + } + + StardictDict *dict = stardict_dict_new (argv[1], &error); + if (!dict) + { + g_printerr ("opening the dictionary failed: %s\n", error->message); + exit (EXIT_FAILURE); + } + + gsize n_words = stardict_info_get_word_count + (stardict_dict_get_info (dict)); + + if (n_processes <= 0) + { + g_printerr ("Error: there must be at least one process\n"); + exit (EXIT_FAILURE); + } + + if ((gsize) n_processes > n_words * 1024) + { + n_processes = n_words / 1024; + if (!n_processes) + n_processes = 1; + g_printerr ("Warning: too many processes, reducing to %d\n", + n_processes); + } + + /* Spawn worker threads to generate pronunciations */ + static GMutex dict_mutex; + + static GMutex remaining_mutex; + static GCond remaining_cond; + + WorkerData *data = g_alloca (sizeof *data * n_processes); + + gint i; + for (i = 0; i < n_processes; i++) + { + data[i].start_entry = (n_words - 1) * i / n_processes; + data[i].end_entry = (n_words - 1) * (i + 1) / n_processes; + + data[i].remaining = data[i].end_entry - data[i].start_entry; + data[i].remaining_mutex = &remaining_mutex; + data[i].remaining_cond = &remaining_cond; + + data[i].dict = dict; + data[i].dict_mutex = &dict_mutex; + + data->main_thread = g_thread_new ("worker", (GThreadFunc) worker, data); + } + + /* Loop while the threads still have some work to do and report status */ + g_mutex_lock (&remaining_mutex); + for (;;) + { + gboolean all_finished = TRUE; + printf ("\rRetrieving pronunciation... "); + for (i = 0; i < n_processes; i++) + { + printf ("%3u%% ", data[i].remaining * 100 + / (data[i].end_entry - data[i].start_entry)); + if (data[i].remaining) + all_finished = FALSE; + } + + if (all_finished) + break; + g_cond_wait (&remaining_cond, &remaining_mutex); + } + g_mutex_unlock (&remaining_mutex); + + for (i = 0; i < n_processes; i++) + g_thread_join (data[i].main_thread); + + // TODO after all processing is done, the program will go through the whole + // dictionary and put extended data entries into a new one. + StardictIterator *iterator = stardict_iterator_new (dict, 0); + while (stardict_iterator_is_valid (iterator)) + { + // ... + stardict_iterator_next (iterator); + } + + return 0; +} diff --git a/src/sdcli.c b/src/sdcli.c new file mode 100644 index 0000000..8a8f4ca --- /dev/null +++ b/src/sdcli.c @@ -0,0 +1,274 @@ +/* + * StarDict console UI + * + * Copyright (c) 2013, Přemysl Janouch + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#define _XOPEN_SOURCE_EXTENDED /**< Yes, we want ncursesw. */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include "stardict.h" + + +#define KEY_ESCAPE 27 /**< Curses doesn't define this. */ + +// --- Utilities --------------------------------------------------------------- + +static void +display (const gchar *format, ...) +{ + va_list ap; + + va_start (ap, format); + vw_printw (stdscr, format, ap); + va_end (ap); + refresh (); +} + +static gchar * +wchar_to_mb (wchar_t ch) +{ + /* Convert the character back to a multi-byte sequence. */ + static gchar buffer[MB_LEN_MAX + 1]; + size_t len = wcrtomb (buffer, ch, NULL); + + /* This shouldn't happen. It would mean that the user has + * somehow managed to enter something inexpressable in the + * current locale. */ + if (len == (size_t) -1) + abort (); + + /* Here I hope the buffer doesn't overflow. Who uses + * shift states nowadays, anyway? */ + if (wcrtomb (buffer + len, L'\0', NULL) == (size_t) -1) + abort (); + + return buffer; +} + +static const gchar * +wchar_to_mb_escaped (wchar_t ch) +{ + switch (ch) + { + case L'\r': return "\\r"; + case L'\n': return "\\n"; + case L'\t': return "\\t"; + default: return wchar_to_mb (ch); + } +} + +static int +poll_restart (struct pollfd *fds, nfds_t nfds, int timeout) +{ + int ret; + do + ret = poll (fds, nfds, timeout); + while (ret == -1 && errno == EINTR); + return ret; +} + +// --- SIGWINCH ---------------------------------------------------------------- + +static int g_winch_pipe[2]; /**< SIGWINCH signalling pipe. */ +static void (*g_old_winch_handler) (int); + +static void +winch_handler (int signum) +{ + /* Call the ncurses handler. */ + if (g_old_winch_handler) + g_old_winch_handler (signum); + + /* And wake up the poll() call. */ + write (g_winch_pipe[1], "x", 1); +} + +static void +install_winch_handler (void) +{ + struct sigaction act, oldact; + + act.sa_handler = winch_handler; + act.sa_flags = SA_RESTART; + sigemptyset (&act.sa_mask); + sigaction (SIGWINCH, &act, &oldact); + + /* Save the ncurses handler. */ + if (oldact.sa_handler != SIG_DFL + && oldact.sa_handler != SIG_IGN) + g_old_winch_handler = oldact.sa_handler; +} + +// --- Event handlers ---------------------------------------------------------- + +typedef struct +{ + wint_t code; + guint is_char : 1; + MEVENT mouse; +} +CursesEvent; + +static gboolean +process_curses_event (CursesEvent *event) +{ + if (!event->is_char) + { + switch (event->code) + { + case KEY_RESIZE: + display ("Screen has been resized to %u x %u\n", + COLS, LINES); + break; + case KEY_MOUSE: + display ("Mouse event at (%d, %d), state %#lx\n", + event->mouse.x, event->mouse.y, event->mouse.bstate); + break; + default: + display ("Keyboard event: non-character: %u\n", + event->code); + } + return TRUE; + } + + display ("Keyboard event: character: '%s'\n", + wchar_to_mb_escaped (event->code)); + + if (event->code == L'q' || event->code == KEY_ESCAPE) + { + display ("Quitting...\n"); + return FALSE; + } + + return TRUE; +} + +static gboolean +process_stdin_input (void) +{ + CursesEvent event; + int sta; + + while ((sta = get_wch (&event.code)) != ERR) + { + event.is_char = (sta == OK); + if (sta == KEY_CODE_YES && event.code == KEY_MOUSE + && getmouse (&event.mouse) == ERR) + abort (); + if (!process_curses_event (&event)) + return FALSE; + } + + return TRUE; +} + +static gboolean +process_winch_input (int fd) +{ + char c; + + read (fd, &c, 1); + return process_stdin_input (); +} + +// --- Main -------------------------------------------------------------------- + +int +main (int argc, char *argv[]) +{ + static GOptionEntry entries[] = + { + { NULL } + }; + + if (!setlocale (LC_ALL, "")) + abort (); + + GError *error = NULL; + GOptionContext *ctx = g_option_context_new ("- StarDict console UI"); + g_option_context_add_main_entries (ctx, entries, NULL); + if (!g_option_context_parse (ctx, &argc, &argv, &error)) + { + g_print ("option parsing failed: %s\n", error->message); + exit (EXIT_FAILURE); + } + + if (!initscr () + || cbreak () == ERR + || noecho () == ERR) + abort (); + + keypad (stdscr, TRUE); /* Enable character processing. */ + nodelay (stdscr, TRUE); /* Don't block on get_wch(). */ + + mousemask (ALL_MOUSE_EVENTS, NULL); + + display ("Press Q, Escape or ^C to quit\n"); + + if (pipe (g_winch_pipe) == -1) + abort (); + + install_winch_handler (); + +// --- Message loop ------------------------------------------------------------ + + struct pollfd pollfd[2]; + + pollfd[0].fd = fileno (stdin); + pollfd[0].events = POLLIN; + pollfd[1].fd = g_winch_pipe[0]; + pollfd[1].events = POLLIN; + + while (TRUE) + { + if (poll_restart (pollfd, 3, -1) == -1) + abort (); + + if ((pollfd[0].revents & POLLIN) + && !process_stdin_input ()) + break; + if ((pollfd[1].revents & POLLIN) + && !process_winch_input (pollfd[2].fd)) + break; + } + +// --- Cleanup ----------------------------------------------------------------- + + endwin (); + + if (close (g_winch_pipe[0]) == -1 + || close (g_winch_pipe[1]) == -1) + abort (); + + return 0; +} + diff --git a/src/stardict.c b/src/stardict.c new file mode 100644 index 0000000..4439022 --- /dev/null +++ b/src/stardict.c @@ -0,0 +1,1070 @@ +/* + * stardict.c: StarDict API + * + * Copyright (c) 2013, Přemysl Janouch + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include +#include +#include +#include + +#include +#include + +#include "stardict.h" + + +/** Describes a single entry in the dictionary index. */ +typedef struct stardict_index_entry StardictIndexEntry; + +/** Describes a single entry in the synonyms index. */ +typedef struct stardict_synonym_entry StardictSynonymEntry; + +/** Helper class for reading .ifo files. */ +typedef struct ifo_reader IfoReader; + + +typedef enum stardict_version StardictVersion; +enum stardict_version { SD_VERSION_2_4_2, SD_VERSION_3_0_0 }; + +struct stardict_info +{ + gchar * path; + StardictVersion version; + + gchar * book_name; + gulong word_count; + gulong syn_word_count; + gulong idx_filesize; + gulong idx_offset_bits; + gchar * author; + gchar * email; + gchar * website; + gchar * description; + gchar * date; + gchar * same_type_sequence; +}; + +struct stardict_index_entry +{ + gchar * name; //!< The word in utf-8 + guint64 data_offset; //!< Offset of the definition + guint32 data_size; //!< Size of the definition +}; + +struct stardict_synonym_entry +{ + gchar * word; //!< A synonymous word + guint32 original_word; //!< The original word's index +}; + struct ifo_reader +{ + gchar * data; //!< File data terminated with \0 + gchar * data_end; //!< Where the final \0 char. is + + gchar * start; //!< Start of the current token + + gchar * key; //!< The key (points into @a data) + gchar * value; //!< The value (points into @a data) +}; + +// --- Utilities --------------------------------------------------------------- + +/** Read the whole stream into a byte array. */ +static gboolean +stream_read_all (GByteArray *ba, GInputStream *is, GError **error) +{ + guint8 buffer[1024 * 64]; + gsize bytes_read; + + while (g_input_stream_read_all (is, buffer, sizeof buffer, + &bytes_read, NULL, error)) + { + g_byte_array_append (ba, buffer, bytes_read); + if (bytes_read < sizeof buffer) + return TRUE; + } + return FALSE; +} + +/** Read a null-terminated string from a data input stream. */ +static gchar * +stream_read_string (GDataInputStream *dis, GError **error) +{ + gsize length; + gchar *s = g_data_input_stream_read_upto (dis, "", 1, &length, NULL, error); + if (!s) + return NULL; + + GError *err = NULL; + g_data_input_stream_read_byte (dis, NULL, &err); + if (err) + { + g_free (s); + g_propagate_error (error, err); + return NULL; + } + + return s; +} + +/** String compare function used for StarDict indexes. */ +static inline gint +stardict_strcmp (const gchar *s1, const gchar *s2) +{ + gint a; + a = g_ascii_strcasecmp (s1, s2); + return a ? a : strcmp (s1, s2); +} + +/** After this statement, the element has been found and its index is stored + * in the variable "imid". */ +#define BINARY_SEARCH_BEGIN(max, compare) \ + gint imin = 0, imax = max, imid; \ + while (imin <= imax) { \ + imid = imin + (imax - imin) / 2; \ + gint cmp = compare; \ + if (cmp > 0) imin = imid + 1; \ + else if (cmp < 0) imax = imid - 1; \ + else { + +/** After this statement, the binary search has failed and "imin" stores + * the position where the element can be inserted. */ +#define BINARY_SEARCH_END \ + } \ + } + +// --- Errors ------------------------------------------------------------------ + +GQuark +stardict_error_quark (void) +{ + return g_quark_from_static_string ("stardict-error-quark"); +} + +// --- IFO reader -------------------------------------------------------------- + +static gboolean +ifo_reader_init (IfoReader *ir, const gchar *path, GError **error) +{ + gsize length; + gchar *contents; + if (!g_file_get_contents (path, &contents, &length, error)) + return FALSE; + + static const char first_line[] = "StarDict's dict ifo file\n"; + if (length < sizeof first_line - 1 + || strncmp (contents, first_line, sizeof first_line - 1)) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: invalid header format", path); + return FALSE; + } + + ir->data = contents; + ir->start = contents + sizeof first_line - 1; + ir->data_end = contents + length; + return TRUE; +} + +static void +ifo_reader_free (IfoReader *ir) +{ + g_free (ir->data); +} + +static gint +ifo_reader_read (IfoReader *ir) +{ + ir->key = NULL; + ir->value = NULL; + + gchar *p; + for (p = ir->start; p < ir->data_end; p++) + { + if (*p == '\n') + { + if (!ir->key) + return -1; + + *p = 0; + ir->value = ir->start; + ir->start = p + 1; + return 1; + } + + if (*p == '=') + { + if (p == ir->start) + return -1; + + *p = 0; + ir->key = ir->start; + ir->start = p + 1; + } + } + + if (!ir->key) + { + if (p != ir->start) + return -1; + return 0; + } + + ir->value = ir->start; + ir->start = p; + return 1; +} + +// --- StardictInfo ------------------------------------------------------------ + +/** Return the filesystem path for the dictionary. */ +const gchar * +stardict_info_get_path (StardictInfo *sdi) +{ + return sdi->path; +} + +/** Return the name of the dictionary. */ +const gchar * +stardict_info_get_book_name (StardictInfo *sdi) +{ + return sdi->book_name; +} + +/** Return the word count of the dictionary. Note that this information comes + * from the .ifo file, while the dictionary could successfully load with + * a different count of word entries. + */ +gsize +stardict_info_get_word_count (StardictInfo *sdi) +{ + return sdi->word_count; +} + +/** Destroy the dictionary info object. */ +void +stardict_info_free (StardictInfo *sdi) +{ + g_free (sdi->path); + g_free (sdi->book_name); + g_free (sdi->author); + g_free (sdi->email); + g_free (sdi->website); + g_free (sdi->description); + g_free (sdi->date); + g_free (sdi->same_type_sequence); + g_free (sdi); +} + +#define DEFINE_IFO_KEY(n, t, e) { (n), IFO_##t, offsetof (StardictInfo, e) } + +static gboolean +load_ifo (StardictInfo *sti, const gchar *path, GError **error) +{ + IfoReader ir; + if (!ifo_reader_init (&ir, path, error)) + return FALSE; + + gboolean ret_val = FALSE; + memset (sti, 0, sizeof *sti); + + if (ifo_reader_read (&ir) != 1 || strcmp (ir.key, "version")) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: version not specified", path); + goto error; + } + + if (!strcmp (ir.value, "2.4.2")) + sti->version = SD_VERSION_2_4_2; + else if (!strcmp (ir.value, "3.0.0")) + sti->version = SD_VERSION_3_0_0; + else + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: invalid version: %s", path, ir.value); + goto error; + } + + static const struct + { + const gchar *name; + enum { IFO_STRING, IFO_NUMBER } type; + size_t offset; + } + ifo_keys[] = + { + DEFINE_IFO_KEY ("bookname", STRING, book_name), + DEFINE_IFO_KEY ("wordcount", NUMBER, word_count), + DEFINE_IFO_KEY ("synwordcount", NUMBER, syn_word_count), + DEFINE_IFO_KEY ("idxfilesize", NUMBER, idx_filesize), + DEFINE_IFO_KEY ("idxoffsetbits", NUMBER, idx_offset_bits), + DEFINE_IFO_KEY ("author", STRING, author), + DEFINE_IFO_KEY ("email", STRING, email), + DEFINE_IFO_KEY ("website", STRING, website), + DEFINE_IFO_KEY ("description", STRING, description), + DEFINE_IFO_KEY ("date", STRING, date), + DEFINE_IFO_KEY ("sametypesequence", STRING, same_type_sequence) + }; + + gint ret; + while ((ret = ifo_reader_read (&ir)) == 1) + { + guint i; + for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++) + if (!strcmp (ir.key, ifo_keys[i].name)) + break; + + if (i == G_N_ELEMENTS (ifo_keys)) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: unknown key, ignoring: %s", path, ir.key); + continue; + } + + if (ifo_keys[i].type == IFO_STRING) + { + G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset) + = g_strdup (ir.value); + continue; + } + + // Otherwise it has to be IFO_NUMBER + gchar *end; + gulong wc = strtol (ir.value, &end, 10); + if (*end) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: invalid integer", path); + goto error; + } + + G_STRUCT_MEMBER (gulong, sti, ifo_keys[i].offset) = wc; + } + + if (ret == -1) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: option format error", path); + goto error; + } + + ret_val = TRUE; + + // FIXME check for zeros, don't assume that 0 means for "not set" + if (!sti->book_name || !*sti->book_name) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: no book name specified\n", path); + ret_val = FALSE; + } + if (!sti->word_count) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: word count not specified\n", path); + ret_val = FALSE; + } + if (!sti->idx_filesize) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: .idx file size not specified\n", path); + ret_val = FALSE; + } + + if (!sti->idx_offset_bits) + sti->idx_offset_bits = 32; + else if (sti->idx_offset_bits != 32 && sti->idx_offset_bits != 64) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: wrong index offset bits: %lu\n", path, sti->idx_offset_bits); + ret_val = FALSE; + } + +error: + if (!ret_val) + { + guint i; + for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++) + if (ifo_keys[i].type == IFO_STRING) + g_free (G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset)); + } + else + sti->path = g_strdup (path); + + ifo_reader_free (&ir); + return ret_val; +} + +/** List all dictionary files located in a path. + * @return GList. Deallocate the list with: + * @code + * g_list_free_full ((GDestroyNotify) stardict_info_free); + * @endcode + */ +GList * +stardict_list_dictionaries (const gchar *path) +{ + GPatternSpec *ps = g_pattern_spec_new ("*.ifo"); + GDir *dir = g_dir_open (path, 0, NULL); + g_return_val_if_fail (dir != NULL, NULL); + + GList *dicts = NULL; + const gchar *name; + while ((name = g_dir_read_name (dir))) + { + if (!g_pattern_match_string (ps, name)) + continue; + + gchar *filename = g_build_filename (path, name, NULL); + StardictInfo *ifo = g_new (StardictInfo, 1); + if (load_ifo (ifo, filename, NULL)) + dicts = g_list_append (dicts, ifo); + else + g_free (ifo); + g_free (filename); + } + g_dir_close (dir); + g_pattern_spec_free (ps); + return dicts; +} + +// --- StardictDict ------------------------------------------------------------ + +G_DEFINE_TYPE (StardictDict, stardict_dict, G_TYPE_OBJECT) + +static void +stardict_dict_finalize (GObject *self) +{ + StardictDict *sd = STARDICT_DICT (self); + + stardict_info_free (sd->info); + g_array_free (sd->index, TRUE); + g_array_free (sd->synonyms, TRUE); + + if (sd->mapped_dict) + g_mapped_file_unref (sd->mapped_dict); + else + g_free (sd->dict); + + G_OBJECT_CLASS (stardict_dict_parent_class)->finalize (self); +} + +static void +stardict_dict_class_init (StardictDictClass *klass) +{ + G_OBJECT_CLASS (klass)->finalize = stardict_dict_finalize; +} + +static void +stardict_dict_init (G_GNUC_UNUSED StardictDict *sd) +{ +} + +/** Load a StarDict dictionary. + * @param[in] filename Path to the .ifo file + */ +StardictDict * +stardict_dict_new (const gchar *filename, GError **error) +{ + StardictInfo *ifo = g_new (StardictInfo, 1); + if (!load_ifo (ifo, filename, error)) + { + g_free (ifo); + return NULL; + } + + StardictDict *sd = stardict_dict_new_from_info (ifo, error); + if (!sd) stardict_info_free (ifo); + return sd; +} + +/** Return information about a loaded dictionary. */ +StardictInfo * +stardict_dict_get_info (StardictDict *sd) +{ + g_return_val_if_fail (STARDICT_IS_DICT (sd), NULL); + return sd->info; +} + +/** Load a StarDict index from a GIO input stream. */ +static gboolean +load_idx_internal (StardictDict *sd, GInputStream *is, GError **error) +{ + GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (is)); + g_data_input_stream_set_byte_order (dis, + G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); + + StardictIndexEntry entry; + GError *err = NULL; + // Ignoring "wordcount", just reading as long as we can + while ((entry.name = stream_read_string (dis, &err))) + { + if (sd->info->idx_offset_bits == 32) + entry.data_offset + = g_data_input_stream_read_uint32 (dis, NULL, &err); + else + entry.data_offset + = g_data_input_stream_read_uint64 (dis, NULL, &err); + if (err) + goto error; + + entry.data_size = g_data_input_stream_read_uint32 (dis, NULL, &err); + if (err) + goto error; + + g_array_append_val (sd->index, entry); + } + + if (err != NULL) + goto error; + + g_object_unref (dis); + return TRUE; + +error: + g_propagate_error (error, err); + g_free (entry.name); + g_object_unref (dis); + return FALSE; +} + +/** Load a StarDict index. */ +static gboolean +load_idx (StardictDict *sd, const gchar *filename, + gboolean gzipped, GError **error) +{ + gboolean ret_val = FALSE; + GFile *file = g_file_new_for_path (filename); + GFileInputStream *fis = g_file_read (file, NULL, error); + + if (!fis) + goto cannot_open; + + if (gzipped) + { + GZlibDecompressor *zd + = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); + GInputStream *cis = g_converter_input_stream_new + (G_INPUT_STREAM (fis), G_CONVERTER (zd)); + + ret_val = load_idx_internal (sd, cis, error); + + g_object_unref (cis); + g_object_unref (zd); + } + else + ret_val = load_idx_internal (sd, G_INPUT_STREAM (fis), error); + + g_object_unref (fis); +cannot_open: + g_object_unref (file); + return ret_val; +} + +static gboolean +load_syn (StardictDict *sd, const gchar *filename, GError **error) +{ + gboolean ret_val = FALSE; + GFile *file = g_file_new_for_path (filename); + GFileInputStream *fis = g_file_read (file, NULL, error); + + if (!fis) + goto cannot_open; + + GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (fis)); + g_data_input_stream_set_byte_order (dis, + G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); + + StardictSynonymEntry entry; + GError *err = NULL; + // Ignoring "synwordcount", just reading as long as we can + while ((entry.word = stream_read_string (dis, &err))) + { + entry.original_word = g_data_input_stream_read_uint32 (dis, NULL, &err); + if (err) + break; + + g_array_append_val (sd->synonyms, entry); + } + + if (err != NULL) + { + g_free (entry.word); + g_propagate_error (error, err); + } + else + ret_val = TRUE; + + g_object_unref (dis); + g_object_unref (fis); +cannot_open: + g_object_unref (file); + return ret_val; +} + +/** Destroy an index entry. */ +static void +index_destroy_cb (gpointer sde) +{ + StardictIndexEntry *e = sde; + g_free (e->name); +} + +/** Destroy a synonym entry. */ +static void +syn_destroy_cb (gpointer sde) +{ + StardictSynonymEntry *e = sde; + g_free (e->word); +} + +/** Load StarDict dictionary data. */ +static gboolean +load_dict (StardictDict *sd, const gchar *filename, gboolean gzipped, + GError **error) +{ + if (gzipped) + { + gboolean ret_val = FALSE; + GFile *file = g_file_new_for_path (filename); + GFileInputStream *fis = g_file_read (file, NULL, error); + + if (!fis) + goto cannot_open; + + // Just read it all, as it is, into memory + GByteArray *ba = g_byte_array_new (); + GZlibDecompressor *zd + = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); + GInputStream *cis = g_converter_input_stream_new + (G_INPUT_STREAM (fis), G_CONVERTER (zd)); + + ret_val = stream_read_all (ba, cis, error); + + g_object_unref (cis); + g_object_unref (zd); + + if (ret_val) + { + sd->dict_length = ba->len; + sd->dict = g_byte_array_free (ba, FALSE); + } + else + g_byte_array_free (ba, TRUE); + + g_object_unref (fis); +cannot_open: + g_object_unref (file); + return ret_val; + } + + sd->mapped_dict = g_mapped_file_new (filename, FALSE, error); + if (!sd->mapped_dict) + return FALSE; + + sd->dict_length = g_mapped_file_get_length (sd->mapped_dict); + sd->dict = g_mapped_file_get_contents (sd->mapped_dict); + return TRUE; +} + +/** Load a StarDict dictionary. + * @param[in] sdi Parsed .ifo data. + */ +StardictDict * +stardict_dict_new_from_info (StardictInfo *sdi, GError **error) +{ + g_return_val_if_fail (sdi != NULL, NULL); + + StardictDict *sd = g_object_new (STARDICT_TYPE_DICT, NULL); + sd->info = sdi; + sd->index = g_array_new (FALSE, FALSE, sizeof (StardictIndexEntry)); + g_array_set_clear_func (sd->index, index_destroy_cb); + sd->synonyms = g_array_new (FALSE, FALSE, sizeof (StardictSynonymEntry)); + g_array_set_clear_func (sd->synonyms, syn_destroy_cb); + + const gchar *dot = strrchr (sdi->path, '.'); + gchar *base = dot ? g_strndup (sdi->path, dot - sdi->path) + : g_strdup (sdi->path); + + gchar *base_idx = g_strconcat (base, ".idx", NULL); + gboolean ret = FALSE; + if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_idx (sd, base_idx, FALSE, error); + else + { + gchar *base_idx_gz = g_strconcat (base_idx, ".gz", NULL); + g_free (base_idx); + base_idx = base_idx_gz; + + if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_idx (sd, base_idx, TRUE, error); + else + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND, + "%s: cannot find index file", sdi->path); + } + } + g_free (base_idx); + + if (!ret) + goto error; + + gchar *base_dict = g_strconcat (base, ".dict", NULL); + ret = FALSE; + if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_dict (sd, base_dict, FALSE, error); + else + { + gchar *base_dict_dz = g_strconcat (base_dict, ".dz", NULL); + g_free (base_dict); + base_dict = base_dict_dz; + + if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_dict (sd, base_dict, TRUE, error); + else + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND, + "%s: cannot find dict file", sdi->path); + } + } + g_free (base_dict); + + if (!ret) + goto error; + + gchar *base_syn = g_strconcat (base, ".syn", NULL); + if (g_file_test (base_syn, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + load_syn (sd, base_syn, NULL); + g_free (base_syn); + + g_free (base); + return sd; + +error: + g_array_free (sd->index, TRUE); + g_free (base); + g_object_unref (sd); + return NULL; +} + +/** Return words for which the argument is a synonym of or NULL + * if there are no such words. + */ +gchar ** +stardict_dict_get_synonyms (StardictDict *sd, const gchar *word) +{ + BINARY_SEARCH_BEGIN (sd->synonyms->len - 1, stardict_strcmp (word, + g_array_index (sd->synonyms, StardictSynonymEntry, imid).word)) + + // Back off to the first matching entry + while (imid > 0 && !stardict_strcmp (word, + g_array_index (sd->synonyms, StardictSynonymEntry, --imid).word)); + + GPtrArray *array = g_ptr_array_new (); + + // And add all matching entries from that position on to the array + do + g_ptr_array_add (array, g_strdup (g_array_index + (sd->index, StardictIndexEntry, g_array_index + (sd->synonyms, StardictSynonymEntry, ++imid).original_word).name)); + while ((guint) imid < sd->synonyms->len - 1 && !stardict_strcmp (word, + g_array_index (sd->synonyms, StardictSynonymEntry, imid + 1).word)); + + return (gchar **) g_ptr_array_free (array, FALSE); + + BINARY_SEARCH_END + + return NULL; +} + +/** Search for a word. + * @param[in] word The word in utf-8 encoding + * @param[out] success TRUE if found + * @return An iterator object pointing to the word, or where it would be + */ +StardictIterator * +stardict_dict_search (StardictDict *sd, const gchar *word, gboolean *success) +{ + BINARY_SEARCH_BEGIN (sd->index->len - 1, stardict_strcmp (word, + g_array_index (sd->index, StardictIndexEntry, imid).name)) + + if (success) *success = TRUE; + return stardict_iterator_new (sd, imid); + + BINARY_SEARCH_END + + if (success) *success = FALSE; + return stardict_iterator_new (sd, imin); +} + +static void +stardict_entry_field_free (StardictEntryField *sef) +{ + g_free (sef->data); + g_slice_free1 (sizeof *sef, sef); +} + +static StardictEntryField * +read_entry (gchar type, const gchar **entry_iterator, + const gchar *end, gboolean is_final) +{ + const gchar *entry = *entry_iterator; + if (g_ascii_islower (type)) + { + GString *data = g_string_new (NULL); + + if (is_final) + { + g_string_append_len (data, entry, end - entry); + entry += end - entry; + } + else + { + gint c = EOF; + while (entry < end && (c = *entry++)) + g_string_append_c (data, c); + + if (c != '\0') + return (gpointer) g_string_free (data, TRUE); + } + + StardictEntryField *sef = g_slice_alloc (sizeof *sef); + sef->type = type; + sef->data_size = data->len + 1; + sef->data = g_string_free (data, FALSE); + *entry_iterator = entry; + return sef; + } + + gsize length; + if (is_final) + length = end - entry; + else + { + if (entry + sizeof (guint32) > end) + return NULL; + + length = GUINT32_FROM_BE (*(guint32 *) entry); + entry += sizeof (guint32); + + if (entry + length > end) + return NULL; + } + + StardictEntryField *sef = g_slice_alloc (sizeof *sef); + sef->type = type; + sef->data_size = length; + sef->data = memcpy (g_malloc (length), entry, length); + *entry_iterator = entry + length; + return sef; +} + +static GList * +read_entries (const gchar *entry, gsize entry_size, GError **error) +{ + const gchar *end = entry + entry_size; + GList *result = NULL; + + while (entry < end) + { + gchar type = *entry++; + StardictEntryField *sef = read_entry (type, &entry, end, FALSE); + if (!sef) + goto error; + result = g_list_append (result, sef); + } + + return result; + +error: + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "invalid data entry"); + g_list_free_full (result, (GDestroyNotify) stardict_entry_field_free); + return NULL; +} + +static GList * +read_entries_sts (const gchar *entry, gsize entry_size, + const gchar *sts, GError **error) +{ + const gchar *end = entry + entry_size; + GList *result = NULL; + + while (*sts) + { + gchar type = *sts++; + StardictEntryField *sef = read_entry (type, &entry, end, !*sts); + if (!sef) + goto error; + result = g_list_append (result, sef); + } + + return result; + +error: + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "invalid data entry"); + g_list_free_full (result, (GDestroyNotify) stardict_entry_field_free); + return NULL; +} + +/** Return the data for the specified offset in the index. Unsafe. */ +static StardictEntry * +stardict_dict_get_entry (StardictDict *sd, guint32 offset) +{ + // TODO cache the entries + StardictIndexEntry *sie = &g_array_index (sd->index, + StardictIndexEntry, offset); + + g_return_val_if_fail (sie->data_offset + sie->data_size + <= sd->dict_length, NULL); + + GList *entries; + if (sd->info->same_type_sequence) + entries = read_entries_sts (sd->dict + sie->data_offset, + sie->data_size, sd->info->same_type_sequence, NULL); + else + entries = read_entries (sd->dict + sie->data_offset, + sie->data_size, NULL); + + if (!entries) + return NULL; + + StardictEntry *se = g_object_new (STARDICT_TYPE_ENTRY, NULL); + se->fields = entries; + return se; +} + +// --- StardictEntry ----------------------------------------------------------- + +G_DEFINE_TYPE (StardictEntry, stardict_entry, G_TYPE_OBJECT) + +static void +stardict_entry_finalize (GObject *self) +{ + StardictEntry *sde = STARDICT_ENTRY (self); + + g_list_free_full (sde->fields, (GDestroyNotify) stardict_entry_field_free); + + G_OBJECT_CLASS (stardict_entry_parent_class)->finalize (self); +} + +static void +stardict_entry_class_init (StardictEntryClass *klass) +{ + G_OBJECT_CLASS (klass)->finalize = stardict_entry_finalize; +} + +static void +stardict_entry_init (G_GNUC_UNUSED StardictEntry *sde) +{ +} + +/** Return the entries present within the entry. + * @return GList + */ +const GList * +stardict_entry_get_fields (StardictEntry *sde) +{ + g_return_val_if_fail (STARDICT_IS_ENTRY (sde), NULL); + return sde->fields; +} + +// --- StardictIterator--------------------------------------------------------- + +G_DEFINE_TYPE (StardictIterator, stardict_iterator, G_TYPE_OBJECT) + +static void +stardict_iterator_finalize (GObject *self) +{ + StardictIterator *si = STARDICT_ITERATOR (self); + + g_object_unref (si->owner); + + G_OBJECT_CLASS (stardict_iterator_parent_class)->finalize (self); +} + +static void +stardict_iterator_class_init (StardictIteratorClass *klass) +{ + G_OBJECT_CLASS (klass)->finalize = stardict_iterator_finalize; +} + +static void +stardict_iterator_init (G_GNUC_UNUSED StardictIterator *sd) +{ +} + +/** Create a new iterator for the dictionary with offset @a offset. */ +StardictIterator * +stardict_iterator_new (StardictDict *sd, guint32 offset) +{ + g_return_val_if_fail (STARDICT_IS_DICT (sd), NULL); + + StardictIterator *si = g_object_new (STARDICT_TYPE_ITERATOR, NULL); + si->owner = g_object_ref (sd); + si->offset = offset; + return si; +} + +/** Return the word in the index that the iterator points at, or NULL. */ +const gchar * +stardict_iterator_get_word (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); + if (!stardict_iterator_is_valid (sdi)) + return NULL; + return g_array_index (sdi->owner->index, + StardictIndexEntry, sdi->offset).name; +} + +/** Return the dictionary entry that the iterator points at, or NULL. */ +StardictEntry * +stardict_iterator_get_entry (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); + if (!stardict_iterator_is_valid (sdi)) + return FALSE; + return stardict_dict_get_entry (sdi->owner, sdi->offset); +} + +/** Return whether the iterator points to a valid index entry. */ +gboolean +stardict_iterator_is_valid (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), FALSE); + return sdi->offset >= 0 && sdi->offset < sdi->owner->index->len; +} + +/** Return the offset of the iterator within the dictionary index. */ +gint64 +stardict_iterator_get_offset (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), -1); + return sdi->offset; +} + +/** Set the offset of the iterator. */ +void +stardict_iterator_set_offset + (StardictIterator *sdi, gint64 offset, gboolean relative) +{ + g_return_if_fail (STARDICT_IS_ITERATOR (sdi)); + sdi->offset = relative ? sdi->offset + offset : offset; +} diff --git a/src/stardict.h b/src/stardict.h new file mode 100644 index 0000000..aef27fd --- /dev/null +++ b/src/stardict.h @@ -0,0 +1,215 @@ +/* + * stardict.h: StarDict API + * + * This module doesn't cover all the functionality available to StarDict + * dictionaries, it should however be good enough for most of them that are + * freely available on the Internet. + * + * Copyright (c) 2013, Přemysl Janouch + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef STARDICT_H +#define STARDICT_H + +/** An object intended for interacting with a dictionary. */ +typedef struct stardict_dict StardictDict; +typedef struct stardict_dict_class StardictDictClass; + +/** Overall information about a particular dictionary. */ +typedef struct stardict_info StardictInfo; + +/** Handles the task of moving around the dictionary. */ +typedef struct stardict_iterator StardictIterator; +typedef struct stardict_iterator_class StardictIteratorClass; + +/** Contains the decoded data for a single word definition. */ +typedef struct stardict_entry StardictEntry; +typedef struct stardict_entry_class StardictEntryClass; + +/** A single field of a word definition. */ +typedef struct stardict_entry_field StardictEntryField; + +/* GObject boilerplate. */ +#define STARDICT_TYPE_DICT (stardict_dict_get_type ()) +#define STARDICT_DICT(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ + STARDICT_TYPE_DICT, StardictDict)) +#define STARDICT_IS_DICT(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ + STARDICT_TYPE_DICT)) +#define STARDICT_DICT_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST ((klass), \ + STARDICT_TYPE_DICT, StardictDictClass)) +#define STARDICT_IS_DICT_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE ((klass), \ + STARDICT_TYPE_DICT)) +#define STARDICT_DICT_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS ((obj), \ + STARDICT_TYPE_DICT, StardictDictClass)) + +#define STARDICT_TYPE_ITERATOR (stardict_iterator_get_type ()) +#define STARDICT_ITERATOR(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ + STARDICT_TYPE_ITERATOR, StardictIterator)) +#define STARDICT_IS_ITERATOR(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ + STARDICT_TYPE_ITERATOR)) +#define STARDICT_ITERATOR_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST ((klass), \ + STARDICT_TYPE_ITERATOR, StardictIteratorClass)) +#define STARDICT_IS_ITERATOR_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE ((klass), \ + STARDICT_TYPE_ITERATOR)) +#define STARDICT_ITERATOR_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS ((obj), \ + STARDICT_TYPE_ITERATOR, StardictIteratorClass)) + +#define STARDICT_TYPE_ENTRY (stardict_entry_get_type ()) +#define STARDICT_ENTRY(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ + STARDICT_TYPE_ENTRY, StardictEntry)) +#define STARDICT_IS_ENTRY(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ + STARDICT_TYPE_ENTRY)) +#define STARDICT_ENTRY_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST ((klass), \ + STARDICT_TYPE_ENTRY, StardictEntryClass)) +#define STARDICT_IS_ENTRY_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE ((klass), \ + STARDICT_TYPE_ENTRY)) +#define STARDICT_ENTRY_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS ((obj), \ + STARDICT_TYPE_ENTRY, StardictEntryClass)) + +// --- Errors ------------------------------------------------------------------ + +/** General error type. */ +typedef enum { + STARDICT_ERROR_FILE_NOT_FOUND, //!< Some file was not found + STARDICT_ERROR_INVALID_DATA //!< Dictionary contains invalid data +} StardictError; + +#define STARDICT_ERROR (stardict_error_quark ()) + +GQuark stardict_error_quark (void); + +// --- Dictionary information -------------------------------------------------- + +const gchar *stardict_info_get_path (StardictInfo *sdi) G_GNUC_PURE; +const gchar *stardict_info_get_book_name (StardictInfo *sdi) G_GNUC_PURE; +gsize stardict_info_get_word_count (StardictInfo *sd) G_GNUC_PURE; +void stardict_info_free (StardictInfo *sdi); + +GList *stardict_list_dictionaries (const gchar *path); + +// --- Dictionaries ------------------------------------------------------------ + +struct stardict_dict +{ + GObject parent_instance; + StardictInfo * info; //!< General information about the dict + GArray * index; //!< Word index + GArray * synonyms; //!< Synonyms + gpointer dict; //!< Dictionary data + gsize dict_length; //!< Length of the dict data in bytes + GMappedFile * mapped_dict; //!< Memory map handle +}; + +struct stardict_dict_class +{ + GObjectClass parent_class; +}; + +GType stardict_dict_get_type (void); +StardictDict *stardict_dict_new (const gchar *filename, GError **error); +StardictDict *stardict_dict_new_from_info (StardictInfo *sdi, GError **error); +StardictInfo *stardict_dict_get_info (StardictDict *sd); +gchar **stardict_dict_get_synonyms (StardictDict *sd, const gchar *word); +StardictIterator *stardict_dict_search + (StardictDict *sd, const gchar *word, gboolean *success); + +// --- Dictionary iterators ---------------------------------------------------- + +struct stardict_iterator +{ + GObject parent_instance; + StardictDict * owner; //!< The related dictionary + gint64 offset; //!< Index within the dictionary +}; + +struct stardict_iterator_class +{ + GObjectClass parent_class; +}; + +GType stardict_iterator_get_type (void); +StardictIterator *stardict_iterator_new (StardictDict *sd, guint32 index); +const gchar *stardict_iterator_get_word (StardictIterator *sdi) G_GNUC_PURE; +StardictEntry *stardict_iterator_get_entry (StardictIterator *sdi); +gboolean stardict_iterator_is_valid (StardictIterator *sdi) G_GNUC_PURE; +gint64 stardict_iterator_get_offset (StardictIterator *sdi) G_GNUC_PURE; +void stardict_iterator_set_offset + (StardictIterator *sdi, gint64 offset, gboolean relative); + +/** Go to the next entry. */ +#define stardict_iterator_next(sdi) \ + (stardict_iterator_set_offset (sdi, 1, TRUE)) + +/** Go to the previous entry. */ +#define stardict_iterator_prev(sdi) \ + (stardict_iterator_set_offset (sdi, -1, TRUE)) + +// --- Dictionary entries ------------------------------------------------------ + +typedef enum { + STARDICT_FIELD_MEANING = 'm', //!< Word's purely textual meaning + STARDICT_FIELD_LOCALE = 'l', //!< Locale-dependent meaning + STARDICT_FIELD_PANGO = 'g', //!< Pango text markup language + STARDICT_FIELD_PHONETIC = 't', //!< English phonetic string + STARDICT_FIELD_XDXF = 'x', //!< xdxf language + STARDICT_FIELD_YB_KANA = 'y', //!< Chinese YinBiao or Japanese KANA + STARDICT_FIELD_POWERWORD = 'k', //!< KingSoft PowerWord's data + STARDICT_FIELD_MEDIAWIKI = 'w', //!< MediaWiki markup language + STARDICT_FIELD_HTML = 'h', //!< HTML codes + STARDICT_FIELD_RESOURCE = 'r', //!< Resource file list + STARDICT_FIELD_WAV = 'W', //!< WAV file + STARDICT_FIELD_PICTURE = 'P', //!< Picture file + STARDICT_FIELD_X = 'X' //!< Reserved, experimental extensions +} StardictEntryFieldType; + +struct stardict_entry_field +{ + gchar type; //!< Type of entry (EntryFieldType) + gpointer data; //!< Raw data or null-terminated string + gsize data_size; //!< Size of data, includding any \0 +}; + +struct stardict_entry +{ + GObject parent_instance; + GList * fields; //!< List of StardictEntryField's +}; + +struct stardict_entry_class +{ + GObjectClass parent_class; +}; + +GType stardict_entry_get_type (void); +const GList *stardict_entry_get_fields (StardictEntry *sde) G_GNUC_PURE; + + #endif /* ! STARDICT_H */ diff --git a/src/test-stardict.c b/src/test-stardict.c new file mode 100644 index 0000000..8f7f798 --- /dev/null +++ b/src/test-stardict.c @@ -0,0 +1,429 @@ +/* + * stardict.c: StarDict API test + * + * Copyright (c) 2013, Přemysl Janouch + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include +#include +#include + +#include +#include + +#include "stardict.h" + + +// --- Utilities --------------------------------------------------------------- + +// Adapted http://gezeiten.org/post/2009/04/Writing-Your-Own-GIO-Jobs +static gboolean remove_recursive (GFile *file, GError **error); + +static gboolean +remove_directory_contents (GFile *file, GError **error) +{ + GFileEnumerator *enumerator = + g_file_enumerate_children (file, "standard::*", + G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, NULL, error); + + if (!enumerator) + return FALSE; + + gboolean success = TRUE; + do + { + GError *err = NULL; + GFileInfo *child_info = + g_file_enumerator_next_file (enumerator, NULL, &err); + + if (!child_info) + { + if (err) + { + g_propagate_error (error, err); + success = FALSE; + } + break; + } + + GFile *child = g_file_resolve_relative_path + (file, g_file_info_get_name (child_info)); + success = remove_recursive (child, error); + g_object_unref (child); + g_object_unref (child_info); + } + while (success); + + g_object_unref (enumerator); + return success; +} + +static gboolean +remove_recursive (GFile *file, GError **error) +{ + g_return_val_if_fail (G_IS_FILE (file), FALSE); + + GFileInfo *info = g_file_query_info (file, "standard::*", + G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, NULL, error); + + if (!info) + return FALSE; + + GFileType type = g_file_info_get_file_type (info); + g_object_unref (info); + + if (type == G_FILE_TYPE_DIRECTORY && + !remove_directory_contents (file, error)) + return FALSE; + + return g_file_delete (file, NULL, error); +} + +static gchar * +generate_random_string (gsize length, GRand *rand) +{ + GString *s = g_string_sized_new (length); + while (length--) + g_string_append_c (s, g_rand_int_range (rand, 'a', 'z' + 1)); + return g_string_free (s, FALSE); +} + +static gpointer +generate_random_data (gsize length, GRand *rand) +{ + gchar *blob = g_malloc (length), *i = blob; + while (length--) + *i++ = g_rand_int_range (rand, 0, 256); + return blob; +} + +// --- Dictionary generation --------------------------------------------------- + +typedef struct dictionary Dictionary; +typedef struct test_entry TestEntry; + +struct dictionary +{ + GFile *tmp_dir; //!< A temporary dictionary + GFile *ifo_file; //!< The dictionary's .ifo file + GArray *data; //!< Array of TestEntry's +}; + +struct test_entry +{ + gchar *word; + gchar *meaning; + gpointer data; + gsize data_size; +}; + +static void +test_entry_free (TestEntry *te) +{ + g_free (te->word); + g_free (te->meaning); + g_free (te->data); +} + +static gint +test_entry_word_compare (gconstpointer a, gconstpointer b) +{ + return strcmp (((TestEntry *) a)->word, ((TestEntry *) b)->word); +} + +static GArray * +generate_dictionary_data (gsize length) +{ + GRand *rand = g_rand_new_with_seed (0); + + GArray *a = g_array_sized_new (FALSE, FALSE, sizeof (TestEntry), length); + g_array_set_clear_func (a, (GDestroyNotify) test_entry_free); + + while (length--) + { + TestEntry te; + + te.word = generate_random_string + (g_rand_int_range (rand, 1, 10), rand); + te.meaning = generate_random_string + (g_rand_int_range (rand, 1, 1024), rand); + + te.data_size = g_rand_int_range (rand, 0, 1048576); + te.data = generate_random_data (te.data_size, rand); + + g_array_append_val (a, te); + } + + g_rand_free (rand); + g_array_sort (a, test_entry_word_compare); + return a; +} + +static Dictionary * +dictionary_create (void) +{ + GError *error; + gchar *tmp_dir_path = g_dir_make_tmp ("stardict-test-XXXXXX", &error); + if (!tmp_dir_path) + g_error ("Failed to create a directory for the test dictionary: %s", + error->message); + + Dictionary *dict = g_malloc (sizeof *dict); + dict->tmp_dir = g_file_new_for_path (tmp_dir_path); + + static const gint dictionary_size = 8; + dict->data = generate_dictionary_data (dictionary_size); + GFile *dict_file = g_file_get_child (dict->tmp_dir, "test.dict"); + GFile *idx_file = g_file_get_child (dict->tmp_dir, "test.idx"); + + GFileOutputStream *dict_stream = g_file_replace (dict_file, + NULL, FALSE, G_FILE_CREATE_NONE, NULL, &error); + if (!dict_stream) + g_error ("Failed to create the .dict file: %s", error->message); + + GFileOutputStream *idx_stream = g_file_replace (idx_file, + NULL, FALSE, G_FILE_CREATE_NONE, NULL, &error); + if (!idx_stream) + g_error ("Failed to create the .idx file: %s", error->message); + + GDataOutputStream *dict_data + = g_data_output_stream_new (G_OUTPUT_STREAM (dict_stream)); + g_data_output_stream_set_byte_order + (dict_data, G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); + + GDataOutputStream *idx_data + = g_data_output_stream_new (G_OUTPUT_STREAM (idx_stream)); + g_data_output_stream_set_byte_order + (idx_data, G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); + + gint i; + gsize written; + for (i = 0; i < dictionary_size; i++) + { + TestEntry *te = &g_array_index (dict->data, TestEntry, i); + goffset offset = g_seekable_tell (G_SEEKABLE (dict_stream)); + + if (!g_data_output_stream_put_string (dict_data, + te->meaning, NULL, &error) + || !g_data_output_stream_put_byte (dict_data, '\0', NULL, &error) + || !g_output_stream_write_all (G_OUTPUT_STREAM (dict_stream), + te->data, te->data_size, &written, NULL, &error)) + g_error ("Write to dictionary failed: %s", error->message); + + if (!g_data_output_stream_put_string (idx_data, + te->word, NULL, &error) + || !g_data_output_stream_put_byte (idx_data, '\0', NULL, &error) + || !g_data_output_stream_put_uint32 (idx_data, offset, NULL, &error) + || !g_data_output_stream_put_uint32 (idx_data, + g_seekable_tell (G_SEEKABLE (dict_stream)) - offset, NULL, &error)) + g_error ("Write to index failed: %s", error->message); + } + + gint index_size = g_seekable_tell (G_SEEKABLE (idx_stream)); + + if (!g_output_stream_close (G_OUTPUT_STREAM (dict_stream), NULL, &error)) + g_error ("Failed to close the .dict file: %s", error->message); + if (!g_output_stream_close (G_OUTPUT_STREAM (idx_stream), NULL, &error)) + g_error ("Failed to close the .idx file: %s", error->message); + + g_object_unref (dict_data); + g_object_unref (idx_data); + + g_object_unref (dict_stream); + g_object_unref (idx_stream); + + gchar *ifo_contents = g_strdup_printf + ("StarDict's dict ifo file\n" + "version=3.0.0\n" + "bookname=Test Book\n" + "wordcount=%d\n" + "idxfilesize=%d\n" + "idxoffsetbits=32\n" + "author=Lyra Heartstrings\n" + "email=lyra@equestria.net\n" + "website=http://equestria.net\n" + "description=Test dictionary\n" + "date=21.12.2012\n" + "sametypesequence=mX\n", + dictionary_size, index_size); + + g_object_unref (dict_file); + g_object_unref (idx_file); + + dict->ifo_file = g_file_get_child (dict->tmp_dir, "test.ifo"); + if (!g_file_replace_contents (dict->ifo_file, + ifo_contents, strlen (ifo_contents), + NULL, FALSE, G_FILE_CREATE_NONE, NULL, NULL, &error)) + g_error ("Failed to create the .ifo file: %s", error->message); + g_free (ifo_contents); + + g_message ("Successfully created a test dictionary in %s", tmp_dir_path); + g_free (tmp_dir_path); + + return dict; +} + +static void +dictionary_destroy (Dictionary *dict) +{ + GError *error; + if (!remove_recursive (dict->tmp_dir, &error)) + g_error ("Failed to delete the temporary directory: %s", + error->message); + + g_message ("The test dictionary has been deleted"); + + g_object_unref (dict->tmp_dir); + g_object_unref (dict->ifo_file); + g_array_free (dict->data, TRUE); + g_free (dict); +} + +// --- Testing ----------------------------------------------------------------- + +typedef struct dict_fixture DictFixture; + +struct dict_fixture +{ + StardictDict *dict; +}; + +static void +dict_setup (DictFixture *fixture, gconstpointer test_data) +{ + Dictionary *dict = (Dictionary *) test_data; + + gchar *ifo_filename = g_file_get_path (dict->ifo_file); + fixture->dict = stardict_dict_new (ifo_filename, NULL); + g_free (ifo_filename); +} + +static void +dict_teardown (DictFixture *fixture, G_GNUC_UNUSED gconstpointer test_data) +{ + g_object_unref (fixture->dict); +} + +static void +dict_test_list (gconstpointer user_data) +{ + Dictionary *dict = (Dictionary *) user_data; + + gchar *tmp_path = g_file_get_path (dict->tmp_dir); + GList *dictionaries = stardict_list_dictionaries (tmp_path); + g_free (tmp_path); + + g_assert (dictionaries != NULL); + g_assert (dictionaries->next == NULL); + + StardictInfo *info = dictionaries->data; + GFile *ifo_file = g_file_new_for_path (stardict_info_get_path (info)); + g_assert (g_file_equal (ifo_file, dict->ifo_file) == TRUE); + g_object_unref (ifo_file); + + g_list_free_full (dictionaries, (GDestroyNotify) stardict_info_free); +} + +static void +dict_test_new (gconstpointer user_data) +{ + Dictionary *dict = (Dictionary *) user_data; + + gchar *ifo_filename = g_file_get_path (dict->ifo_file); + StardictDict *sd = stardict_dict_new (ifo_filename, NULL); + g_free (ifo_filename); + + g_assert (sd != NULL); + g_object_unref (sd); +} + +static void +dict_test_data_entry (StardictDict *sd, TestEntry *entry) +{ + gboolean success; + StardictIterator *sdi = + stardict_dict_search (sd, entry->word, &success); + + g_assert (success == TRUE); + g_assert (sdi != NULL); + g_assert (stardict_iterator_is_valid (sdi)); + + const gchar *word = stardict_iterator_get_word (sdi); + g_assert_cmpstr (word, ==, entry->word); + + StardictEntry *sde = stardict_iterator_get_entry (sdi); + g_assert (sde != NULL); + + const GList *fields = stardict_entry_get_fields (sde); + const StardictEntryField *sdef; + g_assert (fields != NULL); + g_assert (fields->data != NULL); + + sdef = fields->data; + g_assert (sdef->type == 'm'); + g_assert_cmpstr (sdef->data, ==, entry->meaning); + + fields = fields->next; + g_assert (fields != NULL); + g_assert (fields->data != NULL); + + sdef = fields->data; + g_assert (sdef->type == 'X'); + g_assert_cmpuint (sdef->data_size, ==, entry->data_size); + g_assert (memcmp (sdef->data, entry->data, entry->data_size) == 0); + + fields = fields->next; + g_assert (fields == NULL); + + g_object_unref (sde); + g_object_unref (sdi); +} + +static void +dict_test_data (DictFixture *fixture, gconstpointer user_data) +{ + Dictionary *dict = (Dictionary *) user_data; + GArray *data = dict->data; + StardictDict *sd = fixture->dict; + + guint i; + for (i = 0; i < data->len; i++) + { + TestEntry *entry = &g_array_index (data, TestEntry, i); + dict_test_data_entry (sd, entry); + } +} + +int +main (int argc, char *argv[]) +{ + g_test_init (&argc, &argv, NULL); + if (glib_check_version (2, 36, 0)) + g_type_init (); + + Dictionary *dict = dictionary_create (); + + g_test_add_data_func ("/dict/list", dict, dict_test_list); + g_test_add_data_func ("/dict/new", dict, dict_test_new); + + g_test_add ("/dict/data", DictFixture, dict, + dict_setup, dict_test_data, dict_teardown); + + int result = g_test_run (); + dictionary_destroy (dict); + return result; +} diff --git a/stardict.c b/stardict.c deleted file mode 100644 index 4439022..0000000 --- a/stardict.c +++ /dev/null @@ -1,1070 +0,0 @@ -/* - * stardict.c: StarDict API - * - * Copyright (c) 2013, Přemysl Janouch - * All rights reserved. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - */ - -#include -#include -#include -#include - -#include -#include - -#include "stardict.h" - - -/** Describes a single entry in the dictionary index. */ -typedef struct stardict_index_entry StardictIndexEntry; - -/** Describes a single entry in the synonyms index. */ -typedef struct stardict_synonym_entry StardictSynonymEntry; - -/** Helper class for reading .ifo files. */ -typedef struct ifo_reader IfoReader; - - -typedef enum stardict_version StardictVersion; -enum stardict_version { SD_VERSION_2_4_2, SD_VERSION_3_0_0 }; - -struct stardict_info -{ - gchar * path; - StardictVersion version; - - gchar * book_name; - gulong word_count; - gulong syn_word_count; - gulong idx_filesize; - gulong idx_offset_bits; - gchar * author; - gchar * email; - gchar * website; - gchar * description; - gchar * date; - gchar * same_type_sequence; -}; - -struct stardict_index_entry -{ - gchar * name; //!< The word in utf-8 - guint64 data_offset; //!< Offset of the definition - guint32 data_size; //!< Size of the definition -}; - -struct stardict_synonym_entry -{ - gchar * word; //!< A synonymous word - guint32 original_word; //!< The original word's index -}; - struct ifo_reader -{ - gchar * data; //!< File data terminated with \0 - gchar * data_end; //!< Where the final \0 char. is - - gchar * start; //!< Start of the current token - - gchar * key; //!< The key (points into @a data) - gchar * value; //!< The value (points into @a data) -}; - -// --- Utilities --------------------------------------------------------------- - -/** Read the whole stream into a byte array. */ -static gboolean -stream_read_all (GByteArray *ba, GInputStream *is, GError **error) -{ - guint8 buffer[1024 * 64]; - gsize bytes_read; - - while (g_input_stream_read_all (is, buffer, sizeof buffer, - &bytes_read, NULL, error)) - { - g_byte_array_append (ba, buffer, bytes_read); - if (bytes_read < sizeof buffer) - return TRUE; - } - return FALSE; -} - -/** Read a null-terminated string from a data input stream. */ -static gchar * -stream_read_string (GDataInputStream *dis, GError **error) -{ - gsize length; - gchar *s = g_data_input_stream_read_upto (dis, "", 1, &length, NULL, error); - if (!s) - return NULL; - - GError *err = NULL; - g_data_input_stream_read_byte (dis, NULL, &err); - if (err) - { - g_free (s); - g_propagate_error (error, err); - return NULL; - } - - return s; -} - -/** String compare function used for StarDict indexes. */ -static inline gint -stardict_strcmp (const gchar *s1, const gchar *s2) -{ - gint a; - a = g_ascii_strcasecmp (s1, s2); - return a ? a : strcmp (s1, s2); -} - -/** After this statement, the element has been found and its index is stored - * in the variable "imid". */ -#define BINARY_SEARCH_BEGIN(max, compare) \ - gint imin = 0, imax = max, imid; \ - while (imin <= imax) { \ - imid = imin + (imax - imin) / 2; \ - gint cmp = compare; \ - if (cmp > 0) imin = imid + 1; \ - else if (cmp < 0) imax = imid - 1; \ - else { - -/** After this statement, the binary search has failed and "imin" stores - * the position where the element can be inserted. */ -#define BINARY_SEARCH_END \ - } \ - } - -// --- Errors ------------------------------------------------------------------ - -GQuark -stardict_error_quark (void) -{ - return g_quark_from_static_string ("stardict-error-quark"); -} - -// --- IFO reader -------------------------------------------------------------- - -static gboolean -ifo_reader_init (IfoReader *ir, const gchar *path, GError **error) -{ - gsize length; - gchar *contents; - if (!g_file_get_contents (path, &contents, &length, error)) - return FALSE; - - static const char first_line[] = "StarDict's dict ifo file\n"; - if (length < sizeof first_line - 1 - || strncmp (contents, first_line, sizeof first_line - 1)) - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "%s: invalid header format", path); - return FALSE; - } - - ir->data = contents; - ir->start = contents + sizeof first_line - 1; - ir->data_end = contents + length; - return TRUE; -} - -static void -ifo_reader_free (IfoReader *ir) -{ - g_free (ir->data); -} - -static gint -ifo_reader_read (IfoReader *ir) -{ - ir->key = NULL; - ir->value = NULL; - - gchar *p; - for (p = ir->start; p < ir->data_end; p++) - { - if (*p == '\n') - { - if (!ir->key) - return -1; - - *p = 0; - ir->value = ir->start; - ir->start = p + 1; - return 1; - } - - if (*p == '=') - { - if (p == ir->start) - return -1; - - *p = 0; - ir->key = ir->start; - ir->start = p + 1; - } - } - - if (!ir->key) - { - if (p != ir->start) - return -1; - return 0; - } - - ir->value = ir->start; - ir->start = p; - return 1; -} - -// --- StardictInfo ------------------------------------------------------------ - -/** Return the filesystem path for the dictionary. */ -const gchar * -stardict_info_get_path (StardictInfo *sdi) -{ - return sdi->path; -} - -/** Return the name of the dictionary. */ -const gchar * -stardict_info_get_book_name (StardictInfo *sdi) -{ - return sdi->book_name; -} - -/** Return the word count of the dictionary. Note that this information comes - * from the .ifo file, while the dictionary could successfully load with - * a different count of word entries. - */ -gsize -stardict_info_get_word_count (StardictInfo *sdi) -{ - return sdi->word_count; -} - -/** Destroy the dictionary info object. */ -void -stardict_info_free (StardictInfo *sdi) -{ - g_free (sdi->path); - g_free (sdi->book_name); - g_free (sdi->author); - g_free (sdi->email); - g_free (sdi->website); - g_free (sdi->description); - g_free (sdi->date); - g_free (sdi->same_type_sequence); - g_free (sdi); -} - -#define DEFINE_IFO_KEY(n, t, e) { (n), IFO_##t, offsetof (StardictInfo, e) } - -static gboolean -load_ifo (StardictInfo *sti, const gchar *path, GError **error) -{ - IfoReader ir; - if (!ifo_reader_init (&ir, path, error)) - return FALSE; - - gboolean ret_val = FALSE; - memset (sti, 0, sizeof *sti); - - if (ifo_reader_read (&ir) != 1 || strcmp (ir.key, "version")) - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "%s: version not specified", path); - goto error; - } - - if (!strcmp (ir.value, "2.4.2")) - sti->version = SD_VERSION_2_4_2; - else if (!strcmp (ir.value, "3.0.0")) - sti->version = SD_VERSION_3_0_0; - else - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "%s: invalid version: %s", path, ir.value); - goto error; - } - - static const struct - { - const gchar *name; - enum { IFO_STRING, IFO_NUMBER } type; - size_t offset; - } - ifo_keys[] = - { - DEFINE_IFO_KEY ("bookname", STRING, book_name), - DEFINE_IFO_KEY ("wordcount", NUMBER, word_count), - DEFINE_IFO_KEY ("synwordcount", NUMBER, syn_word_count), - DEFINE_IFO_KEY ("idxfilesize", NUMBER, idx_filesize), - DEFINE_IFO_KEY ("idxoffsetbits", NUMBER, idx_offset_bits), - DEFINE_IFO_KEY ("author", STRING, author), - DEFINE_IFO_KEY ("email", STRING, email), - DEFINE_IFO_KEY ("website", STRING, website), - DEFINE_IFO_KEY ("description", STRING, description), - DEFINE_IFO_KEY ("date", STRING, date), - DEFINE_IFO_KEY ("sametypesequence", STRING, same_type_sequence) - }; - - gint ret; - while ((ret = ifo_reader_read (&ir)) == 1) - { - guint i; - for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++) - if (!strcmp (ir.key, ifo_keys[i].name)) - break; - - if (i == G_N_ELEMENTS (ifo_keys)) - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "%s: unknown key, ignoring: %s", path, ir.key); - continue; - } - - if (ifo_keys[i].type == IFO_STRING) - { - G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset) - = g_strdup (ir.value); - continue; - } - - // Otherwise it has to be IFO_NUMBER - gchar *end; - gulong wc = strtol (ir.value, &end, 10); - if (*end) - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "%s: invalid integer", path); - goto error; - } - - G_STRUCT_MEMBER (gulong, sti, ifo_keys[i].offset) = wc; - } - - if (ret == -1) - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "%s: option format error", path); - goto error; - } - - ret_val = TRUE; - - // FIXME check for zeros, don't assume that 0 means for "not set" - if (!sti->book_name || !*sti->book_name) - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "%s: no book name specified\n", path); - ret_val = FALSE; - } - if (!sti->word_count) - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "%s: word count not specified\n", path); - ret_val = FALSE; - } - if (!sti->idx_filesize) - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "%s: .idx file size not specified\n", path); - ret_val = FALSE; - } - - if (!sti->idx_offset_bits) - sti->idx_offset_bits = 32; - else if (sti->idx_offset_bits != 32 && sti->idx_offset_bits != 64) - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "%s: wrong index offset bits: %lu\n", path, sti->idx_offset_bits); - ret_val = FALSE; - } - -error: - if (!ret_val) - { - guint i; - for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++) - if (ifo_keys[i].type == IFO_STRING) - g_free (G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset)); - } - else - sti->path = g_strdup (path); - - ifo_reader_free (&ir); - return ret_val; -} - -/** List all dictionary files located in a path. - * @return GList. Deallocate the list with: - * @code - * g_list_free_full ((GDestroyNotify) stardict_info_free); - * @endcode - */ -GList * -stardict_list_dictionaries (const gchar *path) -{ - GPatternSpec *ps = g_pattern_spec_new ("*.ifo"); - GDir *dir = g_dir_open (path, 0, NULL); - g_return_val_if_fail (dir != NULL, NULL); - - GList *dicts = NULL; - const gchar *name; - while ((name = g_dir_read_name (dir))) - { - if (!g_pattern_match_string (ps, name)) - continue; - - gchar *filename = g_build_filename (path, name, NULL); - StardictInfo *ifo = g_new (StardictInfo, 1); - if (load_ifo (ifo, filename, NULL)) - dicts = g_list_append (dicts, ifo); - else - g_free (ifo); - g_free (filename); - } - g_dir_close (dir); - g_pattern_spec_free (ps); - return dicts; -} - -// --- StardictDict ------------------------------------------------------------ - -G_DEFINE_TYPE (StardictDict, stardict_dict, G_TYPE_OBJECT) - -static void -stardict_dict_finalize (GObject *self) -{ - StardictDict *sd = STARDICT_DICT (self); - - stardict_info_free (sd->info); - g_array_free (sd->index, TRUE); - g_array_free (sd->synonyms, TRUE); - - if (sd->mapped_dict) - g_mapped_file_unref (sd->mapped_dict); - else - g_free (sd->dict); - - G_OBJECT_CLASS (stardict_dict_parent_class)->finalize (self); -} - -static void -stardict_dict_class_init (StardictDictClass *klass) -{ - G_OBJECT_CLASS (klass)->finalize = stardict_dict_finalize; -} - -static void -stardict_dict_init (G_GNUC_UNUSED StardictDict *sd) -{ -} - -/** Load a StarDict dictionary. - * @param[in] filename Path to the .ifo file - */ -StardictDict * -stardict_dict_new (const gchar *filename, GError **error) -{ - StardictInfo *ifo = g_new (StardictInfo, 1); - if (!load_ifo (ifo, filename, error)) - { - g_free (ifo); - return NULL; - } - - StardictDict *sd = stardict_dict_new_from_info (ifo, error); - if (!sd) stardict_info_free (ifo); - return sd; -} - -/** Return information about a loaded dictionary. */ -StardictInfo * -stardict_dict_get_info (StardictDict *sd) -{ - g_return_val_if_fail (STARDICT_IS_DICT (sd), NULL); - return sd->info; -} - -/** Load a StarDict index from a GIO input stream. */ -static gboolean -load_idx_internal (StardictDict *sd, GInputStream *is, GError **error) -{ - GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (is)); - g_data_input_stream_set_byte_order (dis, - G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); - - StardictIndexEntry entry; - GError *err = NULL; - // Ignoring "wordcount", just reading as long as we can - while ((entry.name = stream_read_string (dis, &err))) - { - if (sd->info->idx_offset_bits == 32) - entry.data_offset - = g_data_input_stream_read_uint32 (dis, NULL, &err); - else - entry.data_offset - = g_data_input_stream_read_uint64 (dis, NULL, &err); - if (err) - goto error; - - entry.data_size = g_data_input_stream_read_uint32 (dis, NULL, &err); - if (err) - goto error; - - g_array_append_val (sd->index, entry); - } - - if (err != NULL) - goto error; - - g_object_unref (dis); - return TRUE; - -error: - g_propagate_error (error, err); - g_free (entry.name); - g_object_unref (dis); - return FALSE; -} - -/** Load a StarDict index. */ -static gboolean -load_idx (StardictDict *sd, const gchar *filename, - gboolean gzipped, GError **error) -{ - gboolean ret_val = FALSE; - GFile *file = g_file_new_for_path (filename); - GFileInputStream *fis = g_file_read (file, NULL, error); - - if (!fis) - goto cannot_open; - - if (gzipped) - { - GZlibDecompressor *zd - = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); - GInputStream *cis = g_converter_input_stream_new - (G_INPUT_STREAM (fis), G_CONVERTER (zd)); - - ret_val = load_idx_internal (sd, cis, error); - - g_object_unref (cis); - g_object_unref (zd); - } - else - ret_val = load_idx_internal (sd, G_INPUT_STREAM (fis), error); - - g_object_unref (fis); -cannot_open: - g_object_unref (file); - return ret_val; -} - -static gboolean -load_syn (StardictDict *sd, const gchar *filename, GError **error) -{ - gboolean ret_val = FALSE; - GFile *file = g_file_new_for_path (filename); - GFileInputStream *fis = g_file_read (file, NULL, error); - - if (!fis) - goto cannot_open; - - GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (fis)); - g_data_input_stream_set_byte_order (dis, - G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); - - StardictSynonymEntry entry; - GError *err = NULL; - // Ignoring "synwordcount", just reading as long as we can - while ((entry.word = stream_read_string (dis, &err))) - { - entry.original_word = g_data_input_stream_read_uint32 (dis, NULL, &err); - if (err) - break; - - g_array_append_val (sd->synonyms, entry); - } - - if (err != NULL) - { - g_free (entry.word); - g_propagate_error (error, err); - } - else - ret_val = TRUE; - - g_object_unref (dis); - g_object_unref (fis); -cannot_open: - g_object_unref (file); - return ret_val; -} - -/** Destroy an index entry. */ -static void -index_destroy_cb (gpointer sde) -{ - StardictIndexEntry *e = sde; - g_free (e->name); -} - -/** Destroy a synonym entry. */ -static void -syn_destroy_cb (gpointer sde) -{ - StardictSynonymEntry *e = sde; - g_free (e->word); -} - -/** Load StarDict dictionary data. */ -static gboolean -load_dict (StardictDict *sd, const gchar *filename, gboolean gzipped, - GError **error) -{ - if (gzipped) - { - gboolean ret_val = FALSE; - GFile *file = g_file_new_for_path (filename); - GFileInputStream *fis = g_file_read (file, NULL, error); - - if (!fis) - goto cannot_open; - - // Just read it all, as it is, into memory - GByteArray *ba = g_byte_array_new (); - GZlibDecompressor *zd - = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); - GInputStream *cis = g_converter_input_stream_new - (G_INPUT_STREAM (fis), G_CONVERTER (zd)); - - ret_val = stream_read_all (ba, cis, error); - - g_object_unref (cis); - g_object_unref (zd); - - if (ret_val) - { - sd->dict_length = ba->len; - sd->dict = g_byte_array_free (ba, FALSE); - } - else - g_byte_array_free (ba, TRUE); - - g_object_unref (fis); -cannot_open: - g_object_unref (file); - return ret_val; - } - - sd->mapped_dict = g_mapped_file_new (filename, FALSE, error); - if (!sd->mapped_dict) - return FALSE; - - sd->dict_length = g_mapped_file_get_length (sd->mapped_dict); - sd->dict = g_mapped_file_get_contents (sd->mapped_dict); - return TRUE; -} - -/** Load a StarDict dictionary. - * @param[in] sdi Parsed .ifo data. - */ -StardictDict * -stardict_dict_new_from_info (StardictInfo *sdi, GError **error) -{ - g_return_val_if_fail (sdi != NULL, NULL); - - StardictDict *sd = g_object_new (STARDICT_TYPE_DICT, NULL); - sd->info = sdi; - sd->index = g_array_new (FALSE, FALSE, sizeof (StardictIndexEntry)); - g_array_set_clear_func (sd->index, index_destroy_cb); - sd->synonyms = g_array_new (FALSE, FALSE, sizeof (StardictSynonymEntry)); - g_array_set_clear_func (sd->synonyms, syn_destroy_cb); - - const gchar *dot = strrchr (sdi->path, '.'); - gchar *base = dot ? g_strndup (sdi->path, dot - sdi->path) - : g_strdup (sdi->path); - - gchar *base_idx = g_strconcat (base, ".idx", NULL); - gboolean ret = FALSE; - if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) - ret = load_idx (sd, base_idx, FALSE, error); - else - { - gchar *base_idx_gz = g_strconcat (base_idx, ".gz", NULL); - g_free (base_idx); - base_idx = base_idx_gz; - - if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) - ret = load_idx (sd, base_idx, TRUE, error); - else - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND, - "%s: cannot find index file", sdi->path); - } - } - g_free (base_idx); - - if (!ret) - goto error; - - gchar *base_dict = g_strconcat (base, ".dict", NULL); - ret = FALSE; - if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) - ret = load_dict (sd, base_dict, FALSE, error); - else - { - gchar *base_dict_dz = g_strconcat (base_dict, ".dz", NULL); - g_free (base_dict); - base_dict = base_dict_dz; - - if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) - ret = load_dict (sd, base_dict, TRUE, error); - else - { - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND, - "%s: cannot find dict file", sdi->path); - } - } - g_free (base_dict); - - if (!ret) - goto error; - - gchar *base_syn = g_strconcat (base, ".syn", NULL); - if (g_file_test (base_syn, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) - load_syn (sd, base_syn, NULL); - g_free (base_syn); - - g_free (base); - return sd; - -error: - g_array_free (sd->index, TRUE); - g_free (base); - g_object_unref (sd); - return NULL; -} - -/** Return words for which the argument is a synonym of or NULL - * if there are no such words. - */ -gchar ** -stardict_dict_get_synonyms (StardictDict *sd, const gchar *word) -{ - BINARY_SEARCH_BEGIN (sd->synonyms->len - 1, stardict_strcmp (word, - g_array_index (sd->synonyms, StardictSynonymEntry, imid).word)) - - // Back off to the first matching entry - while (imid > 0 && !stardict_strcmp (word, - g_array_index (sd->synonyms, StardictSynonymEntry, --imid).word)); - - GPtrArray *array = g_ptr_array_new (); - - // And add all matching entries from that position on to the array - do - g_ptr_array_add (array, g_strdup (g_array_index - (sd->index, StardictIndexEntry, g_array_index - (sd->synonyms, StardictSynonymEntry, ++imid).original_word).name)); - while ((guint) imid < sd->synonyms->len - 1 && !stardict_strcmp (word, - g_array_index (sd->synonyms, StardictSynonymEntry, imid + 1).word)); - - return (gchar **) g_ptr_array_free (array, FALSE); - - BINARY_SEARCH_END - - return NULL; -} - -/** Search for a word. - * @param[in] word The word in utf-8 encoding - * @param[out] success TRUE if found - * @return An iterator object pointing to the word, or where it would be - */ -StardictIterator * -stardict_dict_search (StardictDict *sd, const gchar *word, gboolean *success) -{ - BINARY_SEARCH_BEGIN (sd->index->len - 1, stardict_strcmp (word, - g_array_index (sd->index, StardictIndexEntry, imid).name)) - - if (success) *success = TRUE; - return stardict_iterator_new (sd, imid); - - BINARY_SEARCH_END - - if (success) *success = FALSE; - return stardict_iterator_new (sd, imin); -} - -static void -stardict_entry_field_free (StardictEntryField *sef) -{ - g_free (sef->data); - g_slice_free1 (sizeof *sef, sef); -} - -static StardictEntryField * -read_entry (gchar type, const gchar **entry_iterator, - const gchar *end, gboolean is_final) -{ - const gchar *entry = *entry_iterator; - if (g_ascii_islower (type)) - { - GString *data = g_string_new (NULL); - - if (is_final) - { - g_string_append_len (data, entry, end - entry); - entry += end - entry; - } - else - { - gint c = EOF; - while (entry < end && (c = *entry++)) - g_string_append_c (data, c); - - if (c != '\0') - return (gpointer) g_string_free (data, TRUE); - } - - StardictEntryField *sef = g_slice_alloc (sizeof *sef); - sef->type = type; - sef->data_size = data->len + 1; - sef->data = g_string_free (data, FALSE); - *entry_iterator = entry; - return sef; - } - - gsize length; - if (is_final) - length = end - entry; - else - { - if (entry + sizeof (guint32) > end) - return NULL; - - length = GUINT32_FROM_BE (*(guint32 *) entry); - entry += sizeof (guint32); - - if (entry + length > end) - return NULL; - } - - StardictEntryField *sef = g_slice_alloc (sizeof *sef); - sef->type = type; - sef->data_size = length; - sef->data = memcpy (g_malloc (length), entry, length); - *entry_iterator = entry + length; - return sef; -} - -static GList * -read_entries (const gchar *entry, gsize entry_size, GError **error) -{ - const gchar *end = entry + entry_size; - GList *result = NULL; - - while (entry < end) - { - gchar type = *entry++; - StardictEntryField *sef = read_entry (type, &entry, end, FALSE); - if (!sef) - goto error; - result = g_list_append (result, sef); - } - - return result; - -error: - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "invalid data entry"); - g_list_free_full (result, (GDestroyNotify) stardict_entry_field_free); - return NULL; -} - -static GList * -read_entries_sts (const gchar *entry, gsize entry_size, - const gchar *sts, GError **error) -{ - const gchar *end = entry + entry_size; - GList *result = NULL; - - while (*sts) - { - gchar type = *sts++; - StardictEntryField *sef = read_entry (type, &entry, end, !*sts); - if (!sef) - goto error; - result = g_list_append (result, sef); - } - - return result; - -error: - g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, - "invalid data entry"); - g_list_free_full (result, (GDestroyNotify) stardict_entry_field_free); - return NULL; -} - -/** Return the data for the specified offset in the index. Unsafe. */ -static StardictEntry * -stardict_dict_get_entry (StardictDict *sd, guint32 offset) -{ - // TODO cache the entries - StardictIndexEntry *sie = &g_array_index (sd->index, - StardictIndexEntry, offset); - - g_return_val_if_fail (sie->data_offset + sie->data_size - <= sd->dict_length, NULL); - - GList *entries; - if (sd->info->same_type_sequence) - entries = read_entries_sts (sd->dict + sie->data_offset, - sie->data_size, sd->info->same_type_sequence, NULL); - else - entries = read_entries (sd->dict + sie->data_offset, - sie->data_size, NULL); - - if (!entries) - return NULL; - - StardictEntry *se = g_object_new (STARDICT_TYPE_ENTRY, NULL); - se->fields = entries; - return se; -} - -// --- StardictEntry ----------------------------------------------------------- - -G_DEFINE_TYPE (StardictEntry, stardict_entry, G_TYPE_OBJECT) - -static void -stardict_entry_finalize (GObject *self) -{ - StardictEntry *sde = STARDICT_ENTRY (self); - - g_list_free_full (sde->fields, (GDestroyNotify) stardict_entry_field_free); - - G_OBJECT_CLASS (stardict_entry_parent_class)->finalize (self); -} - -static void -stardict_entry_class_init (StardictEntryClass *klass) -{ - G_OBJECT_CLASS (klass)->finalize = stardict_entry_finalize; -} - -static void -stardict_entry_init (G_GNUC_UNUSED StardictEntry *sde) -{ -} - -/** Return the entries present within the entry. - * @return GList - */ -const GList * -stardict_entry_get_fields (StardictEntry *sde) -{ - g_return_val_if_fail (STARDICT_IS_ENTRY (sde), NULL); - return sde->fields; -} - -// --- StardictIterator--------------------------------------------------------- - -G_DEFINE_TYPE (StardictIterator, stardict_iterator, G_TYPE_OBJECT) - -static void -stardict_iterator_finalize (GObject *self) -{ - StardictIterator *si = STARDICT_ITERATOR (self); - - g_object_unref (si->owner); - - G_OBJECT_CLASS (stardict_iterator_parent_class)->finalize (self); -} - -static void -stardict_iterator_class_init (StardictIteratorClass *klass) -{ - G_OBJECT_CLASS (klass)->finalize = stardict_iterator_finalize; -} - -static void -stardict_iterator_init (G_GNUC_UNUSED StardictIterator *sd) -{ -} - -/** Create a new iterator for the dictionary with offset @a offset. */ -StardictIterator * -stardict_iterator_new (StardictDict *sd, guint32 offset) -{ - g_return_val_if_fail (STARDICT_IS_DICT (sd), NULL); - - StardictIterator *si = g_object_new (STARDICT_TYPE_ITERATOR, NULL); - si->owner = g_object_ref (sd); - si->offset = offset; - return si; -} - -/** Return the word in the index that the iterator points at, or NULL. */ -const gchar * -stardict_iterator_get_word (StardictIterator *sdi) -{ - g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); - if (!stardict_iterator_is_valid (sdi)) - return NULL; - return g_array_index (sdi->owner->index, - StardictIndexEntry, sdi->offset).name; -} - -/** Return the dictionary entry that the iterator points at, or NULL. */ -StardictEntry * -stardict_iterator_get_entry (StardictIterator *sdi) -{ - g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); - if (!stardict_iterator_is_valid (sdi)) - return FALSE; - return stardict_dict_get_entry (sdi->owner, sdi->offset); -} - -/** Return whether the iterator points to a valid index entry. */ -gboolean -stardict_iterator_is_valid (StardictIterator *sdi) -{ - g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), FALSE); - return sdi->offset >= 0 && sdi->offset < sdi->owner->index->len; -} - -/** Return the offset of the iterator within the dictionary index. */ -gint64 -stardict_iterator_get_offset (StardictIterator *sdi) -{ - g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), -1); - return sdi->offset; -} - -/** Set the offset of the iterator. */ -void -stardict_iterator_set_offset - (StardictIterator *sdi, gint64 offset, gboolean relative) -{ - g_return_if_fail (STARDICT_IS_ITERATOR (sdi)); - sdi->offset = relative ? sdi->offset + offset : offset; -} diff --git a/stardict.h b/stardict.h deleted file mode 100644 index aef27fd..0000000 --- a/stardict.h +++ /dev/null @@ -1,215 +0,0 @@ -/* - * stardict.h: StarDict API - * - * This module doesn't cover all the functionality available to StarDict - * dictionaries, it should however be good enough for most of them that are - * freely available on the Internet. - * - * Copyright (c) 2013, Přemysl Janouch - * All rights reserved. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - */ - -#ifndef STARDICT_H -#define STARDICT_H - -/** An object intended for interacting with a dictionary. */ -typedef struct stardict_dict StardictDict; -typedef struct stardict_dict_class StardictDictClass; - -/** Overall information about a particular dictionary. */ -typedef struct stardict_info StardictInfo; - -/** Handles the task of moving around the dictionary. */ -typedef struct stardict_iterator StardictIterator; -typedef struct stardict_iterator_class StardictIteratorClass; - -/** Contains the decoded data for a single word definition. */ -typedef struct stardict_entry StardictEntry; -typedef struct stardict_entry_class StardictEntryClass; - -/** A single field of a word definition. */ -typedef struct stardict_entry_field StardictEntryField; - -/* GObject boilerplate. */ -#define STARDICT_TYPE_DICT (stardict_dict_get_type ()) -#define STARDICT_DICT(obj) \ - (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ - STARDICT_TYPE_DICT, StardictDict)) -#define STARDICT_IS_DICT(obj) \ - (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ - STARDICT_TYPE_DICT)) -#define STARDICT_DICT_CLASS(klass) \ - (G_TYPE_CHECK_CLASS_CAST ((klass), \ - STARDICT_TYPE_DICT, StardictDictClass)) -#define STARDICT_IS_DICT_CLASS(klass) \ - (G_TYPE_CHECK_CLASS_TYPE ((klass), \ - STARDICT_TYPE_DICT)) -#define STARDICT_DICT_GET_CLASS(obj) \ - (G_TYPE_INSTANCE_GET_CLASS ((obj), \ - STARDICT_TYPE_DICT, StardictDictClass)) - -#define STARDICT_TYPE_ITERATOR (stardict_iterator_get_type ()) -#define STARDICT_ITERATOR(obj) \ - (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ - STARDICT_TYPE_ITERATOR, StardictIterator)) -#define STARDICT_IS_ITERATOR(obj) \ - (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ - STARDICT_TYPE_ITERATOR)) -#define STARDICT_ITERATOR_CLASS(klass) \ - (G_TYPE_CHECK_CLASS_CAST ((klass), \ - STARDICT_TYPE_ITERATOR, StardictIteratorClass)) -#define STARDICT_IS_ITERATOR_CLASS(klass) \ - (G_TYPE_CHECK_CLASS_TYPE ((klass), \ - STARDICT_TYPE_ITERATOR)) -#define STARDICT_ITERATOR_GET_CLASS(obj) \ - (G_TYPE_INSTANCE_GET_CLASS ((obj), \ - STARDICT_TYPE_ITERATOR, StardictIteratorClass)) - -#define STARDICT_TYPE_ENTRY (stardict_entry_get_type ()) -#define STARDICT_ENTRY(obj) \ - (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ - STARDICT_TYPE_ENTRY, StardictEntry)) -#define STARDICT_IS_ENTRY(obj) \ - (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ - STARDICT_TYPE_ENTRY)) -#define STARDICT_ENTRY_CLASS(klass) \ - (G_TYPE_CHECK_CLASS_CAST ((klass), \ - STARDICT_TYPE_ENTRY, StardictEntryClass)) -#define STARDICT_IS_ENTRY_CLASS(klass) \ - (G_TYPE_CHECK_CLASS_TYPE ((klass), \ - STARDICT_TYPE_ENTRY)) -#define STARDICT_ENTRY_GET_CLASS(obj) \ - (G_TYPE_INSTANCE_GET_CLASS ((obj), \ - STARDICT_TYPE_ENTRY, StardictEntryClass)) - -// --- Errors ------------------------------------------------------------------ - -/** General error type. */ -typedef enum { - STARDICT_ERROR_FILE_NOT_FOUND, //!< Some file was not found - STARDICT_ERROR_INVALID_DATA //!< Dictionary contains invalid data -} StardictError; - -#define STARDICT_ERROR (stardict_error_quark ()) - -GQuark stardict_error_quark (void); - -// --- Dictionary information -------------------------------------------------- - -const gchar *stardict_info_get_path (StardictInfo *sdi) G_GNUC_PURE; -const gchar *stardict_info_get_book_name (StardictInfo *sdi) G_GNUC_PURE; -gsize stardict_info_get_word_count (StardictInfo *sd) G_GNUC_PURE; -void stardict_info_free (StardictInfo *sdi); - -GList *stardict_list_dictionaries (const gchar *path); - -// --- Dictionaries ------------------------------------------------------------ - -struct stardict_dict -{ - GObject parent_instance; - StardictInfo * info; //!< General information about the dict - GArray * index; //!< Word index - GArray * synonyms; //!< Synonyms - gpointer dict; //!< Dictionary data - gsize dict_length; //!< Length of the dict data in bytes - GMappedFile * mapped_dict; //!< Memory map handle -}; - -struct stardict_dict_class -{ - GObjectClass parent_class; -}; - -GType stardict_dict_get_type (void); -StardictDict *stardict_dict_new (const gchar *filename, GError **error); -StardictDict *stardict_dict_new_from_info (StardictInfo *sdi, GError **error); -StardictInfo *stardict_dict_get_info (StardictDict *sd); -gchar **stardict_dict_get_synonyms (StardictDict *sd, const gchar *word); -StardictIterator *stardict_dict_search - (StardictDict *sd, const gchar *word, gboolean *success); - -// --- Dictionary iterators ---------------------------------------------------- - -struct stardict_iterator -{ - GObject parent_instance; - StardictDict * owner; //!< The related dictionary - gint64 offset; //!< Index within the dictionary -}; - -struct stardict_iterator_class -{ - GObjectClass parent_class; -}; - -GType stardict_iterator_get_type (void); -StardictIterator *stardict_iterator_new (StardictDict *sd, guint32 index); -const gchar *stardict_iterator_get_word (StardictIterator *sdi) G_GNUC_PURE; -StardictEntry *stardict_iterator_get_entry (StardictIterator *sdi); -gboolean stardict_iterator_is_valid (StardictIterator *sdi) G_GNUC_PURE; -gint64 stardict_iterator_get_offset (StardictIterator *sdi) G_GNUC_PURE; -void stardict_iterator_set_offset - (StardictIterator *sdi, gint64 offset, gboolean relative); - -/** Go to the next entry. */ -#define stardict_iterator_next(sdi) \ - (stardict_iterator_set_offset (sdi, 1, TRUE)) - -/** Go to the previous entry. */ -#define stardict_iterator_prev(sdi) \ - (stardict_iterator_set_offset (sdi, -1, TRUE)) - -// --- Dictionary entries ------------------------------------------------------ - -typedef enum { - STARDICT_FIELD_MEANING = 'm', //!< Word's purely textual meaning - STARDICT_FIELD_LOCALE = 'l', //!< Locale-dependent meaning - STARDICT_FIELD_PANGO = 'g', //!< Pango text markup language - STARDICT_FIELD_PHONETIC = 't', //!< English phonetic string - STARDICT_FIELD_XDXF = 'x', //!< xdxf language - STARDICT_FIELD_YB_KANA = 'y', //!< Chinese YinBiao or Japanese KANA - STARDICT_FIELD_POWERWORD = 'k', //!< KingSoft PowerWord's data - STARDICT_FIELD_MEDIAWIKI = 'w', //!< MediaWiki markup language - STARDICT_FIELD_HTML = 'h', //!< HTML codes - STARDICT_FIELD_RESOURCE = 'r', //!< Resource file list - STARDICT_FIELD_WAV = 'W', //!< WAV file - STARDICT_FIELD_PICTURE = 'P', //!< Picture file - STARDICT_FIELD_X = 'X' //!< Reserved, experimental extensions -} StardictEntryFieldType; - -struct stardict_entry_field -{ - gchar type; //!< Type of entry (EntryFieldType) - gpointer data; //!< Raw data or null-terminated string - gsize data_size; //!< Size of data, includding any \0 -}; - -struct stardict_entry -{ - GObject parent_instance; - GList * fields; //!< List of StardictEntryField's -}; - -struct stardict_entry_class -{ - GObjectClass parent_class; -}; - -GType stardict_entry_get_type (void); -const GList *stardict_entry_get_fields (StardictEntry *sde) G_GNUC_PURE; - - #endif /* ! STARDICT_H */ diff --git a/test-stardict.c b/test-stardict.c deleted file mode 100644 index 8f7f798..0000000 --- a/test-stardict.c +++ /dev/null @@ -1,429 +0,0 @@ -/* - * stardict.c: StarDict API test - * - * Copyright (c) 2013, Přemysl Janouch - * All rights reserved. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - */ - -#include -#include -#include - -#include -#include - -#include "stardict.h" - - -// --- Utilities --------------------------------------------------------------- - -// Adapted http://gezeiten.org/post/2009/04/Writing-Your-Own-GIO-Jobs -static gboolean remove_recursive (GFile *file, GError **error); - -static gboolean -remove_directory_contents (GFile *file, GError **error) -{ - GFileEnumerator *enumerator = - g_file_enumerate_children (file, "standard::*", - G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, NULL, error); - - if (!enumerator) - return FALSE; - - gboolean success = TRUE; - do - { - GError *err = NULL; - GFileInfo *child_info = - g_file_enumerator_next_file (enumerator, NULL, &err); - - if (!child_info) - { - if (err) - { - g_propagate_error (error, err); - success = FALSE; - } - break; - } - - GFile *child = g_file_resolve_relative_path - (file, g_file_info_get_name (child_info)); - success = remove_recursive (child, error); - g_object_unref (child); - g_object_unref (child_info); - } - while (success); - - g_object_unref (enumerator); - return success; -} - -static gboolean -remove_recursive (GFile *file, GError **error) -{ - g_return_val_if_fail (G_IS_FILE (file), FALSE); - - GFileInfo *info = g_file_query_info (file, "standard::*", - G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, NULL, error); - - if (!info) - return FALSE; - - GFileType type = g_file_info_get_file_type (info); - g_object_unref (info); - - if (type == G_FILE_TYPE_DIRECTORY && - !remove_directory_contents (file, error)) - return FALSE; - - return g_file_delete (file, NULL, error); -} - -static gchar * -generate_random_string (gsize length, GRand *rand) -{ - GString *s = g_string_sized_new (length); - while (length--) - g_string_append_c (s, g_rand_int_range (rand, 'a', 'z' + 1)); - return g_string_free (s, FALSE); -} - -static gpointer -generate_random_data (gsize length, GRand *rand) -{ - gchar *blob = g_malloc (length), *i = blob; - while (length--) - *i++ = g_rand_int_range (rand, 0, 256); - return blob; -} - -// --- Dictionary generation --------------------------------------------------- - -typedef struct dictionary Dictionary; -typedef struct test_entry TestEntry; - -struct dictionary -{ - GFile *tmp_dir; //!< A temporary dictionary - GFile *ifo_file; //!< The dictionary's .ifo file - GArray *data; //!< Array of TestEntry's -}; - -struct test_entry -{ - gchar *word; - gchar *meaning; - gpointer data; - gsize data_size; -}; - -static void -test_entry_free (TestEntry *te) -{ - g_free (te->word); - g_free (te->meaning); - g_free (te->data); -} - -static gint -test_entry_word_compare (gconstpointer a, gconstpointer b) -{ - return strcmp (((TestEntry *) a)->word, ((TestEntry *) b)->word); -} - -static GArray * -generate_dictionary_data (gsize length) -{ - GRand *rand = g_rand_new_with_seed (0); - - GArray *a = g_array_sized_new (FALSE, FALSE, sizeof (TestEntry), length); - g_array_set_clear_func (a, (GDestroyNotify) test_entry_free); - - while (length--) - { - TestEntry te; - - te.word = generate_random_string - (g_rand_int_range (rand, 1, 10), rand); - te.meaning = generate_random_string - (g_rand_int_range (rand, 1, 1024), rand); - - te.data_size = g_rand_int_range (rand, 0, 1048576); - te.data = generate_random_data (te.data_size, rand); - - g_array_append_val (a, te); - } - - g_rand_free (rand); - g_array_sort (a, test_entry_word_compare); - return a; -} - -static Dictionary * -dictionary_create (void) -{ - GError *error; - gchar *tmp_dir_path = g_dir_make_tmp ("stardict-test-XXXXXX", &error); - if (!tmp_dir_path) - g_error ("Failed to create a directory for the test dictionary: %s", - error->message); - - Dictionary *dict = g_malloc (sizeof *dict); - dict->tmp_dir = g_file_new_for_path (tmp_dir_path); - - static const gint dictionary_size = 8; - dict->data = generate_dictionary_data (dictionary_size); - GFile *dict_file = g_file_get_child (dict->tmp_dir, "test.dict"); - GFile *idx_file = g_file_get_child (dict->tmp_dir, "test.idx"); - - GFileOutputStream *dict_stream = g_file_replace (dict_file, - NULL, FALSE, G_FILE_CREATE_NONE, NULL, &error); - if (!dict_stream) - g_error ("Failed to create the .dict file: %s", error->message); - - GFileOutputStream *idx_stream = g_file_replace (idx_file, - NULL, FALSE, G_FILE_CREATE_NONE, NULL, &error); - if (!idx_stream) - g_error ("Failed to create the .idx file: %s", error->message); - - GDataOutputStream *dict_data - = g_data_output_stream_new (G_OUTPUT_STREAM (dict_stream)); - g_data_output_stream_set_byte_order - (dict_data, G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); - - GDataOutputStream *idx_data - = g_data_output_stream_new (G_OUTPUT_STREAM (idx_stream)); - g_data_output_stream_set_byte_order - (idx_data, G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); - - gint i; - gsize written; - for (i = 0; i < dictionary_size; i++) - { - TestEntry *te = &g_array_index (dict->data, TestEntry, i); - goffset offset = g_seekable_tell (G_SEEKABLE (dict_stream)); - - if (!g_data_output_stream_put_string (dict_data, - te->meaning, NULL, &error) - || !g_data_output_stream_put_byte (dict_data, '\0', NULL, &error) - || !g_output_stream_write_all (G_OUTPUT_STREAM (dict_stream), - te->data, te->data_size, &written, NULL, &error)) - g_error ("Write to dictionary failed: %s", error->message); - - if (!g_data_output_stream_put_string (idx_data, - te->word, NULL, &error) - || !g_data_output_stream_put_byte (idx_data, '\0', NULL, &error) - || !g_data_output_stream_put_uint32 (idx_data, offset, NULL, &error) - || !g_data_output_stream_put_uint32 (idx_data, - g_seekable_tell (G_SEEKABLE (dict_stream)) - offset, NULL, &error)) - g_error ("Write to index failed: %s", error->message); - } - - gint index_size = g_seekable_tell (G_SEEKABLE (idx_stream)); - - if (!g_output_stream_close (G_OUTPUT_STREAM (dict_stream), NULL, &error)) - g_error ("Failed to close the .dict file: %s", error->message); - if (!g_output_stream_close (G_OUTPUT_STREAM (idx_stream), NULL, &error)) - g_error ("Failed to close the .idx file: %s", error->message); - - g_object_unref (dict_data); - g_object_unref (idx_data); - - g_object_unref (dict_stream); - g_object_unref (idx_stream); - - gchar *ifo_contents = g_strdup_printf - ("StarDict's dict ifo file\n" - "version=3.0.0\n" - "bookname=Test Book\n" - "wordcount=%d\n" - "idxfilesize=%d\n" - "idxoffsetbits=32\n" - "author=Lyra Heartstrings\n" - "email=lyra@equestria.net\n" - "website=http://equestria.net\n" - "description=Test dictionary\n" - "date=21.12.2012\n" - "sametypesequence=mX\n", - dictionary_size, index_size); - - g_object_unref (dict_file); - g_object_unref (idx_file); - - dict->ifo_file = g_file_get_child (dict->tmp_dir, "test.ifo"); - if (!g_file_replace_contents (dict->ifo_file, - ifo_contents, strlen (ifo_contents), - NULL, FALSE, G_FILE_CREATE_NONE, NULL, NULL, &error)) - g_error ("Failed to create the .ifo file: %s", error->message); - g_free (ifo_contents); - - g_message ("Successfully created a test dictionary in %s", tmp_dir_path); - g_free (tmp_dir_path); - - return dict; -} - -static void -dictionary_destroy (Dictionary *dict) -{ - GError *error; - if (!remove_recursive (dict->tmp_dir, &error)) - g_error ("Failed to delete the temporary directory: %s", - error->message); - - g_message ("The test dictionary has been deleted"); - - g_object_unref (dict->tmp_dir); - g_object_unref (dict->ifo_file); - g_array_free (dict->data, TRUE); - g_free (dict); -} - -// --- Testing ----------------------------------------------------------------- - -typedef struct dict_fixture DictFixture; - -struct dict_fixture -{ - StardictDict *dict; -}; - -static void -dict_setup (DictFixture *fixture, gconstpointer test_data) -{ - Dictionary *dict = (Dictionary *) test_data; - - gchar *ifo_filename = g_file_get_path (dict->ifo_file); - fixture->dict = stardict_dict_new (ifo_filename, NULL); - g_free (ifo_filename); -} - -static void -dict_teardown (DictFixture *fixture, G_GNUC_UNUSED gconstpointer test_data) -{ - g_object_unref (fixture->dict); -} - -static void -dict_test_list (gconstpointer user_data) -{ - Dictionary *dict = (Dictionary *) user_data; - - gchar *tmp_path = g_file_get_path (dict->tmp_dir); - GList *dictionaries = stardict_list_dictionaries (tmp_path); - g_free (tmp_path); - - g_assert (dictionaries != NULL); - g_assert (dictionaries->next == NULL); - - StardictInfo *info = dictionaries->data; - GFile *ifo_file = g_file_new_for_path (stardict_info_get_path (info)); - g_assert (g_file_equal (ifo_file, dict->ifo_file) == TRUE); - g_object_unref (ifo_file); - - g_list_free_full (dictionaries, (GDestroyNotify) stardict_info_free); -} - -static void -dict_test_new (gconstpointer user_data) -{ - Dictionary *dict = (Dictionary *) user_data; - - gchar *ifo_filename = g_file_get_path (dict->ifo_file); - StardictDict *sd = stardict_dict_new (ifo_filename, NULL); - g_free (ifo_filename); - - g_assert (sd != NULL); - g_object_unref (sd); -} - -static void -dict_test_data_entry (StardictDict *sd, TestEntry *entry) -{ - gboolean success; - StardictIterator *sdi = - stardict_dict_search (sd, entry->word, &success); - - g_assert (success == TRUE); - g_assert (sdi != NULL); - g_assert (stardict_iterator_is_valid (sdi)); - - const gchar *word = stardict_iterator_get_word (sdi); - g_assert_cmpstr (word, ==, entry->word); - - StardictEntry *sde = stardict_iterator_get_entry (sdi); - g_assert (sde != NULL); - - const GList *fields = stardict_entry_get_fields (sde); - const StardictEntryField *sdef; - g_assert (fields != NULL); - g_assert (fields->data != NULL); - - sdef = fields->data; - g_assert (sdef->type == 'm'); - g_assert_cmpstr (sdef->data, ==, entry->meaning); - - fields = fields->next; - g_assert (fields != NULL); - g_assert (fields->data != NULL); - - sdef = fields->data; - g_assert (sdef->type == 'X'); - g_assert_cmpuint (sdef->data_size, ==, entry->data_size); - g_assert (memcmp (sdef->data, entry->data, entry->data_size) == 0); - - fields = fields->next; - g_assert (fields == NULL); - - g_object_unref (sde); - g_object_unref (sdi); -} - -static void -dict_test_data (DictFixture *fixture, gconstpointer user_data) -{ - Dictionary *dict = (Dictionary *) user_data; - GArray *data = dict->data; - StardictDict *sd = fixture->dict; - - guint i; - for (i = 0; i < data->len; i++) - { - TestEntry *entry = &g_array_index (data, TestEntry, i); - dict_test_data_entry (sd, entry); - } -} - -int -main (int argc, char *argv[]) -{ - g_test_init (&argc, &argv, NULL); - if (glib_check_version (2, 36, 0)) - g_type_init (); - - Dictionary *dict = dictionary_create (); - - g_test_add_data_func ("/dict/list", dict, dict_test_list); - g_test_add_data_func ("/dict/new", dict, dict_test_new); - - g_test_add ("/dict/data", DictFixture, dict, - dict_setup, dict_test_data, dict_teardown); - - int result = g_test_run (); - dictionary_destroy (dict); - return result; -} -- cgit v1.2.3-70-g09d2