diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/add-pronunciation.c | 262 | ||||
-rw-r--r-- | src/sdcli.c | 274 | ||||
-rw-r--r-- | src/stardict.c | 1070 | ||||
-rw-r--r-- | src/stardict.h | 215 | ||||
-rw-r--r-- | src/test-stardict.c | 429 |
5 files changed, 2250 insertions, 0 deletions
diff --git a/src/add-pronunciation.c b/src/add-pronunciation.c new file mode 100644 index 0000000..45eae61 --- /dev/null +++ b/src/add-pronunciation.c @@ -0,0 +1,262 @@ +/* + * A tool to add eSpeak-generated pronunciation to dictionaries + * + * Here I use the `espeak' process rather than libespeak because of the GPL. + * + * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com> + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <glib.h> +#include <gio/gio.h> + +#include "stardict.h" + + +// --- Pronunciation generator ------------------------------------------------- + +typedef struct worker_data WorkerData; + +struct worker_data +{ + guint32 start_entry; //! The first entry to be processed + guint32 end_entry; //! Past the last entry to be processed + + /* Reader, writer */ + GMutex *dict_mutex; //! Locks the dictionary object + + /* Reader */ + GThread *main_thread; //! A handle to the reader thread + StardictDict *dict; //! The dictionary object + gpointer output; //! Linked-list of pronunciation data + + GMutex *remaining_mutex; //! Locks the progress stats + GCond *remaining_cond; //! Signals a change in progress + guint32 remaining; //! How many entries remain + + /* Writer */ + StardictIterator *iterator; //! Iterates over the dictionary + FILE *child_stdin; //! Standard input of eSpeak +}; + +/** Writes to espeak's stdin. */ +static gpointer +worker_writer (WorkerData *data) +{ + while (stardict_iterator_get_offset (data->iterator) != data->end_entry) + { + g_mutex_lock (data->dict_mutex); + const gchar *word = stardict_iterator_get_word (data->iterator); + g_mutex_unlock (data->dict_mutex); + + stardict_iterator_next (data->iterator); + if (fprintf (data->child_stdin, "%s\n", word) < 0) + g_error ("write to eSpeak failed: %s", strerror (errno)); + } + + g_object_unref (data->iterator); + return GINT_TO_POINTER (fclose (data->child_stdin)); +} + +/** Reads from espeak's stdout. */ +static gpointer +worker (WorkerData *data) +{ + /* Spawn eSpeak */ + static gchar *cmdline[] = { "espeak", "--ipa", "-q", NULL }; + gint child_in, child_out; + + GError *error; + if (!g_spawn_async_with_pipes (NULL, cmdline, NULL, + G_SPAWN_SEARCH_PATH, NULL, NULL, + NULL, &child_in, &child_out, NULL, &error)) + g_error ("g_spawn() failed: %s", error->message); + + data->child_stdin = fdopen (child_in, "wb"); + if (!data->child_stdin) + perror ("fdopen"); + + FILE *child_stdout = fdopen (child_out, "rb"); + if (!child_stdout) + perror ("fdopen"); + + /* Spawn a writer thread */ + g_mutex_lock (data->dict_mutex); + data->iterator = stardict_iterator_new (data->dict, data->start_entry); + g_mutex_unlock (data->dict_mutex); + + GThread *writer = g_thread_new ("write worker", + (GThreadFunc) worker_writer, data); + + /* Read the output */ + g_mutex_lock (data->remaining_mutex); + guint32 remaining = data->remaining; + g_mutex_unlock (data->remaining_mutex); + + data->output = NULL; + gpointer *output_end = &data->output; + while (remaining) + { + static gchar next[sizeof (gpointer)]; + GString *s = g_string_new (NULL); + g_string_append_len (s, next, sizeof next); + + gint c; + while ((c = fgetc (child_stdout)) != EOF && c != '\n') + g_string_append_c (s, c); + if (c == EOF) + g_error ("eSpeak process died too soon"); + + gchar *translation = g_string_free (s, FALSE); + *output_end = translation; + output_end = (gpointer *) translation; + + /* We limit progress reporting so that + * the mutex doesn't spin like crazy */ + if ((--remaining & 1023) != 0) + continue; + + g_mutex_lock (data->remaining_mutex); + data->remaining = remaining; + g_cond_broadcast (data->remaining_cond); + g_mutex_unlock (data->remaining_mutex); + } + + fclose (child_stdout); + return g_thread_join (writer); +} + +// --- Main -------------------------------------------------------------------- + +int +main (int argc, char *argv[]) +{ + gint n_processes = 1; + + GOptionEntry entries[] = + { + { "processes", 'N', G_OPTION_FLAG_IN_MAIN, + G_OPTION_ARG_INT, &n_processes, + "the number of espeak processes run in parallel", "PROCESSES" }, + { NULL } + }; + + GError *error = NULL; + GOptionContext *ctx = g_option_context_new + ("input.ifo output.ifo - add pronunciation to dictionaries"); + g_option_context_add_main_entries (ctx, entries, NULL); + if (!g_option_context_parse (ctx, &argc, &argv, &error)) + { + g_print ("option parsing failed: %s\n", error->message); + exit (EXIT_FAILURE); + } + + if (argc != 3) + { + gchar *help = g_option_context_get_help (ctx, TRUE, FALSE); + g_print ("%s", help); + g_free (help); + exit (EXIT_FAILURE); + } + + StardictDict *dict = stardict_dict_new (argv[1], &error); + if (!dict) + { + g_printerr ("opening the dictionary failed: %s\n", error->message); + exit (EXIT_FAILURE); + } + + gsize n_words = stardict_info_get_word_count + (stardict_dict_get_info (dict)); + + if (n_processes <= 0) + { + g_printerr ("Error: there must be at least one process\n"); + exit (EXIT_FAILURE); + } + + if ((gsize) n_processes > n_words * 1024) + { + n_processes = n_words / 1024; + if (!n_processes) + n_processes = 1; + g_printerr ("Warning: too many processes, reducing to %d\n", + n_processes); + } + + /* Spawn worker threads to generate pronunciations */ + static GMutex dict_mutex; + + static GMutex remaining_mutex; + static GCond remaining_cond; + + WorkerData *data = g_alloca (sizeof *data * n_processes); + + gint i; + for (i = 0; i < n_processes; i++) + { + data[i].start_entry = (n_words - 1) * i / n_processes; + data[i].end_entry = (n_words - 1) * (i + 1) / n_processes; + + data[i].remaining = data[i].end_entry - data[i].start_entry; + data[i].remaining_mutex = &remaining_mutex; + data[i].remaining_cond = &remaining_cond; + + data[i].dict = dict; + data[i].dict_mutex = &dict_mutex; + + data->main_thread = g_thread_new ("worker", (GThreadFunc) worker, data); + } + + /* Loop while the threads still have some work to do and report status */ + g_mutex_lock (&remaining_mutex); + for (;;) + { + gboolean all_finished = TRUE; + printf ("\rRetrieving pronunciation... "); + for (i = 0; i < n_processes; i++) + { + printf ("%3u%% ", data[i].remaining * 100 + / (data[i].end_entry - data[i].start_entry)); + if (data[i].remaining) + all_finished = FALSE; + } + + if (all_finished) + break; + g_cond_wait (&remaining_cond, &remaining_mutex); + } + g_mutex_unlock (&remaining_mutex); + + for (i = 0; i < n_processes; i++) + g_thread_join (data[i].main_thread); + + // TODO after all processing is done, the program will go through the whole + // dictionary and put extended data entries into a new one. + StardictIterator *iterator = stardict_iterator_new (dict, 0); + while (stardict_iterator_is_valid (iterator)) + { + // ... + stardict_iterator_next (iterator); + } + + return 0; +} diff --git a/src/sdcli.c b/src/sdcli.c new file mode 100644 index 0000000..8a8f4ca --- /dev/null +++ b/src/sdcli.c @@ -0,0 +1,274 @@ +/* + * StarDict console UI + * + * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com> + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#define _XOPEN_SOURCE_EXTENDED /**< Yes, we want ncursesw. */ + +#include <stdio.h> +#include <stdlib.h> +#include <locale.h> +#include <stdarg.h> +#include <limits.h> + +#include <glib.h> +#include <gio/gio.h> +#include <ncurses.h> + +#include <unistd.h> +#include <poll.h> +#include <errno.h> +#include <signal.h> + +#include "stardict.h" + + +#define KEY_ESCAPE 27 /**< Curses doesn't define this. */ + +// --- Utilities --------------------------------------------------------------- + +static void +display (const gchar *format, ...) +{ + va_list ap; + + va_start (ap, format); + vw_printw (stdscr, format, ap); + va_end (ap); + refresh (); +} + +static gchar * +wchar_to_mb (wchar_t ch) +{ + /* Convert the character back to a multi-byte sequence. */ + static gchar buffer[MB_LEN_MAX + 1]; + size_t len = wcrtomb (buffer, ch, NULL); + + /* This shouldn't happen. It would mean that the user has + * somehow managed to enter something inexpressable in the + * current locale. */ + if (len == (size_t) -1) + abort (); + + /* Here I hope the buffer doesn't overflow. Who uses + * shift states nowadays, anyway? */ + if (wcrtomb (buffer + len, L'\0', NULL) == (size_t) -1) + abort (); + + return buffer; +} + +static const gchar * +wchar_to_mb_escaped (wchar_t ch) +{ + switch (ch) + { + case L'\r': return "\\r"; + case L'\n': return "\\n"; + case L'\t': return "\\t"; + default: return wchar_to_mb (ch); + } +} + +static int +poll_restart (struct pollfd *fds, nfds_t nfds, int timeout) +{ + int ret; + do + ret = poll (fds, nfds, timeout); + while (ret == -1 && errno == EINTR); + return ret; +} + +// --- SIGWINCH ---------------------------------------------------------------- + +static int g_winch_pipe[2]; /**< SIGWINCH signalling pipe. */ +static void (*g_old_winch_handler) (int); + +static void +winch_handler (int signum) +{ + /* Call the ncurses handler. */ + if (g_old_winch_handler) + g_old_winch_handler (signum); + + /* And wake up the poll() call. */ + write (g_winch_pipe[1], "x", 1); +} + +static void +install_winch_handler (void) +{ + struct sigaction act, oldact; + + act.sa_handler = winch_handler; + act.sa_flags = SA_RESTART; + sigemptyset (&act.sa_mask); + sigaction (SIGWINCH, &act, &oldact); + + /* Save the ncurses handler. */ + if (oldact.sa_handler != SIG_DFL + && oldact.sa_handler != SIG_IGN) + g_old_winch_handler = oldact.sa_handler; +} + +// --- Event handlers ---------------------------------------------------------- + +typedef struct +{ + wint_t code; + guint is_char : 1; + MEVENT mouse; +} +CursesEvent; + +static gboolean +process_curses_event (CursesEvent *event) +{ + if (!event->is_char) + { + switch (event->code) + { + case KEY_RESIZE: + display ("Screen has been resized to %u x %u\n", + COLS, LINES); + break; + case KEY_MOUSE: + display ("Mouse event at (%d, %d), state %#lx\n", + event->mouse.x, event->mouse.y, event->mouse.bstate); + break; + default: + display ("Keyboard event: non-character: %u\n", + event->code); + } + return TRUE; + } + + display ("Keyboard event: character: '%s'\n", + wchar_to_mb_escaped (event->code)); + + if (event->code == L'q' || event->code == KEY_ESCAPE) + { + display ("Quitting...\n"); + return FALSE; + } + + return TRUE; +} + +static gboolean +process_stdin_input (void) +{ + CursesEvent event; + int sta; + + while ((sta = get_wch (&event.code)) != ERR) + { + event.is_char = (sta == OK); + if (sta == KEY_CODE_YES && event.code == KEY_MOUSE + && getmouse (&event.mouse) == ERR) + abort (); + if (!process_curses_event (&event)) + return FALSE; + } + + return TRUE; +} + +static gboolean +process_winch_input (int fd) +{ + char c; + + read (fd, &c, 1); + return process_stdin_input (); +} + +// --- Main -------------------------------------------------------------------- + +int +main (int argc, char *argv[]) +{ + static GOptionEntry entries[] = + { + { NULL } + }; + + if (!setlocale (LC_ALL, "")) + abort (); + + GError *error = NULL; + GOptionContext *ctx = g_option_context_new ("- StarDict console UI"); + g_option_context_add_main_entries (ctx, entries, NULL); + if (!g_option_context_parse (ctx, &argc, &argv, &error)) + { + g_print ("option parsing failed: %s\n", error->message); + exit (EXIT_FAILURE); + } + + if (!initscr () + || cbreak () == ERR + || noecho () == ERR) + abort (); + + keypad (stdscr, TRUE); /* Enable character processing. */ + nodelay (stdscr, TRUE); /* Don't block on get_wch(). */ + + mousemask (ALL_MOUSE_EVENTS, NULL); + + display ("Press Q, Escape or ^C to quit\n"); + + if (pipe (g_winch_pipe) == -1) + abort (); + + install_winch_handler (); + +// --- Message loop ------------------------------------------------------------ + + struct pollfd pollfd[2]; + + pollfd[0].fd = fileno (stdin); + pollfd[0].events = POLLIN; + pollfd[1].fd = g_winch_pipe[0]; + pollfd[1].events = POLLIN; + + while (TRUE) + { + if (poll_restart (pollfd, 3, -1) == -1) + abort (); + + if ((pollfd[0].revents & POLLIN) + && !process_stdin_input ()) + break; + if ((pollfd[1].revents & POLLIN) + && !process_winch_input (pollfd[2].fd)) + break; + } + +// --- Cleanup ----------------------------------------------------------------- + + endwin (); + + if (close (g_winch_pipe[0]) == -1 + || close (g_winch_pipe[1]) == -1) + abort (); + + return 0; +} + diff --git a/src/stardict.c b/src/stardict.c new file mode 100644 index 0000000..4439022 --- /dev/null +++ b/src/stardict.c @@ -0,0 +1,1070 @@ +/* + * stardict.c: StarDict API + * + * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com> + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <locale.h> + +#include <glib.h> +#include <gio/gio.h> + +#include "stardict.h" + + +/** Describes a single entry in the dictionary index. */ +typedef struct stardict_index_entry StardictIndexEntry; + +/** Describes a single entry in the synonyms index. */ +typedef struct stardict_synonym_entry StardictSynonymEntry; + +/** Helper class for reading .ifo files. */ +typedef struct ifo_reader IfoReader; + + +typedef enum stardict_version StardictVersion; +enum stardict_version { SD_VERSION_2_4_2, SD_VERSION_3_0_0 }; + +struct stardict_info +{ + gchar * path; + StardictVersion version; + + gchar * book_name; + gulong word_count; + gulong syn_word_count; + gulong idx_filesize; + gulong idx_offset_bits; + gchar * author; + gchar * email; + gchar * website; + gchar * description; + gchar * date; + gchar * same_type_sequence; +}; + +struct stardict_index_entry +{ + gchar * name; //!< The word in utf-8 + guint64 data_offset; //!< Offset of the definition + guint32 data_size; //!< Size of the definition +}; + +struct stardict_synonym_entry +{ + gchar * word; //!< A synonymous word + guint32 original_word; //!< The original word's index +}; + struct ifo_reader +{ + gchar * data; //!< File data terminated with \0 + gchar * data_end; //!< Where the final \0 char. is + + gchar * start; //!< Start of the current token + + gchar * key; //!< The key (points into @a data) + gchar * value; //!< The value (points into @a data) +}; + +// --- Utilities --------------------------------------------------------------- + +/** Read the whole stream into a byte array. */ +static gboolean +stream_read_all (GByteArray *ba, GInputStream *is, GError **error) +{ + guint8 buffer[1024 * 64]; + gsize bytes_read; + + while (g_input_stream_read_all (is, buffer, sizeof buffer, + &bytes_read, NULL, error)) + { + g_byte_array_append (ba, buffer, bytes_read); + if (bytes_read < sizeof buffer) + return TRUE; + } + return FALSE; +} + +/** Read a null-terminated string from a data input stream. */ +static gchar * +stream_read_string (GDataInputStream *dis, GError **error) +{ + gsize length; + gchar *s = g_data_input_stream_read_upto (dis, "", 1, &length, NULL, error); + if (!s) + return NULL; + + GError *err = NULL; + g_data_input_stream_read_byte (dis, NULL, &err); + if (err) + { + g_free (s); + g_propagate_error (error, err); + return NULL; + } + + return s; +} + +/** String compare function used for StarDict indexes. */ +static inline gint +stardict_strcmp (const gchar *s1, const gchar *s2) +{ + gint a; + a = g_ascii_strcasecmp (s1, s2); + return a ? a : strcmp (s1, s2); +} + +/** After this statement, the element has been found and its index is stored + * in the variable "imid". */ +#define BINARY_SEARCH_BEGIN(max, compare) \ + gint imin = 0, imax = max, imid; \ + while (imin <= imax) { \ + imid = imin + (imax - imin) / 2; \ + gint cmp = compare; \ + if (cmp > 0) imin = imid + 1; \ + else if (cmp < 0) imax = imid - 1; \ + else { + +/** After this statement, the binary search has failed and "imin" stores + * the position where the element can be inserted. */ +#define BINARY_SEARCH_END \ + } \ + } + +// --- Errors ------------------------------------------------------------------ + +GQuark +stardict_error_quark (void) +{ + return g_quark_from_static_string ("stardict-error-quark"); +} + +// --- IFO reader -------------------------------------------------------------- + +static gboolean +ifo_reader_init (IfoReader *ir, const gchar *path, GError **error) +{ + gsize length; + gchar *contents; + if (!g_file_get_contents (path, &contents, &length, error)) + return FALSE; + + static const char first_line[] = "StarDict's dict ifo file\n"; + if (length < sizeof first_line - 1 + || strncmp (contents, first_line, sizeof first_line - 1)) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: invalid header format", path); + return FALSE; + } + + ir->data = contents; + ir->start = contents + sizeof first_line - 1; + ir->data_end = contents + length; + return TRUE; +} + +static void +ifo_reader_free (IfoReader *ir) +{ + g_free (ir->data); +} + +static gint +ifo_reader_read (IfoReader *ir) +{ + ir->key = NULL; + ir->value = NULL; + + gchar *p; + for (p = ir->start; p < ir->data_end; p++) + { + if (*p == '\n') + { + if (!ir->key) + return -1; + + *p = 0; + ir->value = ir->start; + ir->start = p + 1; + return 1; + } + + if (*p == '=') + { + if (p == ir->start) + return -1; + + *p = 0; + ir->key = ir->start; + ir->start = p + 1; + } + } + + if (!ir->key) + { + if (p != ir->start) + return -1; + return 0; + } + + ir->value = ir->start; + ir->start = p; + return 1; +} + +// --- StardictInfo ------------------------------------------------------------ + +/** Return the filesystem path for the dictionary. */ +const gchar * +stardict_info_get_path (StardictInfo *sdi) +{ + return sdi->path; +} + +/** Return the name of the dictionary. */ +const gchar * +stardict_info_get_book_name (StardictInfo *sdi) +{ + return sdi->book_name; +} + +/** Return the word count of the dictionary. Note that this information comes + * from the .ifo file, while the dictionary could successfully load with + * a different count of word entries. + */ +gsize +stardict_info_get_word_count (StardictInfo *sdi) +{ + return sdi->word_count; +} + +/** Destroy the dictionary info object. */ +void +stardict_info_free (StardictInfo *sdi) +{ + g_free (sdi->path); + g_free (sdi->book_name); + g_free (sdi->author); + g_free (sdi->email); + g_free (sdi->website); + g_free (sdi->description); + g_free (sdi->date); + g_free (sdi->same_type_sequence); + g_free (sdi); +} + +#define DEFINE_IFO_KEY(n, t, e) { (n), IFO_##t, offsetof (StardictInfo, e) } + +static gboolean +load_ifo (StardictInfo *sti, const gchar *path, GError **error) +{ + IfoReader ir; + if (!ifo_reader_init (&ir, path, error)) + return FALSE; + + gboolean ret_val = FALSE; + memset (sti, 0, sizeof *sti); + + if (ifo_reader_read (&ir) != 1 || strcmp (ir.key, "version")) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: version not specified", path); + goto error; + } + + if (!strcmp (ir.value, "2.4.2")) + sti->version = SD_VERSION_2_4_2; + else if (!strcmp (ir.value, "3.0.0")) + sti->version = SD_VERSION_3_0_0; + else + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: invalid version: %s", path, ir.value); + goto error; + } + + static const struct + { + const gchar *name; + enum { IFO_STRING, IFO_NUMBER } type; + size_t offset; + } + ifo_keys[] = + { + DEFINE_IFO_KEY ("bookname", STRING, book_name), + DEFINE_IFO_KEY ("wordcount", NUMBER, word_count), + DEFINE_IFO_KEY ("synwordcount", NUMBER, syn_word_count), + DEFINE_IFO_KEY ("idxfilesize", NUMBER, idx_filesize), + DEFINE_IFO_KEY ("idxoffsetbits", NUMBER, idx_offset_bits), + DEFINE_IFO_KEY ("author", STRING, author), + DEFINE_IFO_KEY ("email", STRING, email), + DEFINE_IFO_KEY ("website", STRING, website), + DEFINE_IFO_KEY ("description", STRING, description), + DEFINE_IFO_KEY ("date", STRING, date), + DEFINE_IFO_KEY ("sametypesequence", STRING, same_type_sequence) + }; + + gint ret; + while ((ret = ifo_reader_read (&ir)) == 1) + { + guint i; + for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++) + if (!strcmp (ir.key, ifo_keys[i].name)) + break; + + if (i == G_N_ELEMENTS (ifo_keys)) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: unknown key, ignoring: %s", path, ir.key); + continue; + } + + if (ifo_keys[i].type == IFO_STRING) + { + G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset) + = g_strdup (ir.value); + continue; + } + + // Otherwise it has to be IFO_NUMBER + gchar *end; + gulong wc = strtol (ir.value, &end, 10); + if (*end) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: invalid integer", path); + goto error; + } + + G_STRUCT_MEMBER (gulong, sti, ifo_keys[i].offset) = wc; + } + + if (ret == -1) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: option format error", path); + goto error; + } + + ret_val = TRUE; + + // FIXME check for zeros, don't assume that 0 means for "not set" + if (!sti->book_name || !*sti->book_name) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: no book name specified\n", path); + ret_val = FALSE; + } + if (!sti->word_count) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: word count not specified\n", path); + ret_val = FALSE; + } + if (!sti->idx_filesize) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: .idx file size not specified\n", path); + ret_val = FALSE; + } + + if (!sti->idx_offset_bits) + sti->idx_offset_bits = 32; + else if (sti->idx_offset_bits != 32 && sti->idx_offset_bits != 64) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: wrong index offset bits: %lu\n", path, sti->idx_offset_bits); + ret_val = FALSE; + } + +error: + if (!ret_val) + { + guint i; + for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++) + if (ifo_keys[i].type == IFO_STRING) + g_free (G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset)); + } + else + sti->path = g_strdup (path); + + ifo_reader_free (&ir); + return ret_val; +} + +/** List all dictionary files located in a path. + * @return GList<StardictInfo *>. Deallocate the list with: + * @code + * g_list_free_full ((GDestroyNotify) stardict_info_free); + * @endcode + */ +GList * +stardict_list_dictionaries (const gchar *path) +{ + GPatternSpec *ps = g_pattern_spec_new ("*.ifo"); + GDir *dir = g_dir_open (path, 0, NULL); + g_return_val_if_fail (dir != NULL, NULL); + + GList *dicts = NULL; + const gchar *name; + while ((name = g_dir_read_name (dir))) + { + if (!g_pattern_match_string (ps, name)) + continue; + + gchar *filename = g_build_filename (path, name, NULL); + StardictInfo *ifo = g_new (StardictInfo, 1); + if (load_ifo (ifo, filename, NULL)) + dicts = g_list_append (dicts, ifo); + else + g_free (ifo); + g_free (filename); + } + g_dir_close (dir); + g_pattern_spec_free (ps); + return dicts; +} + +// --- StardictDict ------------------------------------------------------------ + +G_DEFINE_TYPE (StardictDict, stardict_dict, G_TYPE_OBJECT) + +static void +stardict_dict_finalize (GObject *self) +{ + StardictDict *sd = STARDICT_DICT (self); + + stardict_info_free (sd->info); + g_array_free (sd->index, TRUE); + g_array_free (sd->synonyms, TRUE); + + if (sd->mapped_dict) + g_mapped_file_unref (sd->mapped_dict); + else + g_free (sd->dict); + + G_OBJECT_CLASS (stardict_dict_parent_class)->finalize (self); +} + +static void +stardict_dict_class_init (StardictDictClass *klass) +{ + G_OBJECT_CLASS (klass)->finalize = stardict_dict_finalize; +} + +static void +stardict_dict_init (G_GNUC_UNUSED StardictDict *sd) +{ +} + +/** Load a StarDict dictionary. + * @param[in] filename Path to the .ifo file + */ +StardictDict * +stardict_dict_new (const gchar *filename, GError **error) +{ + StardictInfo *ifo = g_new (StardictInfo, 1); + if (!load_ifo (ifo, filename, error)) + { + g_free (ifo); + return NULL; + } + + StardictDict *sd = stardict_dict_new_from_info (ifo, error); + if (!sd) stardict_info_free (ifo); + return sd; +} + +/** Return information about a loaded dictionary. */ +StardictInfo * +stardict_dict_get_info (StardictDict *sd) +{ + g_return_val_if_fail (STARDICT_IS_DICT (sd), NULL); + return sd->info; +} + +/** Load a StarDict index from a GIO input stream. */ +static gboolean +load_idx_internal (StardictDict *sd, GInputStream *is, GError **error) +{ + GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (is)); + g_data_input_stream_set_byte_order (dis, + G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); + + StardictIndexEntry entry; + GError *err = NULL; + // Ignoring "wordcount", just reading as long as we can + while ((entry.name = stream_read_string (dis, &err))) + { + if (sd->info->idx_offset_bits == 32) + entry.data_offset + = g_data_input_stream_read_uint32 (dis, NULL, &err); + else + entry.data_offset + = g_data_input_stream_read_uint64 (dis, NULL, &err); + if (err) + goto error; + + entry.data_size = g_data_input_stream_read_uint32 (dis, NULL, &err); + if (err) + goto error; + + g_array_append_val (sd->index, entry); + } + + if (err != NULL) + goto error; + + g_object_unref (dis); + return TRUE; + +error: + g_propagate_error (error, err); + g_free (entry.name); + g_object_unref (dis); + return FALSE; +} + +/** Load a StarDict index. */ +static gboolean +load_idx (StardictDict *sd, const gchar *filename, + gboolean gzipped, GError **error) +{ + gboolean ret_val = FALSE; + GFile *file = g_file_new_for_path (filename); + GFileInputStream *fis = g_file_read (file, NULL, error); + + if (!fis) + goto cannot_open; + + if (gzipped) + { + GZlibDecompressor *zd + = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); + GInputStream *cis = g_converter_input_stream_new + (G_INPUT_STREAM (fis), G_CONVERTER (zd)); + + ret_val = load_idx_internal (sd, cis, error); + + g_object_unref (cis); + g_object_unref (zd); + } + else + ret_val = load_idx_internal (sd, G_INPUT_STREAM (fis), error); + + g_object_unref (fis); +cannot_open: + g_object_unref (file); + return ret_val; +} + +static gboolean +load_syn (StardictDict *sd, const gchar *filename, GError **error) +{ + gboolean ret_val = FALSE; + GFile *file = g_file_new_for_path (filename); + GFileInputStream *fis = g_file_read (file, NULL, error); + + if (!fis) + goto cannot_open; + + GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (fis)); + g_data_input_stream_set_byte_order (dis, + G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); + + StardictSynonymEntry entry; + GError *err = NULL; + // Ignoring "synwordcount", just reading as long as we can + while ((entry.word = stream_read_string (dis, &err))) + { + entry.original_word = g_data_input_stream_read_uint32 (dis, NULL, &err); + if (err) + break; + + g_array_append_val (sd->synonyms, entry); + } + + if (err != NULL) + { + g_free (entry.word); + g_propagate_error (error, err); + } + else + ret_val = TRUE; + + g_object_unref (dis); + g_object_unref (fis); +cannot_open: + g_object_unref (file); + return ret_val; +} + +/** Destroy an index entry. */ +static void +index_destroy_cb (gpointer sde) +{ + StardictIndexEntry *e = sde; + g_free (e->name); +} + +/** Destroy a synonym entry. */ +static void +syn_destroy_cb (gpointer sde) +{ + StardictSynonymEntry *e = sde; + g_free (e->word); +} + +/** Load StarDict dictionary data. */ +static gboolean +load_dict (StardictDict *sd, const gchar *filename, gboolean gzipped, + GError **error) +{ + if (gzipped) + { + gboolean ret_val = FALSE; + GFile *file = g_file_new_for_path (filename); + GFileInputStream *fis = g_file_read (file, NULL, error); + + if (!fis) + goto cannot_open; + + // Just read it all, as it is, into memory + GByteArray *ba = g_byte_array_new (); + GZlibDecompressor *zd + = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); + GInputStream *cis = g_converter_input_stream_new + (G_INPUT_STREAM (fis), G_CONVERTER (zd)); + + ret_val = stream_read_all (ba, cis, error); + + g_object_unref (cis); + g_object_unref (zd); + + if (ret_val) + { + sd->dict_length = ba->len; + sd->dict = g_byte_array_free (ba, FALSE); + } + else + g_byte_array_free (ba, TRUE); + + g_object_unref (fis); +cannot_open: + g_object_unref (file); + return ret_val; + } + + sd->mapped_dict = g_mapped_file_new (filename, FALSE, error); + if (!sd->mapped_dict) + return FALSE; + + sd->dict_length = g_mapped_file_get_length (sd->mapped_dict); + sd->dict = g_mapped_file_get_contents (sd->mapped_dict); + return TRUE; +} + +/** Load a StarDict dictionary. + * @param[in] sdi Parsed .ifo data. + */ +StardictDict * +stardict_dict_new_from_info (StardictInfo *sdi, GError **error) +{ + g_return_val_if_fail (sdi != NULL, NULL); + + StardictDict *sd = g_object_new (STARDICT_TYPE_DICT, NULL); + sd->info = sdi; + sd->index = g_array_new (FALSE, FALSE, sizeof (StardictIndexEntry)); + g_array_set_clear_func (sd->index, index_destroy_cb); + sd->synonyms = g_array_new (FALSE, FALSE, sizeof (StardictSynonymEntry)); + g_array_set_clear_func (sd->synonyms, syn_destroy_cb); + + const gchar *dot = strrchr (sdi->path, '.'); + gchar *base = dot ? g_strndup (sdi->path, dot - sdi->path) + : g_strdup (sdi->path); + + gchar *base_idx = g_strconcat (base, ".idx", NULL); + gboolean ret = FALSE; + if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_idx (sd, base_idx, FALSE, error); + else + { + gchar *base_idx_gz = g_strconcat (base_idx, ".gz", NULL); + g_free (base_idx); + base_idx = base_idx_gz; + + if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_idx (sd, base_idx, TRUE, error); + else + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND, + "%s: cannot find index file", sdi->path); + } + } + g_free (base_idx); + + if (!ret) + goto error; + + gchar *base_dict = g_strconcat (base, ".dict", NULL); + ret = FALSE; + if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_dict (sd, base_dict, FALSE, error); + else + { + gchar *base_dict_dz = g_strconcat (base_dict, ".dz", NULL); + g_free (base_dict); + base_dict = base_dict_dz; + + if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_dict (sd, base_dict, TRUE, error); + else + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND, + "%s: cannot find dict file", sdi->path); + } + } + g_free (base_dict); + + if (!ret) + goto error; + + gchar *base_syn = g_strconcat (base, ".syn", NULL); + if (g_file_test (base_syn, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + load_syn (sd, base_syn, NULL); + g_free (base_syn); + + g_free (base); + return sd; + +error: + g_array_free (sd->index, TRUE); + g_free (base); + g_object_unref (sd); + return NULL; +} + +/** Return words for which the argument is a synonym of or NULL + * if there are no such words. + */ +gchar ** +stardict_dict_get_synonyms (StardictDict *sd, const gchar *word) +{ + BINARY_SEARCH_BEGIN (sd->synonyms->len - 1, stardict_strcmp (word, + g_array_index (sd->synonyms, StardictSynonymEntry, imid).word)) + + // Back off to the first matching entry + while (imid > 0 && !stardict_strcmp (word, + g_array_index (sd->synonyms, StardictSynonymEntry, --imid).word)); + + GPtrArray *array = g_ptr_array_new (); + + // And add all matching entries from that position on to the array + do + g_ptr_array_add (array, g_strdup (g_array_index + (sd->index, StardictIndexEntry, g_array_index + (sd->synonyms, StardictSynonymEntry, ++imid).original_word).name)); + while ((guint) imid < sd->synonyms->len - 1 && !stardict_strcmp (word, + g_array_index (sd->synonyms, StardictSynonymEntry, imid + 1).word)); + + return (gchar **) g_ptr_array_free (array, FALSE); + + BINARY_SEARCH_END + + return NULL; +} + +/** Search for a word. + * @param[in] word The word in utf-8 encoding + * @param[out] success TRUE if found + * @return An iterator object pointing to the word, or where it would be + */ +StardictIterator * +stardict_dict_search (StardictDict *sd, const gchar *word, gboolean *success) +{ + BINARY_SEARCH_BEGIN (sd->index->len - 1, stardict_strcmp (word, + g_array_index (sd->index, StardictIndexEntry, imid).name)) + + if (success) *success = TRUE; + return stardict_iterator_new (sd, imid); + + BINARY_SEARCH_END + + if (success) *success = FALSE; + return stardict_iterator_new (sd, imin); +} + +static void +stardict_entry_field_free (StardictEntryField *sef) +{ + g_free (sef->data); + g_slice_free1 (sizeof *sef, sef); +} + +static StardictEntryField * +read_entry (gchar type, const gchar **entry_iterator, + const gchar *end, gboolean is_final) +{ + const gchar *entry = *entry_iterator; + if (g_ascii_islower (type)) + { + GString *data = g_string_new (NULL); + + if (is_final) + { + g_string_append_len (data, entry, end - entry); + entry += end - entry; + } + else + { + gint c = EOF; + while (entry < end && (c = *entry++)) + g_string_append_c (data, c); + + if (c != '\0') + return (gpointer) g_string_free (data, TRUE); + } + + StardictEntryField *sef = g_slice_alloc (sizeof *sef); + sef->type = type; + sef->data_size = data->len + 1; + sef->data = g_string_free (data, FALSE); + *entry_iterator = entry; + return sef; + } + + gsize length; + if (is_final) + length = end - entry; + else + { + if (entry + sizeof (guint32) > end) + return NULL; + + length = GUINT32_FROM_BE (*(guint32 *) entry); + entry += sizeof (guint32); + + if (entry + length > end) + return NULL; + } + + StardictEntryField *sef = g_slice_alloc (sizeof *sef); + sef->type = type; + sef->data_size = length; + sef->data = memcpy (g_malloc (length), entry, length); + *entry_iterator = entry + length; + return sef; +} + +static GList * +read_entries (const gchar *entry, gsize entry_size, GError **error) +{ + const gchar *end = entry + entry_size; + GList *result = NULL; + + while (entry < end) + { + gchar type = *entry++; + StardictEntryField *sef = read_entry (type, &entry, end, FALSE); + if (!sef) + goto error; + result = g_list_append (result, sef); + } + + return result; + +error: + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "invalid data entry"); + g_list_free_full (result, (GDestroyNotify) stardict_entry_field_free); + return NULL; +} + +static GList * +read_entries_sts (const gchar *entry, gsize entry_size, + const gchar *sts, GError **error) +{ + const gchar *end = entry + entry_size; + GList *result = NULL; + + while (*sts) + { + gchar type = *sts++; + StardictEntryField *sef = read_entry (type, &entry, end, !*sts); + if (!sef) + goto error; + result = g_list_append (result, sef); + } + + return result; + +error: + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "invalid data entry"); + g_list_free_full (result, (GDestroyNotify) stardict_entry_field_free); + return NULL; +} + +/** Return the data for the specified offset in the index. Unsafe. */ +static StardictEntry * +stardict_dict_get_entry (StardictDict *sd, guint32 offset) +{ + // TODO cache the entries + StardictIndexEntry *sie = &g_array_index (sd->index, + StardictIndexEntry, offset); + + g_return_val_if_fail (sie->data_offset + sie->data_size + <= sd->dict_length, NULL); + + GList *entries; + if (sd->info->same_type_sequence) + entries = read_entries_sts (sd->dict + sie->data_offset, + sie->data_size, sd->info->same_type_sequence, NULL); + else + entries = read_entries (sd->dict + sie->data_offset, + sie->data_size, NULL); + + if (!entries) + return NULL; + + StardictEntry *se = g_object_new (STARDICT_TYPE_ENTRY, NULL); + se->fields = entries; + return se; +} + +// --- StardictEntry ----------------------------------------------------------- + +G_DEFINE_TYPE (StardictEntry, stardict_entry, G_TYPE_OBJECT) + +static void +stardict_entry_finalize (GObject *self) +{ + StardictEntry *sde = STARDICT_ENTRY (self); + + g_list_free_full (sde->fields, (GDestroyNotify) stardict_entry_field_free); + + G_OBJECT_CLASS (stardict_entry_parent_class)->finalize (self); +} + +static void +stardict_entry_class_init (StardictEntryClass *klass) +{ + G_OBJECT_CLASS (klass)->finalize = stardict_entry_finalize; +} + +static void +stardict_entry_init (G_GNUC_UNUSED StardictEntry *sde) +{ +} + +/** Return the entries present within the entry. + * @return GList<StardictEntryField *> + */ +const GList * +stardict_entry_get_fields (StardictEntry *sde) +{ + g_return_val_if_fail (STARDICT_IS_ENTRY (sde), NULL); + return sde->fields; +} + +// --- StardictIterator--------------------------------------------------------- + +G_DEFINE_TYPE (StardictIterator, stardict_iterator, G_TYPE_OBJECT) + +static void +stardict_iterator_finalize (GObject *self) +{ + StardictIterator *si = STARDICT_ITERATOR (self); + + g_object_unref (si->owner); + + G_OBJECT_CLASS (stardict_iterator_parent_class)->finalize (self); +} + +static void +stardict_iterator_class_init (StardictIteratorClass *klass) +{ + G_OBJECT_CLASS (klass)->finalize = stardict_iterator_finalize; +} + +static void +stardict_iterator_init (G_GNUC_UNUSED StardictIterator *sd) +{ +} + +/** Create a new iterator for the dictionary with offset @a offset. */ +StardictIterator * +stardict_iterator_new (StardictDict *sd, guint32 offset) +{ + g_return_val_if_fail (STARDICT_IS_DICT (sd), NULL); + + StardictIterator *si = g_object_new (STARDICT_TYPE_ITERATOR, NULL); + si->owner = g_object_ref (sd); + si->offset = offset; + return si; +} + +/** Return the word in the index that the iterator points at, or NULL. */ +const gchar * +stardict_iterator_get_word (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); + if (!stardict_iterator_is_valid (sdi)) + return NULL; + return g_array_index (sdi->owner->index, + StardictIndexEntry, sdi->offset).name; +} + +/** Return the dictionary entry that the iterator points at, or NULL. */ +StardictEntry * +stardict_iterator_get_entry (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); + if (!stardict_iterator_is_valid (sdi)) + return FALSE; + return stardict_dict_get_entry (sdi->owner, sdi->offset); +} + +/** Return whether the iterator points to a valid index entry. */ +gboolean +stardict_iterator_is_valid (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), FALSE); + return sdi->offset >= 0 && sdi->offset < sdi->owner->index->len; +} + +/** Return the offset of the iterator within the dictionary index. */ +gint64 +stardict_iterator_get_offset (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), -1); + return sdi->offset; +} + +/** Set the offset of the iterator. */ +void +stardict_iterator_set_offset + (StardictIterator *sdi, gint64 offset, gboolean relative) +{ + g_return_if_fail (STARDICT_IS_ITERATOR (sdi)); + sdi->offset = relative ? sdi->offset + offset : offset; +} diff --git a/src/stardict.h b/src/stardict.h new file mode 100644 index 0000000..aef27fd --- /dev/null +++ b/src/stardict.h @@ -0,0 +1,215 @@ +/* + * stardict.h: StarDict API + * + * This module doesn't cover all the functionality available to StarDict + * dictionaries, it should however be good enough for most of them that are + * freely available on the Internet. + * + * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com> + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef STARDICT_H +#define STARDICT_H + +/** An object intended for interacting with a dictionary. */ +typedef struct stardict_dict StardictDict; +typedef struct stardict_dict_class StardictDictClass; + +/** Overall information about a particular dictionary. */ +typedef struct stardict_info StardictInfo; + +/** Handles the task of moving around the dictionary. */ +typedef struct stardict_iterator StardictIterator; +typedef struct stardict_iterator_class StardictIteratorClass; + +/** Contains the decoded data for a single word definition. */ +typedef struct stardict_entry StardictEntry; +typedef struct stardict_entry_class StardictEntryClass; + +/** A single field of a word definition. */ +typedef struct stardict_entry_field StardictEntryField; + +/* GObject boilerplate. */ +#define STARDICT_TYPE_DICT (stardict_dict_get_type ()) +#define STARDICT_DICT(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ + STARDICT_TYPE_DICT, StardictDict)) +#define STARDICT_IS_DICT(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ + STARDICT_TYPE_DICT)) +#define STARDICT_DICT_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST ((klass), \ + STARDICT_TYPE_DICT, StardictDictClass)) +#define STARDICT_IS_DICT_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE ((klass), \ + STARDICT_TYPE_DICT)) +#define STARDICT_DICT_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS ((obj), \ + STARDICT_TYPE_DICT, StardictDictClass)) + +#define STARDICT_TYPE_ITERATOR (stardict_iterator_get_type ()) +#define STARDICT_ITERATOR(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ + STARDICT_TYPE_ITERATOR, StardictIterator)) +#define STARDICT_IS_ITERATOR(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ + STARDICT_TYPE_ITERATOR)) +#define STARDICT_ITERATOR_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST ((klass), \ + STARDICT_TYPE_ITERATOR, StardictIteratorClass)) +#define STARDICT_IS_ITERATOR_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE ((klass), \ + STARDICT_TYPE_ITERATOR)) +#define STARDICT_ITERATOR_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS ((obj), \ + STARDICT_TYPE_ITERATOR, StardictIteratorClass)) + +#define STARDICT_TYPE_ENTRY (stardict_entry_get_type ()) +#define STARDICT_ENTRY(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ + STARDICT_TYPE_ENTRY, StardictEntry)) +#define STARDICT_IS_ENTRY(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ + STARDICT_TYPE_ENTRY)) +#define STARDICT_ENTRY_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST ((klass), \ + STARDICT_TYPE_ENTRY, StardictEntryClass)) +#define STARDICT_IS_ENTRY_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE ((klass), \ + STARDICT_TYPE_ENTRY)) +#define STARDICT_ENTRY_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS ((obj), \ + STARDICT_TYPE_ENTRY, StardictEntryClass)) + +// --- Errors ------------------------------------------------------------------ + +/** General error type. */ +typedef enum { + STARDICT_ERROR_FILE_NOT_FOUND, //!< Some file was not found + STARDICT_ERROR_INVALID_DATA //!< Dictionary contains invalid data +} StardictError; + +#define STARDICT_ERROR (stardict_error_quark ()) + +GQuark stardict_error_quark (void); + +// --- Dictionary information -------------------------------------------------- + +const gchar *stardict_info_get_path (StardictInfo *sdi) G_GNUC_PURE; +const gchar *stardict_info_get_book_name (StardictInfo *sdi) G_GNUC_PURE; +gsize stardict_info_get_word_count (StardictInfo *sd) G_GNUC_PURE; +void stardict_info_free (StardictInfo *sdi); + +GList *stardict_list_dictionaries (const gchar *path); + +// --- Dictionaries ------------------------------------------------------------ + +struct stardict_dict +{ + GObject parent_instance; + StardictInfo * info; //!< General information about the dict + GArray * index; //!< Word index + GArray * synonyms; //!< Synonyms + gpointer dict; //!< Dictionary data + gsize dict_length; //!< Length of the dict data in bytes + GMappedFile * mapped_dict; //!< Memory map handle +}; + +struct stardict_dict_class +{ + GObjectClass parent_class; +}; + +GType stardict_dict_get_type (void); +StardictDict *stardict_dict_new (const gchar *filename, GError **error); +StardictDict *stardict_dict_new_from_info (StardictInfo *sdi, GError **error); +StardictInfo *stardict_dict_get_info (StardictDict *sd); +gchar **stardict_dict_get_synonyms (StardictDict *sd, const gchar *word); +StardictIterator *stardict_dict_search + (StardictDict *sd, const gchar *word, gboolean *success); + +// --- Dictionary iterators ---------------------------------------------------- + +struct stardict_iterator +{ + GObject parent_instance; + StardictDict * owner; //!< The related dictionary + gint64 offset; //!< Index within the dictionary +}; + +struct stardict_iterator_class +{ + GObjectClass parent_class; +}; + +GType stardict_iterator_get_type (void); +StardictIterator *stardict_iterator_new (StardictDict *sd, guint32 index); +const gchar *stardict_iterator_get_word (StardictIterator *sdi) G_GNUC_PURE; +StardictEntry *stardict_iterator_get_entry (StardictIterator *sdi); +gboolean stardict_iterator_is_valid (StardictIterator *sdi) G_GNUC_PURE; +gint64 stardict_iterator_get_offset (StardictIterator *sdi) G_GNUC_PURE; +void stardict_iterator_set_offset + (StardictIterator *sdi, gint64 offset, gboolean relative); + +/** Go to the next entry. */ +#define stardict_iterator_next(sdi) \ + (stardict_iterator_set_offset (sdi, 1, TRUE)) + +/** Go to the previous entry. */ +#define stardict_iterator_prev(sdi) \ + (stardict_iterator_set_offset (sdi, -1, TRUE)) + +// --- Dictionary entries ------------------------------------------------------ + +typedef enum { + STARDICT_FIELD_MEANING = 'm', //!< Word's purely textual meaning + STARDICT_FIELD_LOCALE = 'l', //!< Locale-dependent meaning + STARDICT_FIELD_PANGO = 'g', //!< Pango text markup language + STARDICT_FIELD_PHONETIC = 't', //!< English phonetic string + STARDICT_FIELD_XDXF = 'x', //!< xdxf language + STARDICT_FIELD_YB_KANA = 'y', //!< Chinese YinBiao or Japanese KANA + STARDICT_FIELD_POWERWORD = 'k', //!< KingSoft PowerWord's data + STARDICT_FIELD_MEDIAWIKI = 'w', //!< MediaWiki markup language + STARDICT_FIELD_HTML = 'h', //!< HTML codes + STARDICT_FIELD_RESOURCE = 'r', //!< Resource file list + STARDICT_FIELD_WAV = 'W', //!< WAV file + STARDICT_FIELD_PICTURE = 'P', //!< Picture file + STARDICT_FIELD_X = 'X' //!< Reserved, experimental extensions +} StardictEntryFieldType; + +struct stardict_entry_field +{ + gchar type; //!< Type of entry (EntryFieldType) + gpointer data; //!< Raw data or null-terminated string + gsize data_size; //!< Size of data, includding any \0 +}; + +struct stardict_entry +{ + GObject parent_instance; + GList * fields; //!< List of StardictEntryField's +}; + +struct stardict_entry_class +{ + GObjectClass parent_class; +}; + +GType stardict_entry_get_type (void); +const GList *stardict_entry_get_fields (StardictEntry *sde) G_GNUC_PURE; + + #endif /* ! STARDICT_H */ diff --git a/src/test-stardict.c b/src/test-stardict.c new file mode 100644 index 0000000..8f7f798 --- /dev/null +++ b/src/test-stardict.c @@ -0,0 +1,429 @@ +/* + * stardict.c: StarDict API test + * + * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com> + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <glib.h> +#include <gio/gio.h> + +#include "stardict.h" + + +// --- Utilities --------------------------------------------------------------- + +// Adapted http://gezeiten.org/post/2009/04/Writing-Your-Own-GIO-Jobs +static gboolean remove_recursive (GFile *file, GError **error); + +static gboolean +remove_directory_contents (GFile *file, GError **error) +{ + GFileEnumerator *enumerator = + g_file_enumerate_children (file, "standard::*", + G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, NULL, error); + + if (!enumerator) + return FALSE; + + gboolean success = TRUE; + do + { + GError *err = NULL; + GFileInfo *child_info = + g_file_enumerator_next_file (enumerator, NULL, &err); + + if (!child_info) + { + if (err) + { + g_propagate_error (error, err); + success = FALSE; + } + break; + } + + GFile *child = g_file_resolve_relative_path + (file, g_file_info_get_name (child_info)); + success = remove_recursive (child, error); + g_object_unref (child); + g_object_unref (child_info); + } + while (success); + + g_object_unref (enumerator); + return success; +} + +static gboolean +remove_recursive (GFile *file, GError **error) +{ + g_return_val_if_fail (G_IS_FILE (file), FALSE); + + GFileInfo *info = g_file_query_info (file, "standard::*", + G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, NULL, error); + + if (!info) + return FALSE; + + GFileType type = g_file_info_get_file_type (info); + g_object_unref (info); + + if (type == G_FILE_TYPE_DIRECTORY && + !remove_directory_contents (file, error)) + return FALSE; + + return g_file_delete (file, NULL, error); +} + +static gchar * +generate_random_string (gsize length, GRand *rand) +{ + GString *s = g_string_sized_new (length); + while (length--) + g_string_append_c (s, g_rand_int_range (rand, 'a', 'z' + 1)); + return g_string_free (s, FALSE); +} + +static gpointer +generate_random_data (gsize length, GRand *rand) +{ + gchar *blob = g_malloc (length), *i = blob; + while (length--) + *i++ = g_rand_int_range (rand, 0, 256); + return blob; +} + +// --- Dictionary generation --------------------------------------------------- + +typedef struct dictionary Dictionary; +typedef struct test_entry TestEntry; + +struct dictionary +{ + GFile *tmp_dir; //!< A temporary dictionary + GFile *ifo_file; //!< The dictionary's .ifo file + GArray *data; //!< Array of TestEntry's +}; + +struct test_entry +{ + gchar *word; + gchar *meaning; + gpointer data; + gsize data_size; +}; + +static void +test_entry_free (TestEntry *te) +{ + g_free (te->word); + g_free (te->meaning); + g_free (te->data); +} + +static gint +test_entry_word_compare (gconstpointer a, gconstpointer b) +{ + return strcmp (((TestEntry *) a)->word, ((TestEntry *) b)->word); +} + +static GArray * +generate_dictionary_data (gsize length) +{ + GRand *rand = g_rand_new_with_seed (0); + + GArray *a = g_array_sized_new (FALSE, FALSE, sizeof (TestEntry), length); + g_array_set_clear_func (a, (GDestroyNotify) test_entry_free); + + while (length--) + { + TestEntry te; + + te.word = generate_random_string + (g_rand_int_range (rand, 1, 10), rand); + te.meaning = generate_random_string + (g_rand_int_range (rand, 1, 1024), rand); + + te.data_size = g_rand_int_range (rand, 0, 1048576); + te.data = generate_random_data (te.data_size, rand); + + g_array_append_val (a, te); + } + + g_rand_free (rand); + g_array_sort (a, test_entry_word_compare); + return a; +} + +static Dictionary * +dictionary_create (void) +{ + GError *error; + gchar *tmp_dir_path = g_dir_make_tmp ("stardict-test-XXXXXX", &error); + if (!tmp_dir_path) + g_error ("Failed to create a directory for the test dictionary: %s", + error->message); + + Dictionary *dict = g_malloc (sizeof *dict); + dict->tmp_dir = g_file_new_for_path (tmp_dir_path); + + static const gint dictionary_size = 8; + dict->data = generate_dictionary_data (dictionary_size); + GFile *dict_file = g_file_get_child (dict->tmp_dir, "test.dict"); + GFile *idx_file = g_file_get_child (dict->tmp_dir, "test.idx"); + + GFileOutputStream *dict_stream = g_file_replace (dict_file, + NULL, FALSE, G_FILE_CREATE_NONE, NULL, &error); + if (!dict_stream) + g_error ("Failed to create the .dict file: %s", error->message); + + GFileOutputStream *idx_stream = g_file_replace (idx_file, + NULL, FALSE, G_FILE_CREATE_NONE, NULL, &error); + if (!idx_stream) + g_error ("Failed to create the .idx file: %s", error->message); + + GDataOutputStream *dict_data + = g_data_output_stream_new (G_OUTPUT_STREAM (dict_stream)); + g_data_output_stream_set_byte_order + (dict_data, G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); + + GDataOutputStream *idx_data + = g_data_output_stream_new (G_OUTPUT_STREAM (idx_stream)); + g_data_output_stream_set_byte_order + (idx_data, G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); + + gint i; + gsize written; + for (i = 0; i < dictionary_size; i++) + { + TestEntry *te = &g_array_index (dict->data, TestEntry, i); + goffset offset = g_seekable_tell (G_SEEKABLE (dict_stream)); + + if (!g_data_output_stream_put_string (dict_data, + te->meaning, NULL, &error) + || !g_data_output_stream_put_byte (dict_data, '\0', NULL, &error) + || !g_output_stream_write_all (G_OUTPUT_STREAM (dict_stream), + te->data, te->data_size, &written, NULL, &error)) + g_error ("Write to dictionary failed: %s", error->message); + + if (!g_data_output_stream_put_string (idx_data, + te->word, NULL, &error) + || !g_data_output_stream_put_byte (idx_data, '\0', NULL, &error) + || !g_data_output_stream_put_uint32 (idx_data, offset, NULL, &error) + || !g_data_output_stream_put_uint32 (idx_data, + g_seekable_tell (G_SEEKABLE (dict_stream)) - offset, NULL, &error)) + g_error ("Write to index failed: %s", error->message); + } + + gint index_size = g_seekable_tell (G_SEEKABLE (idx_stream)); + + if (!g_output_stream_close (G_OUTPUT_STREAM (dict_stream), NULL, &error)) + g_error ("Failed to close the .dict file: %s", error->message); + if (!g_output_stream_close (G_OUTPUT_STREAM (idx_stream), NULL, &error)) + g_error ("Failed to close the .idx file: %s", error->message); + + g_object_unref (dict_data); + g_object_unref (idx_data); + + g_object_unref (dict_stream); + g_object_unref (idx_stream); + + gchar *ifo_contents = g_strdup_printf + ("StarDict's dict ifo file\n" + "version=3.0.0\n" + "bookname=Test Book\n" + "wordcount=%d\n" + "idxfilesize=%d\n" + "idxoffsetbits=32\n" + "author=Lyra Heartstrings\n" + "email=lyra@equestria.net\n" + "website=http://equestria.net\n" + "description=Test dictionary\n" + "date=21.12.2012\n" + "sametypesequence=mX\n", + dictionary_size, index_size); + + g_object_unref (dict_file); + g_object_unref (idx_file); + + dict->ifo_file = g_file_get_child (dict->tmp_dir, "test.ifo"); + if (!g_file_replace_contents (dict->ifo_file, + ifo_contents, strlen (ifo_contents), + NULL, FALSE, G_FILE_CREATE_NONE, NULL, NULL, &error)) + g_error ("Failed to create the .ifo file: %s", error->message); + g_free (ifo_contents); + + g_message ("Successfully created a test dictionary in %s", tmp_dir_path); + g_free (tmp_dir_path); + + return dict; +} + +static void +dictionary_destroy (Dictionary *dict) +{ + GError *error; + if (!remove_recursive (dict->tmp_dir, &error)) + g_error ("Failed to delete the temporary directory: %s", + error->message); + + g_message ("The test dictionary has been deleted"); + + g_object_unref (dict->tmp_dir); + g_object_unref (dict->ifo_file); + g_array_free (dict->data, TRUE); + g_free (dict); +} + +// --- Testing ----------------------------------------------------------------- + +typedef struct dict_fixture DictFixture; + +struct dict_fixture +{ + StardictDict *dict; +}; + +static void +dict_setup (DictFixture *fixture, gconstpointer test_data) +{ + Dictionary *dict = (Dictionary *) test_data; + + gchar *ifo_filename = g_file_get_path (dict->ifo_file); + fixture->dict = stardict_dict_new (ifo_filename, NULL); + g_free (ifo_filename); +} + +static void +dict_teardown (DictFixture *fixture, G_GNUC_UNUSED gconstpointer test_data) +{ + g_object_unref (fixture->dict); +} + +static void +dict_test_list (gconstpointer user_data) +{ + Dictionary *dict = (Dictionary *) user_data; + + gchar *tmp_path = g_file_get_path (dict->tmp_dir); + GList *dictionaries = stardict_list_dictionaries (tmp_path); + g_free (tmp_path); + + g_assert (dictionaries != NULL); + g_assert (dictionaries->next == NULL); + + StardictInfo *info = dictionaries->data; + GFile *ifo_file = g_file_new_for_path (stardict_info_get_path (info)); + g_assert (g_file_equal (ifo_file, dict->ifo_file) == TRUE); + g_object_unref (ifo_file); + + g_list_free_full (dictionaries, (GDestroyNotify) stardict_info_free); +} + +static void +dict_test_new (gconstpointer user_data) +{ + Dictionary *dict = (Dictionary *) user_data; + + gchar *ifo_filename = g_file_get_path (dict->ifo_file); + StardictDict *sd = stardict_dict_new (ifo_filename, NULL); + g_free (ifo_filename); + + g_assert (sd != NULL); + g_object_unref (sd); +} + +static void +dict_test_data_entry (StardictDict *sd, TestEntry *entry) +{ + gboolean success; + StardictIterator *sdi = + stardict_dict_search (sd, entry->word, &success); + + g_assert (success == TRUE); + g_assert (sdi != NULL); + g_assert (stardict_iterator_is_valid (sdi)); + + const gchar *word = stardict_iterator_get_word (sdi); + g_assert_cmpstr (word, ==, entry->word); + + StardictEntry *sde = stardict_iterator_get_entry (sdi); + g_assert (sde != NULL); + + const GList *fields = stardict_entry_get_fields (sde); + const StardictEntryField *sdef; + g_assert (fields != NULL); + g_assert (fields->data != NULL); + + sdef = fields->data; + g_assert (sdef->type == 'm'); + g_assert_cmpstr (sdef->data, ==, entry->meaning); + + fields = fields->next; + g_assert (fields != NULL); + g_assert (fields->data != NULL); + + sdef = fields->data; + g_assert (sdef->type == 'X'); + g_assert_cmpuint (sdef->data_size, ==, entry->data_size); + g_assert (memcmp (sdef->data, entry->data, entry->data_size) == 0); + + fields = fields->next; + g_assert (fields == NULL); + + g_object_unref (sde); + g_object_unref (sdi); +} + +static void +dict_test_data (DictFixture *fixture, gconstpointer user_data) +{ + Dictionary *dict = (Dictionary *) user_data; + GArray *data = dict->data; + StardictDict *sd = fixture->dict; + + guint i; + for (i = 0; i < data->len; i++) + { + TestEntry *entry = &g_array_index (data, TestEntry, i); + dict_test_data_entry (sd, entry); + } +} + +int +main (int argc, char *argv[]) +{ + g_test_init (&argc, &argv, NULL); + if (glib_check_version (2, 36, 0)) + g_type_init (); + + Dictionary *dict = dictionary_create (); + + g_test_add_data_func ("/dict/list", dict, dict_test_list); + g_test_add_data_func ("/dict/new", dict, dict_test_new); + + g_test_add ("/dict/data", DictFixture, dict, + dict_setup, dict_test_data, dict_teardown); + + int result = g_test_run (); + dictionary_destroy (dict); + return result; +} |