From 4f4a86529adc896ed4d3bc922ef88aec89e3fd53 Mon Sep 17 00:00:00 2001 From: Přemysl Janouch Date: Sat, 4 May 2013 16:14:25 +0200 Subject: Initial commit --- .gitignore | 13 + Makefile | 22 ++ sdcli.c | 274 +++++++++++++++ stardict.c | 1081 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ stardict.h | 214 ++++++++++++ 5 files changed, 1604 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 sdcli.c create mode 100644 stardict.c create mode 100644 stardict.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fee94d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# Backup files +*.*~ +# Compile output +/sdcli +*.o +# IDE project files +/sdcli.creator* +/sdcli.includes +/sdcli.files +/sdcli.config +# Blah +/GNUmakefile +/.clang_complete diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e354dff --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +SHELL = /bin/sh + +pkgs = ncursesw glib-2.0 gio-2.0 +targets = sdcli + +CC = clang +CFLAGS = -ggdb -std=gnu99 -Wall -Wextra -Wno-missing-field-initializers \ + `pkg-config --cflags $(pkgs)` +LDFLAGS = `pkg-config --libs $(pkgs)` + +.PHONY: all clean + +all: $(targets) + +clean: + rm -f $(targets) *.o + +sdcli: sdcli.o stardict.o + $(CC) $^ -o $@ $(LDFLAGS) + +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ diff --git a/sdcli.c b/sdcli.c new file mode 100644 index 0000000..8a8f4ca --- /dev/null +++ b/sdcli.c @@ -0,0 +1,274 @@ +/* + * StarDict console UI + * + * Copyright (c) 2013, Přemysl Janouch + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#define _XOPEN_SOURCE_EXTENDED /**< Yes, we want ncursesw. */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include "stardict.h" + + +#define KEY_ESCAPE 27 /**< Curses doesn't define this. */ + +// --- Utilities --------------------------------------------------------------- + +static void +display (const gchar *format, ...) +{ + va_list ap; + + va_start (ap, format); + vw_printw (stdscr, format, ap); + va_end (ap); + refresh (); +} + +static gchar * +wchar_to_mb (wchar_t ch) +{ + /* Convert the character back to a multi-byte sequence. */ + static gchar buffer[MB_LEN_MAX + 1]; + size_t len = wcrtomb (buffer, ch, NULL); + + /* This shouldn't happen. It would mean that the user has + * somehow managed to enter something inexpressable in the + * current locale. */ + if (len == (size_t) -1) + abort (); + + /* Here I hope the buffer doesn't overflow. Who uses + * shift states nowadays, anyway? */ + if (wcrtomb (buffer + len, L'\0', NULL) == (size_t) -1) + abort (); + + return buffer; +} + +static const gchar * +wchar_to_mb_escaped (wchar_t ch) +{ + switch (ch) + { + case L'\r': return "\\r"; + case L'\n': return "\\n"; + case L'\t': return "\\t"; + default: return wchar_to_mb (ch); + } +} + +static int +poll_restart (struct pollfd *fds, nfds_t nfds, int timeout) +{ + int ret; + do + ret = poll (fds, nfds, timeout); + while (ret == -1 && errno == EINTR); + return ret; +} + +// --- SIGWINCH ---------------------------------------------------------------- + +static int g_winch_pipe[2]; /**< SIGWINCH signalling pipe. */ +static void (*g_old_winch_handler) (int); + +static void +winch_handler (int signum) +{ + /* Call the ncurses handler. */ + if (g_old_winch_handler) + g_old_winch_handler (signum); + + /* And wake up the poll() call. */ + write (g_winch_pipe[1], "x", 1); +} + +static void +install_winch_handler (void) +{ + struct sigaction act, oldact; + + act.sa_handler = winch_handler; + act.sa_flags = SA_RESTART; + sigemptyset (&act.sa_mask); + sigaction (SIGWINCH, &act, &oldact); + + /* Save the ncurses handler. */ + if (oldact.sa_handler != SIG_DFL + && oldact.sa_handler != SIG_IGN) + g_old_winch_handler = oldact.sa_handler; +} + +// --- Event handlers ---------------------------------------------------------- + +typedef struct +{ + wint_t code; + guint is_char : 1; + MEVENT mouse; +} +CursesEvent; + +static gboolean +process_curses_event (CursesEvent *event) +{ + if (!event->is_char) + { + switch (event->code) + { + case KEY_RESIZE: + display ("Screen has been resized to %u x %u\n", + COLS, LINES); + break; + case KEY_MOUSE: + display ("Mouse event at (%d, %d), state %#lx\n", + event->mouse.x, event->mouse.y, event->mouse.bstate); + break; + default: + display ("Keyboard event: non-character: %u\n", + event->code); + } + return TRUE; + } + + display ("Keyboard event: character: '%s'\n", + wchar_to_mb_escaped (event->code)); + + if (event->code == L'q' || event->code == KEY_ESCAPE) + { + display ("Quitting...\n"); + return FALSE; + } + + return TRUE; +} + +static gboolean +process_stdin_input (void) +{ + CursesEvent event; + int sta; + + while ((sta = get_wch (&event.code)) != ERR) + { + event.is_char = (sta == OK); + if (sta == KEY_CODE_YES && event.code == KEY_MOUSE + && getmouse (&event.mouse) == ERR) + abort (); + if (!process_curses_event (&event)) + return FALSE; + } + + return TRUE; +} + +static gboolean +process_winch_input (int fd) +{ + char c; + + read (fd, &c, 1); + return process_stdin_input (); +} + +// --- Main -------------------------------------------------------------------- + +int +main (int argc, char *argv[]) +{ + static GOptionEntry entries[] = + { + { NULL } + }; + + if (!setlocale (LC_ALL, "")) + abort (); + + GError *error = NULL; + GOptionContext *ctx = g_option_context_new ("- StarDict console UI"); + g_option_context_add_main_entries (ctx, entries, NULL); + if (!g_option_context_parse (ctx, &argc, &argv, &error)) + { + g_print ("option parsing failed: %s\n", error->message); + exit (EXIT_FAILURE); + } + + if (!initscr () + || cbreak () == ERR + || noecho () == ERR) + abort (); + + keypad (stdscr, TRUE); /* Enable character processing. */ + nodelay (stdscr, TRUE); /* Don't block on get_wch(). */ + + mousemask (ALL_MOUSE_EVENTS, NULL); + + display ("Press Q, Escape or ^C to quit\n"); + + if (pipe (g_winch_pipe) == -1) + abort (); + + install_winch_handler (); + +// --- Message loop ------------------------------------------------------------ + + struct pollfd pollfd[2]; + + pollfd[0].fd = fileno (stdin); + pollfd[0].events = POLLIN; + pollfd[1].fd = g_winch_pipe[0]; + pollfd[1].events = POLLIN; + + while (TRUE) + { + if (poll_restart (pollfd, 3, -1) == -1) + abort (); + + if ((pollfd[0].revents & POLLIN) + && !process_stdin_input ()) + break; + if ((pollfd[1].revents & POLLIN) + && !process_winch_input (pollfd[2].fd)) + break; + } + +// --- Cleanup ----------------------------------------------------------------- + + endwin (); + + if (close (g_winch_pipe[0]) == -1 + || close (g_winch_pipe[1]) == -1) + abort (); + + return 0; +} + diff --git a/stardict.c b/stardict.c new file mode 100644 index 0000000..a4be941 --- /dev/null +++ b/stardict.c @@ -0,0 +1,1081 @@ +/* + * stardict.c: StarDict API + * + * Copyright (c) 2013, Přemysl Janouch + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include +#include +#include +#include + +#include +#include + +#include "stardict.h" + + +/** Describes a single entry in the dictionary index. */ +typedef struct stardict_index_entry StardictIndexEntry; + +/** Describes a single entry in the synonyms index. */ +typedef struct stardict_synonym_entry StardictSynonymEntry; + +/** Helper class for reading .ifo files. */ +typedef struct ifo_reader IfoReader; + + +typedef enum stardict_version StardictVersion; +enum stardict_version { SD_VERSION_2_4_2, SD_VERSION_3_0_0 }; + +struct stardict_info +{ + gchar * path; + StardictVersion version; + + gchar * book_name; + gulong word_count; + gulong syn_word_count; + gulong idx_filesize; + gulong idx_offset_bits; + gchar * author; + gchar * email; + gchar * website; + gchar * description; + gchar * date; + gchar * same_type_sequence; +}; + +struct stardict_index_entry +{ + gchar * name; //!< The word in utf-8 + guint64 data_offset; //!< Offset of the definition + guint32 data_size; //!< Size of the definition +}; + +struct stardict_synonym_entry +{ + gchar * word; //!< A synonymous word + guint32 original_word; //!< The original word's index +}; + struct ifo_reader +{ + gchar * data; //!< File data terminated with \0 + gchar * data_end; //!< Where the final \0 char. is + + gchar * start; //!< Start of the current token + + gchar * key; //!< The key (points into @a data) + gchar * value; //!< The value (points into @a data) +}; + +// --- Utilities --------------------------------------------------------------- + +/** Read the whole stream into a byte array. */ +static gboolean +stream_read_all (GByteArray *ba, GInputStream *is, GError **error) +{ + guint8 buffer[1024 * 64]; + gsize bytes_read; + + while (g_input_stream_read_all (is, buffer, sizeof buffer, + &bytes_read, NULL, error)) + { + g_byte_array_append (ba, buffer, bytes_read); + if (bytes_read < sizeof buffer) + return TRUE; + } + return FALSE; +} + +/** Read a null-terminated string from a data input stream. */ +static gchar * +stream_read_string (GDataInputStream *dis, GError **error) +{ + gsize length; + gchar *s = g_data_input_stream_read_upto (dis, "", 1, &length, NULL, error); + if (!s) + return NULL; + + GError *err = NULL; + g_data_input_stream_read_byte (dis, NULL, &err); + if (err) + { + g_free (s); + g_propagate_error (error, err); + return NULL; + } + + return s; +} + +/** String compare function used for StarDict indexes. */ +static inline gint +stardict_strcmp (const gchar *s1, const gchar *s2) +{ + gint a; + a = g_ascii_strcasecmp (s1, s2); + return a ? a : strcmp (s1, s2); +} + +/** After this statement, the element has been found and its index is stored + * in the variable "imid". */ +#define BINARY_SEARCH_BEGIN(max, compare) \ + gint imin = 0, imax = max, imid; \ + while (imin <= imax) { \ + imid = imin + (imax - imin) / 2; \ + g_assert (imid < imax); \ + gint cmp = compare; \ + if (cmp > 0) imin = imid + 1; \ + else if (cmp < 0) imax = imid - 1; \ + else { + +/** After this statement, the binary search has failed and "imin" stores + * the position where the element can be inserted. */ +#define BINARY_SEARCH_END \ + } \ + } + +// --- Errors ------------------------------------------------------------------ + +GQuark +stardict_error_quark (void) +{ + return g_quark_from_static_string ("stardict-error-quark"); +} + +// --- IFO reader -------------------------------------------------------------- + +static gboolean +ifo_reader_init (IfoReader *ir, const gchar *path, GError **error) +{ + gsize length; + gchar *contents; + if (!g_file_get_contents (path, &contents, &length, error)) + return FALSE; + + static const char first_line[] = "StarDict's dict ifo file\n"; + if (length < sizeof first_line - 1 + || strncmp (ir->data, first_line, sizeof first_line - 1)) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: invalid header format", path); + return FALSE; + } + + ir->data = contents + sizeof first_line - 1; + ir->data_end = ir->data + length; + return TRUE; +} + +static void +ifo_reader_free (IfoReader *ir) +{ + g_free (ir->data); +} + +static gint +ifo_reader_read (IfoReader *ir) +{ + ir->key = NULL; + ir->value = NULL; + + gchar *p; + for (p = ir->start; p < ir->data_end; p++) + { + if (*p == '\n') + { + if (!ir->key) + return -1; + + *p = 0; + ir->value = ir->start; + ir->start = p + 1; + return 1; + } + + if (*p == '=') + { + if (p == ir->start) + return -1; + + *p = 0; + ir->key = ir->start; + ir->start = p + 1; + } + } + + if (!ir->key) + { + if (p != ir->start) + return -1; + return 0; + } + + ir->value = ir->start; + ir->start = p; + return 1; +} + +// --- StardictInfo ------------------------------------------------------------ + +/** Return the filesystem path for the dictionary. */ +const gchar * +stardict_info_get_path (StardictInfo *sdi) +{ + return sdi->path; +} + +/** Return the name of the dictionary. */ +const gchar * +stardict_info_get_book_name (StardictInfo *sdi) +{ + return sdi->book_name; +} + +/** Return the word count of the dictionary. Note that this information comes + * from the .ifo file, while the dictionary could successfully load with + * a different count of word entries. + */ +gsize +stardict_info_get_word_count (StardictInfo *sdi) +{ + return sdi->word_count; +} + +/** Destroy the dictionary info object. */ +void +stardict_info_free (StardictInfo *sdi) +{ + g_free (sdi->path); + g_free (sdi->book_name); + g_free (sdi->author); + g_free (sdi->email); + g_free (sdi->website); + g_free (sdi->description); + g_free (sdi->date); + g_free (sdi->same_type_sequence); + g_free (sdi); +} + +#define DEFINE_IFO_KEY(n, t, e) { (n), IFO_##t, offsetof (StardictInfo, e) } + +static gboolean +load_ifo (StardictInfo *sti, const gchar *path, GError **error) +{ + IfoReader ir; + if (!ifo_reader_init (&ir, path, error)) + return FALSE; + + gboolean ret_val = FALSE; + + if (ifo_reader_read (&ir) != 1 || strcmp (ir.key, "version")) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: version not specified", path); + goto error; + } + + if (!strcmp (ir.value, "2.4.2")) + sti->version = SD_VERSION_2_4_2; + else if (!strcmp (ir.value, "3.0.0")) + sti->version = SD_VERSION_3_0_0; + else + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: invalid version: %s", path, ir.value); + goto error; + } + + static const struct + { + const gchar *name; + enum { IFO_STRING, IFO_NUMBER } type; + size_t offset; + } + ifo_keys[] = + { + DEFINE_IFO_KEY ("bookname", STRING, book_name), + DEFINE_IFO_KEY ("wordcount", NUMBER, word_count), + DEFINE_IFO_KEY ("synwordcount", NUMBER, syn_word_count), + DEFINE_IFO_KEY ("idxfilesize", NUMBER, idx_filesize), + DEFINE_IFO_KEY ("idxoffsetbits", NUMBER, idx_offset_bits), + DEFINE_IFO_KEY ("author", STRING, author), + DEFINE_IFO_KEY ("email", STRING, email), + DEFINE_IFO_KEY ("website", STRING, website), + DEFINE_IFO_KEY ("description", STRING, description), + DEFINE_IFO_KEY ("date", STRING, date), + DEFINE_IFO_KEY ("sametypesequence", STRING, same_type_sequence) + }; + + gint ret; + while ((ret = ifo_reader_read (&ir)) == 1) + { + guint i; + for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++) + if (!strcmp (ir.key, ifo_keys[i].name)) + break; + + if (i == G_N_ELEMENTS (ifo_keys)) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: unknown key, ignoring: %s", path, ir.key); + continue; + } + + if (ifo_keys[i].type == IFO_STRING) + { + G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset) + = g_strdup (ir.value); + continue; + } + + // Otherwise it has to be IFO_NUMBER + gchar *end; + gulong wc = strtol (ir.value, &end, 10); + if (*end) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: invalid integer", path); + goto error; + } + + G_STRUCT_MEMBER (gulong, sti, ifo_keys[i].offset) = wc; + } + + if (ret == -1) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: option format error", path); + goto error; + } + + ret_val = TRUE; + + // FIXME check for zeros, don't assume that 0 means for "not set" + if (!sti->book_name || !*sti->book_name) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: no book name specified\n", path); + ret_val = FALSE; + } + if (!sti->word_count) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: word count not specified\n", path); + ret_val = FALSE; + } + if (!sti->idx_filesize) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: .idx file size not specified\n", path); + ret_val = FALSE; + } + + if (!sti->idx_offset_bits) + sti->idx_offset_bits = 32; + else if (sti->idx_offset_bits != 32 && sti->idx_offset_bits != 64) + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "%s: wrong index offset bits: %lu\n", path, sti->idx_offset_bits); + ret_val = FALSE; + } + +error: + if (!ret_val) + { + guint i; + for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++) + if (ifo_keys[i].type == IFO_STRING) + g_free (G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset)); + } + + ifo_reader_free (&ir); + return ret_val; +} + +/** List all dictionary files located in a path. + * @return GList. Deallocate the list with: + * @code + * g_list_free_full ((GDestroyNotify) stardict_info_free); + * @endcode + */ +GList * +stardict_list_dictionaries (const gchar *path) +{ + GPatternSpec *ps = g_pattern_spec_new ("*.ifo"); + GDir *dir = g_dir_open (path, 0, NULL); + g_return_val_if_fail (dir != NULL, NULL); + + GList *dicts = NULL; + const gchar *name; + while ((name = g_dir_read_name (dir))) + { + if (!g_pattern_match_string (ps, name)) + continue; + + gchar *filename = g_build_filename (path, name, NULL); + StardictInfo *ifo = g_new (StardictInfo, 1); + if (load_ifo (ifo, filename, NULL)) + dicts = g_list_append (dicts, ifo); + else + g_free (ifo); + g_free (filename); + } + g_dir_close (dir); + g_pattern_spec_free (ps); + return dicts; +} + +// --- StardictDict ------------------------------------------------------------ + +G_DEFINE_TYPE (StardictDict, stardict_dict, G_TYPE_OBJECT) + +static void +stardict_dict_finalize (GObject *self) +{ + StardictDict *sd = STARDICT_DICT (self); + + stardict_info_free (sd->info); + g_array_free (sd->index, TRUE); + g_array_free (sd->synonyms, TRUE); + g_free (sd->dict); + + G_OBJECT_CLASS (stardict_dict_parent_class)->finalize (self); +} + +static void +stardict_dict_class_init (StardictDictClass *klass) +{ + G_OBJECT_CLASS (klass)->finalize = stardict_dict_finalize; +} + +static void +stardict_dict_init (G_GNUC_UNUSED StardictDict *sd) +{ +} + +/** Load a StarDict dictionary. + * @param[in] filename Path to the .ifo file + */ +StardictDict * +stardict_dict_new (const gchar *filename, GError **error) +{ + StardictInfo *ifo = g_new (StardictInfo, 1); + if (!load_ifo (ifo, filename, error)) + { + g_free (ifo); + return NULL; + } + + StardictDict *sd = stardict_dict_new_from_info (ifo, error); + if (!sd) stardict_info_free (ifo); + return sd; +} + +/** Load a StarDict index from a GIO input stream. */ +static gboolean +load_idx_internal (StardictDict *sd, GInputStream *is, GError **error) +{ + GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (is)); + g_data_input_stream_set_byte_order (dis, + G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); + + StardictIndexEntry entry; + GError *err = NULL; + // Ignoring "wordcount", just reading as long as we can + while ((entry.name = stream_read_string (dis, &err))) + { + if (sd->info->idx_offset_bits == 32) + entry.data_offset + = g_data_input_stream_read_uint32 (dis, NULL, &err); + else + entry.data_offset + = g_data_input_stream_read_uint64 (dis, NULL, &err); + if (err) + goto error; + + entry.data_size = g_data_input_stream_read_uint32 (dis, NULL, &err); + if (err) + goto error; + + g_array_append_val (sd->index, entry); + } + + g_error_free (err); + g_object_unref (dis); + return TRUE; + +error: + g_propagate_error (error, err); + g_free (entry.name); + g_object_unref (dis); + return FALSE; +} + +/** Load a StarDict index. */ +static gboolean +load_idx (StardictDict *sd, const gchar *filename, + gboolean gzipped, GError **error) +{ + gboolean ret_val = FALSE; + GFile *file = g_file_new_for_path (filename); + GFileInputStream *fis = g_file_read (file, NULL, error); + + if (!fis) + goto cannot_open; + + if (gzipped) + { + GZlibDecompressor *zd + = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); + GInputStream *cis = g_converter_input_stream_new + (G_INPUT_STREAM (fis), G_CONVERTER (zd)); + + ret_val = load_idx_internal (sd, cis, error); + + g_object_unref (cis); + g_object_unref (zd); + } + else + ret_val = load_idx_internal (sd, G_INPUT_STREAM (fis), error); + + g_object_unref (fis); +cannot_open: + g_object_unref (file); + return ret_val; +} + +static gboolean +load_syn (StardictDict *sd, const gchar *filename, GError **error) +{ + gboolean ret_val = FALSE; + GFile *file = g_file_new_for_path (filename); + GFileInputStream *fis = g_file_read (file, NULL, error); + + if (!fis) + goto cannot_open; + + GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (fis)); + g_data_input_stream_set_byte_order (dis, + G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); + + StardictSynonymEntry entry; + GError *err = NULL; + // Ignoring "synwordcount", just reading as long as we can + while ((entry.word = stream_read_string (dis, &err))) + { + entry.original_word = g_data_input_stream_read_uint32 (dis, NULL, &err); + if (err) + break; + + g_array_append_val (sd->synonyms, entry); + } + + if (entry.word) + { + g_free (entry.word); + g_propagate_error (error, err); + } + else + { + g_error_free (err); + ret_val = TRUE; + } + + g_object_unref (dis); + g_object_unref (fis); +cannot_open: + g_object_unref (file); + return ret_val; +} + +/** Destroy an index entry. */ +static void +index_destroy_cb (gpointer sde) +{ + StardictIndexEntry *e = sde; + g_free (e->name); +} + +/** Destroy a synonym entry. */ +static void +syn_destroy_cb (gpointer sde) +{ + StardictSynonymEntry *e = sde; + g_free (e->word); +} + +/** Load StarDict dictionary data. */ +static gboolean +load_dict (StardictDict *sd, const gchar *filename, gboolean gzipped, + GError **error) +{ + gboolean ret_val = FALSE; + GFile *file = g_file_new_for_path (filename); + GFileInputStream *fis = g_file_read (file, NULL, error); + + if (!fis) + goto cannot_open; + + // Just read it all, as it is, into memory + GByteArray *ba = g_byte_array_new (); + if (gzipped) + { + GZlibDecompressor *zd + = g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); + GInputStream *cis = g_converter_input_stream_new + (G_INPUT_STREAM (fis), G_CONVERTER (zd)); + + ret_val = stream_read_all (ba, cis, error); + + g_object_unref (cis); + g_object_unref (zd); + } + else + ret_val = stream_read_all (ba, G_INPUT_STREAM (fis), error); + + if (!ret_val) + { + g_byte_array_free (ba, TRUE); + goto reading_failed; + } + + sd->dict_length = ba->len; + sd->dict = g_byte_array_free (ba, FALSE); + +reading_failed: + g_object_unref (fis); +cannot_open: + g_object_unref (file); + return ret_val; +} + +/** Load a StarDict dictionary. + * @param[in] sdi Parsed .ifo data. + */ +StardictDict * +stardict_dict_new_from_info (StardictInfo *sdi, GError **error) +{ + g_return_val_if_fail (sdi != NULL, NULL); + + StardictDict *sd = g_object_new (STARDICT_TYPE_DICT, NULL); + sd->info = sdi; + sd->index = g_array_new (FALSE, FALSE, sizeof (StardictIndexEntry)); + g_array_set_clear_func (sd->index, index_destroy_cb); + sd->synonyms = g_array_new (FALSE, FALSE, sizeof (StardictSynonymEntry)); + g_array_set_clear_func (sd->synonyms, syn_destroy_cb); + + const gchar *dot = strrchr (sdi->path, '.'); + gchar *base = dot ? g_strndup (sdi->path, dot - sdi->path) + : g_strdup (sdi->path); + + gchar *base_idx = g_strconcat (base, ".idx", NULL); + gboolean ret = FALSE; + if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_idx (sd, base_idx, FALSE, error); + else + { + gchar *base_idx_gz = g_strconcat (base_idx, ".gz", NULL); + g_free (base_idx); + base_idx = base_idx_gz; + + if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_idx (sd, base_idx, TRUE, error); + else + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND, + "%s: cannot find index file", sdi->path); + } + } + g_free (base_idx); + + if (!ret) + goto error; + + gchar *base_dict = g_strconcat (base, ".dict", NULL); + ret = FALSE; + if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_dict (sd, base_dict, FALSE, error); + else + { + gchar *base_dict_dz = g_strconcat (base_dict, ".dz", NULL); + g_free (base_dict); + base_dict = base_dict_dz; + + if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + ret = load_dict (sd, base_dict, TRUE, error); + else + { + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND, + "%s: cannot find dict file", sdi->path); + } + } + g_free (base_dict); + + if (!ret) + goto error; + + gchar *base_syn = g_strconcat (base, ".syn", NULL); + if (g_file_test (base_syn, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) + load_syn (sd, base_syn, NULL); + g_free (load_syn); + + g_free (base); + return sd; + +error: + g_array_free (sd->index, TRUE); + g_free (base); + g_object_unref (sd); + return NULL; +} + +/** Return words for which the argument is a synonym of or NULL + * if there are no such words. + */ +gchar ** +stardict_dict_get_synonyms (StardictDict *sd, const gchar *word) +{ + BINARY_SEARCH_BEGIN (sd->synonyms->len - 1, stardict_strcmp (word, + g_array_index (sd->synonyms, StardictSynonymEntry, imid).word)) + + // Back off to the first matching entry + while (imid > 0 && !stardict_strcmp (word, + g_array_index (sd->synonyms, StardictSynonymEntry, --imid).word)); + + GPtrArray *array = g_ptr_array_new (); + + // And add all matching entries from that position on to the array + do + g_ptr_array_add (array, g_strdup (g_array_index + (sd->index, StardictIndexEntry, g_array_index + (sd->synonyms, StardictSynonymEntry, ++imid).original_word).name)); + while ((guint) imid < sd->synonyms->len - 1 && !stardict_strcmp (word, + g_array_index (sd->synonyms, StardictSynonymEntry, imid + 1).word)); + + return (gchar **) g_ptr_array_free (array, FALSE); + + BINARY_SEARCH_END + + return NULL; +} + +/** Search for a word. + * @param[in] word The word in utf-8 encoding + * @param[out] success TRUE if found + * @return An iterator object pointing to the word, or where it would be + */ +StardictIterator * +stardict_dict_search (StardictDict *sd, const gchar *word, gboolean *success) +{ + BINARY_SEARCH_BEGIN (sd->index->len - 1, stardict_strcmp (word, + g_array_index (sd->index, StardictIndexEntry, imid).name)) + + if (success) *success = TRUE; + return stardict_iterator_new (sd, imid); + + BINARY_SEARCH_END + + if (success) *success = FALSE; + return stardict_iterator_new (sd, imin); +} + +static void +stardict_entry_field_free (StardictEntryField *sef) +{ + g_free (sef->data); + g_slice_free1 (sizeof *sef, sef); +} + +static GList * +read_entries (const gchar *entry, gsize entry_size, GError **error) +{ + const gchar *end = entry + entry_size; + GList *result = NULL; + + while (entry < end) + { + gchar type = *entry++; + if (g_ascii_islower (type)) + { + GString *data = g_string_new (NULL); + gchar c; + while (entry < end && (c = *entry++)) + g_string_append_c (data, c); + + if (c != '\0') + { + g_string_free (data, TRUE); + goto error; + } + + StardictEntryField *sef = g_slice_alloc (sizeof *sef); + sef->type = type; + sef->data_size = data->len + 1; + sef->data = g_string_free (data, FALSE); + result = g_list_append (result, sef); + } + else + { + if (entry + sizeof (guint32) > end) + goto error; + + gsize length = GUINT32_FROM_BE (*(guint32 *) entry); + entry += sizeof (guint32); + + if (entry + length > end) + goto error; + + gpointer data = g_malloc (length); + memcpy (data, entry, length); + + StardictEntryField *sef = g_slice_alloc (sizeof *sef); + sef->type = type; + sef->data_size = length; + sef->data = data; + result = g_list_append (result, sef); + } + } + + return result; + +error: + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "invalid data entry"); + g_list_free_full (result, + (GDestroyNotify) stardict_entry_field_free); + return NULL; +} + +static GList * +read_entries_sts (const gchar *entry, gsize entry_size, + const gchar *sts, GError **error) +{ + const gchar *end = entry + entry_size; + GList *result = NULL; + + while (*sts) + { + gchar type = *sts++; + gboolean is_final = !*sts; + if (g_ascii_islower (type)) + { + GString *data = g_string_new (NULL); + + if (is_final) + g_string_append_len (data, entry, end - entry); + else + { + gchar c; + while (entry < end && (c = *entry++)) + g_string_append_c (data, (c = *entry++)); + + if (c != '\0') + { + g_string_free (data, TRUE); + goto error; + } + } + + + StardictEntryField *sef = g_slice_alloc (sizeof *sef); + sef->type = type; + sef->data_size = data->len + 1; + sef->data = g_string_free (data, FALSE); + result = g_list_append (result, sef); + } + else + { + gsize length; + if (is_final) + length = end - entry; + else + { + if (entry + sizeof (guint32) > end) + goto error; + + length = GUINT32_FROM_BE (*(guint32 *) entry); + entry += sizeof (guint32); + + if (entry + length > end) + goto error; + + } + + StardictEntryField *sef = g_slice_alloc (sizeof *sef); + sef->type = type; + sef->data_size = length; + sef->data = memcpy (g_malloc (length), entry, length); + result = g_list_append (result, sef); + } + } + + return result; + +error: + g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, + "invalid data entry"); + g_list_free_full (result, + (GDestroyNotify) stardict_entry_field_free); + return NULL; +} + +/** Return the data for the specified offset in the index. Unsafe. */ +static StardictEntry * +stardict_dict_get_entry (StardictDict *sd, guint32 offset) +{ + // TODO cache the entries + StardictIndexEntry *sie = &g_array_index (sd->index, + StardictIndexEntry, offset); + + g_return_val_if_fail (sie->data_offset + sie->data_size + <= sd->dict_length, NULL); + + GList *entries; + if (sd->info->same_type_sequence) + entries = read_entries (sd->dict + sie->data_offset, + sie->data_size, NULL); + else + entries = read_entries_sts (sd->dict + sie->data_offset, + sie->data_size, sd->info->same_type_sequence, NULL); + + if (!entries) + return NULL; + + StardictEntry *se = g_object_new (STARDICT_TYPE_ENTRY, NULL); + se->fields = entries; + return se; +} + +// --- StardictEntry ----------------------------------------------------------- + +G_DEFINE_TYPE (StardictEntry, stardict_entry, G_TYPE_OBJECT) + +static void +stardict_entry_finalize (GObject *self) +{ + StardictEntry *sde = STARDICT_ENTRY (self); + + g_list_free_full (sde->fields, (GDestroyNotify) stardict_entry_field_free); + + G_OBJECT_CLASS (stardict_entry_parent_class)->finalize (self); +} + +static void +stardict_entry_class_init (StardictEntryClass *klass) +{ + G_OBJECT_CLASS (klass)->finalize = stardict_entry_finalize; +} + +static void +stardict_entry_init (G_GNUC_UNUSED StardictEntry *sde) +{ +} + +/** Return the entries present within the entry. + * @return GList + */ +const GList * +stardict_entry_get_fields (StardictEntry *sde) +{ + g_return_val_if_fail (STARDICT_IS_ENTRY (sde), NULL); + return sde->fields; +} + +// --- StardictIterator--------------------------------------------------------- + +G_DEFINE_TYPE (StardictIterator, stardict_iterator, G_TYPE_OBJECT) + +static void +stardict_iterator_finalize (GObject *self) +{ + StardictIterator *si = STARDICT_ITERATOR (self); + + g_object_unref (si->owner); + + G_OBJECT_CLASS (stardict_iterator_parent_class)->finalize (self); +} + +static void +stardict_iterator_class_init (StardictIteratorClass *klass) +{ + G_OBJECT_CLASS (klass)->finalize = stardict_iterator_finalize; +} + +static void +stardict_iterator_init (G_GNUC_UNUSED StardictIterator *sd) +{ +} + +/** Create a new iterator for the dictionary with offset @a offset. */ +StardictIterator * +stardict_iterator_new (StardictDict *sd, guint32 offset) +{ + g_return_val_if_fail (STARDICT_IS_DICT (sd), NULL); + + StardictIterator *si = g_object_new (STARDICT_TYPE_ITERATOR, NULL); + si->owner = g_object_ref (sd); + si->offset = offset; + return si; +} + +/** Return the word in the index that the iterator points at, or NULL. */ +const gchar * +stardict_iterator_get_word (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); + if (!stardict_iterator_is_valid (sdi)) + return NULL; + return g_array_index (sdi->owner->index, + StardictIndexEntry, sdi->offset).name; +} + +/** Return the dictionary entry that the iterator points at, or NULL. */ +StardictEntry * +stardict_iterator_get_entry (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); + if (!stardict_iterator_is_valid (sdi)) + return FALSE; + return stardict_dict_get_entry (sdi->owner, sdi->offset); +} + +/** Return whether the iterator points to a valid index entry. */ +gboolean +stardict_iterator_is_valid (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), FALSE); + return sdi->offset >= 0 && sdi->offset < sdi->owner->index->len; +} + +/** Return the offset of the iterator within the dictionary index. */ +gint64 +stardict_iterator_get_offset (StardictIterator *sdi) +{ + g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), -1); + return sdi->offset; +} + +/** Set the offset of the iterator. */ +void +stardict_iterator_set_offset + (StardictIterator *sdi, gint64 offset, gboolean relative) +{ + g_return_if_fail (STARDICT_IS_ITERATOR (sdi)); + sdi->offset = relative ? sdi->offset + offset : offset; +} diff --git a/stardict.h b/stardict.h new file mode 100644 index 0000000..1086ac6 --- /dev/null +++ b/stardict.h @@ -0,0 +1,214 @@ +/* + * stardict.h: StarDict API + * + * This module doesn't cover all the functionality available to StarDict + * dictionaries, it should however be good enough for most of them that are + * freely available on the Internet. + * + * Copyright (c) 2013, Přemysl Janouch + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef STARDICT_H +#define STARDICT_H + +/** An object intended for interacting with a dictionary. */ +typedef struct stardict_dict StardictDict; +typedef struct stardict_dict_class StardictDictClass; + +/** Overall information about a particular dictionary. */ +typedef struct stardict_info StardictInfo; + +/** Handles the task of moving around the dictionary. */ +typedef struct stardict_iterator StardictIterator; +typedef struct stardict_iterator_class StardictIteratorClass; + +/** Contains the decoded data for a single word definition. */ +typedef struct stardict_entry StardictEntry; +typedef struct stardict_entry_class StardictEntryClass; + +/** A single field of a word definition. */ +typedef struct stardict_entry_field StardictEntryField; + +/* GObject boilerplate. */ +#define STARDICT_TYPE_DICT (stardict_dict_get_type ()) +#define STARDICT_DICT(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ + STARDICT_TYPE_DICT, StardictDict)) +#define STARDICT_IS_DICT(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ + STARDICT_TYPE_DICT)) +#define STARDICT_DICT_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST ((klass), \ + STARDICT_TYPE_DICT, StardictDictClass)) +#define STARDICT_IS_DICT_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE ((klass), \ + STARDICT_TYPE_DICT)) +#define STARDICT_DICT_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS ((obj), \ + STARDICT_TYPE_DICT, StardictDictClass)) + +#define STARDICT_TYPE_ITERATOR (stardict_iterator_get_type ()) +#define STARDICT_ITERATOR(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ + STARDICT_TYPE_ITERATOR, StardictIterator)) +#define STARDICT_IS_ITERATOR(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ + STARDICT_TYPE_ITERATOR)) +#define STARDICT_ITERATOR_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST ((klass), \ + STARDICT_TYPE_ITERATOR, StardictIteratorClass)) +#define STARDICT_IS_ITERATOR_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE ((klass), \ + STARDICT_TYPE_ITERATOR)) +#define STARDICT_ITERATOR_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS ((obj), \ + STARDICT_TYPE_ITERATOR, StardictIteratorClass)) + +#define STARDICT_TYPE_ENTRY (stardict_entry_get_type ()) +#define STARDICT_ENTRY(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST ((obj), \ + STARDICT_TYPE_ENTRY, StardictEntry)) +#define STARDICT_IS_ENTRY(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE ((obj), \ + STARDICT_TYPE_ENTRY)) +#define STARDICT_ENTRY_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST ((klass), \ + STARDICT_TYPE_ENTRY, StardictEntryClass)) +#define STARDICT_IS_ENTRY_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE ((klass), \ + STARDICT_TYPE_ENTRY)) +#define STARDICT_ENTRY_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS ((obj), \ + STARDICT_TYPE_ENTRY, StardictEntryClass)) + +// --- Errors ------------------------------------------------------------------ + +/** General error type. */ +typedef enum { + STARDICT_ERROR_FILE_NOT_FOUND, //!< Some file was not found + STARDICT_ERROR_INVALID_DATA //!< Dictionary contains invalid data +} StardictError; + +#define STARDICT_ERROR (stardict_error_quark ()) + +GQuark stardict_error_quark (void); + +// --- Dictionary information -------------------------------------------------- + +const gchar *stardict_info_get_path (StardictInfo *sdi) G_GNUC_PURE; +const gchar *stardict_info_get_book_name (StardictInfo *sdi) G_GNUC_PURE; +gsize stardict_info_get_word_count (StardictInfo *sd) G_GNUC_PURE; +void stardict_info_free (StardictInfo *sdi); + +GList *stardict_list_dictionaries (const gchar *path); + +// --- Dictionaries ------------------------------------------------------------ + +struct stardict_dict +{ + GObject parent_instance; + StardictInfo * info; //!< General information about the dict + GArray * index; //!< Word index + GArray * synonyms; //!< Synonyms + gpointer dict; //!< Dictionary data + gsize dict_length; //!< Length of the dict data in bytes +}; + +struct stardict_dict_class +{ + GObjectClass parent_class; +}; + +GType stardict_dict_get_type (void); +StardictDict *stardict_dict_new (const gchar *filename, GError **error); +StardictDict *stardict_dict_new_from_info (StardictInfo *sdi, GError **error); +StardictInfo *stardict_dict_get_info (StardictDict *sd); +gchar **stardict_dict_get_synonyms (StardictDict *sd, const gchar *word); +StardictIterator *stardict_dict_search + (StardictDict *sd, const gchar *word, gboolean *success); + +// --- Dictionary iterators ---------------------------------------------------- + +struct stardict_iterator +{ + GObject parent_instance; + StardictDict * owner; //!< The related dictionary + gint64 offset; //!< Index within the dictionary +}; + +struct stardict_iterator_class +{ + GObjectClass parent_class; +}; + +GType stardict_iterator_get_type (void); +StardictIterator *stardict_iterator_new (StardictDict *sd, guint32 index); +const gchar *stardict_iterator_get_word (StardictIterator *sdi) G_GNUC_PURE; +StardictEntry *stardict_iterator_get_entry (StardictIterator *sdi); +gboolean stardict_iterator_is_valid (StardictIterator *sdi) G_GNUC_PURE; +gint64 stardict_iterator_get_offset (StardictIterator *sdi) G_GNUC_PURE; +void stardict_iterator_set_offset + (StardictIterator *sdi, gint64 offset, gboolean relative); + +/** Go to the next entry. */ +#define stardict_iterator_next(sdi) \ + (stardict_iterator_set_offset (sdi, 1, TRUE)) + +/** Go to the previous entry. */ +#define stardict_iterator_prev(sdi) \ + (stardict_iterator_set_offset (sdi, -1, TRUE)) + +// --- Dictionary entries ------------------------------------------------------ + +typedef enum { + STARDICT_FIELD_MEANING = 'm', //!< Word's purely textual meaning + STARDICT_FIELD_LOCALE = 'l', //!< Locale-dependent meaning + STARDICT_FIELD_PANGO = 'g', //!< Pango text markup language + STARDICT_FIELD_PHONETIC = 't', //!< English phonetic string + STARDICT_FIELD_XDXF = 'x', //!< xdxf language + STARDICT_FIELD_YB_KANA = 'y', //!< Chinese YinBiao or Japanese KANA + STARDICT_FIELD_POWERWORD = 'k', //!< KingSoft PowerWord's data + STARDICT_FIELD_MEDIAWIKI = 'w', //!< MediaWiki markup language + STARDICT_FIELD_HTML = 'h', //!< HTML codes + STARDICT_FIELD_RESOURCE = 'r', //!< Resource file list + STARDICT_FIELD_WAV = 'W', //!< WAV file + STARDICT_FIELD_PICTURE = 'P', //!< Picture file + STARDICT_FIELD_X = 'X' //!< Reserved, experimental extensions +} StardictEntryFieldType; + +struct stardict_entry_field +{ + gchar type; //!< Type of entry (EntryFieldType) + gpointer data; //!< Raw data or null-terminated string + gsize data_size; //!< Size of data, includding any \0 +}; + +struct stardict_entry +{ + GObject parent_instance; + GList * fields; //!< List of StardictEntryField's +}; + +struct stardict_entry_class +{ + GObjectClass parent_class; +}; + +GType stardict_entry_get_type (void); +const GList *stardict_entry_get_fields (StardictEntry *sde) G_GNUC_PURE; + + #endif /* ! STARDICT_H */ -- cgit v1.2.3-70-g09d2