From 4f4a86529adc896ed4d3bc922ef88aec89e3fd53 Mon Sep 17 00:00:00 2001
From: Přemysl Janouch <p.janouch@gmail.com>
Date: Sat, 4 May 2013 16:14:25 +0200
Subject: Initial commit

---
 stardict.c | 1081 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1081 insertions(+)
 create mode 100644 stardict.c

(limited to 'stardict.c')

diff --git a/stardict.c b/stardict.c
new file mode 100644
index 0000000..a4be941
--- /dev/null
+++ b/stardict.c
@@ -0,0 +1,1081 @@
+/*
+ * stardict.c: StarDict API
+ *
+ * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+
+#include "stardict.h"
+
+
+/** Describes a single entry in the dictionary index. */
+typedef struct stardict_index_entry    StardictIndexEntry;
+
+/** Describes a single entry in the synonyms index. */
+typedef struct stardict_synonym_entry  StardictSynonymEntry;
+
+/** Helper class for reading .ifo files. */
+typedef struct ifo_reader              IfoReader;
+
+
+typedef enum stardict_version StardictVersion;
+enum stardict_version { SD_VERSION_2_4_2, SD_VERSION_3_0_0 };
+
+struct stardict_info
+{
+	gchar           * path;
+	StardictVersion   version;
+
+	gchar           * book_name;
+	gulong            word_count;
+	gulong            syn_word_count;
+	gulong            idx_filesize;
+	gulong            idx_offset_bits;
+	gchar           * author;
+	gchar           * email;
+	gchar           * website;
+	gchar           * description;
+	gchar           * date;
+	gchar           * same_type_sequence;
+};
+
+struct stardict_index_entry
+{
+	gchar           * name;             //!< The word in utf-8
+	guint64           data_offset;      //!< Offset of the definition
+	guint32           data_size;        //!< Size of the definition
+};
+
+struct stardict_synonym_entry
+{
+	gchar           * word;             //!< A synonymous word
+	guint32           original_word;    //!< The original word's index
+};
+ struct ifo_reader
+{
+	gchar           * data;             //!< File data terminated with \0
+	gchar           * data_end;         //!< Where the final \0 char. is
+
+	gchar           * start;            //!< Start of the current token
+
+	gchar           * key;              //!< The key (points into @a data)
+	gchar           * value;            //!< The value (points into @a data)
+};
+
+// --- Utilities ---------------------------------------------------------------
+
+/** Read the whole stream into a byte array. */
+static gboolean
+stream_read_all (GByteArray *ba, GInputStream *is, GError **error)
+{
+	guint8 buffer[1024 * 64];
+	gsize bytes_read;
+
+	while (g_input_stream_read_all (is, buffer, sizeof buffer,
+		&bytes_read, NULL, error))
+	{
+		g_byte_array_append (ba, buffer, bytes_read);
+		if (bytes_read < sizeof buffer)
+			return TRUE;
+	}
+	return FALSE;
+}
+
+/** Read a null-terminated string from a data input stream. */
+static gchar *
+stream_read_string (GDataInputStream *dis, GError **error)
+{
+	gsize length;
+	gchar *s = g_data_input_stream_read_upto (dis, "", 1, &length, NULL, error);
+	if (!s)
+		return NULL;
+
+	GError *err = NULL;
+	g_data_input_stream_read_byte (dis, NULL, &err);
+	if (err)
+	{
+		g_free (s);
+		g_propagate_error (error, err);
+		return NULL;
+	}
+
+	return s;
+}
+
+/** String compare function used for StarDict indexes. */
+static inline gint
+stardict_strcmp (const gchar *s1, const gchar *s2)
+{
+	gint a;
+	a = g_ascii_strcasecmp (s1, s2);
+	return a ? a : strcmp (s1, s2);
+}
+
+/** After this statement, the element has been found and its index is stored
+ *  in the variable "imid". */
+#define BINARY_SEARCH_BEGIN(max, compare)                                     \
+	gint imin = 0, imax = max, imid;                                          \
+	while (imin <= imax) {                                                    \
+		imid = imin + (imax - imin) / 2;                                      \
+		g_assert (imid < imax);                                               \
+		gint cmp = compare;                                                   \
+		if      (cmp > 0) imin = imid + 1;                                    \
+		else if (cmp < 0) imax = imid - 1;                                    \
+		else {
+
+/** After this statement, the binary search has failed and "imin" stores
+ *  the position where the element can be inserted. */
+#define BINARY_SEARCH_END                                                     \
+		}                                                                     \
+	}
+
+// --- Errors ------------------------------------------------------------------
+
+GQuark
+stardict_error_quark (void)
+{
+	return g_quark_from_static_string ("stardict-error-quark");
+}
+
+// --- IFO reader --------------------------------------------------------------
+
+static gboolean
+ifo_reader_init (IfoReader *ir, const gchar *path, GError **error)
+{
+	gsize length;
+	gchar *contents;
+	if (!g_file_get_contents (path, &contents, &length, error))
+		return FALSE;
+
+	static const char first_line[] = "StarDict's dict ifo file\n";
+	if (length < sizeof first_line - 1
+	 || strncmp (ir->data, first_line, sizeof first_line - 1))
+	{
+		g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+			"%s: invalid header format", path);
+		return FALSE;
+	}
+
+	ir->data = contents + sizeof first_line - 1;
+	ir->data_end = ir->data + length;
+	return TRUE;
+}
+
+static void
+ifo_reader_free (IfoReader *ir)
+{
+	g_free (ir->data);
+}
+
+static gint
+ifo_reader_read (IfoReader *ir)
+{
+	ir->key = NULL;
+	ir->value = NULL;
+
+	gchar *p;
+	for (p = ir->start; p < ir->data_end; p++)
+	{
+		if (*p == '\n')
+		{
+			if (!ir->key)
+				return -1;
+
+			*p = 0;
+			ir->value = ir->start;
+			ir->start = p + 1;
+			return 1;
+		}
+
+		if (*p == '=')
+		{
+			if (p == ir->start)
+				return -1;
+
+			*p = 0;
+			ir->key = ir->start;
+			ir->start = p + 1;
+		}
+	}
+
+	if (!ir->key)
+	{
+		if (p != ir->start)
+			return -1;
+		return 0;
+	}
+
+	ir->value = ir->start;
+	ir->start = p;
+	return 1;
+}
+
+// --- StardictInfo ------------------------------------------------------------
+
+/** Return the filesystem path for the dictionary. */
+const gchar *
+stardict_info_get_path (StardictInfo *sdi)
+{
+	return sdi->path;
+}
+
+/** Return the name of the dictionary. */
+const gchar *
+stardict_info_get_book_name (StardictInfo *sdi)
+{
+	return sdi->book_name;
+}
+
+/** Return the word count of the dictionary.  Note that this information comes
+ *  from the .ifo file, while the dictionary could successfully load with
+ *  a different count of word entries.
+ */
+gsize
+stardict_info_get_word_count (StardictInfo *sdi)
+{
+	return sdi->word_count;
+}
+
+/** Destroy the dictionary info object. */
+void
+stardict_info_free (StardictInfo *sdi)
+{
+	g_free (sdi->path);
+	g_free (sdi->book_name);
+	g_free (sdi->author);
+	g_free (sdi->email);
+	g_free (sdi->website);
+	g_free (sdi->description);
+	g_free (sdi->date);
+	g_free (sdi->same_type_sequence);
+	g_free (sdi);
+}
+
+#define DEFINE_IFO_KEY(n, t, e) { (n), IFO_##t, offsetof (StardictInfo, e) }
+
+static gboolean
+load_ifo (StardictInfo *sti, const gchar *path, GError **error)
+{
+	IfoReader ir;
+	if (!ifo_reader_init (&ir, path, error))
+		return FALSE;
+
+	gboolean ret_val = FALSE;
+
+	if (ifo_reader_read (&ir) != 1 || strcmp (ir.key, "version"))
+	{
+		g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+			"%s: version not specified", path);
+		goto error;
+	}
+
+	if (!strcmp (ir.value, "2.4.2"))
+		sti->version = SD_VERSION_2_4_2;
+	else if (!strcmp (ir.value, "3.0.0"))
+		sti->version = SD_VERSION_3_0_0;
+	else
+	{
+		g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+			"%s: invalid version: %s", path, ir.value);
+		goto error;
+	}
+
+	static const struct
+	{
+		const gchar *name;
+		enum { IFO_STRING, IFO_NUMBER } type;
+		size_t offset;
+	}
+	ifo_keys[] =
+	{
+		DEFINE_IFO_KEY ("bookname",         STRING, book_name),
+		DEFINE_IFO_KEY ("wordcount",        NUMBER, word_count),
+		DEFINE_IFO_KEY ("synwordcount",     NUMBER, syn_word_count),
+		DEFINE_IFO_KEY ("idxfilesize",      NUMBER, idx_filesize),
+		DEFINE_IFO_KEY ("idxoffsetbits",    NUMBER, idx_offset_bits),
+		DEFINE_IFO_KEY ("author",           STRING, author),
+		DEFINE_IFO_KEY ("email",            STRING, email),
+		DEFINE_IFO_KEY ("website",          STRING, website),
+		DEFINE_IFO_KEY ("description",      STRING, description),
+		DEFINE_IFO_KEY ("date",             STRING, date),
+		DEFINE_IFO_KEY ("sametypesequence", STRING, same_type_sequence)
+	};
+
+	gint ret;
+	while ((ret = ifo_reader_read (&ir)) == 1)
+	{
+		guint i;
+		for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++)
+			if (!strcmp (ir.key, ifo_keys[i].name))
+				break;
+
+		if (i == G_N_ELEMENTS (ifo_keys))
+		{
+			g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+				"%s: unknown key, ignoring: %s", path, ir.key);
+			continue;
+		}
+
+		if (ifo_keys[i].type == IFO_STRING)
+		{
+			G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset)
+				= g_strdup (ir.value);
+			continue;
+		}
+
+		// Otherwise it has to be IFO_NUMBER
+		gchar *end;
+		gulong wc = strtol (ir.value, &end, 10);
+		if (*end)
+		{
+			g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+				"%s: invalid integer", path);
+			goto error;
+		}
+
+		G_STRUCT_MEMBER (gulong, sti, ifo_keys[i].offset) = wc;
+	}
+
+	if (ret == -1)
+	{
+		g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+			"%s: option format error", path);
+		goto error;
+	}
+
+	ret_val = TRUE;
+
+	// FIXME check for zeros, don't assume that 0 means for "not set"
+	if (!sti->book_name || !*sti->book_name)
+	{
+		g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+			"%s: no book name specified\n", path);
+		ret_val = FALSE;
+	}
+	if (!sti->word_count)
+	{
+		g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+			"%s: word count not specified\n", path);
+		ret_val = FALSE;
+	}
+	if (!sti->idx_filesize)
+	{
+		g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+			"%s: .idx file size not specified\n", path);
+		ret_val = FALSE;
+	}
+
+	if (!sti->idx_offset_bits)
+		sti->idx_offset_bits = 32;
+	else if (sti->idx_offset_bits != 32 && sti->idx_offset_bits != 64)
+	{
+		g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+			"%s: wrong index offset bits: %lu\n", path, sti->idx_offset_bits);
+		ret_val = FALSE;
+	}
+
+error:
+	if (!ret_val)
+	{
+		guint i;
+		for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++)
+			if (ifo_keys[i].type == IFO_STRING)
+				g_free (G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset));
+	}
+
+	ifo_reader_free (&ir);
+	return ret_val;
+}
+
+/** List all dictionary files located in a path.
+ *  @return GList<StardictInfo *>. Deallocate the list with:
+ *  @code
+ *    g_list_free_full ((GDestroyNotify) stardict_info_free);
+ *  @endcode
+ */
+GList *
+stardict_list_dictionaries (const gchar *path)
+{
+	GPatternSpec *ps = g_pattern_spec_new ("*.ifo");
+	GDir *dir = g_dir_open (path, 0, NULL);
+	g_return_val_if_fail (dir != NULL, NULL);
+
+	GList *dicts = NULL;
+	const gchar *name;
+	while ((name = g_dir_read_name (dir)))
+	{
+		if (!g_pattern_match_string (ps, name))
+			continue;
+
+		gchar *filename = g_build_filename (path, name, NULL);
+		StardictInfo *ifo = g_new (StardictInfo, 1);
+		if (load_ifo (ifo, filename, NULL))
+			dicts = g_list_append (dicts, ifo);
+		else
+			g_free (ifo);
+		g_free (filename);
+	}
+	g_dir_close (dir);
+	g_pattern_spec_free (ps);
+	return dicts;
+}
+
+// --- StardictDict ------------------------------------------------------------
+
+G_DEFINE_TYPE (StardictDict, stardict_dict, G_TYPE_OBJECT)
+
+static void
+stardict_dict_finalize (GObject *self)
+{
+	StardictDict *sd = STARDICT_DICT (self);
+
+	stardict_info_free (sd->info);
+	g_array_free (sd->index, TRUE);
+	g_array_free (sd->synonyms, TRUE);
+	g_free (sd->dict);
+
+	G_OBJECT_CLASS (stardict_dict_parent_class)->finalize (self);
+}
+
+static void
+stardict_dict_class_init (StardictDictClass *klass)
+{
+	G_OBJECT_CLASS (klass)->finalize = stardict_dict_finalize;
+}
+
+static void
+stardict_dict_init (G_GNUC_UNUSED StardictDict *sd)
+{
+}
+
+/** Load a StarDict dictionary.
+ *  @param[in] filename  Path to the .ifo file
+ */
+StardictDict *
+stardict_dict_new (const gchar *filename, GError **error)
+{
+	StardictInfo *ifo = g_new (StardictInfo, 1);
+	if (!load_ifo (ifo, filename, error))
+	{
+		g_free (ifo);
+		return NULL;
+	}
+
+	StardictDict *sd = stardict_dict_new_from_info (ifo, error);
+	if (!sd)  stardict_info_free (ifo);
+	return sd;
+}
+
+/** Load a StarDict index from a GIO input stream. */
+static gboolean
+load_idx_internal (StardictDict *sd, GInputStream *is, GError **error)
+{
+	GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (is));
+	g_data_input_stream_set_byte_order (dis,
+		G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN);
+
+	StardictIndexEntry entry;
+	GError *err = NULL;
+	// Ignoring "wordcount", just reading as long as we can
+	while ((entry.name = stream_read_string (dis, &err)))
+	{
+		if (sd->info->idx_offset_bits == 32)
+			entry.data_offset
+				= g_data_input_stream_read_uint32 (dis, NULL, &err);
+		else
+			entry.data_offset
+				= g_data_input_stream_read_uint64 (dis, NULL, &err);
+		if (err)
+			goto error;
+
+		entry.data_size = g_data_input_stream_read_uint32 (dis, NULL, &err);
+		if (err)
+			goto error;
+
+		g_array_append_val (sd->index, entry);
+	}
+
+	g_error_free (err);
+	g_object_unref (dis);
+	return TRUE;
+
+error:
+	g_propagate_error (error, err);
+	g_free (entry.name);
+	g_object_unref (dis);
+	return FALSE;
+}
+
+/** Load a StarDict index. */
+static gboolean
+load_idx (StardictDict *sd, const gchar *filename,
+	gboolean gzipped, GError **error)
+{
+	gboolean ret_val = FALSE;
+	GFile *file = g_file_new_for_path (filename);
+	GFileInputStream *fis = g_file_read (file, NULL, error);
+
+	if (!fis)
+		goto cannot_open;
+
+	if (gzipped)
+	{
+		GZlibDecompressor *zd
+			= g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP);
+		GInputStream *cis = g_converter_input_stream_new
+			(G_INPUT_STREAM (fis), G_CONVERTER (zd));
+
+		ret_val = load_idx_internal (sd, cis, error);
+
+		g_object_unref (cis);
+		g_object_unref (zd);
+	}
+	else
+		ret_val = load_idx_internal (sd, G_INPUT_STREAM (fis), error);
+
+	g_object_unref (fis);
+cannot_open:
+	g_object_unref (file);
+	return ret_val;
+}
+
+static gboolean
+load_syn (StardictDict *sd, const gchar *filename, GError **error)
+{
+	gboolean ret_val = FALSE;
+	GFile *file = g_file_new_for_path (filename);
+	GFileInputStream *fis = g_file_read (file, NULL, error);
+
+	if (!fis)
+		goto cannot_open;
+
+	GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (fis));
+	g_data_input_stream_set_byte_order (dis,
+		G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN);
+
+	StardictSynonymEntry entry;
+	GError *err = NULL;
+	// Ignoring "synwordcount", just reading as long as we can
+	while ((entry.word = stream_read_string (dis, &err)))
+	{
+		entry.original_word = g_data_input_stream_read_uint32 (dis, NULL, &err);
+		if (err)
+			break;
+
+		g_array_append_val (sd->synonyms, entry);
+	}
+
+	if (entry.word)
+	{
+		g_free (entry.word);
+		g_propagate_error (error, err);
+	}
+	else
+	{
+		g_error_free (err);
+		ret_val = TRUE;
+	}
+
+	g_object_unref (dis);
+	g_object_unref (fis);
+cannot_open:
+	g_object_unref (file);
+	return ret_val;
+}
+
+/** Destroy an index entry. */
+static void
+index_destroy_cb (gpointer sde)
+{
+	StardictIndexEntry *e = sde;
+	g_free (e->name);
+}
+
+/** Destroy a synonym entry. */
+static void
+syn_destroy_cb (gpointer sde)
+{
+	StardictSynonymEntry *e = sde;
+	g_free (e->word);
+}
+
+/** Load StarDict dictionary data. */
+static gboolean
+load_dict (StardictDict *sd, const gchar *filename, gboolean gzipped,
+	GError **error)
+{
+	gboolean ret_val = FALSE;
+	GFile *file = g_file_new_for_path (filename);
+	GFileInputStream *fis = g_file_read (file, NULL, error);
+
+	if (!fis)
+		goto cannot_open;
+
+	// Just read it all, as it is, into memory
+	GByteArray *ba = g_byte_array_new ();
+	if (gzipped)
+	{
+		GZlibDecompressor *zd
+			= g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP);
+		GInputStream *cis = g_converter_input_stream_new
+			(G_INPUT_STREAM (fis), G_CONVERTER (zd));
+
+		ret_val = stream_read_all (ba, cis, error);
+
+		g_object_unref (cis);
+		g_object_unref (zd);
+	}
+	else
+		ret_val = stream_read_all (ba, G_INPUT_STREAM (fis), error);
+
+	if (!ret_val)
+	{
+		g_byte_array_free (ba, TRUE);
+		goto reading_failed;
+	}
+
+	sd->dict_length = ba->len;
+	sd->dict = g_byte_array_free (ba, FALSE);
+
+reading_failed:
+	g_object_unref (fis);
+cannot_open:
+	g_object_unref (file);
+	return ret_val;
+}
+
+/** Load a StarDict dictionary.
+ *  @param[in] sdi  Parsed .ifo data.
+ */
+StardictDict *
+stardict_dict_new_from_info (StardictInfo *sdi, GError **error)
+{
+	g_return_val_if_fail (sdi != NULL, NULL);
+
+	StardictDict *sd = g_object_new (STARDICT_TYPE_DICT, NULL);
+	sd->info = sdi;
+	sd->index = g_array_new (FALSE, FALSE, sizeof (StardictIndexEntry));
+	g_array_set_clear_func (sd->index, index_destroy_cb);
+	sd->synonyms = g_array_new (FALSE, FALSE, sizeof (StardictSynonymEntry));
+	g_array_set_clear_func (sd->synonyms, syn_destroy_cb);
+
+	const gchar *dot = strrchr (sdi->path, '.');
+	gchar *base = dot ? g_strndup (sdi->path, dot - sdi->path)
+		: g_strdup (sdi->path);
+
+	gchar *base_idx = g_strconcat (base, ".idx", NULL);
+	gboolean ret = FALSE;
+	if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR))
+		ret = load_idx (sd, base_idx, FALSE, error);
+	else
+	{
+		gchar *base_idx_gz = g_strconcat (base_idx, ".gz", NULL);
+		g_free (base_idx);
+		base_idx = base_idx_gz;
+
+		if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR))
+			ret = load_idx (sd, base_idx, TRUE, error);
+		else
+		{
+			g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND,
+				"%s: cannot find index file", sdi->path);
+		}
+	}
+	g_free (base_idx);
+
+	if (!ret)
+		goto error;
+
+	gchar *base_dict = g_strconcat (base, ".dict", NULL);
+	ret = FALSE;
+	if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR))
+		ret = load_dict (sd, base_dict, FALSE, error);
+	else
+	{
+		gchar *base_dict_dz = g_strconcat (base_dict, ".dz", NULL);
+		g_free (base_dict);
+		base_dict = base_dict_dz;
+
+		if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR))
+			ret = load_dict (sd, base_dict, TRUE, error);
+		else
+		{
+			g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND,
+				"%s: cannot find dict file", sdi->path);
+		}
+	}
+	g_free (base_dict);
+
+	if (!ret)
+		goto error;
+
+	gchar *base_syn = g_strconcat (base, ".syn", NULL);
+	if (g_file_test (base_syn, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR))
+		load_syn (sd, base_syn, NULL);
+	g_free (load_syn);
+
+	g_free (base);
+	return sd;
+
+error:
+	g_array_free (sd->index, TRUE);
+	g_free (base);
+	g_object_unref (sd);
+	return NULL;
+}
+
+/** Return words for which the argument is a synonym of or NULL
+ *  if there are no such words.
+ */
+gchar **
+stardict_dict_get_synonyms (StardictDict *sd, const gchar *word)
+{
+	BINARY_SEARCH_BEGIN (sd->synonyms->len - 1, stardict_strcmp (word,
+			g_array_index (sd->synonyms, StardictSynonymEntry, imid).word))
+
+	// Back off to the first matching entry
+	while (imid > 0 && !stardict_strcmp (word,
+		g_array_index (sd->synonyms, StardictSynonymEntry, --imid).word));
+
+	GPtrArray *array = g_ptr_array_new ();
+
+	// And add all matching entries from that position on to the array
+	do
+		g_ptr_array_add (array, g_strdup (g_array_index
+			(sd->index, StardictIndexEntry, g_array_index
+			(sd->synonyms, StardictSynonymEntry, ++imid).original_word).name));
+	while ((guint) imid < sd->synonyms->len - 1 && !stardict_strcmp (word,
+		g_array_index (sd->synonyms, StardictSynonymEntry, imid + 1).word));
+
+	return (gchar **) g_ptr_array_free (array, FALSE);
+
+	BINARY_SEARCH_END
+
+	return NULL;
+}
+
+/** Search for a word.
+ *  @param[in] word  The word in utf-8 encoding
+ *  @param[out] success  TRUE if found
+ *  @return An iterator object pointing to the word, or where it would be
+ */
+StardictIterator *
+stardict_dict_search (StardictDict *sd, const gchar *word, gboolean *success)
+{
+	BINARY_SEARCH_BEGIN (sd->index->len - 1, stardict_strcmp (word,
+		g_array_index (sd->index, StardictIndexEntry, imid).name))
+
+	if (success) *success = TRUE;
+	return stardict_iterator_new (sd, imid);
+
+	BINARY_SEARCH_END
+
+	if (success) *success = FALSE;
+	return stardict_iterator_new (sd, imin);
+}
+
+static void
+stardict_entry_field_free (StardictEntryField *sef)
+{
+	g_free (sef->data);
+	g_slice_free1 (sizeof *sef, sef);
+}
+
+static GList *
+read_entries (const gchar *entry, gsize entry_size, GError **error)
+{
+	const gchar *end = entry + entry_size;
+	GList *result = NULL;
+
+	while (entry < end)
+	{
+		gchar type = *entry++;
+		if (g_ascii_islower (type))
+		{
+			GString *data = g_string_new (NULL);
+			gchar c;
+			while (entry < end && (c = *entry++))
+				g_string_append_c (data, c);
+
+			if (c != '\0')
+			{
+				g_string_free (data, TRUE);
+				goto error;
+			}
+
+			StardictEntryField *sef = g_slice_alloc (sizeof *sef);
+			sef->type = type;
+			sef->data_size = data->len + 1;
+			sef->data = g_string_free (data, FALSE);
+			result = g_list_append (result, sef);
+		}
+		else
+		{
+			if (entry + sizeof (guint32) > end)
+				goto error;
+
+			gsize length = GUINT32_FROM_BE (*(guint32 *) entry);
+			entry += sizeof (guint32);
+
+			if (entry + length > end)
+				goto error;
+
+			gpointer data = g_malloc (length);
+			memcpy (data, entry, length);
+
+			StardictEntryField *sef = g_slice_alloc (sizeof *sef);
+			sef->type = type;
+			sef->data_size = length;
+			sef->data = data;
+			result = g_list_append (result, sef);
+		}
+	}
+
+	return result;
+
+error:
+	g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+		"invalid data entry");
+	g_list_free_full (result,
+		(GDestroyNotify) stardict_entry_field_free);
+	return NULL;
+}
+
+static GList *
+read_entries_sts (const gchar *entry, gsize entry_size,
+	const gchar *sts, GError **error)
+{
+	const gchar *end = entry + entry_size;
+	GList *result = NULL;
+
+	while (*sts)
+	{
+		gchar type = *sts++;
+		gboolean is_final = !*sts;
+		if (g_ascii_islower (type))
+		{
+			GString *data = g_string_new (NULL);
+
+			if (is_final)
+				g_string_append_len (data, entry, end - entry);
+			else
+			{
+				gchar c;
+				while (entry < end && (c = *entry++))
+					g_string_append_c (data, (c = *entry++));
+
+				if (c != '\0')
+				{
+					g_string_free (data, TRUE);
+					goto error;
+				}
+			}
+
+
+			StardictEntryField *sef = g_slice_alloc (sizeof *sef);
+			sef->type = type;
+			sef->data_size = data->len + 1;
+			sef->data = g_string_free (data, FALSE);
+			result = g_list_append (result, sef);
+		}
+		else
+		{
+			gsize length;
+			if (is_final)
+				length = end - entry;
+			else
+			{
+				if (entry + sizeof (guint32) > end)
+					goto error;
+
+				length = GUINT32_FROM_BE (*(guint32 *) entry);
+				entry += sizeof (guint32);
+
+				if (entry + length > end)
+					goto error;
+
+			}
+
+			StardictEntryField *sef = g_slice_alloc (sizeof *sef);
+			sef->type = type;
+			sef->data_size = length;
+			sef->data = memcpy (g_malloc (length), entry, length);
+			result = g_list_append (result, sef);
+		}
+	}
+
+	return result;
+
+error:
+	g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
+		"invalid data entry");
+	g_list_free_full (result,
+		(GDestroyNotify) stardict_entry_field_free);
+	return NULL;
+}
+
+/** Return the data for the specified offset in the index.  Unsafe. */
+static StardictEntry *
+stardict_dict_get_entry (StardictDict *sd, guint32 offset)
+{
+	// TODO cache the entries
+	StardictIndexEntry *sie = &g_array_index (sd->index,
+		StardictIndexEntry, offset);
+
+	g_return_val_if_fail (sie->data_offset + sie->data_size
+		<= sd->dict_length, NULL);
+
+	GList *entries;
+	if (sd->info->same_type_sequence)
+		entries = read_entries (sd->dict + sie->data_offset,
+			sie->data_size, NULL);
+	else
+		entries = read_entries_sts (sd->dict + sie->data_offset,
+			sie->data_size, sd->info->same_type_sequence, NULL);
+
+	if (!entries)
+		return NULL;
+
+	StardictEntry *se = g_object_new (STARDICT_TYPE_ENTRY, NULL);
+	se->fields = entries;
+	return se;
+}
+
+// --- StardictEntry -----------------------------------------------------------
+
+G_DEFINE_TYPE (StardictEntry, stardict_entry, G_TYPE_OBJECT)
+
+static void
+stardict_entry_finalize (GObject *self)
+{
+	StardictEntry *sde = STARDICT_ENTRY (self);
+
+	g_list_free_full (sde->fields, (GDestroyNotify) stardict_entry_field_free);
+
+	G_OBJECT_CLASS (stardict_entry_parent_class)->finalize (self);
+}
+
+static void
+stardict_entry_class_init (StardictEntryClass *klass)
+{
+	G_OBJECT_CLASS (klass)->finalize = stardict_entry_finalize;
+}
+
+static void
+stardict_entry_init (G_GNUC_UNUSED StardictEntry *sde)
+{
+}
+
+/** Return the entries present within the entry.
+ *  @return GList<StardictEntryField *>
+ */
+const GList *
+stardict_entry_get_fields (StardictEntry *sde)
+{
+	g_return_val_if_fail (STARDICT_IS_ENTRY (sde), NULL);
+	return sde->fields;
+}
+
+// --- StardictIterator---------------------------------------------------------
+
+G_DEFINE_TYPE (StardictIterator, stardict_iterator, G_TYPE_OBJECT)
+
+static void
+stardict_iterator_finalize (GObject *self)
+{
+	StardictIterator *si = STARDICT_ITERATOR (self);
+
+	g_object_unref (si->owner);
+
+	G_OBJECT_CLASS (stardict_iterator_parent_class)->finalize (self);
+}
+
+static void
+stardict_iterator_class_init (StardictIteratorClass *klass)
+{
+	G_OBJECT_CLASS (klass)->finalize = stardict_iterator_finalize;
+}
+
+static void
+stardict_iterator_init (G_GNUC_UNUSED StardictIterator *sd)
+{
+}
+
+/** Create a new iterator for the dictionary with offset @a offset. */
+StardictIterator *
+stardict_iterator_new (StardictDict *sd, guint32 offset)
+{
+	g_return_val_if_fail (STARDICT_IS_DICT (sd), NULL);
+
+	StardictIterator *si = g_object_new (STARDICT_TYPE_ITERATOR, NULL);
+	si->owner = g_object_ref (sd);
+	si->offset = offset;
+	return si;
+}
+
+/** Return the word in the index that the iterator points at, or NULL. */
+const gchar *
+stardict_iterator_get_word (StardictIterator *sdi)
+{
+	g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL);
+	if (!stardict_iterator_is_valid (sdi))
+		return NULL;
+	return g_array_index (sdi->owner->index,
+		StardictIndexEntry, sdi->offset).name;
+}
+
+/** Return the dictionary entry that the iterator points at, or NULL. */
+StardictEntry *
+stardict_iterator_get_entry (StardictIterator *sdi)
+{
+	g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL);
+	if (!stardict_iterator_is_valid (sdi))
+		return FALSE;
+	return stardict_dict_get_entry (sdi->owner, sdi->offset);
+}
+
+/** Return whether the iterator points to a valid index entry. */
+gboolean
+stardict_iterator_is_valid (StardictIterator *sdi)
+{
+	g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), FALSE);
+	return sdi->offset >= 0 && sdi->offset < sdi->owner->index->len;
+}
+
+/** Return the offset of the iterator within the dictionary index. */
+gint64
+stardict_iterator_get_offset (StardictIterator *sdi)
+{
+	g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), -1);
+	return sdi->offset;
+}
+
+/** Set the offset of the iterator. */
+void
+stardict_iterator_set_offset
+	(StardictIterator *sdi, gint64 offset, gboolean relative)
+{
+	g_return_if_fail (STARDICT_IS_ITERATOR (sdi));
+	sdi->offset = relative ? sdi->offset + offset : offset;
+}
-- 
cgit v1.3