5 files changed, 813 insertions, 55 deletions
diff --git a/src/dictzip-input-stream.c b/src/dictzip-input-stream.c
new file mode 100644
index 0000000..e3c0d7c
--- /dev/null
+++ b/src/dictzip-input-stream.c
@@ -0,0 +1,628 @@
+/*
+ * dictzip-input-stream.c: dictzip GIO stream reader
+ *
+ * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+
+#include <zlib.h>
+
+#include "utils.h"
+#include "dictzip-input-stream.h"
+
+
+// --- Errors ------------------------------------------------------------------
+
+GQuark
+dictzip_error_quark (void)
+{
+	return g_quark_from_static_string ("dictzip-error-quark");
+}
+
+// --- dictzip utilities -------------------------------------------------------
+
+static void
+free_gzip_header (gz_header *gzh)
+{
+	g_free (gzh->comment);  gzh->comment = NULL;
+	g_free (gzh->extra);    gzh->extra   = NULL;
+	g_free (gzh->name);     gzh->name    = NULL;
+}
+
+/* Reading the header in manually due to stupidity of the ZLIB API. */
+static gboolean
+read_gzip_header (GInputStream *is, gz_header *gzh,
+	goffset *first_block_offset, GError **error)
+{
+	assert (is != NULL);
+	assert (gzh != NULL);
+
+	GDataInputStream *dis = g_data_input_stream_new (is);
+	g_data_input_stream_set_byte_order (dis,
+		G_DATA_STREAM_BYTE_ORDER_LITTLE_ENDIAN);
+	g_filter_input_stream_set_close_base_stream
+		(G_FILTER_INPUT_STREAM (dis), FALSE);
+
+	GError *err = NULL;
+	memset (gzh, 0, sizeof *gzh);
+
+	// File header identification
+	if (g_data_input_stream_read_byte (dis, NULL, &err) != 31
+	 || g_data_input_stream_read_byte (dis, NULL, &err) != 139)
+	{
+		if (err)
+			g_propagate_error (error, err);
+		else
+			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+				"wrong header magic");
+		goto error_own;
+	}
+
+	// Compression method, only "deflate" is supported here
+	if (g_data_input_stream_read_byte (dis, NULL, &err) != Z_DEFLATED)
+	{
+		if (err)
+			g_propagate_error (error, err);
+		else
+			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+				"unsupported compression method");
+		goto error_own;
+	}
+
+	guint flags = g_data_input_stream_read_byte (dis, NULL, &err);
+	if (err) goto error;
+
+	gzh->text = ((flags & 1) != 0);
+	gzh->hcrc = ((flags & 2) != 0);
+
+	gzh->time = g_data_input_stream_read_uint32 (dis, NULL, &err);
+	if (err) goto error;
+
+	gzh->xflags = g_data_input_stream_read_byte (dis, NULL, &err);
+	if (err) goto error;
+
+	gzh->os = g_data_input_stream_read_byte (dis, NULL, &err);
+	if (err) goto error;
+
+	if (flags & 4)
+	{
+		gzh->extra_len = g_data_input_stream_read_uint16 (dis, NULL, &err);
+		if (err) goto error;
+		gzh->extra_max = gzh->extra_len;
+
+		gzh->extra = g_malloc (gzh->extra_len);
+		gssize read = g_input_stream_read (G_INPUT_STREAM (dis),
+			gzh->extra, gzh->extra_len, NULL, &err);
+		if (err) goto error;
+
+		if (read != gzh->extra_len)
+		{
+			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+				"unexpected end of file");
+			goto error_own;
+		}
+	}
+
+	if (flags & 8)
+	{
+		gzh->name = (Bytef *) stream_read_string (dis, &err);
+		if (err) goto error;
+		gzh->name_max = strlen ((char *) gzh->name) + 1;
+	}
+
+	if (flags & 16)
+	{
+		gzh->comment = (Bytef *) stream_read_string (dis, &err);
+		if (err) goto error;
+		gzh->comm_max = strlen ((char *) gzh->comment) + 1;
+	}
+
+	goffset header_size_sans_crc = g_seekable_tell (G_SEEKABLE (dis));
+
+	if (!gzh->hcrc)
+		*first_block_offset = header_size_sans_crc;
+	else
+	{
+		*first_block_offset = header_size_sans_crc + 2;
+		uLong header_crc = g_data_input_stream_read_uint16 (dis, NULL, &err);
+		if (err) goto error;
+
+		g_seekable_seek (G_SEEKABLE (is), 0, G_SEEK_SET, NULL, &err);
+		if (err) goto error;
+
+		gpointer buf = g_malloc (header_size_sans_crc);
+		g_input_stream_read (is, buf, header_size_sans_crc, NULL, &err);
+		if (err) goto error;
+
+		uLong crc = crc32 (0, NULL, 0);
+		crc = crc32 (crc, buf, header_size_sans_crc);
+		g_free (buf);
+
+		if (header_crc != (guint16) crc)
+		{
+			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+				"header checksum mismatch");
+			goto error_own;
+		}
+	}
+
+	gzh->done = 1;
+	g_object_unref (dis);
+	return TRUE;
+
+error:
+	g_propagate_error (error, err);
+error_own:
+	free_gzip_header (gzh);
+	g_object_unref (dis);
+	return FALSE;
+}
+
+static guint16 *
+read_random_access_field (const gz_header *gzh,
+	gsize *chunk_length, gsize *n_chunks, GError **error)
+{
+	if (!gzh->extra)
+	{
+		g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+			"no 'extra' field within the header");
+		return NULL;
+	}
+
+	guchar *extra_iterator = gzh->extra;
+	guchar *extra_end = gzh->extra + gzh->extra_len;
+
+	guint16 *chunks = NULL;
+
+	while (extra_iterator <= extra_end - 4)
+	{
+		guchar *f = extra_iterator;
+
+		guint16 length = f[2] | (f[3] << 8);
+		extra_iterator += length + 4;
+		if (extra_iterator > extra_end)
+		{
+			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+				"overflowing header subfield");
+			g_free (chunks);
+			return NULL;
+		}
+
+		if (f[0] != 'R' || f[1] != 'A')
+			continue;
+
+		if (chunks != NULL)
+		{
+			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+				"multiple RA subfields present in the header");
+			g_free (chunks);
+			return NULL;
+		}
+
+		guint16 version = f[4] | (f[5] << 8);
+		if (version != 1)
+		{
+			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+				"unsupported RA subfield version");
+			return NULL;
+		}
+
+		*chunk_length = f[6] | (f[7] << 8);
+		if (chunk_length == 0)
+		{
+			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+				"invalid RA chunk length");
+			return NULL;
+		}
+
+		*n_chunks = f[8] | (f[9] << 8);
+		if ((gulong) (extra_iterator - f) < 10 + *n_chunks * 2)
+		{
+			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+				"RA subfield overflow");
+			return NULL;
+		}
+
+		chunks = g_malloc_n (*n_chunks, sizeof *chunks);
+
+		guint i;
+		for (i = 0; i < *n_chunks; i++)
+			chunks[i] = f[10 + i * 2] + (f[10 + i * 2 + 1] << 8);
+	}
+
+	if (extra_iterator < extra_end - 4)
+	{
+		g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+			"invalid 'extra' field, subfield too short");
+		g_free (chunks);
+		return NULL;
+	}
+
+	return chunks;
+}
+
+// --- DictzipInputStream ------------------------------------------------------
+
+static void dictzip_input_stream_finalize (GObject *gobject);
+
+static void dictzip_input_stream_seekable_init
+	(GSeekableIface *iface, gpointer iface_data);
+static goffset dictzip_input_stream_tell (GSeekable *seekable);
+static gboolean dictzip_input_stream_seek (GSeekable *seekable, goffset offset,
+	GSeekType type, GCancellable *cancellable, GError **error);
+
+static gssize dictzip_input_stream_read (GInputStream *stream, void *buffer,
+	gsize count, GCancellable *cancellable, GError **error);
+static gssize dictzip_input_stream_skip (GInputStream *stream, gsize count,
+	GCancellable *cancellable, GError **error);
+
+struct dictzip_input_stream_private
+{
+	GFileInfo  * file_info;            //!< File information from gzip header
+
+	goffset      first_block_offset;   //!< Offset to the first block/chunk
+	gsize        chunk_length;         //!< Uncompressed chunk length
+	gsize        n_chunks;             //!< Number of chunks in file
+	guint16    * chunks;               //!< Chunk sizes after compression
+
+	z_stream     zs;                   //!< zlib decompression context
+	gpointer     input_buffer;         //!< Input buffer
+
+	goffset      offset;               //!< Current offset
+	gpointer   * decompressed;         //!< Array of decompressed chunks
+	gsize        last_chunk_length;    //!< Size of the last chunk
+};
+
+G_DEFINE_TYPE_EXTENDED (DictzipInputStream, dictzip_input_stream,
+	G_TYPE_FILTER_INPUT_STREAM, 0,
+	G_IMPLEMENT_INTERFACE (G_TYPE_SEEKABLE, dictzip_input_stream_seekable_init))
+
+static gboolean seekable_true  (G_GNUC_UNUSED GSeekable *x) { return TRUE;  }
+static gboolean seekable_false (G_GNUC_UNUSED GSeekable *x) { return FALSE; }
+
+static void
+dictzip_input_stream_seekable_init
+	(GSeekableIface *iface, G_GNUC_UNUSED gpointer iface_data)
+{
+	iface->tell            = dictzip_input_stream_tell;
+	iface->can_seek        = seekable_true;
+	iface->seek            = dictzip_input_stream_seek;
+	iface->can_truncate    = seekable_false;
+}
+
+static void
+dictzip_input_stream_class_init (DictzipInputStreamClass *klass)
+{
+	g_type_class_add_private (klass, sizeof (DictzipInputStreamPrivate));
+
+	GInputStreamClass *stream_class = G_INPUT_STREAM_CLASS (klass);
+	stream_class->read_fn  = dictzip_input_stream_read;
+	stream_class->skip     = dictzip_input_stream_skip;
+
+	GObjectClass *object_class = G_OBJECT_CLASS (klass);
+	object_class->finalize = dictzip_input_stream_finalize;
+}
+
+static void
+dictzip_input_stream_init (DictzipInputStream *self)
+{
+	self->priv = G_TYPE_INSTANCE_GET_PRIVATE (self,
+		DICTZIP_TYPE_INPUT_STREAM, DictzipInputStreamPrivate);
+}
+
+static void
+dictzip_input_stream_finalize (GObject *gobject)
+{
+	DictzipInputStreamPrivate *priv = DICTZIP_INPUT_STREAM (gobject)->priv;
+	g_object_unref (priv->file_info);
+	g_free (priv->chunks);
+	g_free (priv->input_buffer);
+	inflateEnd (&priv->zs);
+
+	guint i;
+	for (i = 0; i < priv->n_chunks; i++)
+		g_free (priv->decompressed[i]);
+	g_free (priv->decompressed);
+
+	G_OBJECT_CLASS (dictzip_input_stream_parent_class)->finalize (gobject);
+}
+
+static goffset
+dictzip_input_stream_tell (GSeekable *seekable)
+{
+	return DICTZIP_INPUT_STREAM (seekable)->priv->offset;
+}
+
+static gpointer
+inflate_chunk (DictzipInputStream *self,
+	guint chunk_id, gsize *inflated_length, GError **error)
+{
+	DictzipInputStreamPrivate *priv = self->priv;
+	g_return_val_if_fail (chunk_id < priv->n_chunks, NULL);
+
+	GInputStream *base_stream = G_FILTER_INPUT_STREAM (self)->base_stream;
+
+	guint i;
+	goffset offset = priv->first_block_offset;
+	for (i = 0; i < chunk_id; i++)
+		offset += priv->chunks[i];
+
+	if (!g_seekable_seek (G_SEEKABLE (base_stream),
+		offset, G_SEEK_SET, NULL, error))
+		return NULL;
+
+	gssize read = g_input_stream_read (base_stream, priv->input_buffer,
+		priv->chunks[chunk_id], NULL, error);
+	if (read == -1)
+		return NULL;
+
+	if (read != priv->chunks[chunk_id])
+	{
+		g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+			"premature end of file");
+		return NULL;
+	}
+
+	int z_err;
+	gpointer chunk_data = g_malloc (priv->chunk_length);
+
+	priv->zs.next_in   = (Bytef *) priv->input_buffer;
+	priv->zs.avail_in  = read;
+	priv->zs.total_in  = 0;
+
+	priv->zs.next_out  = (Bytef *) chunk_data;
+	priv->zs.avail_out = priv->chunk_length;
+	priv->zs.total_out = 0;
+
+	z_err = inflateReset (&priv->zs);
+	if (z_err != Z_OK)
+		goto error_zlib;
+
+	z_err = inflate (&priv->zs, Z_BLOCK);
+	if (z_err != Z_OK)
+		goto error_zlib;
+
+	*inflated_length = priv->zs.total_out;
+	return chunk_data;
+
+error_zlib:
+	g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+		"failed to inflate the requested block: %s", zError (z_err));
+	g_free (chunk_data);
+	return NULL;
+}
+
+static gpointer
+get_chunk (DictzipInputStream *self, guint chunk_id, GError **error)
+{
+	DictzipInputStreamPrivate *priv = self->priv;
+	gpointer chunk = priv->decompressed[chunk_id];
+	if (!chunk)
+	{
+		/* Just inflating the file piece by piece as needed. */
+		gsize chunk_size;
+		chunk = inflate_chunk (self, chunk_id, &chunk_size, error);
+		if (!chunk)
+			return NULL;
+
+		if (chunk_id + 1 == priv->n_chunks)
+			priv->last_chunk_length = chunk_size;
+		else if (chunk_size < priv->chunk_length)
+		{
+			g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+				"inflated dictzip chunk is too short");
+			g_free (chunk);
+			return NULL;
+		}
+
+		priv->decompressed[chunk_id] = chunk;
+	}
+	return chunk;
+}
+
+static gboolean
+dictzip_input_stream_seek (GSeekable *seekable, goffset offset,
+	GSeekType type, GCancellable *cancellable, GError **error)
+{
+	if (g_cancellable_set_error_if_cancelled (cancellable, error))
+		return FALSE;
+
+	if (type == G_SEEK_END)
+	{
+		/* This could be implemented by retrieving the last chunk
+		 * and deducing the filesize, should the functionality be needed. */
+		g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
+			"I don't know where the stream ends, cannot seek there");
+		return FALSE;
+	}
+
+	DictzipInputStream *self = DICTZIP_INPUT_STREAM (seekable);
+	goffset new_offset;
+
+	if (type == G_SEEK_SET)
+		new_offset = offset;
+	else if (type == G_SEEK_CUR)
+		new_offset = self->priv->offset + offset;
+	else
+		g_assert_not_reached ();
+
+	if (new_offset < 0)
+	{
+		g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+			"cannot seek before the start of data");
+		return FALSE;
+	}
+
+	self->priv->offset = new_offset;
+	return TRUE;
+}
+
+static gssize
+dictzip_input_stream_read (GInputStream *stream, void *buffer,
+	gsize count, GCancellable *cancellable, GError **error)
+{
+	if (g_cancellable_set_error_if_cancelled (cancellable, error))
+		return -1;
+
+	DictzipInputStream *self = DICTZIP_INPUT_STREAM (stream);
+	DictzipInputStreamPrivate *priv = self->priv;
+	gssize read = 0;
+
+	guint chunk_id     = priv->offset / priv->chunk_length;
+	guint chunk_offset = priv->offset % priv->chunk_length;
+
+	do
+	{
+		if (chunk_id >= priv->n_chunks)
+			return read;
+
+		gpointer chunk = get_chunk (self, chunk_id, error);
+		if (!chunk)
+			return -1;
+
+		glong to_copy;
+		if (chunk_id + 1 == priv->n_chunks)
+			// Set by the call to get_chunk().
+			to_copy = priv->last_chunk_length - chunk_offset;
+		else
+			to_copy = priv->chunk_length - chunk_offset;
+
+		if (to_copy > (glong) count)
+			to_copy = count;
+
+		if (to_copy > 0)
+		{
+			memcpy (buffer, chunk + chunk_offset, to_copy);
+			buffer += to_copy;
+			priv->offset += to_copy;
+			count -= to_copy;
+			read += to_copy;
+		}
+
+		chunk_id++;
+		chunk_offset = 0;
+	}
+	while (count);
+
+	return read;
+}
+
+static gssize
+dictzip_input_stream_skip (GInputStream *stream, gsize count,
+	GCancellable *cancellable, GError **error)
+{
+	if (!dictzip_input_stream_seek (G_SEEKABLE (stream), count,
+		G_SEEK_CUR, cancellable, error))
+		return -1;
+
+	return count;
+}
+
+/** Create an input stream for the underlying dictzip file. */
+DictzipInputStream *
+dictzip_input_stream_new (GInputStream *base_stream, GError **error)
+{
+	g_return_val_if_fail (G_IS_INPUT_STREAM (base_stream), NULL);
+
+	if (!G_IS_SEEKABLE (base_stream)
+	 || !g_seekable_can_seek (G_SEEKABLE (base_stream)))
+	{
+		g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_NOT_SEEKABLE,
+			"the underlying stream isn't seekable");
+		return NULL;
+	}
+
+	GError *err = NULL;
+	DictzipInputStream *self = g_object_new (DICTZIP_TYPE_INPUT_STREAM,
+		"base-stream", base_stream, "close-base-stream", FALSE, NULL);
+	DictzipInputStreamPrivate *priv = self->priv;
+
+	/* Decode the header. */
+	gz_header gzh;
+	if (!read_gzip_header (G_INPUT_STREAM (base_stream),
+		&gzh, &priv->first_block_offset, &err))
+	{
+		g_propagate_error (error, err);
+		goto error;
+	}
+
+	priv->chunks = read_random_access_field (&gzh,
+		&priv->chunk_length, &priv->n_chunks, &err);
+	if (err)
+	{
+		g_propagate_error (error, err);
+		goto error;
+	}
+
+	if (!priv->chunks)
+	{
+		g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
+			"not a dictzip file");
+		goto error;
+	}
+
+	/* Store file information. */
+	priv->file_info = g_file_info_new ();
+
+	if (gzh.time != 0)
+	{
+		GTimeVal m_time = { gzh.time, 0 };
+		g_file_info_set_modification_time (priv->file_info, &m_time);
+	}
+
+	if (gzh.name && *gzh.name)
+		g_file_info_set_name (priv->file_info, (gchar *) gzh.name);
+
+	/* Initialise zlib. */
+	int z_err;
+	z_err = inflateInit2 (&priv->zs, -15);
+	if (z_err != Z_OK)
+	{
+		g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+			"zlib initialisation failed: %s", zError (z_err));
+		goto error;
+	}
+
+	priv->input_buffer = g_malloc (65536);
+	priv->decompressed = g_new0 (gpointer, priv->n_chunks);
+	priv->last_chunk_length = -1; // We don't know yet.
+
+	free_gzip_header (&gzh);
+	return self;
+
+error:
+	free_gzip_header (&gzh);
+	g_object_unref (self);
+	return NULL;
+}
+
+/** Return file information for the compressed file. */
+GFileInfo *
+dictzip_input_stream_get_file_info (DictzipInputStream *self)
+{
+	g_return_val_if_fail (DICTZIP_IS_INPUT_STREAM (self), NULL);
+
+	DictzipInputStreamPrivate *priv = self->priv;
+	return priv->file_info;
+}
diff --git a/src/dictzip-input-stream.h b/src/dictzip-input-stream.h
new file mode 100644
index 0000000..b9d039c
--- /dev/null
+++ b/src/dictzip-input-stream.h
@@ -0,0 +1,77 @@
+/*
+ * dictzip-input-stream.h: dictzip GIO stream reader
+ *
+ * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#ifndef DICTZIP_INPUT_STREAM_H
+#define DICTZIP_INPUT_STREAM_H
+
+/** Random-access dictzip reader. */
+typedef struct dictzip_input_stream          DictzipInputStream;
+typedef struct dictzip_input_stream_class    DictzipInputStreamClass;
+typedef struct dictzip_input_stream_private  DictzipInputStreamPrivate;
+
+/* GObject boilerplate. */
+#define DICTZIP_TYPE_INPUT_STREAM  (dictzip_input_stream_get_type ())
+#define DICTZIP_INPUT_STREAM(obj) \
+	(G_TYPE_CHECK_INSTANCE_CAST ((obj), \
+	DICTZIP_TYPE_INPUT_STREAM, DictzipInputStream))
+#define DICTZIP_IS_INPUT_STREAM(obj) \
+	(G_TYPE_CHECK_INSTANCE_TYPE ((obj), \
+	DICTZIP_TYPE_INPUT_STREAM))
+#define DICTZIP_INPUT_STREAM_CLASS(klass) \
+	(G_TYPE_CHECK_CLASS_CAST ((klass), \
+	DICTZIP_TYPE_INPUT_STREAM, DictzipInputStreamClass))
+#define DICTZIP_IS_INPUT_STREAM_CLASS(klass) \
+	(G_TYPE_CHECK_CLASS_TYPE ((klass), \
+	DICTZIP_TYPE_INPUT_STREAM))
+#define DICTZIP_INPUT_STREAM_GET_CLASS(obj) \
+	(G_TYPE_INSTANCE_GET_CLASS ((obj), \
+	DICTZIP_TYPE_INPUT_STREAM, DictzipInputStreamClass))
+
+// --- Errors ------------------------------------------------------------------
+
+typedef enum {
+	DICTZIP_ERROR_NOT_SEEKABLE,        //!< Underlying stream isn't seekable
+	DICTZIP_ERROR_INVALID_HEADER       //!< Error occured while parsing header
+} DictzipError;
+
+#define DICTZIP_ERROR  (dictzip_error_quark ())
+
+GQuark dictzip_error_quark (void);
+
+// --- DictzipInputStream ------------------------------------------------------
+
+struct dictzip_input_stream
+{
+	GFilterInputStream parent_instance;
+	DictzipInputStreamPrivate *priv;
+};
+
+struct dictzip_input_stream_class
+{
+	GFilterInputStreamClass parent_class;
+};
+
+GType dictzip_input_stream_get_type (void);
+DictzipInputStream *dictzip_input_stream_new
+	(GInputStream *base_stream, GError **error);
+GFileInfo *dictzip_input_stream_get_file_info (DictzipInputStream *self);
+
+
+#endif /* ! DICTZIP_INPUT_STREAM_H */
diff --git a/src/stardict.c b/src/stardict.c
index 4e3f5bd..9a25b3e 100644
--- a/src/stardict.c
+++ b/src/stardict.c
@@ -29,48 +29,11 @@
 
 #include "stardict.h"
 #include "stardict-private.h"
+#include "utils.h"
 
 
 // --- Utilities ---------------------------------------------------------------
 
-/** Read the whole stream into a byte array. */
-static gboolean
-stream_read_all (GByteArray *ba, GInputStream *is, GError **error)
-{
-	guint8 buffer[1024 * 64];
-	gsize bytes_read;
-
-	while (g_input_stream_read_all (is, buffer, sizeof buffer,
-		&bytes_read, NULL, error))
-	{
-		g_byte_array_append (ba, buffer, bytes_read);
-		if (bytes_read < sizeof buffer)
-			return TRUE;
-	}
-	return FALSE;
-}
-
-/** Read a null-terminated string from a data input stream. */
-static gchar *
-stream_read_string (GDataInputStream *dis, GError **error)
-{
-	gsize length;
-	gchar *s = g_data_input_stream_read_upto (dis, "", 1, &length, NULL, error);
-	if (!s)
-		return NULL;
-
-	GError *err = NULL;
-	g_data_input_stream_read_byte (dis, NULL, &err);
-	if (err)
-	{
-		g_free (s);
-		g_propagate_error (error, err);
-		return NULL;
-	}
-
-	return s;
-}
-
 /** String compare function used for StarDict indexes. */
 static inline gint
 stardict_strcmp (const gchar *s1, const gchar *s2)
@@ -79,23 +42,6 @@ stardict_strcmp (const gchar *s1, const gchar *s2)
 	return a ? a : strcmp (s1, s2);
 }
 
-/** After this statement, the element has been found and its index is stored
- *  in the variable "imid". */
-#define BINARY_SEARCH_BEGIN(max, compare)                                     \
-	gint imin = 0, imax = max, imid;                                          \
-	while (imin <= imax) {                                                    \
-		imid = imin + (imax - imin) / 2;                                      \
-		gint cmp = compare;                                                   \
-		if      (cmp > 0) imin = imid + 1;                                    \
-		else if (cmp < 0) imax = imid - 1;                                    \
-		else {
-
-/** After this statement, the binary search has failed and "imin" stores
- *  the position where the element can be inserted. */
-#define BINARY_SEARCH_END                                                     \
-		}                                                                     \
-	}
-
 // --- Errors ------------------------------------------------------------------
 
 GQuark
diff --git a/src/utils.c b/src/utils.c
new file mode 100644
index 0000000..8636778
--- /dev/null
+++ b/src/utils.c
@@ -0,0 +1,63 @@
+/*
+ * utils.c: miscellaneous utilities
+ *
+ * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include <glib.h>
+#include <gio/gio.h>
+
+#include "utils.h"
+
+
+/** Read the whole stream into a byte array. */
+gboolean
+stream_read_all (GByteArray *ba, GInputStream *is, GError **error)
+{
+	guint8 buffer[1024 * 64];
+	gsize bytes_read;
+
+	while (g_input_stream_read_all (is, buffer, sizeof buffer,
+		&bytes_read, NULL, error))
+	{
+		g_byte_array_append (ba, buffer, bytes_read);
+		if (bytes_read < sizeof buffer)
+			return TRUE;
+	}
+	return FALSE;
+}
+
+/** Read a null-terminated string from a data input stream. */
+gchar *
+stream_read_string (GDataInputStream *dis, GError **error)
+{
+	gsize length;
+	gchar *s = g_data_input_stream_read_upto (dis, "", 1, &length, NULL, error);
+	if (!s)
+		return NULL;
+
+	GError *err = NULL;
+	g_data_input_stream_read_byte (dis, NULL, &err);
+	if (err)
+	{
+		g_free (s);
+		g_propagate_error (error, err);
+		return NULL;
+	}
+
+	return s;
+}
diff --git a/src/utils.h b/src/utils.h
new file mode 100644
index 0000000..61c108e
--- /dev/null
+++ b/src/utils.h
@@ -0,0 +1,44 @@
+/*
+ * utils.h: miscellaneous utilities
+ *
+ * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#ifndef UTILS_H
+#define UTILS_H
+
+/** After this statement, the element has been found and its index is stored
+ *  in the variable "imid". */
+#define BINARY_SEARCH_BEGIN(max, compare)                                     \
+	gint imin = 0, imax = max, imid;                                          \
+	while (imin <= imax) {                                                    \
+		imid = imin + (imax - imin) / 2;                                      \
+		gint cmp = compare;                                                   \
+		if      (cmp > 0) imin = imid + 1;                                    \
+		else if (cmp < 0) imax = imid - 1;                                    \
+		else {
+
+/** After this statement, the binary search has failed and "imin" stores
+ *  the position where the element can be inserted. */
+#define BINARY_SEARCH_END                                                     \
+		}                                                                     \
+	}
+
+gboolean stream_read_all (GByteArray *ba, GInputStream *is, GError **error);
+gchar *stream_read_string (GDataInputStream *dis, GError **error);
+
+#endif /* ! UTILS_H */