diff options
author | Přemysl Eric Janouch <p@janouch.name> | 2023-06-11 17:45:38 +0200 |
---|---|---|
committer | Přemysl Eric Janouch <p@janouch.name> | 2023-06-11 18:08:03 +0200 |
commit | c77d994dc44a9ef8f87dd36661201f499877fc34 (patch) | |
tree | 0ff850d9807f53b9acfe4e9ea95e3346b214ef37 /src/tabfile.c | |
parent | 238e7a2bb961eb448dee1542e03cbdb84dea027d (diff) | |
download | tdv-c77d994dc44a9ef8f87dd36661201f499877fc34.tar.gz tdv-c77d994dc44a9ef8f87dd36661201f499877fc34.tar.xz tdv-c77d994dc44a9ef8f87dd36661201f499877fc34.zip |
Rename tools, make them installable
Diffstat (limited to 'src/tabfile.c')
-rw-r--r-- | src/tabfile.c | 223 |
1 files changed, 0 insertions, 223 deletions
diff --git a/src/tabfile.c b/src/tabfile.c deleted file mode 100644 index fab0ef2..0000000 --- a/src/tabfile.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * A clean reimplementation of StarDict's tabfile - * - * Copyright (c) 2020 - 2021, Přemysl Eric Janouch <p@janouch.name> - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <locale.h> - -#include <glib.h> -#include <gio/gio.h> -#include <pango/pango.h> - -#include <unicode/ucol.h> - -#include "config.h" -#include "stardict.h" -#include "stardict-private.h" -#include "generator.h" -#include "utils.h" - - -static gboolean -set_data_error (GError **error, const gchar *message) -{ - g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA, message); - return FALSE; -} - -static const gchar escapes[256] = { ['n'] = '\n', ['t'] = '\t', ['\\'] = '\\' }; - -static gboolean -inplace_unescape (gchar *line, GError **error) -{ - gboolean escape = FALSE; - gchar *dest = line; - for (gchar *src = line; *src; src++) - { - if (escape) - { - escape = FALSE; - if (!(*dest++ = escapes[(guchar) *src])) - return set_data_error (error, "unsupported escape"); - } - else if (*src == '\\') - escape = TRUE; - else - *dest++ = *src; - } - if (escape) - return set_data_error (error, "trailing escape character"); - - *dest = 0; - return TRUE; -} - -static gboolean -import_line (Generator *generator, gchar *line, gsize len, GError **error) -{ - if (!len) - return TRUE; - if (!g_utf8_validate_len (line, len, NULL)) - return set_data_error (error, "not valid UTF-8"); - - gchar *separator = strchr (line, '\t'); - if (!separator) - return set_data_error (error, "keyword separator not found"); - - *separator++ = 0; - if (strchr (line, '\\')) - // The index wouldn't be sorted correctly with our method - return set_data_error (error, "escapes not allowed in keywords"); - - gchar *newline = strpbrk (separator, "\r\n"); - if (newline) - *newline = 0; - - if (!inplace_unescape (line, error) - || !inplace_unescape (separator, error)) - return FALSE; - - if (generator->info->same_type_sequence - && *generator->info->same_type_sequence == STARDICT_FIELD_PANGO - && !pango_parse_markup (separator, -1, 0, NULL, NULL, NULL, error)) - return FALSE; - - generator_begin_entry (generator); - return generator_write_string (generator, separator, TRUE, error) - && generator_finish_entry (generator, line, error); -} - -static gboolean -transform (FILE *fsorted, Generator *generator, GError **error) -{ - gchar *line = NULL; - gsize size = 0, ln = 1; - for (ssize_t read; (read = getline (&line, &size, fsorted)) >= 0; ln++) - if (!import_line (generator, line, read, error)) - break; - - free (line); - if (ferror (fsorted)) - { - g_set_error_literal (error, G_IO_ERROR, - g_io_error_from_errno (errno), g_strerror (errno)); - return FALSE; - } - if (!feof (fsorted)) - { - // You'll only get good line number output with presorted input! - g_prefix_error (error, "line %zu: ", ln); - return FALSE; - } - return TRUE; -} - -static void -validate_collation_locale (const gchar *locale) -{ - UErrorCode error = U_ZERO_ERROR; - UCollator *collator = ucol_open (locale, &error); - if (!collator) - fatal ("failed to create a collator for %s: %s\n", - locale, u_errorName (error)); - ucol_close (collator); -} - -int -main (int argc, char *argv[]) -{ - // The GLib help includes an ellipsis character, for some reason - (void) setlocale (LC_ALL, ""); - - GError *error = NULL; - GOptionContext *ctx = g_option_context_new ("output-basename < input"); - g_option_context_set_summary (ctx, - "Create a StarDict dictionary from plaintext."); - - gboolean pango_markup = FALSE; - StardictInfo template = {}; - GOptionEntry entries[] = - { - { "pango", 'p', 0, G_OPTION_ARG_NONE, &pango_markup, - "Entries use Pango markup", NULL }, - - { "book-name", 'b', 0, G_OPTION_ARG_STRING, &template.book_name, - "Set the book name field", "TEXT" }, - { "author", 'a', 0, G_OPTION_ARG_STRING, &template.author, - "Set the author field ", "NAME" }, - { "e-mail", 'e', 0, G_OPTION_ARG_STRING, &template.email, - "Set the e-mail field", "ADDRESS" }, - { "website", 'w', 0, G_OPTION_ARG_STRING, &template.website, - "Set the website field", "LINK" }, - { "description", 'd', 0, G_OPTION_ARG_STRING, &template.description, - "Set the description field (newlines supported)", "TEXT" }, - { "date", 'D', 0, G_OPTION_ARG_STRING, &template.date, - "Set the date field", "DATE" }, - { "collation", 'c', 0, G_OPTION_ARG_STRING, &template.collation, - "Set the collation field (for ICU)", "LOCALE" }, - { } - }; - - g_option_context_add_main_entries (ctx, entries, GETTEXT_PACKAGE); - if (!g_option_context_parse (ctx, &argc, &argv, &error)) - fatal ("Error: option parsing failed: %s\n", error->message); - if (argc != 2) - fatal ("%s", g_option_context_get_help (ctx, TRUE, NULL)); - g_option_context_free (ctx); - - template.version = SD_VERSION_3_0_0; - template.same_type_sequence = pango_markup - ? (char[]) { STARDICT_FIELD_PANGO, 0 } - : (char[]) { STARDICT_FIELD_MEANING, 0 }; - - if (!template.book_name) - template.book_name = argv[1]; - if (template.description) - { - gchar **lines = g_strsplit (template.description, "\n", -1); - g_free (template.description); - gchar *in_one_line = g_strjoinv ("<br>", lines); - g_strfreev (lines); - template.description = in_one_line; - } - if (template.collation) - validate_collation_locale (template.collation); - - // This actually implements stardict_strcmp(), POSIX-compatibly. - // Your sort(1) is not expected to be stable by default, like bsdsort is. - FILE *fsorted = popen ("LC_ALL=C sort -t'\t' -k1f,1", "r"); - if (!fsorted) - fatal ("%s: %s\n", "popen", g_strerror (errno)); - - Generator *generator = generator_new (argv[1], &error); - if (!generator) - fatal ("Error: failed to create the output dictionary: %s\n", - error->message); - - StardictInfo *info = generator->info; - stardict_info_copy (info, &template); - if (!transform (fsorted, generator, &error) - || !generator_finish (generator, &error)) - fatal ("Error: failed to write the dictionary: %s\n", error->message); - - generator_free (generator); - pclose (fsorted); - return 0; -} |