From 8d19acd91af9592d862ef2a7aa8e95eea4160152 Mon Sep 17 00:00:00 2001 From: Přemysl Eric Janouch
Date: Thu, 3 Sep 2020 23:17:17 +0200 Subject: Add a tool to transform dictionaries --- CMakeLists.txt | 17 ++- README.adoc | 5 + src/add-pronunciation.c | 29 +----- src/generator.c | 30 +++++- src/generator.h | 7 +- src/transform.c | 270 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 318 insertions(+), 40 deletions(-) create mode 100644 src/transform.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 6edd410..3bb97aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -180,15 +180,14 @@ add_executable (${PROJECT_NAME} target_link_libraries (${PROJECT_NAME} ${project_common_libraries}) # Tools -add_executable (query-tool EXCLUDE_FROM_ALL - src/query-tool.c ${project_common_sources}) -target_link_libraries (query-tool ${project_common_libraries}) - -add_executable (add-pronunciation EXCLUDE_FROM_ALL - src/add-pronunciation.c ${project_common_sources}) -target_link_libraries (add-pronunciation ${project_common_libraries}) - -add_custom_target (tools DEPENDS add-pronunciation query-tool) +set (tools add-pronunciation query-tool transform) +foreach (tool ${tools}) + add_executable (${tool} EXCLUDE_FROM_ALL + src/${tool}.c ${project_common_sources}) + target_link_libraries (${tool} ${project_common_libraries}) +endforeach (tool) + +add_custom_target (tools DEPENDS ${tools}) # The files to be installed include (GNUInstallDirs) diff --git a/README.adoc b/README.adoc index cfad569..fb89f18 100644 --- a/README.adoc +++ b/README.adoc @@ -100,6 +100,11 @@ Dictionaries Unfortunately this application only really works with specific dictionaries. Word definitions have to be in plain text, separated by newlines. +You may use the included transform tool to transform existing dictionaries that +are almost useful as they are, e.g. after stripping XML tags. You might want to +fix up the `sametypesequence` of the resulting '.ifo' file afterwards, and run +dictzip on the resulting '.dict' file. + https://mega.co.nz/#!axtD0QRK!sbtBgizksyfkPqKvKEgr8GQ11rsWhtqyRgUUV0B7pwg[ CZ <--> { EN, DE, PL, RU } dictionaries] diff --git a/src/add-pronunciation.c b/src/add-pronunciation.c index 6ca5ad3..26261f9 100644 --- a/src/add-pronunciation.c +++ b/src/add-pronunciation.c @@ -282,32 +282,6 @@ stardict_info_copy (StardictInfo *dest, const StardictInfo *src) } } -/// Write a list of data fields back to a dictionary. -static gboolean -write_fields (Generator *generator, GList *fields, gboolean sts, GError **error) -{ - while (fields) - { - StardictEntryField *field = fields->data; - if (!sts && !generator_write_type (generator, field->type, error)) - return FALSE; - - gboolean mark_end = !sts || fields->next != NULL; - if (g_ascii_islower (field->type)) - { - if (!generator_write_string (generator, - field->data, mark_end, error)) - return FALSE; - } - else if (!generator_write_raw (generator, - field->data, field->data_size, mark_end, error)) - return FALSE; - - fields = fields->next; - } - return TRUE; -} - int main (int argc, char *argv[]) { @@ -516,8 +490,7 @@ G_GNUC_END_IGNORE_DEPRECATIONS start_link.next = entry->fields; start_link.data = &field; - if (!write_fields (generator, &start_link, - info->same_type_sequence != NULL, &error) + if (!generator_write_fields (generator, &start_link, &error) || !generator_finish_entry (generator, stardict_iterator_get_word (iterator), &error)) { diff --git a/src/generator.c b/src/generator.c index 9f6be9b..25c8e43 100644 --- a/src/generator.c +++ b/src/generator.c @@ -1,7 +1,7 @@ /* * generator.c: dictionary generator * - * Copyright (c) 2013, Přemysl Eric Janouch
+ * Copyright (c) 2013 - 2020, Přemysl Eric Janouch
* * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted. @@ -170,6 +170,34 @@ generator_write_string (Generator *self, return TRUE; } +/// Write a list of data fields back to a dictionary. The list has to be +/// acceptable for the generated dictionary's sametypesequence (or lack of). +gboolean +generator_write_fields (Generator *self, const GList *fields, GError **error) +{ + gboolean sts = self->info->same_type_sequence != NULL; + while (fields) + { + StardictEntryField *field = fields->data; + if (!sts && !generator_write_type (self, field->type, error)) + return FALSE; + + gboolean mark_end = !sts || fields->next != NULL; + if (g_ascii_islower (field->type)) + { + if (!generator_write_string (self, + field->data, mark_end, error)) + return FALSE; + } + else if (!generator_write_raw (self, + field->data, field->data_size, mark_end, error)) + return FALSE; + + fields = fields->next; + } + return TRUE; +} + /// Finishes the current entry and writes it into the index. gboolean generator_finish_entry (Generator *self, const gchar *word, GError **error) diff --git a/src/generator.h b/src/generator.h index 554e7ed..ba19d58 100644 --- a/src/generator.h +++ b/src/generator.h @@ -4,7 +4,7 @@ * Nothing fancy. Just something moved out off the `stardict' test to be * conveniently reused by the included tools. * - * Copyright (c) 2013, Přemysl Eric Janouch
+ * Copyright (c) 2013 - 2020, Přemysl Eric Janouch
* * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted. @@ -42,12 +42,15 @@ Generator *generator_new (const gchar *base, GError **error); gboolean generator_finish (Generator *self, GError **error); void generator_free (Generator *self); -void generator_begin_entry (Generator *self); gboolean generator_write_type (Generator *self, gchar type, GError **error); gboolean generator_write_raw (Generator *self, gpointer data, gsize data_size, gboolean mark_end, GError **error); gboolean generator_write_string (Generator *self, const gchar *s, gboolean mark_end, GError **error); + +void generator_begin_entry (Generator *self); +gboolean generator_write_fields (Generator *self, + const GList *fields, GError **error); gboolean generator_finish_entry (Generator *self, const gchar *word, GError **error); diff --git a/src/transform.c b/src/transform.c new file mode 100644 index 0000000..2d5c2f2 --- /dev/null +++ b/src/transform.c @@ -0,0 +1,270 @@ +/* + * A tool to transform dictionaries dictionaries by an external filter + * + * The external filter needs to process NUL-separated textual entries. + * + * Example: transform input.info output -- perl -p0e s/bullshit/soykaf/g + * + * Copyright (c) 2020, Přemysl Eric Janouch
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include