diff options
| -rw-r--r-- | CMakeLists.txt | 16 | ||||
| -rw-r--r-- | README.adoc | 5 | ||||
| -rwxr-xr-x | dicts/gnu-fdl-en-cz.sh | 21 | ||||
| -rwxr-xr-x | dicts/slovnik-cizich-slov.sh | 8 | 
4 files changed, 47 insertions, 3 deletions
| diff --git a/CMakeLists.txt b/CMakeLists.txt index bd42553..9f07bfb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -172,6 +172,21 @@ endforeach ()  add_custom_target (tools DEPENDS ${tools}) +# Example dictionaries +file (GLOB dicts_scripts "${PROJECT_SOURCE_DIR}/dicts/*.sh") +set (dicts_targets) +foreach (dict_script ${dicts_scripts}) +	get_filename_component (dict_name "${dict_script}" NAME_WE) +	list (APPEND dicts_targets "dicts-${dict_name}") +	add_custom_target (dicts-${dict_name} +		COMMAND sh -c "PATH=.:$PATH \"$0\"" "${dict_script}" +		DEPENDS tabfile +		COMMENT "Generating sample dictionary ${dict_name}" +		VERBATIM) +endforeach () + +add_custom_target (dicts DEPENDS ${dicts_targets}) +  # The files to be installed  include (GNUInstallDirs)  install (TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR}) @@ -212,4 +227,3 @@ set (CPACK_SOURCE_IGNORE_FILES "/\\\\.git;/build;/CMakeLists.txt.user")  set (CPACK_SOURCE_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}")  include (CPack) - diff --git a/README.adoc b/README.adoc index 4f98061..724c7e8 100644 --- a/README.adoc +++ b/README.adoc @@ -101,13 +101,14 @@ Dictionaries  Unfortunately this application only really works with specific dictionaries.  Word definitions have to be in plain text, separated by newlines. +The `make dicts` command will build some examples from freely available sources. +  You may use the included transform tool to transform existing dictionaries that  are almost useful as they are, e.g. after stripping XML tags.  You might want to  fix up the `sametypesequence` of the resulting '.ifo' file afterwards, and run  dictzip on the resulting '.dict' file. -https://mega.co.nz/#!axtD0QRK!sbtBgizksyfkPqKvKEgr8GQ11rsWhtqyRgUUV0B7pwg[ -CZ <--> { EN, DE, PL, RU } dictionaries] +https://mega.co.nz/#!axtD0QRK!sbtBgizksyfkPqKvKEgr8GQ11rsWhtqyRgUUV0B7pwg[CZ <--> EN/DE/PL/RU dictionaries]  Contributing and Support  ------------------------ diff --git a/dicts/gnu-fdl-en-cz.sh b/dicts/gnu-fdl-en-cz.sh new file mode 100755 index 0000000..8feeea9 --- /dev/null +++ b/dicts/gnu-fdl-en-cz.sh @@ -0,0 +1,21 @@ +#!/bin/sh -e +# GNU/FDL English-Czech dictionary, see https://www.svobodneslovniky.cz/ +curl -Lo- https://www.svobodneslovniky.cz/data/en-cs.txt.gz | \ +zcat | grep -v ^# | sed 's/\\//g' | perl -CSD -F\\t -le ' +	sub e { shift =~ s/\\/\\\\/gr =~ s/\n/\\n/gr =~ s/\t/\\t/gr } +	sub w { +		open(my $f, "|-", "tabfile gnu-fdl-$_[0]") or die $!; +		print $f e($k) . "\t" . e(join("\n", @$v)) +			while ($k, $v) = each %{$_[1]}; +		close($f); +	} +	my ($en, $cz, $notes, $special, $translator) = @F; +	if ($cz) { +		$notes =~ s/\w+:\s?//g;          # remove word classes +		$notes =~ s/(\w+\.)(?!])/($1)/;  # quote "pl." +		push(@{$encz{$en}}, $notes ? "$cz " . $notes : $cz); +		push(@{$czen{$cz}}, $notes ? "$en " . $notes : $en); +	} END { +		w("en-cz", \%encz); +		w("cz-en", \%czen); +	}' diff --git a/dicts/slovnik-cizich-slov.sh b/dicts/slovnik-cizich-slov.sh new file mode 100755 index 0000000..cb99ea1 --- /dev/null +++ b/dicts/slovnik-cizich-slov.sh @@ -0,0 +1,8 @@ +#!/bin/sh -e +# Slovník cizích slov, see https://slovnik-cizich-slov.abz.cz/web.php/o-slovniku +# TODO: Skipping the optional pronunciation field, tabfile can't handle it yet, +# but could be made to accept a lowercase sametypesequence +curl -Lo- https://slovnik-cizich-slov.abz.cz/export.php | \ +iconv -f latin2 -t UTF-8 | perl -CSD -F\\\| -le ' +	print "$_\t" . $F[2] =~ s/\\/\\\\/gr =~ s/; /\\n/gr for split(", ", $F[0]) +' | sort -u | tabfile slovnik-cizich-slov | 
