diff options
Diffstat (limited to 'dicts')
-rwxr-xr-x | dicts/gnu-fdl-en-cz.sh | 21 | ||||
-rwxr-xr-x | dicts/slovnik-cizich-slov.sh | 8 |
2 files changed, 29 insertions, 0 deletions
diff --git a/dicts/gnu-fdl-en-cz.sh b/dicts/gnu-fdl-en-cz.sh new file mode 100755 index 0000000..8feeea9 --- /dev/null +++ b/dicts/gnu-fdl-en-cz.sh @@ -0,0 +1,21 @@ +#!/bin/sh -e +# GNU/FDL English-Czech dictionary, see https://www.svobodneslovniky.cz/ +curl -Lo- https://www.svobodneslovniky.cz/data/en-cs.txt.gz | \ +zcat | grep -v ^# | sed 's/\\//g' | perl -CSD -F\\t -le ' + sub e { shift =~ s/\\/\\\\/gr =~ s/\n/\\n/gr =~ s/\t/\\t/gr } + sub w { + open(my $f, "|-", "tabfile gnu-fdl-$_[0]") or die $!; + print $f e($k) . "\t" . e(join("\n", @$v)) + while ($k, $v) = each %{$_[1]}; + close($f); + } + my ($en, $cz, $notes, $special, $translator) = @F; + if ($cz) { + $notes =~ s/\w+:\s?//g; # remove word classes + $notes =~ s/(\w+\.)(?!])/($1)/; # quote "pl." + push(@{$encz{$en}}, $notes ? "$cz " . $notes : $cz); + push(@{$czen{$cz}}, $notes ? "$en " . $notes : $en); + } END { + w("en-cz", \%encz); + w("cz-en", \%czen); + }' diff --git a/dicts/slovnik-cizich-slov.sh b/dicts/slovnik-cizich-slov.sh new file mode 100755 index 0000000..cb99ea1 --- /dev/null +++ b/dicts/slovnik-cizich-slov.sh @@ -0,0 +1,8 @@ +#!/bin/sh -e +# Slovník cizích slov, see https://slovnik-cizich-slov.abz.cz/web.php/o-slovniku +# TODO: Skipping the optional pronunciation field, tabfile can't handle it yet, +# but could be made to accept a lowercase sametypesequence +curl -Lo- https://slovnik-cizich-slov.abz.cz/export.php | \ +iconv -f latin2 -t UTF-8 | perl -CSD -F\\\| -le ' + print "$_\t" . $F[2] =~ s/\\/\\\\/gr =~ s/; /\\n/gr for split(", ", $F[0]) +' | sort -u | tabfile slovnik-cizich-slov |