aboutsummaryrefslogtreecommitdiff
path: root/dicts/gnu-fdl-en-cz.sh
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2021-10-07 03:37:12 +0200
committerPřemysl Eric Janouch <p@janouch.name>2021-10-07 14:06:57 +0200
commited8b1bcdad7c430af1eef5fbe78b6ec4eb3eb60e (patch)
tree4d864fe34d2102eb1ef4812d4613c2eca7006336 /dicts/gnu-fdl-en-cz.sh
parent3881725904473cd9fdbd3e60cd1de2010f14d767 (diff)
downloadtdv-ed8b1bcdad7c430af1eef5fbe78b6ec4eb3eb60e.tar.gz
tdv-ed8b1bcdad7c430af1eef5fbe78b6ec4eb3eb60e.tar.xz
tdv-ed8b1bcdad7c430af1eef5fbe78b6ec4eb3eb60e.zip
Add sample dictionary downloaders/builders
Diffstat (limited to 'dicts/gnu-fdl-en-cz.sh')
-rwxr-xr-xdicts/gnu-fdl-en-cz.sh21
1 files changed, 21 insertions, 0 deletions
diff --git a/dicts/gnu-fdl-en-cz.sh b/dicts/gnu-fdl-en-cz.sh
new file mode 100755
index 0000000..8feeea9
--- /dev/null
+++ b/dicts/gnu-fdl-en-cz.sh
@@ -0,0 +1,21 @@
+#!/bin/sh -e
+# GNU/FDL English-Czech dictionary, see https://www.svobodneslovniky.cz/
+curl -Lo- https://www.svobodneslovniky.cz/data/en-cs.txt.gz | \
+zcat | grep -v ^# | sed 's/\\//g' | perl -CSD -F\\t -le '
+ sub e { shift =~ s/\\/\\\\/gr =~ s/\n/\\n/gr =~ s/\t/\\t/gr }
+ sub w {
+ open(my $f, "|-", "tabfile gnu-fdl-$_[0]") or die $!;
+ print $f e($k) . "\t" . e(join("\n", @$v))
+ while ($k, $v) = each %{$_[1]};
+ close($f);
+ }
+ my ($en, $cz, $notes, $special, $translator) = @F;
+ if ($cz) {
+ $notes =~ s/\w+:\s?//g; # remove word classes
+ $notes =~ s/(\w+\.)(?!])/($1)/; # quote "pl."
+ push(@{$encz{$en}}, $notes ? "$cz " . $notes : $cz);
+ push(@{$czen{$cz}}, $notes ? "$en " . $notes : $en);
+ } END {
+ w("en-cz", \%encz);
+ w("cz-en", \%czen);
+ }'