diff options
author | Přemysl Eric Janouch <p@janouch.name> | 2021-10-10 06:11:28 +0200 |
---|---|---|
committer | Přemysl Eric Janouch <p@janouch.name> | 2021-10-10 06:13:49 +0200 |
commit | de7089d6696cdba8aa524464d5f5840e768fab65 (patch) | |
tree | 0abe1b62b55276fef99b42a283468c7681c5a085 /dicts | |
parent | 16d6eaf012da1bac5d34fce41fd30b0f53aa8f65 (diff) | |
download | tdv-de7089d6696cdba8aa524464d5f5840e768fab65.tar.gz tdv-de7089d6696cdba8aa524464d5f5840e768fab65.tar.xz tdv-de7089d6696cdba8aa524464d5f5840e768fab65.zip |
gnu-fdl-en-cz.sh: employ Pango formatting
Also add collation fields.
Diffstat (limited to 'dicts')
-rwxr-xr-x | dicts/gnu-fdl-en-cz.sh | 28 |
1 files changed, 18 insertions, 10 deletions
diff --git a/dicts/gnu-fdl-en-cz.sh b/dicts/gnu-fdl-en-cz.sh index 8feeea9..e5a575d 100755 --- a/dicts/gnu-fdl-en-cz.sh +++ b/dicts/gnu-fdl-en-cz.sh @@ -2,20 +2,28 @@ # GNU/FDL English-Czech dictionary, see https://www.svobodneslovniky.cz/ curl -Lo- https://www.svobodneslovniky.cz/data/en-cs.txt.gz | \ zcat | grep -v ^# | sed 's/\\//g' | perl -CSD -F\\t -le ' - sub e { shift =~ s/\\/\\\\/gr =~ s/\n/\\n/gr =~ s/\t/\\t/gr } + sub tabesc { shift =~ s/\\/\\\\/gr =~ s/\n/\\n/gr =~ s/\t/\\t/gr } sub w { - open(my $f, "|-", "tabfile gnu-fdl-$_[0]") or die $!; - print $f e($k) . "\t" . e(join("\n", @$v)) - while ($k, $v) = each %{$_[1]}; + my ($name, $dict, $collation) = @_; + open(my $f, "|-", "tabfile", "--pango", "--collation=$collation", + "--website=https://www.svobodneslovniky.cz", + "gnu-fdl-$name") or die $!; + print $f tabesc($keyword) . "\t" . tabesc(join("\n", @$defs)) + while ($keyword, $defs) = each %{$dict}; close($f); } - my ($en, $cz, $notes, $special, $translator) = @F; - if ($cz) { + sub xmlesc { shift =~ s/&/&/gr =~ s/</</gr =~ s/>/>/gr } + sub entry { + my ($definition, $notes) = map {xmlesc($_)} @_; + $notes ? "$definition <i>$notes</i>" : $definition; + } + my ($en, $cs, $notes, $special, $translator) = @F; + if ($cs) { $notes =~ s/\w+:\s?//g; # remove word classes $notes =~ s/(\w+\.)(?!])/($1)/; # quote "pl." - push(@{$encz{$en}}, $notes ? "$cz " . $notes : $cz); - push(@{$czen{$cz}}, $notes ? "$en " . $notes : $en); + push(@{$encs{$en}}, entry($cs, $notes)); + push(@{$csen{$cs}}, entry($en, $notes)); } END { - w("en-cz", \%encz); - w("cz-en", \%czen); + w("en-cz", \%encs, "en"); + w("cz-en", \%csen, "cs"); }' |