diff options
| author | Přemysl Eric Janouch <p@janouch.name> | 2021-10-13 23:57:36 +0200 | 
|---|---|---|
| committer | Přemysl Eric Janouch <p@janouch.name> | 2021-10-13 23:58:57 +0200 | 
| commit | f812fae922eec06235c9e566b78c7f0fb46a709b (patch) | |
| tree | c0ed980d90dba03aecfa782e61457f2045fcaf87 /dicts | |
| parent | 3d53b2c131914da48fe8873f4133995bd42dbdcc (diff) | |
| download | tdv-f812fae922eec06235c9e566b78c7f0fb46a709b.tar.gz tdv-f812fae922eec06235c9e566b78c7f0fb46a709b.tar.xz tdv-f812fae922eec06235c9e566b78c7f0fb46a709b.zip | |
Add the GNU/FDL German-Czech dictionary to dicts
But only build it with WANT_BAD_DICTS set to non-null.
Diffstat (limited to 'dicts')
| -rwxr-xr-x | dicts/gnu-fdl-de-cz.sh | 34 | 
1 files changed, 34 insertions, 0 deletions
| diff --git a/dicts/gnu-fdl-de-cz.sh b/dicts/gnu-fdl-de-cz.sh new file mode 100755 index 0000000..ffe56a4 --- /dev/null +++ b/dicts/gnu-fdl-de-cz.sh @@ -0,0 +1,34 @@ +#!/bin/sh -e +# GNU/FDL German-Czech dictionary, see https://gnu.nemeckoceskyslovnik.cz + +# Sometimes the domain doesn't resolve, and the contents are close to useless +[ -n "$WANT_BAD_DICTS" ] || exit + +curl -Lo- 'https://gnu.nemeckoceskyslovnik.cz/index.php?id=6&sablona=export&format=zcu' | \ +grep -v ^# | sed 's/\\//g' | perl -CSD -F\\t -le ' +	sub tabesc { shift =~ s/\\/\\\\/gr =~ s/\n/\\n/gr =~ s/\t/\\t/gr } +	sub w { +		my ($name, $dict, $collation) = @_; +		open(my $f, "|-", "tabfile", "--pango", "--collation=$collation", +			"--website=https://gnu.nemeckoceskyslovnik.cz", +			"gnu-fdl-$name") or die $!; +		print $f tabesc($keyword) . "\t" . tabesc(join("\n", @$defs)) +			while ($keyword, $defs) = each %{$dict}; +		close($f); +	} +	sub xmlesc { shift =~ s/&/&/gr =~ s/</</gr =~ s/>/>/gr } +	sub entry { +		my ($definition, $notes) = map {xmlesc($_)} @_; +		$notes ? "$definition <i>$notes</i>" : $definition; +	} +	next if !$_ .. 0; +	my ($de, $cs, $notes, $special, $translator) = @F; +	if ($cs) { +		$notes =~ s/\w+:\s?//g;          # remove word classes +		$notes =~ s/(\w+\.)(?!])/($1)/;  # quote "pl." +		push(@{$decs{$de}}, entry($cs, $notes)); +		push(@{$csde{$cs}}, entry($de, $notes)); +	} END { +		w("de-cz", \%decs, "de"); +		w("cz-de", \%csde, "cs"); +	}' | 
