aboutsummaryrefslogtreecommitdiff
path: root/dicts/slovnik-cizich-slov.sh
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2021-10-07 03:37:12 +0200
committerPřemysl Eric Janouch <p@janouch.name>2021-10-07 14:06:57 +0200
commited8b1bcdad7c430af1eef5fbe78b6ec4eb3eb60e (patch)
tree4d864fe34d2102eb1ef4812d4613c2eca7006336 /dicts/slovnik-cizich-slov.sh
parent3881725904473cd9fdbd3e60cd1de2010f14d767 (diff)
downloadtdv-ed8b1bcdad7c430af1eef5fbe78b6ec4eb3eb60e.tar.gz
tdv-ed8b1bcdad7c430af1eef5fbe78b6ec4eb3eb60e.tar.xz
tdv-ed8b1bcdad7c430af1eef5fbe78b6ec4eb3eb60e.zip
Add sample dictionary downloaders/builders
Diffstat (limited to 'dicts/slovnik-cizich-slov.sh')
-rwxr-xr-xdicts/slovnik-cizich-slov.sh8
1 files changed, 8 insertions, 0 deletions
diff --git a/dicts/slovnik-cizich-slov.sh b/dicts/slovnik-cizich-slov.sh
new file mode 100755
index 0000000..cb99ea1
--- /dev/null
+++ b/dicts/slovnik-cizich-slov.sh
@@ -0,0 +1,8 @@
+#!/bin/sh -e
+# Slovník cizích slov, see https://slovnik-cizich-slov.abz.cz/web.php/o-slovniku
+# TODO: Skipping the optional pronunciation field, tabfile can't handle it yet,
+# but could be made to accept a lowercase sametypesequence
+curl -Lo- https://slovnik-cizich-slov.abz.cz/export.php | \
+iconv -f latin2 -t UTF-8 | perl -CSD -F\\\| -le '
+ print "$_\t" . $F[2] =~ s/\\/\\\\/gr =~ s/; /\\n/gr for split(", ", $F[0])
+' | sort -u | tabfile slovnik-cizich-slov