diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/asciiman.awk | 231 |
1 files changed, 231 insertions, 0 deletions
diff --git a/tools/asciiman.awk b/tools/asciiman.awk new file mode 100644 index 0000000..da32db8 --- /dev/null +++ b/tools/asciiman.awk @@ -0,0 +1,231 @@ +# asciiman.awk: stupid AsciiDoc to manual page converter +# +# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name> +# SPDX-License-Identifier: 0BSD +# +# This is not intended to produce great output, merely useful output. +# As such, input documents should restrict themselves as follows: +# +# - Attributes cannot be passed on the command line. +# - In-line formatting sequences must not overlap, +# cannot be escaped, and cannot span lines. +# - Heading underlines must match in byte length exactly. +# - Only a small subset of syntax is supported overall. +# +# Also beware that the output has only been tested with GNU troff. + +function fatal(message) { + print ".\\\" " FILENAME ":" FNR ": fatal error: " message + print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr" + exit 1 +} + +function expand(s, attr) { + # TODO: This should not expand unknown attribute names. + while (match(s, /[{][^{}]*[}]/)) { + attr = substr(s, RSTART + 1, RLENGTH - 2) + s = substr(s, 1, RSTART - 1) Attrs[attr] substr(s, RSTART + RLENGTH) + } + return s +} + +function escape(s) { + gsub(/\\/, "\\\\", s) + gsub(/-/, "\\-", s) + gsub(/[.]/, "\\.", s) + return s +} + +function readattribute(line, attrname, attrvalue) { + if (match(line, /^:[^:]*: /)) { + attrname = substr(line, RSTART + 1, RLENGTH - 3) + attrvalue = substr(line, RSTART + RLENGTH) + Attrs[attrname] = expand(attrvalue) + return 1 + } +} + +NR == 1 { + nameline = $0 + if (match(nameline, /[(][[:digit:]][)]$/)) { + name = substr(nameline, 1, RSTART - 1) + section = substr(nameline, RSTART + 1, RLENGTH - 2) + } else { + fatal("invalid header line") + } + + getline + if (length(nameline) != length($0) || /[^=]/) + fatal("invalid header underline") + + getline + while (readattribute($0)) + getline + if ($0) + fatal("expected an empty line after the header") + + # Requesting tbl(1), even though we currently do not support tables. + print "'\\\\"" t" + print ".TH \"" toupper(name) "\" \"" section "\"" + + # Hyphenation is indeed rather annoying, in particular with long links. + print ".nh" +} + +function inline(line) { + if (!line) { + print ".sp" + return + } + + line = escape(expand(line)) + + # Enable double-spacing after the end of a sentence. + gsub(/\\[.][[:space:]]+/, ".\n", s) + + # Strip empty URL descriptions, otherwise useful for demarking the end. + while (match(line, /[^[:space:]]+\[\]/)) { + line = substr(line, 1, RSTART + RLENGTH - 3) \ + substr(line, RSTART + RLENGTH) + } + + # Pass-through, otherwise useful for hacks, is a lie here. + while (match(line, /[+][+][+][^+]+[+][+][+]/)) { + line = substr(line, 1, RSTART - 1) \ + substr(line, RSTART + 3, RLENGTH - 6) \ + substr(line, RSTART + RLENGTH) + } + + # Italic and bold formatting doesn't respect any word boundaries. + while (match(line, /__[^_]+__/)) { + line = substr(line, 1, RSTART - 1) \ + "\\fI" substr(line, RSTART + 2, RLENGTH - 4) "\\fP" \ + substr(line, RSTART + RLENGTH) + } + while (match(line, /_[^_]+_/)) { + line = substr(line, 1, RSTART - 1) \ + "\\fI" substr(line, RSTART + 1, RLENGTH - 2) "\\fP" \ + substr(line, RSTART + RLENGTH) + } + while (match(line, /[*][*][^*]+[*][*]/)) { + line = substr(line, 1, RSTART - 1) \ + "\\fB" substr(line, RSTART + 2, RLENGTH - 4) "\\fP" \ + substr(line, RSTART + RLENGTH) + } + while (match(line, /[*][^*]+[*]/)) { + line = substr(line, 1, RSTART - 1) \ + "\\fB" substr(line, RSTART + 1, RLENGTH - 2) "\\fP" \ + substr(line, RSTART + RLENGTH) + } + + sub(/[[:space:]]+[+]$/, "\n.br", line) + print line +} + +# Returns 1 iff the left-over $0 should be processed further. +function process(firstline) { + if (readattribute(firstline)) + return 0 + if (getline <= 0) { + inline(firstline) + return 0 + } + + if (length(firstline) == length($0) && /^-+$/) { + print ".SH \"" escape(toupper(expand(firstline))) "\"" + return 0 + } + if (length(firstline) == length($0) && /^~+$/) { + print ".SS \"" escape(expand(firstline)) "\"" + return 0 + } + if (firstline ~ /^(-{4,}|[.]{4,})$/) { + print ".if n .RS 4" + print ".nf" + print ".fam C" + do { + print escape($0) + } while (getline > 0 && $0 != firstline) + print ".fam" + print ".fi" + print ".if n .RE" + return 0 + } + if (firstline ~ /^\/{4,}$/) { + do { + print ".\\\" " $0 + } while (getline > 0 && $0 != firstline) + return 0 + } + if (match(firstline, /^\/\//)) { + print ".\\\" " firstline + return 1 + } + + # We generally assume these block end with a blank line. + if (match(firstline, /^[[:space:]]*[*][[:space:]]+/)) { + # Bullet magic copied over from AsciiDoc/Asciidoctor generators. + print ".RS 4" + print ".ie n \\{\\" + print "\\h'-04'\\(bu\\h'+03'\\c" + print ".\\}" + print ".el \\{\\" + print ".sp -1" + print ".IP \\(bu 2.3" + print ".\\}" + + inline(substr(firstline, RSTART + RLENGTH)) + while ($0) { + sub(/^[[:space:]]+/, "") + sub(/^[+]$/, "") + if (!process($0) && getline <= 0) + fatal("unexpected EOF") + if (match($0, /^[[:space:]]*[*][[:space:]]+/)) + break + } + print ".RE" + print ".sp" + return !!$0 + } + if (match(firstline, /^[[:space:]]+/)) { + print ".if n .RS 4" + print ".nf" + print ".fam C" + do { + print escape(substr(firstline, RLENGTH + 1)) + firstline = $0 + } while ($0 && getline > 0) + print ".fam" + print ".fi" + print ".if n .RE" + return 1 + } + if (match(firstline, /::$/)) { + inline(substr(firstline, 1, RSTART - 1)) + while (match($0, /::$/)) { + print ".br" + inline(substr($0, 1, RSTART - 1)) + if (getline <= 0) + fatal("unexpected EOF") + } + + print ".RS 4" + while ($0) { + sub(/^[[:space:]]+/, "") + sub(/^[+]$/, "") + if (!process($0) && getline <= 0) + fatal("unexpected EOF") + if (match($0, /::$/)) + break + } + print ".RE" + print ".sp" + return !!$0 + } + inline(firstline) + return 1 +} + +{ + while (process($0)) {} +} |