aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2024-12-30 23:14:10 +0100
committerPřemysl Eric Janouch <p@janouch.name>2024-12-31 20:25:51 +0100
commit09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32 (patch)
treeb8457b1871929cf84806a043e94e26da6ece93a2 /tools
parent7560e8700e2c72cd4a11cfe818907bd9da76e800 (diff)
downloadliberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.tar.gz
liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.tar.xz
liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.zip
Add a --help/--version to AsciiDoc convertor
liberty is now self-contained, from opt_handler to manual page.
Diffstat (limited to 'tools')
-rw-r--r--tools/help2adoc.awk234
1 files changed, 234 insertions, 0 deletions
diff --git a/tools/help2adoc.awk b/tools/help2adoc.awk
new file mode 100644
index 0000000..b36753f
--- /dev/null
+++ b/tools/help2adoc.awk
@@ -0,0 +1,234 @@
+# help2adoc.awk: convert --version/--help to AsciiDoc manual pages
+#
+# Copyright (c) 2024, Přemysl Eric Janouch <p@janouch.name>
+# SPDX-License-Identifier: 0BSD
+#
+# Usage: awk -f help2adoc.awk -v Target=cat
+#
+# This is not intended to produce great output, merely useful output,
+# if only because there is no real standard of what the input should look like.
+#
+# The only target that needs to work is liberty's own opt_handler.
+# The expected input format is roughly that of GNU utilites.
+
+function fatal(message) {
+ print "// " message
+ print "fatal error: " message > "/dev/stderr"
+ exit 1
+}
+
+# The input model of this script is that function take the next line on $0,
+# read further lines as necessary, and leave the next line in $0 again.
+function readline( ok) {
+ if ((ok = (Command | getline)) < 0)
+ fatal("read error")
+ if (!ok)
+ exit
+}
+
+function emboldenoptions(line) {
+ # -N, --newer=DATE-OR-FILE, --after-date=DATE-OR-FILE
+ sub(/^-[^-=,[:space:]{[<]/, "*&*", line)
+ while (match(line, /[^-_[:alnum:]*'+]-[^-=,[:space:]{[<]/)) {
+ line = substr(line, 1, RSTART) \
+ "**" substr(line, RSTART + 1, RLENGTH - 1) "**" \
+ substr(line, RSTART + RLENGTH)
+ }
+ sub(/^--[-_[:alnum:]]+/, "*&*", line)
+ while (match(line, /[^-_[:alnum:]*'+]--[-_[:alnum:]]+/)) {
+ line = substr(line, 1, RSTART) \
+ "**" substr(line, RSTART + 1, RLENGTH - 1) "**" \
+ substr(line, RSTART + RLENGTH)
+ }
+ return line
+}
+
+function formatinline(line, programname, last, i) {
+ # Go the extra step of emboldening the program name at word boundaries.
+ programname = ProgramName
+ gsub(/[][\\.^$(){}|*+?]/, "\\\\&", programname)
+ if (match(line, "^" programname "[^-_[:alnum:]*'+/]")) {
+ line = "**" substr(line, RSTART, RLENGTH - 1) "**" \
+ substr(line, RSTART + RLENGTH - 1)
+ }
+ while (match(line, "[^-_[:alnum:]*'+/]" programname "[^-_[:alnum:]*'+/]")) {
+ line = substr(line, 1, RSTART) \
+ "**" substr(line, RSTART + 1, RLENGTH - 2) "**" \
+ substr(line, RSTART + RLENGTH - 1)
+ }
+ if (match(line, "[^-_[:alnum:]*'+/]" programname "$")) {
+ line = substr(line, 1, RSTART) \
+ "**" substr(line, RSTART + 1, RLENGTH - 1) "**"
+ }
+ return emboldenoptions(line)
+}
+
+function printusage(usage, description) {
+ gsub(/…/, "...", usage)
+ gsub(/—|–/, "-", usage)
+
+ # --help output will more likely than not simply include argv[0],
+ # or perhaps program_invocation_short_name (not addressed here).
+ if (substr(usage, 1, length(Target) + 1) == Target " ")
+ usage = ProgramName substr(usage, length(Target) + 1)
+
+ # A lot of GNOME software includes the description here.
+ if (match(usage, / +- +/) && usage !~ / - [^[:alnum:]]/) {
+ description = substr(usage, RSTART + RLENGTH)
+ usage = substr(usage, 1, RSTART - 1)
+ }
+
+ while (match(usage, /[^-_[:alnum:]*'+.][[:alnum:]][-_[:alnum:]]+/)) {
+ usage = substr(usage, 1, RSTART) \
+ "__" substr(usage, RSTART + 1, RLENGTH - 1) "__" \
+ substr(usage, RSTART + RLENGTH)
+ }
+ sub(/^[^[:space:]]+/, "*&*", usage)
+ print emboldenoptions(usage)
+ print ""
+
+ if (description) {
+ flushsections()
+ print formatinline(description)
+ print ""
+ }
+}
+
+# We're going with Setext headers, because that's what asciiman.awk supports.
+function printheader(text, underline) {
+ print text
+ gsub(/./, underline, text)
+ print text
+}
+
+BEGIN {
+ if (!Target)
+ fatal("missing Target")
+
+ TargetQuoted = Target
+ gsub(/'/, "'\\''", TargetQuoted)
+ TargetQuoted = "'" TargetQuoted "'"
+
+ # Remaining --version lines could be about copyright (GNU),
+ # or something else entirely.
+ Command = TargetQuoted " --version"
+ if ((Command | getline) > 0) {
+ # GNU --version output can place the package name in parentheses.
+ Package = $0
+ if (match($0, /[[:space:]][(][^)]*[)]/)) {
+ Package = substr($0, RSTART + 2, RLENGTH - 3) \
+ substr($0, RSTART + RLENGTH)
+ sub(/[[:space:]]+[(][^)]*[)]/, "")
+ }
+
+ Version = $0
+ sub(/[[:space:]]+[^[:space:]]+$/, "")
+ Name = $0
+ } else {
+ fatal("failed to get --version output")
+ }
+
+ if (Name !~ /[[:space:]]/)
+ ProgramName = Name
+ else if (match(Target, /[^/]+$/))
+ ProgramName = substr(Target, RSTART, RLENGTH)
+
+ printheader(ProgramName "(1)", "=")
+ print ":doctype: manpage"
+ print ":manmanual: " Name " Manual"
+ print ":mansource: " Package
+ print ""
+ printheader("Name", "-")
+ print ProgramName " - manual page for " Version
+ print ""
+
+ close(Command)
+ Command = TargetQuoted " --help"
+ if ((Command | getline) <= 0)
+ fatal("failed to get --help output")
+
+ NextSection = "Description"
+ NextSubsection = ""
+
+ # The SYNOPSIS section is mandatory, so just put it there.
+ printheader("Synopsis", "-")
+ while (1) {
+ if (match($0, /^[Uu]sage:[[:space:]]*/)) {
+ if (($0 = substr($0, RSTART + RLENGTH)))
+ printusage($0)
+ } else if (match($0, /^[[:space:]]+/) && !/^[[:space:]]*-/) {
+ if (($0 = substr($0, RSTART + RLENGTH)))
+ printusage($0)
+ } else if ($0) {
+ break
+ }
+ readline()
+ }
+ while (1) {
+ if (match($0, /^[[:alpha:]][-[:alnum:][:space:]]+:$/)) {
+ # We don't flush sections here,
+ # so that we don't unnecessarily enforce DESCRIPTION first.
+ NextSection = substr($0, RSTART, RLENGTH - 1)
+ } else if (match($0, /^ [[:alpha:]][-[:alnum:][:space:]]+:$/)) {
+ flushsections()
+ NextSubsection = substr($0, RSTART + 1, RLENGTH - 2)
+ } else if (match($0, /^ +-/)) {
+ flushsections()
+ parseoption(substr($0, RSTART + RLENGTH - 1))
+ continue
+ } else if ($0) {
+ flushsections()
+
+ # That will be probably interpreted as a literal block.
+ if (!/^[[:space:]]/)
+ $0 = formatinline($0)
+ print
+ } else {
+ print
+ }
+ readline()
+ }
+}
+
+function flushsections() {
+ if (NextSection) {
+ print ""
+ printheader(NextSection, "-")
+ NextSection = ""
+ }
+ if (NextSubsection) {
+ print ""
+ printheader(NextSubsection, "~")
+ NextSubsection = ""
+ }
+}
+
+function parseoption(line, usage) {
+ # Often enough you will see it separated with only one space,
+ # which will simply not work for us.
+ if (match(line, /[[:space:]]{2,}/)) {
+ usage = substr(line, 1, RSTART - 1)
+ line = substr(line, RSTART + RLENGTH)
+ } else {
+ usage = line
+ line = ""
+ }
+
+ usage = emboldenoptions(usage)
+ while (match(usage, /[=<, ][[:alnum:]][-_[:alnum:]]*/)) {
+ usage = substr(usage, 1, RSTART) \
+ "__" substr(usage, RSTART + 1, RLENGTH - 1) "__" \
+ substr(usage, RSTART + RLENGTH)
+ }
+
+ print ""
+ print usage "::"
+ if (line)
+ print "\t" formatinline(line)
+
+ readline()
+ while (match($0, /^ +[^-[:space:]]|^ {7,}./)) {
+ print "\t" formatinline(substr($0, RSTART + RLENGTH - 1))
+ readline()
+ }
+}