# help2adoc.awk: convert --version/--help to AsciiDoc manual pages # # Copyright (c) 2024, Přemysl Eric Janouch
# SPDX-License-Identifier: 0BSD # # Usage: awk -f help2adoc.awk -v Target=cat # # This is not intended to produce great output, merely useful output, # if only because there is no real standard of what the input should look like. # # The only target that needs to work is liberty's own opt_handler. # The expected input format is roughly that of GNU utilites. function fatal(message) { print "// " message print "fatal error: " message > "/dev/stderr" exit 1 } # The input model of this script is that function take the next line on $0, # read further lines as necessary, and leave the next line in $0 again. function readline( ok) { if ((ok = (Command | getline)) < 0) fatal("read error") if (!ok) exit } function emboldenoptions(line) { # -N, --newer=DATE-OR-FILE, --after-date=DATE-OR-FILE sub(/^-[^-=,[:space:]{[<]/, "*&*", line) while (match(line, /[^-_[:alnum:]*'+]-[^-=,[:space:]{[<]/)) { line = substr(line, 1, RSTART) \ "**" substr(line, RSTART + 1, RLENGTH - 1) "**" \ substr(line, RSTART + RLENGTH) } sub(/^--[-_[:alnum:]]+/, "*&*", line) while (match(line, /[^-_[:alnum:]*'+]--[-_[:alnum:]]+/)) { line = substr(line, 1, RSTART) \ "**" substr(line, RSTART + 1, RLENGTH - 1) "**" \ substr(line, RSTART + RLENGTH) } return line } function formatinline(line, programname, last, i) { # Go the extra step of emboldening the program name at word boundaries. programname = ProgramName gsub(/[][\\.^$(){}|*+?]/, "\\\\&", programname) if (match(line, "^" programname "[^-_[:alnum:]*'+/]")) { line = "**" substr(line, RSTART, RLENGTH - 1) "**" \ substr(line, RSTART + RLENGTH - 1) } while (match(line, "[^-_[:alnum:]*'+/]" programname "[^-_[:alnum:]*'+/]")) { line = substr(line, 1, RSTART) \ "**" substr(line, RSTART + 1, RLENGTH - 2) "**" \ substr(line, RSTART + RLENGTH - 1) } if (match(line, "[^-_[:alnum:]*'+/]" programname "$")) { line = substr(line, 1, RSTART) \ "**" substr(line, RSTART + 1, RLENGTH - 1) "**" } return emboldenoptions(line) } function printusage(usage, description) { gsub(/…/, "...", usage) gsub(/—|–/, "-", usage) # --help output will more likely than not simply include argv[0], # or perhaps program_invocation_short_name (not addressed here). if (substr(usage, 1, length(Target) + 1) == Target " ") usage = ProgramName substr(usage, length(Target) + 1) # A lot of GNOME software includes the description here. if (match(usage, / +- +/) && usage !~ / - [^[:alnum:]]/) { description = substr(usage, RSTART + RLENGTH) usage = substr(usage, 1, RSTART - 1) } while (match(usage, /[^-_[:alnum:]*'+.][[:alnum:]][-_[:alnum:]]+/)) { usage = substr(usage, 1, RSTART) \ "__" substr(usage, RSTART + 1, RLENGTH - 1) "__" \ substr(usage, RSTART + RLENGTH) } sub(/^[^[:space:]]+/, "*&*", usage) print emboldenoptions(usage) print "" if (description) { flushsections() print formatinline(description) print "" } } # We're going with Setext headers, because that's what asciiman.awk supports. function printheader(text, underline) { print text gsub(/./, underline, text) print text } BEGIN { if (!Target) fatal("missing Target") TargetQuoted = Target gsub(/'/, "'\\''", TargetQuoted) TargetQuoted = "'" TargetQuoted "'" # Remaining --version lines could be about copyright (GNU), # or something else entirely. Command = TargetQuoted " --version" if ((Command | getline) > 0) { # GNU --version output can place the package name in parentheses. Package = $0 if (match($0, /[[:space:]][(][^)]*[)]/)) { Package = substr($0, RSTART + 2, RLENGTH - 3) \ substr($0, RSTART + RLENGTH) sub(/[[:space:]]+[(][^)]*[)]/, "") } Version = $0 sub(/[[:space:]]+[^[:space:]]+$/, "") Name = $0 } else { fatal("failed to get --version output") } if (Name !~ /[[:space:]]/) ProgramName = Name else if (match(Target, /[^\/]+$/)) ProgramName = substr(Target, RSTART, RLENGTH) printheader(ProgramName "(1)", "=") print ":doctype: manpage" print ":manmanual: " Name " Manual" print ":mansource: " Package print "" printheader("Name", "-") print ProgramName " - manual page for " Version print "" close(Command) Command = TargetQuoted " --help" if ((Command | getline) <= 0) fatal("failed to get --help output") NextSection = "Description" NextSubsection = "" # The SYNOPSIS section is mandatory, so just put it there. printheader("Synopsis", "-") while (1) { if (match($0, /^[Uu]sage:[[:space:]]*/)) { if (($0 = substr($0, RSTART + RLENGTH))) printusage($0) } else if (match($0, /^[[:space:]]+/) && !/^[[:space:]]*-/) { if (($0 = substr($0, RSTART + RLENGTH))) printusage($0) } else if ($0) { break } readline() } while (1) { if (match($0, /^[[:alpha:]][-[:alnum:][:space:]]+:$/)) { # We don't flush sections here, # so that we don't unnecessarily enforce DESCRIPTION first. NextSection = substr($0, RSTART, RLENGTH - 1) } else if (match($0, /^ [[:alpha:]][-[:alnum:][:space:]]+:$/)) { flushsections() NextSubsection = substr($0, RSTART + 1, RLENGTH - 2) } else if (match($0, /^ +-/)) { flushsections() parseoption(substr($0, RSTART + RLENGTH - 1)) continue } else if ($0) { flushsections() # That will be probably interpreted as a literal block. if (!/^[[:space:]]/) $0 = formatinline($0) print } else { print } readline() } } function flushsections() { if (NextSection) { print "" printheader(NextSection, "-") NextSection = "" } if (NextSubsection) { print "" printheader(NextSubsection, "~") NextSubsection = "" } } function parseoption(line, usage) { # Often enough you will see it separated with only one space, # which will simply not work for us. if (match(line, /[[:space:]]{2,}/)) { usage = substr(line, 1, RSTART - 1) line = substr(line, RSTART + RLENGTH) } else { usage = line line = "" } usage = emboldenoptions(usage) while (match(usage, /[=<, ][[:alnum:]][-_[:alnum:]]*/)) { usage = substr(usage, 1, RSTART) \ "__" substr(usage, RSTART + 1, RLENGTH - 1) "__" \ substr(usage, RSTART + RLENGTH) } print "" print usage "::" if (line) print "\t" formatinline(line) readline() while (match($0, /^ +[^-[:space:]]|^ {7,}./)) { print "\t" formatinline(substr($0, RSTART + RLENGTH - 1)) readline() } }