diff options
author | Přemysl Eric Janouch <p@janouch.name> | 2024-12-30 23:14:10 +0100 |
---|---|---|
committer | Přemysl Eric Janouch <p@janouch.name> | 2024-12-31 20:25:51 +0100 |
commit | 09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32 (patch) | |
tree | b8457b1871929cf84806a043e94e26da6ece93a2 /tools | |
parent | 7560e8700e2c72cd4a11cfe818907bd9da76e800 (diff) | |
download | liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.tar.gz liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.tar.xz liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.zip |
Add a --help/--version to AsciiDoc convertor
liberty is now self-contained, from opt_handler to manual page.
Diffstat (limited to 'tools')
-rw-r--r-- | tools/help2adoc.awk | 234 |
1 files changed, 234 insertions, 0 deletions
diff --git a/tools/help2adoc.awk b/tools/help2adoc.awk new file mode 100644 index 0000000..b36753f --- /dev/null +++ b/tools/help2adoc.awk @@ -0,0 +1,234 @@ +# help2adoc.awk: convert --version/--help to AsciiDoc manual pages +# +# Copyright (c) 2024, Přemysl Eric Janouch <p@janouch.name> +# SPDX-License-Identifier: 0BSD +# +# Usage: awk -f help2adoc.awk -v Target=cat +# +# This is not intended to produce great output, merely useful output, +# if only because there is no real standard of what the input should look like. +# +# The only target that needs to work is liberty's own opt_handler. +# The expected input format is roughly that of GNU utilites. + +function fatal(message) { + print "// " message + print "fatal error: " message > "/dev/stderr" + exit 1 +} + +# The input model of this script is that function take the next line on $0, +# read further lines as necessary, and leave the next line in $0 again. +function readline( ok) { + if ((ok = (Command | getline)) < 0) + fatal("read error") + if (!ok) + exit +} + +function emboldenoptions(line) { + # -N, --newer=DATE-OR-FILE, --after-date=DATE-OR-FILE + sub(/^-[^-=,[:space:]{[<]/, "*&*", line) + while (match(line, /[^-_[:alnum:]*'+]-[^-=,[:space:]{[<]/)) { + line = substr(line, 1, RSTART) \ + "**" substr(line, RSTART + 1, RLENGTH - 1) "**" \ + substr(line, RSTART + RLENGTH) + } + sub(/^--[-_[:alnum:]]+/, "*&*", line) + while (match(line, /[^-_[:alnum:]*'+]--[-_[:alnum:]]+/)) { + line = substr(line, 1, RSTART) \ + "**" substr(line, RSTART + 1, RLENGTH - 1) "**" \ + substr(line, RSTART + RLENGTH) + } + return line +} + +function formatinline(line, programname, last, i) { + # Go the extra step of emboldening the program name at word boundaries. + programname = ProgramName + gsub(/[][\\.^$(){}|*+?]/, "\\\\&", programname) + if (match(line, "^" programname "[^-_[:alnum:]*'+/]")) { + line = "**" substr(line, RSTART, RLENGTH - 1) "**" \ + substr(line, RSTART + RLENGTH - 1) + } + while (match(line, "[^-_[:alnum:]*'+/]" programname "[^-_[:alnum:]*'+/]")) { + line = substr(line, 1, RSTART) \ + "**" substr(line, RSTART + 1, RLENGTH - 2) "**" \ + substr(line, RSTART + RLENGTH - 1) + } + if (match(line, "[^-_[:alnum:]*'+/]" programname "$")) { + line = substr(line, 1, RSTART) \ + "**" substr(line, RSTART + 1, RLENGTH - 1) "**" + } + return emboldenoptions(line) +} + +function printusage(usage, description) { + gsub(/…/, "...", usage) + gsub(/—|–/, "-", usage) + + # --help output will more likely than not simply include argv[0], + # or perhaps program_invocation_short_name (not addressed here). + if (substr(usage, 1, length(Target) + 1) == Target " ") + usage = ProgramName substr(usage, length(Target) + 1) + + # A lot of GNOME software includes the description here. + if (match(usage, / +- +/) && usage !~ / - [^[:alnum:]]/) { + description = substr(usage, RSTART + RLENGTH) + usage = substr(usage, 1, RSTART - 1) + } + + while (match(usage, /[^-_[:alnum:]*'+.][[:alnum:]][-_[:alnum:]]+/)) { + usage = substr(usage, 1, RSTART) \ + "__" substr(usage, RSTART + 1, RLENGTH - 1) "__" \ + substr(usage, RSTART + RLENGTH) + } + sub(/^[^[:space:]]+/, "*&*", usage) + print emboldenoptions(usage) + print "" + + if (description) { + flushsections() + print formatinline(description) + print "" + } +} + +# We're going with Setext headers, because that's what asciiman.awk supports. +function printheader(text, underline) { + print text + gsub(/./, underline, text) + print text +} + +BEGIN { + if (!Target) + fatal("missing Target") + + TargetQuoted = Target + gsub(/'/, "'\\''", TargetQuoted) + TargetQuoted = "'" TargetQuoted "'" + + # Remaining --version lines could be about copyright (GNU), + # or something else entirely. + Command = TargetQuoted " --version" + if ((Command | getline) > 0) { + # GNU --version output can place the package name in parentheses. + Package = $0 + if (match($0, /[[:space:]][(][^)]*[)]/)) { + Package = substr($0, RSTART + 2, RLENGTH - 3) \ + substr($0, RSTART + RLENGTH) + sub(/[[:space:]]+[(][^)]*[)]/, "") + } + + Version = $0 + sub(/[[:space:]]+[^[:space:]]+$/, "") + Name = $0 + } else { + fatal("failed to get --version output") + } + + if (Name !~ /[[:space:]]/) + ProgramName = Name + else if (match(Target, /[^/]+$/)) + ProgramName = substr(Target, RSTART, RLENGTH) + + printheader(ProgramName "(1)", "=") + print ":doctype: manpage" + print ":manmanual: " Name " Manual" + print ":mansource: " Package + print "" + printheader("Name", "-") + print ProgramName " - manual page for " Version + print "" + + close(Command) + Command = TargetQuoted " --help" + if ((Command | getline) <= 0) + fatal("failed to get --help output") + + NextSection = "Description" + NextSubsection = "" + + # The SYNOPSIS section is mandatory, so just put it there. + printheader("Synopsis", "-") + while (1) { + if (match($0, /^[Uu]sage:[[:space:]]*/)) { + if (($0 = substr($0, RSTART + RLENGTH))) + printusage($0) + } else if (match($0, /^[[:space:]]+/) && !/^[[:space:]]*-/) { + if (($0 = substr($0, RSTART + RLENGTH))) + printusage($0) + } else if ($0) { + break + } + readline() + } + while (1) { + if (match($0, /^[[:alpha:]][-[:alnum:][:space:]]+:$/)) { + # We don't flush sections here, + # so that we don't unnecessarily enforce DESCRIPTION first. + NextSection = substr($0, RSTART, RLENGTH - 1) + } else if (match($0, /^ [[:alpha:]][-[:alnum:][:space:]]+:$/)) { + flushsections() + NextSubsection = substr($0, RSTART + 1, RLENGTH - 2) + } else if (match($0, /^ +-/)) { + flushsections() + parseoption(substr($0, RSTART + RLENGTH - 1)) + continue + } else if ($0) { + flushsections() + + # That will be probably interpreted as a literal block. + if (!/^[[:space:]]/) + $0 = formatinline($0) + print + } else { + print + } + readline() + } +} + +function flushsections() { + if (NextSection) { + print "" + printheader(NextSection, "-") + NextSection = "" + } + if (NextSubsection) { + print "" + printheader(NextSubsection, "~") + NextSubsection = "" + } +} + +function parseoption(line, usage) { + # Often enough you will see it separated with only one space, + # which will simply not work for us. + if (match(line, /[[:space:]]{2,}/)) { + usage = substr(line, 1, RSTART - 1) + line = substr(line, RSTART + RLENGTH) + } else { + usage = line + line = "" + } + + usage = emboldenoptions(usage) + while (match(usage, /[=<, ][[:alnum:]][-_[:alnum:]]*/)) { + usage = substr(usage, 1, RSTART) \ + "__" substr(usage, RSTART + 1, RLENGTH - 1) "__" \ + substr(usage, RSTART + RLENGTH) + } + + print "" + print usage "::" + if (line) + print "\t" formatinline(line) + + readline() + while (match($0, /^ +[^-[:space:]]|^ {7,}./)) { + print "\t" formatinline(substr($0, RSTART + RLENGTH - 1)) + readline() + } +} |