Add a --help/--version to AsciiDoc convertor

liberty is now self-contained, from opt_handler to manual page.
author: Přemysl Eric Janouch <p@janouch.name> 2024-12-30 23:14:10 +0100
committer: Přemysl Eric Janouch <p@janouch.name> 2024-12-31 20:25:51 +0100
commit: 09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32 (patch)
tree: b8457b1871929cf84806a043e94e26da6ece93a2 /tools
parent: 7560e8700e2c72cd4a11cfe818907bd9da76e800 (diff)
download: liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.tar.gz
liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.tar.xz
liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.zip
1 files changed, 234 insertions, 0 deletions
diff --git a/tools/help2adoc.awk b/tools/help2adoc.awk
new file mode 100644
index 0000000..b36753f
--- /dev/null
+++ b/tools/help2adoc.awk
@@ -0,0 +1,234 @@
+# help2adoc.awk: convert --version/--help to AsciiDoc manual pages
+#
+# Copyright (c) 2024, Přemysl Eric Janouch <p@janouch.name>
+# SPDX-License-Identifier: 0BSD
+#
+# Usage: awk -f help2adoc.awk -v Target=cat
+#
+# This is not intended to produce great output, merely useful output,
+# if only because there is no real standard of what the input should look like.
+#
+# The only target that needs to work is liberty's own opt_handler.
+# The expected input format is roughly that of GNU utilites.
+
+function fatal(message) {
+	print "// " message
+	print "fatal error: " message > "/dev/stderr"
+	exit 1
+}
+
+# The input model of this script is that function take the next line on $0,
+# read further lines as necessary, and leave the next line in $0 again.
+function readline(    ok) {
+	if ((ok = (Command | getline)) < 0)
+		fatal("read error")
+	if (!ok)
+		exit
+}
+
+function emboldenoptions(line) {
+	# -N, --newer=DATE-OR-FILE, --after-date=DATE-OR-FILE
+	sub(/^-[^-=,[:space:]{[<]/, "*&*", line)
+	while (match(line, /[^-_[:alnum:]*'+]-[^-=,[:space:]{[<]/)) {
+		line = substr(line, 1, RSTART) \
+			"**" substr(line, RSTART + 1, RLENGTH - 1) "**" \
+			substr(line, RSTART + RLENGTH)
+	}
+	sub(/^--[-_[:alnum:]]+/, "*&*", line)
+	while (match(line, /[^-_[:alnum:]*'+]--[-_[:alnum:]]+/)) {
+		line = substr(line, 1, RSTART) \
+			"**" substr(line, RSTART + 1, RLENGTH - 1) "**" \
+			substr(line, RSTART + RLENGTH)
+	}
+	return line
+}
+
+function formatinline(line,    programname, last, i) {
+	# Go the extra step of emboldening the program name at word boundaries.
+	programname = ProgramName
+	gsub(/[][\\.^$(){}|*+?]/, "\\\\&", programname)
+	if (match(line, "^" programname "[^-_[:alnum:]*'+/]")) {
+		line = "**" substr(line, RSTART, RLENGTH - 1) "**" \
+			substr(line, RSTART + RLENGTH - 1)
+	}
+	while (match(line, "[^-_[:alnum:]*'+/]" programname "[^-_[:alnum:]*'+/]")) {
+		line = substr(line, 1, RSTART) \
+			"**" substr(line, RSTART + 1, RLENGTH - 2) "**" \
+			substr(line, RSTART + RLENGTH - 1)
+	}
+	if (match(line, "[^-_[:alnum:]*'+/]" programname "$")) {
+		line = substr(line, 1, RSTART) \
+			"**" substr(line, RSTART + 1, RLENGTH - 1) "**"
+	}
+	return emboldenoptions(line)
+}
+
+function printusage(usage,    description) {
+	gsub(/…/, "...", usage)
+	gsub(/—|–/, "-", usage)
+
+	# --help output will more likely than not simply include argv[0],
+	# or perhaps program_invocation_short_name (not addressed here).
+	if (substr(usage, 1, length(Target) + 1) == Target " ")
+		usage = ProgramName substr(usage, length(Target) + 1)
+
+	# A lot of GNOME software includes the description here.
+	if (match(usage, / +- +/) && usage !~ / - [^[:alnum:]]/) {
+		description = substr(usage, RSTART + RLENGTH)
+		usage = substr(usage, 1, RSTART - 1)
+	}
+
+	while (match(usage, /[^-_[:alnum:]*'+.][[:alnum:]][-_[:alnum:]]+/)) {
+		usage = substr(usage, 1, RSTART) \
+			"__" substr(usage, RSTART + 1, RLENGTH - 1) "__" \
+			substr(usage, RSTART + RLENGTH)
+	}
+	sub(/^[^[:space:]]+/, "*&*", usage)
+	print emboldenoptions(usage)
+	print ""
+
+	if (description) {
+		flushsections()
+		print formatinline(description)
+		print ""
+	}
+}
+
+# We're going with Setext headers, because that's what asciiman.awk supports.
+function printheader(text, underline) {
+	print text
+	gsub(/./, underline, text)
+	print text
+}
+
+BEGIN {
+	if (!Target)
+		fatal("missing Target")
+
+	TargetQuoted = Target
+	gsub(/'/, "'\\''", TargetQuoted)
+	TargetQuoted = "'" TargetQuoted "'"
+
+	# Remaining --version lines could be about copyright (GNU),
+	# or something else entirely.
+	Command = TargetQuoted " --version"
+	if ((Command | getline) > 0) {
+		# GNU --version output can place the package name in parentheses.
+		Package = $0
+		if (match($0, /[[:space:]][(][^)]*[)]/)) {
+			Package = substr($0, RSTART + 2, RLENGTH - 3) \
+				substr($0, RSTART + RLENGTH)
+			sub(/[[:space:]]+[(][^)]*[)]/, "")
+		}
+
+		Version = $0
+		sub(/[[:space:]]+[^[:space:]]+$/, "")
+		Name = $0
+	} else {
+		fatal("failed to get --version output")
+	}
+
+	if (Name !~ /[[:space:]]/)
+		ProgramName = Name
+	else if (match(Target, /[^/]+$/))
+		ProgramName = substr(Target, RSTART, RLENGTH)
+
+	printheader(ProgramName "(1)", "=")
+	print ":doctype: manpage"
+	print ":manmanual: " Name " Manual"
+	print ":mansource: " Package
+	print ""
+	printheader("Name", "-")
+	print ProgramName " - manual page for " Version
+	print ""
+
+	close(Command)
+	Command = TargetQuoted " --help"
+	if ((Command | getline) <= 0)
+		fatal("failed to get --help output")
+
+	NextSection = "Description"
+	NextSubsection = ""
+
+	# The SYNOPSIS section is mandatory, so just put it there.
+	printheader("Synopsis", "-")
+	while (1) {
+		if (match($0, /^[Uu]sage:[[:space:]]*/)) {
+			if (($0 = substr($0, RSTART + RLENGTH)))
+				printusage($0)
+		} else if (match($0, /^[[:space:]]+/) && !/^[[:space:]]*-/) {
+			if (($0 = substr($0, RSTART + RLENGTH)))
+				printusage($0)
+		} else if ($0) {
+			break
+		}
+		readline()
+	}
+	while (1) {
+		if (match($0, /^[[:alpha:]][-[:alnum:][:space:]]+:$/)) {
+			# We don't flush sections here,
+			# so that we don't unnecessarily enforce DESCRIPTION first.
+			NextSection = substr($0, RSTART, RLENGTH - 1)
+		} else if (match($0, /^ [[:alpha:]][-[:alnum:][:space:]]+:$/)) {
+			flushsections()
+			NextSubsection = substr($0, RSTART + 1, RLENGTH - 2)
+		} else if (match($0, /^ +-/)) {
+			flushsections()
+			parseoption(substr($0, RSTART + RLENGTH - 1))
+			continue
+		} else if ($0) {
+			flushsections()
+
+			# That will be probably interpreted as a literal block.
+			if (!/^[[:space:]]/)
+				$0 = formatinline($0)
+			print
+		} else {
+			print
+		}
+		readline()
+	}
+}
+
+function flushsections() {
+	if (NextSection) {
+		print ""
+		printheader(NextSection, "-")
+		NextSection = ""
+	}
+	if (NextSubsection) {
+		print ""
+		printheader(NextSubsection, "~")
+		NextSubsection = ""
+	}
+}
+
+function parseoption(line,    usage) {
+	# Often enough you will see it separated with only one space,
+	# which will simply not work for us.
+	if (match(line, /[[:space:]]{2,}/)) {
+		usage = substr(line, 1, RSTART - 1)
+		line = substr(line, RSTART + RLENGTH)
+	} else {
+		usage = line
+		line = ""
+	}
+
+	usage = emboldenoptions(usage)
+	while (match(usage, /[=<, ][[:alnum:]][-_[:alnum:]]*/)) {
+		usage = substr(usage, 1, RSTART) \
+			"__" substr(usage, RSTART + 1, RLENGTH - 1) "__" \
+			substr(usage, RSTART + RLENGTH)
+	}
+
+	print ""
+	print usage "::"
+	if (line)
+		print "\t" formatinline(line)
+
+	readline()
+	while (match($0, /^ +[^-[:space:]]|^ {7,}./)) {
+		print "\t" formatinline(substr($0, RSTART + RLENGTH - 1))
+		readline()
+	}
+}
author	Přemysl Eric Janouch <p@janouch.name>	2024-12-30 23:14:10 +0100
committer	Přemysl Eric Janouch <p@janouch.name>	2024-12-31 20:25:51 +0100
commit	09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32 (patch)
tree	b8457b1871929cf84806a043e94e26da6ece93a2 /tools
parent	7560e8700e2c72cd4a11cfe818907bd9da76e800 (diff)
download	liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.tar.gz liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.tar.xz liberty-09e635cf97a1cc84d2d3110d7a5e2d9d45f37e32.zip