hswg: add a static website generator

author: Přemysl Eric Janouch <p@janouch.name> 2020-08-15 03:08:02 +0200
committer: Přemysl Eric Janouch <p@janouch.name> 2020-08-15 04:26:50 +0200
commit: a049249d8163a3ea02fb1c91cceaf2d1c6700f30 (patch)
tree: fcca1e26e7bfc434770a13d545feac35095642bb
parent: d4eb9cde393066b97d91cab9f83b4430685178df (diff)
download: haven-a049249d8163a3ea02fb1c91cceaf2d1c6700f30.tar.gz
haven-a049249d8163a3ea02fb1c91cceaf2d1c6700f30.tar.xz
haven-a049249d8163a3ea02fb1c91cceaf2d1c6700f30.zip
1 files changed, 217 insertions, 0 deletions
diff --git a/hswg/main.go b/hswg/main.go
new file mode 100644
index 0000000..595bb4a
--- /dev/null
+++ b/hswg/main.go
@@ -0,0 +1,217 @@
+// Program hswg is a static website generator employing libasciidoc with added
+// support for two-line/underlined titles, and postprocessing "wiki" InterLinks.
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+	"time"
+	"unicode"
+	"unicode/utf8"
+
+	"github.com/bytesparadise/libasciidoc"
+	"github.com/bytesparadise/libasciidoc/pkg/configuration"
+	"github.com/bytesparadise/libasciidoc/pkg/types"
+)
+
+// isTitle returns the title level if the lines seem to form a title,
+// zero otherwise. Input lines may inclide trailing newlines.
+func isTitle(line1, line2 []byte) int {
+	// This is a very naïve method, we should target graphemes (thus at least
+	// NFC normalize the lines first) and account for wide characters.
+	diff := utf8.RuneCount(line1) - utf8.RuneCount(line2)
+	if len(line2) < 2 || diff < -1 || diff > 1 {
+		return 0
+	}
+
+	// "Don't be fooled by back-to-back delimited blocks."
+	// Still gets fooled by other things, though.
+	if bytes.IndexFunc(line1, func(r rune) bool {
+		return unicode.IsLetter(r) || unicode.IsNumber(r)
+	}) < 0 {
+		return 0
+	}
+
+	// The underline must be homogenous.
+	for _, r := range bytes.TrimRight(line2, "\r\n") {
+		if r != line2[0] {
+			return 0
+		}
+	}
+	return 1 + strings.IndexByte("=-~^+", line2[0])
+}
+
+func writeLine(w *io.PipeWriter, cur, next []byte) []byte {
+	if level := isTitle(cur, next); level > 0 {
+		w.Write(append(bytes.Repeat([]byte{'='}, level), ' '))
+		next = nil
+	}
+	w.Write(cur)
+	return next
+}
+
+// ConvertTitles converts AsciiDoc two-line (underlined) titles to single-line.
+func ConvertTitles(w *io.PipeWriter, input []byte) {
+	var last []byte
+	for _, cur := range bytes.SplitAfter(input, []byte{'\n'}) {
+		last = writeLine(w, last, cur)
+	}
+	writeLine(w, last, nil)
+}
+
+// entry contains all context information about a single page.
+type entry struct {
+	path      string         // path
+	mtime     time.Time      // modification time
+	metadata  types.Metadata // metadata
+	document  []byte         // inner document with expanded LinkWords
+	backlinks []string       // what documents link back here
+}
+
+var extRE = regexp.MustCompile(`\.[^/.]*$`)
+
+func stripExtension(path string) string {
+	return extRE.ReplaceAllString(path, "")
+}
+
+func resultPath(path string) string {
+	if m := extRE.FindStringIndex(path); m != nil {
+		return path[:m[0]] + ".html"
+	}
+	return path + ".html"
+}
+
+func makeLink(m *map[string]*entry, name string) string {
+	e := (*m)[name]
+	return fmt.Sprintf("<a href='%s'>%s</a>", resultPath(e.path), name)
+}
+
+var linkWordRE = regexp.MustCompile(`\b\p{Lu}\p{L}*\b`)
+
+func expand(m *map[string]*entry, name string, chunk []byte) []byte {
+	return linkWordRE.ReplaceAllFunc(chunk, func(match []byte) []byte {
+		if link, ok := (*m)[string(match)]; ok {
+			link.backlinks = append(link.backlinks, name)
+			return []byte(makeLink(m, string(match)))
+		}
+		return match
+	})
+}
+
+func main() {
+	if len(os.Args) < 3 {
+		log.Fatalf("usage: %s TEMPLATE GLOB...\n", os.Args[0])
+	}
+
+	// Read the common page header.
+	header, err := ioutil.ReadFile(os.Args[1])
+	if err != nil {
+		log.Fatalln(err)
+	}
+
+	// Create a map from document names to their page entries.
+	entries := map[string]*entry{}
+	for _, glob := range os.Args[2:] {
+		matches, err := filepath.Glob(glob)
+		if err != nil {
+			log.Fatalf("%s: %s\n", glob, err)
+		}
+		for _, path := range matches {
+			name := stripExtension(filepath.Base(path))
+			if conflict, ok := entries[name]; ok {
+				log.Fatalf("%s: conflicts with %s\n", name, conflict.path)
+			}
+			entries[name] = &entry{path: path}
+		}
+	}
+
+	tagRE := regexp.MustCompile(`<[^<>]+>`)
+	for name, e := range entries {
+		f, err := os.Open(e.path)
+		if err != nil {
+			log.Fatalln(err)
+		}
+
+		if i, err := f.Stat(); err != nil {
+			log.Fatalln(err)
+		} else {
+			e.mtime = i.ModTime()
+		}
+
+		input, err := ioutil.ReadAll(f)
+		if err != nil {
+			log.Fatalln(err)
+		}
+
+		pr, pw := io.Pipe()
+		go func() {
+			defer pw.Close()
+			ConvertTitles(pw, input)
+		}()
+
+		config := configuration.NewConfiguration(
+			configuration.WithHeaderFooter(false),
+			configuration.WithFilename(e.path),
+			configuration.WithLastUpdated(e.mtime),
+		)
+
+		buf := bytes.NewBuffer(nil)
+		e.metadata, err = libasciidoc.ConvertToHTML(pr, buf, config)
+		if err != nil {
+			log.Fatalln(err)
+		}
+
+		// Expand LinkWords anywhere between <tags>.
+		// We want something like the inverse of Regexp.ReplaceAllStringFunc.
+		raw, last, expanded := buf.Bytes(), 0, bytes.NewBuffer(nil)
+		for _, where := range tagRE.FindAllIndex(raw, -1) {
+			_, _ = expanded.Write(expand(&entries, name, raw[last:where[0]]))
+			_, _ = expanded.Write(raw[where[0]:where[1]])
+			last = where[1]
+		}
+		_, _ = expanded.Write(expand(&entries, name, raw[last:]))
+		e.document = expanded.Bytes()
+	}
+
+	for name, e := range entries {
+		f, err := os.Create(resultPath(e.path))
+		if err != nil {
+			log.Fatalln(err)
+		}
+
+		_, _ = f.Write(header)
+
+		title := e.metadata.Title
+		if title == "" {
+			title = name
+		}
+
+		_, _ = f.WriteString(fmt.Sprintf("<title>%s</title>\n", title))
+		_, _ = f.WriteString(fmt.Sprintf("<h1>%s</h1>\n", title))
+
+		sort.Strings(e.backlinks)
+
+		backlinks := []string{}
+		for _, name := range e.backlinks {
+			backlinks = append(backlinks, makeLink(&entries, name))
+		}
+
+		if len(backlinks) > 0 {
+			_, _ = f.WriteString(fmt.Sprintf("<p id=links>Links here: %s</p>\n",
+				strings.Join(backlinks, ", ")))
+		}
+
+		_, _ = f.Write(e.document)
+		_, _ = f.WriteString(fmt.Sprintf("<p id=footer>Last updated: %s"+
+			" &mdash; <a href='%s'>Source</p>\n",
+			e.metadata.LastUpdated, e.path))
+	}
+}
author	Přemysl Eric Janouch <p@janouch.name>	2020-08-15 03:08:02 +0200
committer	Přemysl Eric Janouch <p@janouch.name>	2020-08-15 04:26:50 +0200
commit	a049249d8163a3ea02fb1c91cceaf2d1c6700f30 (patch)
tree	fcca1e26e7bfc434770a13d545feac35095642bb
parent	d4eb9cde393066b97d91cab9f83b4430685178df (diff)
download	haven-a049249d8163a3ea02fb1c91cceaf2d1c6700f30.tar.gz haven-a049249d8163a3ea02fb1c91cceaf2d1c6700f30.tar.xz haven-a049249d8163a3ea02fb1c91cceaf2d1c6700f30.zip