From a049249d8163a3ea02fb1c91cceaf2d1c6700f30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Eric=20Janouch?= Date: Sat, 15 Aug 2020 03:08:02 +0200 Subject: hswg: add a static website generator --- hswg/main.go | 217 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 hswg/main.go diff --git a/hswg/main.go b/hswg/main.go new file mode 100644 index 0000000..595bb4a --- /dev/null +++ b/hswg/main.go @@ -0,0 +1,217 @@ +// Program hswg is a static website generator employing libasciidoc with added +// support for two-line/underlined titles, and postprocessing "wiki" InterLinks. +package main + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "log" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + "time" + "unicode" + "unicode/utf8" + + "github.com/bytesparadise/libasciidoc" + "github.com/bytesparadise/libasciidoc/pkg/configuration" + "github.com/bytesparadise/libasciidoc/pkg/types" +) + +// isTitle returns the title level if the lines seem to form a title, +// zero otherwise. Input lines may inclide trailing newlines. +func isTitle(line1, line2 []byte) int { + // This is a very naïve method, we should target graphemes (thus at least + // NFC normalize the lines first) and account for wide characters. + diff := utf8.RuneCount(line1) - utf8.RuneCount(line2) + if len(line2) < 2 || diff < -1 || diff > 1 { + return 0 + } + + // "Don't be fooled by back-to-back delimited blocks." + // Still gets fooled by other things, though. + if bytes.IndexFunc(line1, func(r rune) bool { + return unicode.IsLetter(r) || unicode.IsNumber(r) + }) < 0 { + return 0 + } + + // The underline must be homogenous. + for _, r := range bytes.TrimRight(line2, "\r\n") { + if r != line2[0] { + return 0 + } + } + return 1 + strings.IndexByte("=-~^+", line2[0]) +} + +func writeLine(w *io.PipeWriter, cur, next []byte) []byte { + if level := isTitle(cur, next); level > 0 { + w.Write(append(bytes.Repeat([]byte{'='}, level), ' ')) + next = nil + } + w.Write(cur) + return next +} + +// ConvertTitles converts AsciiDoc two-line (underlined) titles to single-line. +func ConvertTitles(w *io.PipeWriter, input []byte) { + var last []byte + for _, cur := range bytes.SplitAfter(input, []byte{'\n'}) { + last = writeLine(w, last, cur) + } + writeLine(w, last, nil) +} + +// entry contains all context information about a single page. +type entry struct { + path string // path + mtime time.Time // modification time + metadata types.Metadata // metadata + document []byte // inner document with expanded LinkWords + backlinks []string // what documents link back here +} + +var extRE = regexp.MustCompile(`\.[^/.]*$`) + +func stripExtension(path string) string { + return extRE.ReplaceAllString(path, "") +} + +func resultPath(path string) string { + if m := extRE.FindStringIndex(path); m != nil { + return path[:m[0]] + ".html" + } + return path + ".html" +} + +func makeLink(m *map[string]*entry, name string) string { + e := (*m)[name] + return fmt.Sprintf("%s", resultPath(e.path), name) +} + +var linkWordRE = regexp.MustCompile(`\b\p{Lu}\p{L}*\b`) + +func expand(m *map[string]*entry, name string, chunk []byte) []byte { + return linkWordRE.ReplaceAllFunc(chunk, func(match []byte) []byte { + if link, ok := (*m)[string(match)]; ok { + link.backlinks = append(link.backlinks, name) + return []byte(makeLink(m, string(match))) + } + return match + }) +} + +func main() { + if len(os.Args) < 3 { + log.Fatalf("usage: %s TEMPLATE GLOB...\n", os.Args[0]) + } + + // Read the common page header. + header, err := ioutil.ReadFile(os.Args[1]) + if err != nil { + log.Fatalln(err) + } + + // Create a map from document names to their page entries. + entries := map[string]*entry{} + for _, glob := range os.Args[2:] { + matches, err := filepath.Glob(glob) + if err != nil { + log.Fatalf("%s: %s\n", glob, err) + } + for _, path := range matches { + name := stripExtension(filepath.Base(path)) + if conflict, ok := entries[name]; ok { + log.Fatalf("%s: conflicts with %s\n", name, conflict.path) + } + entries[name] = &entry{path: path} + } + } + + tagRE := regexp.MustCompile(`<[^<>]+>`) + for name, e := range entries { + f, err := os.Open(e.path) + if err != nil { + log.Fatalln(err) + } + + if i, err := f.Stat(); err != nil { + log.Fatalln(err) + } else { + e.mtime = i.ModTime() + } + + input, err := ioutil.ReadAll(f) + if err != nil { + log.Fatalln(err) + } + + pr, pw := io.Pipe() + go func() { + defer pw.Close() + ConvertTitles(pw, input) + }() + + config := configuration.NewConfiguration( + configuration.WithHeaderFooter(false), + configuration.WithFilename(e.path), + configuration.WithLastUpdated(e.mtime), + ) + + buf := bytes.NewBuffer(nil) + e.metadata, err = libasciidoc.ConvertToHTML(pr, buf, config) + if err != nil { + log.Fatalln(err) + } + + // Expand LinkWords anywhere between . + // We want something like the inverse of Regexp.ReplaceAllStringFunc. + raw, last, expanded := buf.Bytes(), 0, bytes.NewBuffer(nil) + for _, where := range tagRE.FindAllIndex(raw, -1) { + _, _ = expanded.Write(expand(&entries, name, raw[last:where[0]])) + _, _ = expanded.Write(raw[where[0]:where[1]]) + last = where[1] + } + _, _ = expanded.Write(expand(&entries, name, raw[last:])) + e.document = expanded.Bytes() + } + + for name, e := range entries { + f, err := os.Create(resultPath(e.path)) + if err != nil { + log.Fatalln(err) + } + + _, _ = f.Write(header) + + title := e.metadata.Title + if title == "" { + title = name + } + + _, _ = f.WriteString(fmt.Sprintf("%s\n", title)) + _, _ = f.WriteString(fmt.Sprintf("

%s

\n", title)) + + sort.Strings(e.backlinks) + + backlinks := []string{} + for _, name := range e.backlinks { + backlinks = append(backlinks, makeLink(&entries, name)) + } + + if len(backlinks) > 0 { + _, _ = f.WriteString(fmt.Sprintf("\n", + strings.Join(backlinks, ", "))) + } + + _, _ = f.Write(e.document) + _, _ = f.WriteString(fmt.Sprintf("\n", + e.metadata.LastUpdated, e.path)) + } +} -- cgit v1.2.3