From fb291b6deface105667604fdaa8f6d8fcf4936b2 Mon Sep 17 00:00:00 2001 From: Přemysl Eric Janouch Date: Wed, 25 Dec 2024 22:18:30 +0100 Subject: Improve the terminal filter The new filter comes with these enhancements: - Processing is rune-wise rather than byte-wise; it assumes UTF-8 input and single-cell wide characters, but this condition should be /usually/ satisfied. - Unprocessed control characters are escaped, `cat -v` style. - A lot of escape sequences is at least recognised, if not processed. - Rudimentary preparation for efficient dynamic updates of task views, through Javascript. We make terminal resets and screen clearing commands flush all output and assume that the terminal has a new origin for any later positioning commands. This appears to work well enough with GRUB, at least. The filter is now exposed through a command line option. --- acid.go | 107 +++++++--------- terminal.go | 369 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ terminal_test.go | 44 +++++++ 3 files changed, 456 insertions(+), 64 deletions(-) create mode 100644 terminal.go create mode 100644 terminal_test.go diff --git a/acid.go b/acid.go index 8ff132e..db59c64 100644 --- a/acid.go +++ b/acid.go @@ -22,7 +22,6 @@ import ( "os/exec" "os/signal" "path/filepath" - "slices" "sort" "strconv" "strings" @@ -308,16 +307,16 @@ func handleTask(w http.ResponseWriter, r *http.Request) { return } - rt.RunLog.mu.Lock() - defer rt.RunLog.mu.Unlock() - rt.TaskLog.mu.Lock() - defer rt.TaskLog.mu.Unlock() - rt.DeployLog.mu.Lock() - defer rt.DeployLog.mu.Unlock() + rt.RunLog.Lock() + defer rt.RunLog.Unlock() + rt.TaskLog.Lock() + defer rt.TaskLog.Unlock() + rt.DeployLog.Lock() + defer rt.DeployLog.Unlock() - task.RunLog = slices.Clone(rt.RunLog.b) - task.TaskLog = slices.Clone(rt.TaskLog.b) - task.DeployLog = slices.Clone(rt.DeployLog.b) + task.RunLog = rt.RunLog.Serialize(0) + task.TaskLog = rt.TaskLog.Serialize(0) + task.DeployLog = rt.DeployLog.Serialize(0) }() if err := templateTask.Execute(w, &task); err != nil { @@ -786,54 +785,6 @@ func notifierAwaken() { } // --- Executor ---------------------------------------------------------------- - -type terminalWriter struct { - b []byte - cur int - mu sync.Mutex - tee io.WriteCloser -} - -func (tw *terminalWriter) Write(p []byte) (written int, err error) { - tw.mu.Lock() - defer tw.mu.Unlock() - - if tw.tee != nil { - tw.tee.Write(p) - } - - // Extremely rudimentary emulation of a dumb terminal. - for _, b := range p { - // Enough is enough, writing too much is highly suspicious. - if len(tw.b) > 64<<20 { - return written, errors.New("too much terminal output") - } - - switch b { - case '\b': - if tw.cur > 0 && tw.b[tw.cur-1] != '\n' { - tw.cur-- - } - case '\r': - for tw.cur > 0 && tw.b[tw.cur-1] != '\n' { - tw.cur-- - } - case '\n': - tw.b = append(tw.b, b) - tw.cur = len(tw.b) - default: - tw.b = append(tw.b[:tw.cur], b) - tw.cur = len(tw.b) - } - - if err != nil { - break - } - written += 1 - } - return -} - // ~~~ Running task ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // RunningTask stores all data pertaining to a currently running task. @@ -928,8 +879,8 @@ func newRunningTask(task Task) (*RunningTask, error) { base := filepath.Join(executorTmpDir("/tmp"), fmt.Sprintf("acid-%d-%s-%s-%s-", task.ID, task.Owner, task.Repo, task.Runner)) - rt.RunLog.tee, _ = os.Create(base + "runlog") - rt.TaskLog.tee, _ = os.Create(base + "tasklog") + rt.RunLog.Tee, _ = os.Create(base + "runlog") + rt.TaskLog.Tee, _ = os.Create(base + "tasklog") // The deployment log should not be interesting. } return rt, nil @@ -937,7 +888,7 @@ func newRunningTask(task Task) (*RunningTask, error) { func (rt *RunningTask) close() { for _, tee := range []io.WriteCloser{ - rt.RunLog.tee, rt.TaskLog.tee, rt.DeployLog.tee} { + rt.RunLog.Tee, rt.TaskLog.Tee, rt.DeployLog.Tee} { if tee != nil { tee.Close() } @@ -962,9 +913,9 @@ func (rt *RunningTask) update() error { {&rt.TaskLog, &rt.DB.TaskLog}, {&rt.DeployLog, &rt.DB.DeployLog}, } { - i.tw.mu.Lock() - defer i.tw.mu.Unlock() - if *i.log = bytes.Clone(i.tw.b); *i.log == nil { + i.tw.Lock() + defer i.tw.Unlock() + if *i.log = i.tw.Serialize(0); *i.log == nil { *i.log = []byte{} } } @@ -1581,8 +1532,30 @@ func callRPC(args []string) error { return nil } +// filterTTY exposes the internal virtual terminal filter. +func filterTTY(path string) { + var r io.Reader = os.Stdin + if path != "-" { + if f, err := os.Open(path); err != nil { + log.Println(err) + } else { + r = f + defer f.Close() + } + } + + var tw terminalWriter + if _, err := io.Copy(&tw, r); err != nil { + log.Printf("%s: %s\n", path, err) + } + if _, err := os.Stdout.Write(tw.Serialize(0)); err != nil { + log.Printf("%s: %s\n", path, err) + } +} + func main() { version := flag.Bool("version", false, "show version and exit") + tty := flag.Bool("tty", false, "run the internal virtual terminal filter") flag.Usage = func() { f := flag.CommandLine.Output() @@ -1600,6 +1573,12 @@ func main() { fmt.Printf("%s %s\n", projectName, projectVersion) return } + if *tty { + for _, path := range flag.Args() { + filterTTY(path) + } + return + } if err := parseConfig(flag.Arg(0)); err != nil { log.Fatalln(err) diff --git a/terminal.go b/terminal.go new file mode 100644 index 0000000..4660c1b --- /dev/null +++ b/terminal.go @@ -0,0 +1,369 @@ +package main + +import ( + "bytes" + "io" + "log" + "os" + "strconv" + "strings" + "sync" + "unicode/utf8" +) + +type terminalLine struct { + // For simplicity, we assume that all runes take up one cell, + // including TAB and non-spacing ones. + // The next step would be grouping non-spacing characters, + // in particular Unicode modifier letters, with their base. + columns []rune + + // updateGroup is the topmost line that has changed since this line + // has appeared, for the purpose of update tracking. + updateGroup int +} + +// terminalWriter does a best-effort approximation of an infinite-size +// virtual terminal. +type terminalWriter struct { + sync.Mutex + Tee io.WriteCloser + lines []terminalLine + + // Zero-based coordinates within lines. + column, line int + + // lineTop is used as the base for positioning commands. + lineTop int + + written int + byteBuffer []byte + runeBuffer []rune +} + +func (tw *terminalWriter) log(format string, v ...interface{}) { + if os.Getenv("ACID_TERMINAL_DEBUG") != "" { + log.Printf("terminal: "+format+"\n", v...) + } +} + +func (tw *terminalWriter) Serialize(top int) []byte { + var b bytes.Buffer + for i := top; i < len(tw.lines); i++ { + b.WriteString(string(tw.lines[i].columns)) + b.WriteByte('\n') + } + return b.Bytes() +} + +func (tw *terminalWriter) Write(p []byte) (written int, err error) { + tw.Lock() + defer tw.Unlock() + + // TODO(p): Rather use io.MultiWriter? + // Though I'm not sure what to do about closing (FD leaks). + // Eventually, any handles would be garbage collected in any case. + if tw.Tee != nil { + tw.Tee.Write(p) + } + + // Enough is enough, writing too much is highly suspicious. + ok, remaining := true, 64<<20-tw.written + if remaining < 0 { + ok, p = false, nil + } else if remaining < len(p) { + ok, p = false, p[:remaining] + } + tw.written += len(p) + + // By now, more or less everything should run in UTF-8. + // + // This might have better performance with a ring buffer, + // so as to avoid reallocations. + b := append(tw.byteBuffer, p...) + if !ok { + b = append(b, "\nToo much terminal output\n"...) + } + for utf8.FullRune(b) { + r, len := utf8.DecodeRune(b) + b, tw.runeBuffer = b[len:], append(tw.runeBuffer, r) + } + tw.byteBuffer = b + for tw.processRunes() { + } + return len(p), nil +} + +func (tw *terminalWriter) processPrint(r rune) { + // Extend the buffer vertically. + for len(tw.lines) <= tw.line { + tw.lines = append(tw.lines, + terminalLine{updateGroup: len(tw.lines)}) + } + + // Refresh update trackers, if necessary. + if tw.lines[len(tw.lines)-1].updateGroup > tw.line { + for i := tw.line; i < len(tw.lines); i++ { + tw.lines[i].updateGroup = tw.line + } + } + + // Emulate `cat -v` for C0 characters. + seq := make([]rune, 0, 2) + if r < 32 && r != '\t' { + seq = append(seq, '^', 64+r) + } else { + seq = append(seq, r) + } + + // Extend the line horizontally and write the rune. + for _, r := range seq { + line := &tw.lines[tw.line] + for len(line.columns) <= tw.column { + line.columns = append(line.columns, ' ') + } + + line.columns[tw.column] = r + tw.column++ + } +} + +func (tw *terminalWriter) processFlush() { + tw.column = 0 + tw.line = len(tw.lines) + tw.lineTop = tw.line +} + +func (tw *terminalWriter) processParsedCSI( + private rune, param, intermediate []rune, final rune) bool { + var params []int + if len(param) > 0 { + for _, p := range strings.Split(string(param), ";") { + i, _ := strconv.Atoi(p) + params = append(params, i) + } + } + + if private == '?' && len(intermediate) == 0 && + (final == 'h' || final == 'l') { + for _, p := range params { + // 25 (DECTCEM): There is no cursor to show or hide. + // 7 (DECAWM): We cannot wrap, we're infinite. + if !(p == 25 || (p == 7 && final == 'l')) { + return false + } + } + return true + } + if private != 0 || len(intermediate) > 0 { + return false + } + + switch { + case final == 'C': // Cursor Forward + if len(params) == 0 { + tw.column++ + } else if len(params) >= 1 { + tw.column += params[0] + } + return true + case final == 'D': // Cursor Backward + if len(params) == 0 { + tw.column-- + } else if len(params) >= 1 { + tw.column -= params[0] + } + if tw.column < 0 { + tw.column = 0 + } + return true + case final == 'E': // Cursor Next Line + if len(params) == 0 { + tw.line++ + } else if len(params) >= 1 { + tw.line += params[0] + } + tw.column = 0 + return true + case final == 'F': // Cursor Preceding Line + if len(params) == 0 { + tw.line-- + } else if len(params) >= 1 { + tw.line -= params[0] + } + if tw.line < tw.lineTop { + tw.line = tw.lineTop + } + tw.column = 0 + return true + case final == 'H': // Cursor Position + if len(params) == 0 { + tw.line = tw.lineTop + tw.column = 0 + } else if len(params) >= 2 && params[0] != 0 && params[1] != 0 { + tw.line = tw.lineTop + params[0] - 1 + tw.column = params[1] - 1 + } else { + return false + } + return true + case final == 'J': // Erase in Display + if len(params) == 0 || params[0] == 0 || params[0] == 2 { + // We're not going to erase anything, thank you very much. + tw.processFlush() + } else { + return false + } + return true + case final == 'K': // Erase in Line + if tw.line >= len(tw.lines) { + return true + } + line := &tw.lines[tw.line] + if len(params) == 0 || params[0] == 0 { + if len(line.columns) > tw.column { + line.columns = line.columns[:tw.column] + } + } else if params[0] == 1 { + for i := 0; i < tw.column; i++ { + line.columns[i] = ' ' + } + } else if params[0] == 2 { + line.columns = nil + } else { + return false + } + return true + case final == 'm': + // Straight up ignoring all attributes, at least for now. + return true + } + return false +} + +func (tw *terminalWriter) processCSI(rb []rune) ([]rune, bool) { + if len(rb) < 3 { + return nil, true + } + + i, private, param, intermediate := 2, rune(0), []rune{}, []rune{} + if rb[i] >= 0x3C && rb[i] <= 0x3F { + private = rb[i] + i++ + } + for i < len(rb) && ((rb[i] >= '0' && rb[i] <= '9') || rb[i] == ';') { + param = append(param, rb[i]) + i++ + } + for i < len(rb) && rb[i] >= 0x20 && rb[i] <= 0x2F { + intermediate = append(intermediate, rb[i]) + i++ + } + if i == len(rb) { + return nil, true + } + if rb[i] < 0x40 || rb[i] > 0x7E { + return rb, false + } + if !tw.processParsedCSI(private, param, intermediate, rb[i]) { + tw.log("unhandled CSI %s", string(rb[2:i+1])) + return rb, false + } + return rb[i+1:], true +} + +func (tw *terminalWriter) processEscape(rb []rune) ([]rune, bool) { + if len(rb) < 2 { + return nil, true + } + + // Very roughly following https://vt100.net/emu/dec_ansi_parser + // but being a bit stricter. + switch r := rb[1]; { + case r == '[': + return tw.processCSI(rb) + case r == ']': + // TODO(p): Skip this properly, once we actually hit it. + tw.log("unhandled OSC") + return rb, false + case r == 'P': + // TODO(p): Skip this properly, once we actually hit it. + tw.log("unhandled DCS") + return rb, false + + // Only handling sequences we've seen bother us in real life. + case r == 'c': + // Full reset, use this to flush all output. + tw.processFlush() + return rb[2:], true + case r == 'M': + tw.line-- + return rb[2:], true + + case (r >= 0x30 && r <= 0x4F) || (r >= 0x51 && r <= 0x57) || + r == 0x59 || r == 0x5A || r == 0x5C || (r >= 0x60 && r <= 0x7E): + // → esc_dispatch + tw.log("unhandled ESC %c", r) + return rb, false + //return rb[2:], true + case r >= 0x20 && r <= 0x2F: + // → escape intermediate + i := 2 + for i < len(rb) && rb[i] >= 0x20 && rb[i] <= 0x2F { + i++ + } + if i == len(rb) { + return nil, true + } + if rb[i] < 0x30 || rb[i] > 0x7E { + return rb, false + } + // → esc_dispatch + tw.log("unhandled ESC %s", string(rb[1:i+1])) + return rb, false + //return rb[i+1:], true + default: + // Note that Debian 12 has been seen to produce ESC + // and such due to some very blind string processing. + return rb, false + } +} + +func (tw *terminalWriter) processRunes() bool { + rb := tw.runeBuffer + if len(rb) == 0 { + return false + } + + switch rb[0] { + case '\a': + // Ding dong! + case '\b': + if tw.column > 0 { + tw.column-- + } + case '\n', '\v': + tw.line++ + + // Forced ONLCR flag, because that's what most shell output expects. + fallthrough + case '\r': + tw.column = 0 + + case '\x1b': + var ok bool + if rb, ok = tw.processEscape(rb); rb == nil { + return false + } else if ok { + tw.runeBuffer = rb + return true + } + + // Unsuccessful parses get printed for later inspection. + fallthrough + default: + tw.processPrint(rb[0]) + } + tw.runeBuffer = rb[1:] + return true +} diff --git a/terminal_test.go b/terminal_test.go new file mode 100644 index 0000000..d3fcf45 --- /dev/null +++ b/terminal_test.go @@ -0,0 +1,44 @@ +package main + +import "testing" + +// TODO(p): Add a lot more test cases. +var tests = []struct { + push, want string +}{ + { + "\x1bc\x1b[?7l\x1b[2J\x1b[0mSeaBIOS\r", + "SeaBIOS\n", + }, +} + +func TestTerminal(t *testing.T) { + for _, test := range tests { + tw := terminalWriter{} + if _, err := tw.Write([]byte(test.push)); err != nil { + t.Errorf("%#v: %s", test.push, err) + continue + } + have := string(tw.Serialize(0)) + if have != test.want { + t.Errorf("%#v: %#v; want %#v", test.push, have, test.want) + } + } +} + +func TestTerminalExploded(t *testing.T) { +Loop: + for _, test := range tests { + tw := terminalWriter{} + for _, b := range []byte(test.push) { + if _, err := tw.Write([]byte{b}); err != nil { + t.Errorf("%#v: %s", test.push, err) + continue Loop + } + } + have := string(tw.Serialize(0)) + if have != test.want { + t.Errorf("%#v: %#v; want %#v", test.push, have, test.want) + } + } +} -- cgit v1.2.3-70-g09d2