aboutsummaryrefslogtreecommitdiff
path: root/tools/lxdrgen.awk
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lxdrgen.awk')
-rw-r--r--tools/lxdrgen.awk309
1 files changed, 309 insertions, 0 deletions
diff --git a/tools/lxdrgen.awk b/tools/lxdrgen.awk
new file mode 100644
index 0000000..2b4adb6
--- /dev/null
+++ b/tools/lxdrgen.awk
@@ -0,0 +1,309 @@
+# lxdrgen.awk: an XDR-derived code generator for network protocols.
+#
+# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
+# SPDX-License-Identifier: 0BSD
+#
+# You may read RFC 4506 for context, however it is only a source of inspiration.
+# Grammar is easy to deduce from the parser.
+#
+# Native types: bool, u{8,16,32,64}, i{8,16,32,64}, string
+#
+# Don't define any new types, unless you hate yourself, then it's okay to do so.
+# Backends tend to be a pain in the arse, for different reasons.
+#
+# All numbers are encoded in big-endian byte order.
+# Booleans are one byte each.
+# Strings must be valid UTF-8, use u8<> to lift that restriction.
+# String and array lengths are encoded as u32.
+# Enumeration values automatically start at 1, and are encoded as i8.
+# Any struct or union field may be a variable-length array.
+#
+# Message framing is done externally, but is advised to also prefix u32 lengths,
+# unless this role is already filled by, e.g., WebSocket.
+#
+# Usage: env LC_ALL=C awk -f lxdrgen.awk -f lxdrgen-{c,go,mjs}.awk \
+# -v PrefixCamel=Foo foo.lxdr > foo.{c,go,mjs} | {clang-format,gofmt,...}
+
+# --- Utilities ----------------------------------------------------------------
+
+function cameltosnake(s) {
+ while (match(s, /[[:lower:]][[:upper:]]/)) {
+ s = substr(s, 1, RSTART) "_" \
+ tolower(substr(s, RSTART + 1, RLENGTH - 1)) \
+ substr(s, RSTART + RLENGTH)
+ }
+ return tolower(s)
+}
+
+function snaketocamel(s) {
+ s = toupper(substr(s, 1, 1)) tolower(substr(s, 2))
+ while (match(s, /_[[:alnum:]]/)) {
+ s = substr(s, 1, RSTART - 1) \
+ toupper(substr(s, RSTART + 1, RLENGTH - 1)) \
+ substr(s, RSTART + RLENGTH)
+ }
+ return s
+}
+
+function decapitalize(s) {
+ if (match(s, /[[:upper:]][[:lower:]]/)) {
+ return tolower(substr(s, 1, 1)) substr(s, 2)
+ }
+ return s
+}
+
+function indent(s) {
+ if (!s)
+ return s
+
+ gsub(/\n/, "\n\t", s)
+ sub(/\t*$/, "", s)
+ return "\t" s
+}
+
+function append(a, key, value) {
+ a[key] = a[key] value
+}
+
+# --- Parsing ------------------------------------------------------------------
+
+function fatal(message) {
+ print "// " FILENAME ":" FNR ": fatal error: " message
+ print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
+ exit 1
+}
+
+function skipcomment() {
+ do {
+ if (match($0, /[*]\//)) {
+ $0 = substr($0, RSTART + RLENGTH)
+ return
+ }
+ } while (getline > 0)
+ fatal("unterminated block comment")
+}
+
+function nexttoken() {
+ do {
+ if (match($0, /^[[:space:]]+/)) {
+ $0 = substr($0, RLENGTH + 1)
+ } else if (match($0, /^\/\/.*/)) {
+ $0 = ""
+ } else if (match($0, /^\/[*]/)) {
+ $0 = substr($0, RLENGTH + 1)
+ skipcomment()
+ } else if (match($0, /^[[:alpha:]][[:alnum:]_]*/)) {
+ Token = substr($0, 1, RLENGTH)
+ $0 = substr($0, RLENGTH + 1)
+ return Token
+ # AWK implementations rarely support non-decimal notations
+ # in their implicit string-to-number conversions.
+ } else if (match($0, /^(0|-?[1-9][0-9]*)/)) {
+ Token = substr($0, 1, RLENGTH)
+ $0 = substr($0, RLENGTH + 1)
+ return Token
+ } else if ($0) {
+ Token = substr($0, 1, 1)
+ $0 = substr($0, 2)
+ return Token
+ }
+ } while ($0 || getline > 0)
+ Token = ""
+ return Token
+}
+
+function expect(v) {
+ if (!v)
+ fatal("broken expectations at `" Token "' before `" $0 "'")
+ return v
+}
+
+function accept(what) {
+ if (Token != what)
+ return 0
+ nexttoken()
+ return 1
+}
+
+function identifier( v) {
+ if (Token !~ /^[[:alpha:]]/)
+ return 0
+ v = Token
+ nexttoken()
+ return v
+}
+
+function number( v) {
+ if (Token !~ /^(0|-?[1-9])/)
+ return 0
+ v = Token
+ nexttoken()
+ return v
+}
+
+function readnumber( ident) {
+ ident = identifier()
+ if (!ident)
+ return expect(number())
+ if (!(ident in Consts))
+ fatal("unknown constant: " ident)
+ return Consts[ident]
+}
+
+function defconst( ident, num) {
+ if (!accept("const"))
+ return 0
+
+ ident = expect(identifier())
+ expect(accept("="))
+ num = readnumber()
+ if (ident in Consts)
+ fatal("constant redefined: " ident)
+
+ Consts[ident] = num
+ codegen_constant(ident, num)
+ return 1
+}
+
+function readtype( ident) {
+ ident = deftype()
+ if (ident)
+ return ident
+
+ ident = identifier()
+ if (!ident)
+ return 0
+
+ if (!(ident in Types))
+ fatal("unknown type: " ident)
+ return ident
+}
+
+function defenum( name, ident, value, cg) {
+ delete cg[0]
+
+ name = expect(identifier())
+ expect(accept("{"))
+ while (!accept("}")) {
+ ident = expect(identifier())
+ value = value + 1
+ if (accept("="))
+ value = readnumber() + 0
+ if (!value)
+ fatal("enumeration values cannot be zero")
+ if (value < -128 || value > 127)
+ fatal("enumeration value out of range")
+ expect(accept(","))
+ append(EnumValues, name, SUBSEP ident)
+ if (EnumValues[name, ident]++)
+ fatal("duplicate enum value: " ident)
+ codegen_enum_value(name, ident, value, cg)
+ }
+
+ Types[name] = "enum"
+ codegen_enum(name, cg)
+ return name
+}
+
+function readfield(out, nonvoid) {
+ nonvoid = !accept("void")
+ if (nonvoid) {
+ out["type"] = expect(readtype())
+ out["name"] = expect(identifier())
+ # TODO: Consider supporting XDR's VLA length limits here.
+ # TODO: Consider supporting XDR's fixed-length syntax for string limits.
+ out["isarray"] = accept("<") && expect(accept(">"))
+ }
+ expect(accept(";"))
+ return nonvoid
+}
+
+function defstruct( name, d, cg) {
+ delete d[0]
+ delete cg[0]
+
+ name = expect(identifier())
+ expect(accept("{"))
+ while (!accept("}")) {
+ if (readfield(d))
+ codegen_struct_field(d, cg)
+ }
+
+ Types[name] = "struct"
+ codegen_struct(name, cg)
+ return name
+}
+
+function defunion( name, tag, tagtype, tagvalue, cg, scg, d, a, i, unseen) {
+ delete cg[0]
+ delete scg[0]
+ delete d[0]
+
+ name = expect(identifier())
+ expect(accept("switch"))
+ expect(accept("("))
+ tag["type"] = tagtype = expect(readtype())
+ tag["name"] = expect(identifier())
+ expect(accept(")"))
+
+ if (Types[tagtype] != "enum")
+ fatal("not an enum type: " tagtype)
+ codegen_union_tag(tag, cg)
+
+ split(EnumValues[tagtype], a, SUBSEP)
+ for (i in a)
+ unseen[a[i]]++
+
+ expect(accept("{"))
+ while (!accept("}")) {
+ if (accept("case")) {
+ if (tagvalue)
+ codegen_union_struct(name, tagvalue, cg, scg)
+
+ tagvalue = expect(identifier())
+ expect(accept(":"))
+ if (!unseen[tagvalue]--)
+ fatal("no such value or duplicate case: " tagtype "." tagvalue)
+ codegen_struct_tag(tag, scg)
+ } else if (tagvalue) {
+ if (readfield(d))
+ codegen_struct_field(d, scg)
+ } else {
+ fatal("union fields must fall under a case")
+ }
+ }
+ if (tagvalue)
+ codegen_union_struct(name, tagvalue, cg, scg)
+
+ # What remains non-zero in unseen[2..] is simply not recognized/allowed.
+ Types[name] = "union"
+ codegen_union(name, cg)
+ return name
+}
+
+function deftype() {
+ if (accept("enum"))
+ return defenum()
+ if (accept("struct"))
+ return defstruct()
+ if (accept("union"))
+ return defunion()
+ return 0
+}
+
+{
+ if (PrefixCamel) {
+ PrefixLower = tolower(cameltosnake(PrefixCamel)) "_"
+ PrefixUpper = toupper(cameltosnake(PrefixCamel)) "_"
+ }
+
+ # This is not in a BEGIN clause (even though it consumes all input),
+ # so that the code generator can insert the first FILENAME.
+ codegen_begin()
+
+ nexttoken()
+ while (Token != "") {
+ expect(defconst() || deftype())
+ expect(accept(";"))
+ }
+}