From 035bfe5e81b80ef9df03414c7c567093ce26629a Mon Sep 17 00:00:00 2001 From: Přemysl Eric Janouch
Date: Fri, 30 Sep 2022 03:09:04 +0200 Subject: Document the recently added scripts --- CMakeLists.txt | 11 ++++++ README.adoc | 36 ++++++++++++++++++ libertyxdr.adoc | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ tools/lxdrgen.awk | 18 --------- 4 files changed, 155 insertions(+), 18 deletions(-) create mode 100644 libertyxdr.adoc diff --git a/CMakeLists.txt b/CMakeLists.txt index eb1d2d9..fec6cc5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,17 @@ foreach (name ${tests}) add_test (NAME test-${name} COMMAND test-${name}) endforeach () +# --- Tools -------------------------------------------------------------------- + +# Test the AsciiDoc manual page generator for a successful parse +set (ASCIIMAN ${PROJECT_SOURCE_DIR}/tools/asciiman.awk) +add_custom_command (OUTPUT libertyxdr.7 + COMMAND env LC_ALL=C awk -f ${ASCIIMAN} + "${PROJECT_SOURCE_DIR}/libertyxdr.adoc" > libertyxdr.7 + DEPENDS libertyxdr.adoc ${ASCIIMAN} + COMMENT "Generating man page for libertyxdr" VERBATIM) +add_custom_target (docs ALL DEPENDS libertyxdr.7) + # Test CMake script parsing add_test (test-cmake-parser env LC_ALL=C awk -f ${PROJECT_SOURCE_DIR}/tools/cmake-parser.awk diff --git a/README.adoc b/README.adoc index 93ee3b7..5a3fd0b 100644 --- a/README.adoc +++ b/README.adoc @@ -17,6 +17,42 @@ All development is done on Linux, but other POSIX-compatible operating systems should be supported as well. They have an extremely low priority, however, and I'm not testing them at all, with the exception of OpenBSD. +Tools +----- +This project also hosts a number of supporting scripts written in portable AWK: + +asciiman.awk:: + A fallback manual page generator for AsciiDoc documents, + motivated by the hugeness of AsciiDoc's and Asciidoctor's dependency trees. + It uses the _man_ macro package. + +cmake-parser.awk:: + Parses the CMake language to the extent that is necessary to reliably + extract project versions. Its greatest limitation is its inability + to expand variables, which would require a full interpreter. + +cmake-dump.awk:: + This can be used in conjunction with the previous script to dump CMake + scripts in a normalized format for further processing. + +lxdrgen.awk:: + Protocol code generator for a variant of XDR, + which is link:libertyxdr.adoc[documented separately]. + Successfully employed in https://git.janouch.name/p/xK[xK]. + +lxdrgen-c.awk:: + LibertyXDR backend that builds on top of the C pseudolibrary. + +lxdrgen-go.awk:: + LibertyXDR backend for Go, supporting _encoding/json_ interfaces. It also + produces optimized JSON marshallers (however, note that the _json.Marshaler_ + interface is bound to be underperforming, due to the amount of otherwise + avoidable memory allocations it necessitates). + +lxdrgen-mjs.awk:: + LibertyXDR backend for Javascript, currently for decoding only. + It cuts a corner by not using BigInts, on par with `JSON.parse()`. + Contributing and Support ------------------------ Use https://git.janouch.name/p/liberty to report any bugs, request features, diff --git a/libertyxdr.adoc b/libertyxdr.adoc new file mode 100644 index 0000000..12b7e07 --- /dev/null +++ b/libertyxdr.adoc @@ -0,0 +1,108 @@ +libertyxdr(7) +============= +:doctype: manpage + +Name +---- +LibertyXDR - an XDR-derived IDL and data serialization format + +Description +----------- +*LibertyXDR* is an interface description language, as well as a data +serialization format, that has been largely derived from XDR, though notably +simplified. + +Conventions +~~~~~~~~~~~ +User-defined types should be named in *CamelCase*, field names in *snake_case*, +and constants in *SCREAMING_SNAKE_CASE*. Code generators will convert these to +whatever is appropriate in their target language. + +Primitive data types +~~~~~~~~~~~~~~~~~~~~ +Like in XDR, all data is serialized in the network byte order, i.e., big-endian. + + * *void*: 0 bytes ++ +This is a dummy type that cannot be assigned a field name. + + * *bool*: 1 byte ++ +This is a boolean value: 0 means _false_, any other value means _true_. + + * *u8*, *u16*, *u32*, *u64*: 1, 2, 4, and 8 bytes respectively ++ +These are unsigned integers. + + * *i8*, *i16*, *i32*, *i64*: 1, 2, 4, and 8 bytes respectively ++ +These are signed integers in two's complement. + + * *string*: implicitly prefixed by its length as a *u32*, + then immediately followed by its contents, with no trailing NUL byte ++ +This is a valid UTF-8 string without a byte order mark. Note that strings are +always unbounded, unlike in XDR. + +Constants +~~~~~~~~~ +At the top level of a document, outside other definitions, you can define +typeless integer constants: + + const VERSION = 1; + +The value can be either a name of another previously defined constant, +or an immediate decimal value, which may not contain leading zeros. + +Enumerations +~~~~~~~~~~~~ +An *enum* is an *i8* with uniquely named values, in their own namespace. + +Values can be either specified explicitly, in the same way as with a constant, +or they can be left implicit, in which case names assume a value that is one +larger than their predecessor. Zero is reserved for internal use, thus +enumerations implicitly begin with a value of one. For example, these form +a sequence from one to three: + + enum Vehicle { CAR, LORRY = 2, PLANE, }; + +Structures +~~~~~~~~~~ +A *struct* is a sequence of fields, specified by their type, and their chosen +name. You can add a *<>* suffix to change a field to an array, in which case +it is implicitly preceded by a *u32* specifying its length in terms of its +elements. + +Unlike in XDR, there is no padding between subsequent fields, and type +definitions can be arbitrarily syntactically nested, as in C. + + struct StockReport { + u8 version; // Version of this report. + struct Item { + Vehicle kind; // The vehicle in question. + i32 count; // How many vehicle of that kind there are. + } items<>; // Reported items. + }; + +Unions +~~~~~~ +A *union* is a kind of structure whose fields depend on the value of its first +and always-present field, which must be a tag *enum*: + + union VehicleDetails switch (Vehicle kind) { + case CAR: void; + case LORRY: i8 axles; + case PLANE: i8 engines; + }; + +All possible enumeration values must be named, and there is no *case* +fall-through. + +Framing +------- +Unless this role is already filled by, e.g., WebSocket, _LibertyXDR_ structures +should be prefixed by their byte length in the *u32* format, once serialized. + +See also +-------- +_XDR: External Data Representation Standard_, RFC 4506 diff --git a/tools/lxdrgen.awk b/tools/lxdrgen.awk index 2b4adb6..effbc52 100644 --- a/tools/lxdrgen.awk +++ b/tools/lxdrgen.awk @@ -3,24 +3,6 @@ # Copyright (c) 2022, Přemysl Eric Janouch
# SPDX-License-Identifier: 0BSD # -# You may read RFC 4506 for context, however it is only a source of inspiration. -# Grammar is easy to deduce from the parser. -# -# Native types: bool, u{8,16,32,64}, i{8,16,32,64}, string -# -# Don't define any new types, unless you hate yourself, then it's okay to do so. -# Backends tend to be a pain in the arse, for different reasons. -# -# All numbers are encoded in big-endian byte order. -# Booleans are one byte each. -# Strings must be valid UTF-8, use u8<> to lift that restriction. -# String and array lengths are encoded as u32. -# Enumeration values automatically start at 1, and are encoded as i8. -# Any struct or union field may be a variable-length array. -# -# Message framing is done externally, but is advised to also prefix u32 lengths, -# unless this role is already filled by, e.g., WebSocket. -# # Usage: env LC_ALL=C awk -f lxdrgen.awk -f lxdrgen-{c,go,mjs}.awk \ # -v PrefixCamel=Foo foo.lxdr > foo.{c,go,mjs} | {clang-format,gofmt,...} -- cgit v1.2.3-70-g09d2