diff options
| author | Přemysl Eric Janouch <p@janouch.name> | 2023-07-07 12:25:14 +0200 | 
|---|---|---|
| committer | Přemysl Eric Janouch <p@janouch.name> | 2023-07-07 16:43:52 +0200 | 
| commit | 2edc9c6fd10e34ca1da0d25d3ceb9b67a6b9c73c (patch) | |
| tree | 83e3f3c9cd4b4200946ec2428672cecfd6d3d311 /tools | |
| parent | f78f8a70f1753d687cae4a23659995b65e91fb3e (diff) | |
| download | liberty-2edc9c6fd10e34ca1da0d25d3ceb9b67a6b9c73c.tar.gz liberty-2edc9c6fd10e34ca1da0d25d3ceb9b67a6b9c73c.tar.xz liberty-2edc9c6fd10e34ca1da0d25d3ceb9b67a6b9c73c.zip  | |
Add a C++ backend for LibertyXDR
Also change the C backend so that it also de/serializes
unions without any other fields besides the tag.
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/lxdrgen-c.awk | 6 | ||||
| -rw-r--r-- | tools/lxdrgen-cpp-posix.cpp | 67 | ||||
| -rw-r--r-- | tools/lxdrgen-cpp-win32.cpp | 47 | ||||
| -rw-r--r-- | tools/lxdrgen-cpp.awk | 350 | ||||
| -rw-r--r-- | tools/lxdrgen-go.awk | 2 | ||||
| -rw-r--r-- | tools/lxdrgen-mjs.awk | 2 | ||||
| -rw-r--r-- | tools/lxdrgen-swift.awk | 2 | ||||
| -rw-r--r-- | tools/lxdrgen.awk | 2 | 
8 files changed, 471 insertions, 7 deletions
diff --git a/tools/lxdrgen-c.awk b/tools/lxdrgen-c.awk index e898aae..0ee660c 100644 --- a/tools/lxdrgen-c.awk +++ b/tools/lxdrgen-c.awk @@ -222,7 +222,7 @@ function codegen_struct(name, cg,    ctype, funcname) {  		delete cg[i]  } -function codegen_union_tag(d, cg) { +function codegen_union_tag(name, d, cg) {  	cg["tagtype"] = d["type"]  	cg["tagname"] = d["name"]  	append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n") @@ -281,7 +281,7 @@ function codegen_union(name, cg, exhaustive,    f, ctype, funcname) {  		CodegenDispose[name] = "\t" funcname "(&%s);\n"  	} -	if (cg["serialize"]) { +	{  		funcname = PrefixLower cameltosnake(name) "_serialize"  		print ""  		print "static bool\n" \ @@ -299,7 +299,7 @@ function codegen_union(name, cg, exhaustive,    f, ctype, funcname) {  		CodegenSerialize[name] = "\tif (!" funcname "(&%s, w))\n" \  			"\t\treturn false;\n"  	} -	if (cg["deserialize"]) { +	{  		funcname = PrefixLower cameltosnake(name) "_deserialize"  		print ""  		print "static bool\n" \ diff --git a/tools/lxdrgen-cpp-posix.cpp b/tools/lxdrgen-cpp-posix.cpp new file mode 100644 index 0000000..f3b3d2b --- /dev/null +++ b/tools/lxdrgen-cpp-posix.cpp @@ -0,0 +1,67 @@ +// lxdrgen-cpp-posix.cpp: POSIX support code for lxdrgen-cpp.awk. +// +// Copyright (c) 2023, Přemysl Eric Janouch <p@janouch.name> +// SPDX-License-Identifier: 0BSD +#include <iconv.h> + +#include <cstdint> +#include <string> + +// Various BSD derivatives may have a problem here. +// Linux defines __STDC_ISO_10646__, but also supports "WCHAR_T". +#ifdef APPLE +#define ICONV_WCHAR "UTF-32" +#else +#define ICONV_WCHAR "WCHAR_T" +#endif + +namespace LibertyXDR { + +bool utf8_to_wstring(const uint8_t *utf8, size_t length, std::wstring &wide) { +	iconv_t conv = iconv_open(ICONV_WCHAR, "UTF-8"); +	if (conv == (iconv_t) -1) +		return false; + +	wchar_t buffer[1024] = {}; +	char *start = (char *) buffer, *out = start, *in = (char *) utf8; +	size_t available = sizeof buffer; +	wide.clear(); +	while (iconv(conv, &in, &length, &out, &available) == (size_t) -1) { +		if (errno != E2BIG) { +			iconv_close(conv); +			return false; +		} + +		wide.append(buffer, (out - start) / sizeof *buffer); +		out = start; +		available = sizeof buffer; +	} +	wide.append(buffer, (out - start) / sizeof *buffer); +	iconv_close(conv); +	return true; +} + +bool wstring_to_utf8(const std::wstring &wide, std::string &utf8) { +	iconv_t conv = iconv_open("UTF-8", ICONV_WCHAR); +	if (conv == (iconv_t) -1) +		return false; + +	char buffer[1024] = {}, *out = buffer, *in = (char *) wide.data(); +	size_t available = sizeof buffer, length = wide.size() * sizeof wide[0]; +	utf8.clear(); +	while (iconv(conv, &in, &length, &out, &available) == (size_t) -1) { +		if (errno != E2BIG) { +			iconv_close(conv); +			return false; +		} + +		utf8.append(buffer, out - buffer); +		out = buffer; +		available = sizeof buffer; +	} +	utf8.append(buffer, out - buffer); +	iconv_close(conv); +	return true; +} + +} // namespace LibertyXDR diff --git a/tools/lxdrgen-cpp-win32.cpp b/tools/lxdrgen-cpp-win32.cpp new file mode 100644 index 0000000..778a988 --- /dev/null +++ b/tools/lxdrgen-cpp-win32.cpp @@ -0,0 +1,47 @@ +// lxdrgen-cpp-win32.cpp: Win32 support code for lxdrgen-cpp.awk. +// +// Copyright (c) 2023, Přemysl Eric Janouch <p@janouch.name> +// SPDX-License-Identifier: 0BSD +#include <windows.h> + +#include <climits> +#include <cstdint> +#include <string> + +namespace LibertyXDR { + +bool utf8_to_wstring(const uint8_t *utf8, size_t length, std::wstring &wide) { +	wide.clear(); +	if (!length) +		return true; +	if (length > INT_MAX) +		return false; + +	int size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, +		(LPCCH) utf8, length, nullptr, 0); +	if (size <= 0) +		return false; + +	wide.resize(size); +	return !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, +		(LPCCH) utf8, length, wide.data(), size); +} + +bool wstring_to_utf8(const std::wstring &wide, std::string &utf8) { +	utf8.clear(); +	if (wide.empty()) +		return true; +	if (wide.size() > INT_MAX) +		return false; + +	int size = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, +		(LPCWCH) wide.data(), wide.size(), nullptr, 0, NULL, NULL); +	if (size <= 0) +		return false; + +	utf8.resize(size); +	return !WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, +		(LPCWCH) wide.data(), wide.size(), utf8.data(), size, NULL, NULL); +} + +} // namespace LibertyXDR diff --git a/tools/lxdrgen-cpp.awk b/tools/lxdrgen-cpp.awk new file mode 100644 index 0000000..825bba7 --- /dev/null +++ b/tools/lxdrgen-cpp.awk @@ -0,0 +1,350 @@ +# lxdrgen-cpp.awk: C++ backend for lxdrgen.awk. +# +# This backend is intended for Windows, it just happens to have a fallback +# that will probably work on Unices, of which we make use in tests. +# +# Copyright (c) 2023, Přemysl Eric Janouch <p@janouch.name> +# SPDX-License-Identifier: 0BSD + +function define_internal(name, ctype) { +	Types[name] = "internal" +	CodegenCType[name] = ctype +	CodegenSerialize[name] = \ +		"\tw.append(%s);\n" +	CodegenDeserialize[name] = \ +		"\tif (!r.read(%s))\n" \ +		"\t\treturn false;\n" +} + +function define_int(shortname, ctype) { +	define_internal(shortname, ctype) +} + +function define_sint(size) { define_int("i" size, "int" size "_t") } +function define_uint(size) { define_int("u" size, "uint" size "_t") } + +function codegen_begin() { +	define_sint("8") +	define_sint("16") +	define_sint("32") +	define_sint("64") +	define_uint("8") +	define_uint("16") +	define_uint("32") +	define_uint("64") + +	define_internal("string", "std::wstring") +	define_internal("bool", "bool") + +	CodegenSerialize["string"] = \ +		"\tif (!w.append(%s))\n" \ +		"\t\treturn false;\n" + +	print "// Code generated from " FILENAME ". DO NOT EDIT." +	print "" +	print "#include <cstdint>" +	print "#include <memory>" +	print "#include <string>" +	print "#include <vector>" +	print "" +	print "namespace LibertyXDR {" +	print "" +	print "bool utf8_to_wstring(" +	print "\tconst uint8_t *utf8, size_t length, std::wstring &wide);" +	print "bool wstring_to_utf8(" +	print "\tconst std::wstring &wide, std::string &utf8);" +	print "" +	print "struct Reader {" +	print "\tconst uint8_t *data = {};" +	print "\tsize_t length = {};" +	print "" +	print "\ttemplate<typename T> bool read(T &number) {" +	print "\t\tif (length < sizeof number)" +	print "\t\t\treturn false;" +	print "" +	print "\t\tnumber = 0;" +	print "\t\tfor (size_t i = 0; i < sizeof number; i++) {" +	print "\t\t\tnumber = number << 8 | *data++;" +	print "\t\t\tlength--;" +	print "\t\t}" +	print "\t\treturn true;" +	print "\t}" +	print "" +	print "\tbool read(bool &boolean) {" +	print "\t\tuint8_t number = 0;" +	print "\t\tif (!read(number))" +	print "\t\t\treturn false;" +	print "" +	print "\t\tboolean = number != 0;" +	print "\t\treturn true;" +	print "\t}" +	print "" +	print "\tbool read(std::wstring &string) {" +	print "\t\tuint32_t size = 0;" +	print "\t\tif (!read(size) || size > length)" +	print "\t\t\treturn false;" +	print "\t\tif (!utf8_to_wstring(data, size, string))" +	print "\t\t\treturn false;" +	print "" +	print "\t\tdata += size;" +	print "\t\tlength -= size;" +	print "\t\treturn true;" +	print "\t}" +	print "" +	print "\tbool read(std::vector<uint8_t> &vector) {" +	print "\t\tuint32_t size = 0;" +	print "\t\tif (!read(size) || size > length)" +	print "\t\t\treturn false;" +	print "\t\tvector.assign(data, data + size);" +	print "" +	print "\t\tdata += size;" +	print "\t\tlength -= size;" +	print "\t\treturn true;" +	print "\t}" +	print "};" +	print "" +	print "struct Writer {" +	print "\tstd::vector<uint8_t> data;" +	print "" +	print "\ttemplate<typename T> bool append(T number) {" +	print "\t\tuint8_t buffer[sizeof number], *p = buffer + sizeof buffer;" +	print "\t\twhile (p != buffer) {" +	print "\t\t\t*--p = number;" +	print "\t\t\tnumber >>= 8;" +	print "\t\t}" +	print "\t\tdata.insert(data.end(), buffer, buffer + sizeof buffer);" +	print "\t\treturn true;" +	print "\t}" +	print "" +	print "\tbool append(int8_t number) {" +	print "\t\tdata.push_back(number);" +	print "\t\treturn true;" +	print "\t}" +	print "" +	print "\tbool append(uint8_t number) {" +	print "\t\tdata.push_back(number);" +	print "\t\treturn true;" +	print "\t}" +	print "" +	print "\tbool append(bool boolean) {" +	print "\t\treturn append(uint8_t(boolean));" +	print "\t}" +	print "" +	print "\tbool append(const std::wstring &string) {" +	print "\t\tif (string.size() > UINT32_MAX)" +	print "\t\t\treturn false;" +	print "" +	print "\t\tstd::string utf8;" +	print "\t\tif (!wstring_to_utf8(string, utf8))" +	print "\t\t\treturn false;" +	print "" +	print "\t\tappend<uint32_t>(utf8.size());" +	print "\t\tdata.insert(data.end(), utf8.begin(), utf8.end());" +	print "\t\treturn true;" +	print "\t}" +	print "};" +	print "" +	print "} // namespace LibertyXDR" +	print "namespace " PrefixCamel " {" +} + +END { +	print "" +	print "} // namespace " PrefixCamel +} + +function codegen_constant(name, value) { +	print "" +	print "enum { " name " = " value " };" +} + +function codegen_enum_value(name, subname, value, cg) { +	append(cg, "fields", "\t" subname " = " value ",\n") +} + +function codegen_enum(name, cg) { +	print "" +	print "enum struct " name " : int8_t {" +	print cg["fields"] "};" + +	# XXX: This should also check if it isn't out-of-range for any reason, +	# but our usage of sprintf() stands in the way a bit. +	CodegenSerialize[name] = \ +		"\tw.append(static_cast<int8_t>(%s));\n" +	CodegenDeserialize[name] = \ +		"\t{\n" \ +		"\t\tint8_t v = 0;\n" \ +		"\t\tif (!r.read(v) || !v)\n" \ +		"\t\t\treturn false;\n" \ +		"\t\t%s = static_cast<" name ">(v);\n" \ +		"\t}\n" + +	CodegenCType[name] = name +	for (i in cg) +		delete cg[i] +} + +# Some identifiers do not pose a problem in C, but do in our C++. +function codegen_struct_sanitize(name) { +	if (name ~ /^(serialize|deserialize)_*$/ || +		name ~ /^(catch|class|delete|except|finally|friend|new|operator)_*$/ || +		name ~ /^(private|protected|public|template|this|throw|try|virtual)_*$/) +		return name "_" +	return name +} + +function codegen_struct_tag(d, cg,    name, f) { +	name = codegen_struct_sanitize(d["name"]) +	f = "this->" name + +	append(cg, "serialize", sprintf(CodegenSerialize[d["type"]], f)) +	# Do not deserialize here, that would be out of order. +} + +function codegen_struct_field(d, cg,    name, f, serialize, deserialize) { +	name = codegen_struct_sanitize(d["name"]) +	f = "this->" name + +	serialize = CodegenSerialize[d["type"]] +	deserialize = CodegenDeserialize[d["type"]] +	if (!d["isarray"]) { +		append(cg, "fields", +			"\t" CodegenCType[d["type"]] " " name " = {};\n") +		append(cg, "serialize", sprintf(serialize, f)) +		append(cg, "deserialize", sprintf(deserialize, f)) +		return +	} + +	append(cg, "fields", +		"\tstd::vector<" CodegenCType[d["type"]] "> " name ";\n") + +	# XXX: We should probably pedantically check for overflows. +	append(cg, "serialize", +		sprintf(CodegenSerialize["u32"], "uint32_t(" f ".size())") \ +		"\tfor (const auto &it : " f ")\n" \ +		indent(sprintf(serialize, "it"))) + +	if (d["type"] == "u8") { +		append(cg, "deserialize", +			"\tif (!r.read(" f "))\n" \ +			"\t\treturn false;\n") +	} else if (deserialize) { +		append(cg, "deserialize", +			"\t{\n" \ +			"\t\tuint32_t size = 0;\n" \ +			indent(sprintf(CodegenDeserialize["u32"], "size")) \ +			"\t\t" f ".resize(size);\n" \ +			"\t}\n" \ +			"\tfor (auto &it : " f ")\n" \ +			indent(sprintf(deserialize, "it"))) +	} +} + +function codegen_struct(name, cg) { +	print "" +	print "struct " name " {" +	print cg["fields"] +	print "\tbool serialize(LibertyXDR::Writer &w) const {" +	print indent(cg["serialize"]) "\t\treturn true;" +	print "\t}" +	print "" +	print "\tbool deserialize([[maybe_unused]] LibertyXDR::Reader &r) {" +	print indent(cg["deserialize"]) "\t\treturn true;" +	print "\t}" +	print "};" + +	CodegenSerialize[name] = "\tif (!%s->serialize(w))\n" \ +		"\t\treturn false;\n" +	CodegenDeserialize[name] = "\tif (!%s->deserialize(r))\n" \ +		"\t\treturn false;\n" + +	CodegenCType[name] = name +	for (i in cg) +		delete cg[i] +} + +function codegen_union_tag(name, d, cg,    tagname) { +	cg["tagtype"] = d["type"] +	cg["tagname"] = tagname = codegen_struct_sanitize(d["name"]) + +	print "" +	print "struct " name " {" +	print "\t" CodegenCType[d["type"]] " " tagname " = {};" +	print "\tvirtual ~" name "() = 0;" +	print "\tvirtual bool serialize(LibertyXDR::Writer &w) const = 0;" +	print "\tvirtual bool deserialize(LibertyXDR::Reader &r) = 0;" +	print "};" +	print "" +	print name "::~" name "() {}" +} + +function codegen_union_struct(name, casename, cg, scg,     structname) { +	# And thus not all generated structs are present in Types. +	structname = name "_" snaketocamel(casename) + +	print "" +	print "struct " structname " : virtual public " name " {" +	print scg["fields"] +	print "\t" structname "() {" +	print "\t\tthis->" cg["tagname"] " = " \ +		CodegenCType[cg["tagtype"]] "::" casename ";" +	print "\t}" +	print "" +	print "\tvirtual bool serialize(LibertyXDR::Writer &w) const {" +	print indent(scg["serialize"]) "\t\treturn true;" +	print "\t}" +	print "" +	print "\tvirtual bool deserialize([[maybe_unused]] LibertyXDR::Reader &r) {" +	print indent(scg["deserialize"]) "\t\treturn true;" +	print "\t}" +	print "};" + +	append(cg, "deserialize", +		"\tcase " CodegenCType[cg["tagtype"]] "::" casename ":\n" \ +		"\t\treturn new " structname "();\n") + +	CodegenSerialize[structname] = "\tif (!%s->serialize(w))\n" \ +		"\t\treturn false;\n" +	CodegenDeserialize[structname] = "\tif (!%s->deserialize(r))\n" \ +		"\t\treturn false;\n" + +	CodegenCType[structname] = structname +	for (i in scg) +		delete scg[i] +} + +function codegen_union(name, cg, exhaustive,    ctype) { +	CodegenSerialize[name] = "\tif (!%s->serialize(w))\n" \ +		"\t\treturn false;\n" + +	ctype = "std::unique_ptr<" name ">" +	if (cg["deserialize"]) { +		print "" +		print "static " name " *read" name "(" \ +			CodegenCType[cg["tagtype"]] " " cg["tagname"] ") {" +		print "\tswitch (" cg["tagname"] ") {" +		print cg["deserialize"] "\tdefault:" +		print "\t\treturn nullptr;" +		print "\t}" +		print "}" +		print "" +		print "static " ctype " read" name "(LibertyXDR::Reader &r) {" +		print "\tint8_t v = 0;" +		print "\tif (!r.read(v) || !v)" +		print "\t\treturn nullptr;" +		print "" +		print "\t" ctype " result(read" name "(static_cast<" \ +			CodegenCType[cg["tagtype"]] ">(v)));" +		print "\tif (!result || !result->deserialize(r))" +		print "\t\treturn nullptr;" +		print "\treturn result;" +		print "}" + +		CodegenDeserialize[name] = "\tif (!(%s = read" name "(r)))\n" \ +			"\t\treturn false;\n" +	} + +	CodegenCType[name] = ctype +	for (i in cg) +		delete cg[i] +} diff --git a/tools/lxdrgen-go.awk b/tools/lxdrgen-go.awk index f129bc1..fb27c85 100644 --- a/tools/lxdrgen-go.awk +++ b/tools/lxdrgen-go.awk @@ -439,7 +439,7 @@ function codegen_struct(name, cg,    gotype) {  		delete cg[i]  } -function codegen_union_tag(d, cg) { +function codegen_union_tag(name, d, cg) {  	cg["tagtype"] = d["type"]  	cg["tagname"] = snaketocamel(d["name"])  	# The tag is implied from the type of struct stored in the interface. diff --git a/tools/lxdrgen-mjs.awk b/tools/lxdrgen-mjs.awk index 4eaac63..49c09ff 100644 --- a/tools/lxdrgen-mjs.awk +++ b/tools/lxdrgen-mjs.awk @@ -183,7 +183,7 @@ function codegen_struct(name, cg) {  		delete cg[i]  } -function codegen_union_tag(d, cg) { +function codegen_union_tag(name, d, cg) {  	cg["tagtype"] = d["type"]  	cg["tagname"] = decapitalize(snaketocamel(d["name"]))  } diff --git a/tools/lxdrgen-swift.awk b/tools/lxdrgen-swift.awk index 9ee30de..22de56f 100644 --- a/tools/lxdrgen-swift.awk +++ b/tools/lxdrgen-swift.awk @@ -215,7 +215,7 @@ function codegen_struct(name, cg,    swifttype) {  		delete cg[i]  } -function codegen_union_tag(d, cg) { +function codegen_union_tag(name, d, cg) {  	cg["tagtype"] = d["type"]  	cg["tagname"] = decapitalize(snaketocamel(d["name"]))  } diff --git a/tools/lxdrgen.awk b/tools/lxdrgen.awk index e5259b7..5a51d2d 100644 --- a/tools/lxdrgen.awk +++ b/tools/lxdrgen.awk @@ -232,7 +232,7 @@ function defunion(    name, tag, tagtype, tagvalue, cg, scg, d, a, i,  	if (Types[tagtype] != "enum")  		fatal("not an enum type: " tagtype) -	codegen_union_tag(tag, cg) +	codegen_union_tag(name, tag, cg)  	split(EnumValues[tagtype], a, SUBSEP)  	for (i in a)  | 
