From 1478a9f83f2ccfcc58bb0bf0ce050bf4b40d1fb8 Mon Sep 17 00:00:00 2001
From: Přemysl Eric Janouch 
Date: Mon, 13 Dec 2021 18:56:33 +0100
Subject: Add a tool to extract information from WebP
---
 tools/.gitignore |   1 +
 tools/Makefile   |   2 +-
 tools/info.h     |  94 ++++++++++++++++++++++++++++++++++++++++
 tools/jpeginfo.c |  94 ----------------------------------------
 tools/pnginfo.c  |   2 +
 tools/webpinfo.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 227 insertions(+), 95 deletions(-)
 create mode 100644 tools/webpinfo.c
diff --git a/tools/.gitignore b/tools/.gitignore
index b4b1b2f..3569402 100644
--- a/tools/.gitignore
+++ b/tools/.gitignore
@@ -1,3 +1,4 @@
 /pnginfo
 /jpeginfo
 /tiffinfo
+/webpinfo
diff --git a/tools/Makefile b/tools/Makefile
index 9f34688..792d0b2 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -5,7 +5,7 @@ CFLAGS = -g -O2 -Wall -Wextra `pkg-config --cflags $(deps)`
 LDLIBS = -ljq `pkg-config --libs $(deps)`
 
 deps = libpng
-targets = pnginfo jpeginfo tiffinfo
+targets = pnginfo jpeginfo tiffinfo webpinfo
 
 all: $(targets)
 $(targets): info.h
diff --git a/tools/info.h b/tools/info.h
index f89b313..4acef3c 100644
--- a/tools/info.h
+++ b/tools/info.h
@@ -1275,3 +1275,97 @@ parse_psir(jv o, const uint8_t *p, size_t len)
 	}
 	return o;
 }
+
+// --- ICC profiles ------------------------------------------------------------
+// v2 https://www.color.org/ICC_Minor_Revision_for_Web.pdf
+// v4 https://www.color.org/specification/ICC1v43_2010-12.pdf
+
+static jv
+parse_icc_mluc(jv o, const uint8_t *tag, uint32_t tag_length)
+{
+	// v4 10.13
+	if (tag_length < 16)
+		return add_warning(o, "invalid ICC 'mluc' structure length");
+
+	uint32_t count = u32be(tag + 8);
+	if (count == 0)
+		return add_warning(o, "unnamed ICC profile");
+
+	// There is no particularly good reason for us to iterate, take the first.
+	const uint8_t *record = tag + 16 /* + i * u32be(tag + 12) */;
+	uint32_t len = u32be(&record[4]);
+	uint32_t off = u32be(&record[8]);
+
+	if (off + len > tag_length)
+		return add_warning(o, "invalid ICC 'mluc' structure record");
+
+	// Blindly assume simple ASCII, ensure NUL-termination.
+	char name[len], *p = name;
+	for (uint32_t i = 0; i < len / 2; i++)
+		*p++ = tag[off + i * 2 + 1];
+	*p++ = 0;
+	return jv_set(o, jv_string("ICC"),
+		JV_OBJECT(jv_string("name"), jv_string(name),
+			jv_string("version"), jv_number(4)));
+}
+
+static jv
+parse_icc_desc(jv o, const uint8_t *profile, size_t profile_len,
+	uint32_t tag_offset, uint32_t tag_length)
+{
+	const uint8_t *tag = profile + tag_offset;
+	if (tag_offset + tag_length > profile_len)
+		return add_warning(o, "unexpected end of ICC profile");
+	if (tag_length < 4)
+		return add_warning(o, "invalid ICC tag structure length");
+
+	// v2 6.5.17
+	uint32_t sig = u32be(tag);
+	if (sig == 0x6D6C7563 /* mluc */)
+		return parse_icc_mluc(o, profile + tag_offset, tag_length);
+	if (sig != 0x64657363 /* desc */)
+		return add_warning(o, "invalid ICC 'desc' structure signature");
+	if (tag_length < 12)
+		return add_warning(o, "invalid ICC 'desc' structure length");
+
+	uint32_t count = u32be(tag + 8);
+	if (tag_length < 12 + count)
+		return add_warning(o, "invalid ICC 'desc' structure length");
+
+	// Double-ensure a trailing NUL byte.
+	char name[count + 1];
+	memcpy(name, tag + 12, count);
+	name[count] = 0;
+	return jv_set(o, jv_string("ICC"),
+		JV_OBJECT(jv_string("name"), jv_string(name),
+			jv_string("version"), jv_number(2)));
+}
+
+static jv
+parse_icc(jv o, const uint8_t *profile, size_t profile_len)
+{
+	// v2 6, v4 7
+	if (profile_len < 132)
+		return add_warning(o, "ICC profile too short");
+	if (u32be(profile) != profile_len)
+		return add_warning(o, "ICC profile size mismatch");
+
+	// TODO(p): May decode more of the header fields, and validate them.
+	// Need to check both v2 and v4, this is all fairly annoying.
+	uint32_t count = u32be(profile + 128);
+	if (132 + count * 12 > profile_len)
+		return add_warning(o, "unexpected end of ICC profile");
+
+	for (uint32_t i = 0; i < count; i++) {
+		const uint8_t *entry = profile + 132 + i * 12;
+		uint32_t sig = u32be(&entry[0]);
+		uint32_t off = u32be(&entry[4]);
+		uint32_t len = u32be(&entry[8]);
+
+		// v2 6.4.32, v4 9.2.41
+		if (sig == 0x64657363 /* desc */)
+			return parse_icc_desc(o, profile, profile_len, off, len);
+	}
+	// The description is required, so this should be unreachable.
+	return jv_set(o, jv_string("ICC"), jv_bool(true));
+}
diff --git a/tools/jpeginfo.c b/tools/jpeginfo.c
index d3d9b59..f1e614a 100644
--- a/tools/jpeginfo.c
+++ b/tools/jpeginfo.c
@@ -25,100 +25,6 @@
 #include 
 #include 
 
-// --- ICC profiles ------------------------------------------------------------
-// v2 https://www.color.org/ICC_Minor_Revision_for_Web.pdf
-// v4 https://www.color.org/specification/ICC1v43_2010-12.pdf
-
-static jv
-parse_icc_mluc(jv o, const uint8_t *tag, uint32_t tag_length)
-{
-	// v4 10.13
-	if (tag_length < 16)
-		return add_warning(o, "invalid ICC 'mluc' structure length");
-
-	uint32_t count = u32be(tag + 8);
-	if (count == 0)
-		return add_warning(o, "unnamed ICC profile");
-
-	// There is no particularly good reason for us to iterate, take the first.
-	const uint8_t *record = tag + 16 /* + i * u32be(tag + 12) */;
-	uint32_t len = u32be(&record[4]);
-	uint32_t off = u32be(&record[8]);
-
-	if (off + len > tag_length)
-		return add_warning(o, "invalid ICC 'mluc' structure record");
-
-	// Blindly assume simple ASCII, ensure NUL-termination.
-	char name[len], *p = name;
-	for (uint32_t i = 0; i < len / 2; i++)
-		*p++ = tag[off + i * 2 + 1];
-	*p++ = 0;
-	return jv_set(o, jv_string("ICC"),
-		JV_OBJECT(jv_string("name"), jv_string(name),
-			jv_string("version"), jv_number(4)));
-}
-
-static jv
-parse_icc_desc(jv o, const uint8_t *profile, size_t profile_len,
-	uint32_t tag_offset, uint32_t tag_length)
-{
-	const uint8_t *tag = profile + tag_offset;
-	if (tag_offset + tag_length > profile_len)
-		return add_warning(o, "unexpected end of ICC profile");
-	if (tag_length < 4)
-		return add_warning(o, "invalid ICC tag structure length");
-
-	// v2 6.5.17
-	uint32_t sig = u32be(tag);
-	if (sig == 0x6D6C7563 /* mluc */)
-		return parse_icc_mluc(o, profile + tag_offset, tag_length);
-	if (sig != 0x64657363 /* desc */)
-		return add_warning(o, "invalid ICC 'desc' structure signature");
-	if (tag_length < 12)
-		return add_warning(o, "invalid ICC 'desc' structure length");
-
-	uint32_t count = u32be(tag + 8);
-	if (tag_length < 12 + count)
-		return add_warning(o, "invalid ICC 'desc' structure length");
-
-	// Double-ensure a trailing NUL byte.
-	char name[count + 1];
-	memcpy(name, tag + 12, count);
-	name[count] = 0;
-	return jv_set(o, jv_string("ICC"),
-		JV_OBJECT(jv_string("name"), jv_string(name),
-			jv_string("version"), jv_number(2)));
-}
-
-static jv
-parse_icc(jv o, const uint8_t *profile, size_t profile_len)
-{
-	// v2 6, v4 7
-	if (profile_len < 132)
-		return add_warning(o, "ICC profile too short");
-	if (u32be(profile) != profile_len)
-		return add_warning(o, "ICC profile size mismatch");
-
-	// TODO(p): May decode more of the header fields, and validate them.
-	// Need to check both v2 and v4, this is all fairly annoying.
-	uint32_t count = u32be(profile + 128);
-	if (132 + count * 12 > profile_len)
-		return add_warning(o, "unexpected end of ICC profile");
-
-	for (uint32_t i = 0; i < count; i++) {
-		const uint8_t *entry = profile + 132 + i * 12;
-		uint32_t sig = u32be(&entry[0]);
-		uint32_t off = u32be(&entry[4]);
-		uint32_t len = u32be(&entry[8]);
-
-		// v2 6.4.32, v4 9.2.41
-		if (sig == 0x64657363 /* desc */)
-			return parse_icc_desc(o, profile, profile_len, off, len);
-	}
-	// The description is required, so this should be unreachable.
-	return jv_set(o, jv_string("ICC"), jv_bool(true));
-}
-
 // --- Multi-Picture Format ----------------------------------------------------
 
 enum {
diff --git a/tools/pnginfo.c b/tools/pnginfo.c
index 03ab1d1..2429076 100644
--- a/tools/pnginfo.c
+++ b/tools/pnginfo.c
@@ -348,6 +348,8 @@ error:
 int
 main(int argc, char *argv[])
 {
+	(void) parse_icc;
+
 	// XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes.
 	// Usage: find . -iname *.png -print0 | xargs -0 ./pnginfo
 	for (int i = 1; i < argc; i++) {
diff --git a/tools/webpinfo.c b/tools/webpinfo.c
new file mode 100644
index 0000000..f13ef28
--- /dev/null
+++ b/tools/webpinfo.c
@@ -0,0 +1,129 @@
+//
+// webpinfo.c: acquire information about WebP files in JSON format
+//
+// Copyright (c) 2021, Přemysl Eric Janouch 
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include "info.h"
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+// --- WebP --------------------------------------------------------------------
+// https://github.com/webmproject/libwebp/blob/master/doc/webp-container-spec.txt
+// https://github.com/webmproject/libwebp/blob/master/doc/webp-lossless-bitstream-spec.txt
+// https://datatracker.ietf.org/doc/html/rfc6386
+
+static jv
+parse_webp(jv o, const uint8_t *p, size_t len)
+{
+	// libwebp won't let us simply iterate over all chunks, so handroll it.
+	if (len < 12 || memcmp(p, "RIFF", 4) || memcmp(p + 8, "WEBP", 4))
+		return add_error(o, "not a WEBP file");
+
+	// TODO(p): This can still be parseable.
+	// TODO(p): Warn on trailing data.
+	uint32_t size = unle.u32(p + 4);
+	if (8 + size < len)
+		return add_error(o, "truncated file");
+
+	const uint8_t *end = p + 8 + size;
+	p += 12;
+
+	jv chunks = jv_array();
+	while (p < end) {
+		if (end - p < 8) {
+			o = add_warning(o, "framing mismatch");
+			printf("%ld", end - p);
+			break;
+		}
+
+		uint32_t chunk_size = unle.u32(p + 4);
+		uint32_t chunk_advance = (chunk_size + 1) & ~1;
+		if (p + 8 + chunk_advance > end) {
+			o = add_warning(o, "runaway chunk payload");
+			break;
+		}
+
+		char fourcc[5] = "";
+		memcpy(fourcc, p, 4);
+		chunks = jv_array_append(chunks, jv_string(fourcc));
+		p += 8;
+
+		// TODO(p): Decode VP8 and VP8L chunk metadata.
+		if (!strcmp(fourcc, "EXIF"))
+			o = parse_exif(o, p, chunk_size);
+		if (!strcmp(fourcc, "ICCP"))
+			o = parse_icc(o, p, chunk_size);
+		p += chunk_advance;
+	}
+	return jv_set(o, jv_string("chunks"), chunks);
+}
+
+// --- I/O ---------------------------------------------------------------------
+
+static jv
+do_file(const char *filename, jv o)
+{
+	const char *err = NULL;
+	FILE *fp = fopen(filename, "rb");
+	if (!fp) {
+		err = strerror(errno);
+		goto error;
+	}
+
+	uint8_t *data = NULL, buf[256 << 10];
+	size_t n, len = 0;
+	while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) {
+		data = realloc(data, len + n);
+		memcpy(data + len, buf, n);
+		len += n;
+	}
+	if (ferror(fp)) {
+		err = strerror(errno);
+		goto error_read;
+	}
+
+	o = parse_webp(o, data, len);
+error_read:
+	fclose(fp);
+	free(data);
+error:
+	if (err)
+		o = add_error(o, err);
+	return o;
+}
+
+int
+main(int argc, char *argv[])
+{
+	(void) parse_psir;
+
+	// XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes.
+	// Usage: find . -iname *.png -print0 | xargs -0 ./pnginfo
+	for (int i = 1; i < argc; i++) {
+		const char *filename = argv[i];
+
+		jv o = jv_object();
+		o = jv_object_set(o, jv_string("filename"), jv_string(filename));
+		o = do_file(filename, o);
+		jv_dumpf(o, stdout, 0 /* Might consider JV_PRINT_SORTED. */);
+		fputc('\n', stdout);
+	}
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2