From 1478a9f83f2ccfcc58bb0bf0ce050bf4b40d1fb8 Mon Sep 17 00:00:00 2001
From: Přemysl Eric Janouch
Date: Mon, 13 Dec 2021 18:56:33 +0100
Subject: Add a tool to extract information from WebP
---
tools/.gitignore | 1 +
tools/Makefile | 2 +-
tools/info.h | 94 ++++++++++++++++++++++++++++++++++++++++
tools/jpeginfo.c | 94 ----------------------------------------
tools/pnginfo.c | 2 +
tools/webpinfo.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 227 insertions(+), 95 deletions(-)
create mode 100644 tools/webpinfo.c
diff --git a/tools/.gitignore b/tools/.gitignore
index b4b1b2f..3569402 100644
--- a/tools/.gitignore
+++ b/tools/.gitignore
@@ -1,3 +1,4 @@
/pnginfo
/jpeginfo
/tiffinfo
+/webpinfo
diff --git a/tools/Makefile b/tools/Makefile
index 9f34688..792d0b2 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -5,7 +5,7 @@ CFLAGS = -g -O2 -Wall -Wextra `pkg-config --cflags $(deps)`
LDLIBS = -ljq `pkg-config --libs $(deps)`
deps = libpng
-targets = pnginfo jpeginfo tiffinfo
+targets = pnginfo jpeginfo tiffinfo webpinfo
all: $(targets)
$(targets): info.h
diff --git a/tools/info.h b/tools/info.h
index f89b313..4acef3c 100644
--- a/tools/info.h
+++ b/tools/info.h
@@ -1275,3 +1275,97 @@ parse_psir(jv o, const uint8_t *p, size_t len)
}
return o;
}
+
+// --- ICC profiles ------------------------------------------------------------
+// v2 https://www.color.org/ICC_Minor_Revision_for_Web.pdf
+// v4 https://www.color.org/specification/ICC1v43_2010-12.pdf
+
+static jv
+parse_icc_mluc(jv o, const uint8_t *tag, uint32_t tag_length)
+{
+ // v4 10.13
+ if (tag_length < 16)
+ return add_warning(o, "invalid ICC 'mluc' structure length");
+
+ uint32_t count = u32be(tag + 8);
+ if (count == 0)
+ return add_warning(o, "unnamed ICC profile");
+
+ // There is no particularly good reason for us to iterate, take the first.
+ const uint8_t *record = tag + 16 /* + i * u32be(tag + 12) */;
+ uint32_t len = u32be(&record[4]);
+ uint32_t off = u32be(&record[8]);
+
+ if (off + len > tag_length)
+ return add_warning(o, "invalid ICC 'mluc' structure record");
+
+ // Blindly assume simple ASCII, ensure NUL-termination.
+ char name[len], *p = name;
+ for (uint32_t i = 0; i < len / 2; i++)
+ *p++ = tag[off + i * 2 + 1];
+ *p++ = 0;
+ return jv_set(o, jv_string("ICC"),
+ JV_OBJECT(jv_string("name"), jv_string(name),
+ jv_string("version"), jv_number(4)));
+}
+
+static jv
+parse_icc_desc(jv o, const uint8_t *profile, size_t profile_len,
+ uint32_t tag_offset, uint32_t tag_length)
+{
+ const uint8_t *tag = profile + tag_offset;
+ if (tag_offset + tag_length > profile_len)
+ return add_warning(o, "unexpected end of ICC profile");
+ if (tag_length < 4)
+ return add_warning(o, "invalid ICC tag structure length");
+
+ // v2 6.5.17
+ uint32_t sig = u32be(tag);
+ if (sig == 0x6D6C7563 /* mluc */)
+ return parse_icc_mluc(o, profile + tag_offset, tag_length);
+ if (sig != 0x64657363 /* desc */)
+ return add_warning(o, "invalid ICC 'desc' structure signature");
+ if (tag_length < 12)
+ return add_warning(o, "invalid ICC 'desc' structure length");
+
+ uint32_t count = u32be(tag + 8);
+ if (tag_length < 12 + count)
+ return add_warning(o, "invalid ICC 'desc' structure length");
+
+ // Double-ensure a trailing NUL byte.
+ char name[count + 1];
+ memcpy(name, tag + 12, count);
+ name[count] = 0;
+ return jv_set(o, jv_string("ICC"),
+ JV_OBJECT(jv_string("name"), jv_string(name),
+ jv_string("version"), jv_number(2)));
+}
+
+static jv
+parse_icc(jv o, const uint8_t *profile, size_t profile_len)
+{
+ // v2 6, v4 7
+ if (profile_len < 132)
+ return add_warning(o, "ICC profile too short");
+ if (u32be(profile) != profile_len)
+ return add_warning(o, "ICC profile size mismatch");
+
+ // TODO(p): May decode more of the header fields, and validate them.
+ // Need to check both v2 and v4, this is all fairly annoying.
+ uint32_t count = u32be(profile + 128);
+ if (132 + count * 12 > profile_len)
+ return add_warning(o, "unexpected end of ICC profile");
+
+ for (uint32_t i = 0; i < count; i++) {
+ const uint8_t *entry = profile + 132 + i * 12;
+ uint32_t sig = u32be(&entry[0]);
+ uint32_t off = u32be(&entry[4]);
+ uint32_t len = u32be(&entry[8]);
+
+ // v2 6.4.32, v4 9.2.41
+ if (sig == 0x64657363 /* desc */)
+ return parse_icc_desc(o, profile, profile_len, off, len);
+ }
+ // The description is required, so this should be unreachable.
+ return jv_set(o, jv_string("ICC"), jv_bool(true));
+}
diff --git a/tools/jpeginfo.c b/tools/jpeginfo.c
index d3d9b59..f1e614a 100644
--- a/tools/jpeginfo.c
+++ b/tools/jpeginfo.c
@@ -25,100 +25,6 @@
#include
#include
-// --- ICC profiles ------------------------------------------------------------
-// v2 https://www.color.org/ICC_Minor_Revision_for_Web.pdf
-// v4 https://www.color.org/specification/ICC1v43_2010-12.pdf
-
-static jv
-parse_icc_mluc(jv o, const uint8_t *tag, uint32_t tag_length)
-{
- // v4 10.13
- if (tag_length < 16)
- return add_warning(o, "invalid ICC 'mluc' structure length");
-
- uint32_t count = u32be(tag + 8);
- if (count == 0)
- return add_warning(o, "unnamed ICC profile");
-
- // There is no particularly good reason for us to iterate, take the first.
- const uint8_t *record = tag + 16 /* + i * u32be(tag + 12) */;
- uint32_t len = u32be(&record[4]);
- uint32_t off = u32be(&record[8]);
-
- if (off + len > tag_length)
- return add_warning(o, "invalid ICC 'mluc' structure record");
-
- // Blindly assume simple ASCII, ensure NUL-termination.
- char name[len], *p = name;
- for (uint32_t i = 0; i < len / 2; i++)
- *p++ = tag[off + i * 2 + 1];
- *p++ = 0;
- return jv_set(o, jv_string("ICC"),
- JV_OBJECT(jv_string("name"), jv_string(name),
- jv_string("version"), jv_number(4)));
-}
-
-static jv
-parse_icc_desc(jv o, const uint8_t *profile, size_t profile_len,
- uint32_t tag_offset, uint32_t tag_length)
-{
- const uint8_t *tag = profile + tag_offset;
- if (tag_offset + tag_length > profile_len)
- return add_warning(o, "unexpected end of ICC profile");
- if (tag_length < 4)
- return add_warning(o, "invalid ICC tag structure length");
-
- // v2 6.5.17
- uint32_t sig = u32be(tag);
- if (sig == 0x6D6C7563 /* mluc */)
- return parse_icc_mluc(o, profile + tag_offset, tag_length);
- if (sig != 0x64657363 /* desc */)
- return add_warning(o, "invalid ICC 'desc' structure signature");
- if (tag_length < 12)
- return add_warning(o, "invalid ICC 'desc' structure length");
-
- uint32_t count = u32be(tag + 8);
- if (tag_length < 12 + count)
- return add_warning(o, "invalid ICC 'desc' structure length");
-
- // Double-ensure a trailing NUL byte.
- char name[count + 1];
- memcpy(name, tag + 12, count);
- name[count] = 0;
- return jv_set(o, jv_string("ICC"),
- JV_OBJECT(jv_string("name"), jv_string(name),
- jv_string("version"), jv_number(2)));
-}
-
-static jv
-parse_icc(jv o, const uint8_t *profile, size_t profile_len)
-{
- // v2 6, v4 7
- if (profile_len < 132)
- return add_warning(o, "ICC profile too short");
- if (u32be(profile) != profile_len)
- return add_warning(o, "ICC profile size mismatch");
-
- // TODO(p): May decode more of the header fields, and validate them.
- // Need to check both v2 and v4, this is all fairly annoying.
- uint32_t count = u32be(profile + 128);
- if (132 + count * 12 > profile_len)
- return add_warning(o, "unexpected end of ICC profile");
-
- for (uint32_t i = 0; i < count; i++) {
- const uint8_t *entry = profile + 132 + i * 12;
- uint32_t sig = u32be(&entry[0]);
- uint32_t off = u32be(&entry[4]);
- uint32_t len = u32be(&entry[8]);
-
- // v2 6.4.32, v4 9.2.41
- if (sig == 0x64657363 /* desc */)
- return parse_icc_desc(o, profile, profile_len, off, len);
- }
- // The description is required, so this should be unreachable.
- return jv_set(o, jv_string("ICC"), jv_bool(true));
-}
-
// --- Multi-Picture Format ----------------------------------------------------
enum {
diff --git a/tools/pnginfo.c b/tools/pnginfo.c
index 03ab1d1..2429076 100644
--- a/tools/pnginfo.c
+++ b/tools/pnginfo.c
@@ -348,6 +348,8 @@ error:
int
main(int argc, char *argv[])
{
+ (void) parse_icc;
+
// XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes.
// Usage: find . -iname *.png -print0 | xargs -0 ./pnginfo
for (int i = 1; i < argc; i++) {
diff --git a/tools/webpinfo.c b/tools/webpinfo.c
new file mode 100644
index 0000000..f13ef28
--- /dev/null
+++ b/tools/webpinfo.c
@@ -0,0 +1,129 @@
+//
+// webpinfo.c: acquire information about WebP files in JSON format
+//
+// Copyright (c) 2021, Přemysl Eric Janouch
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include "info.h"
+
+#include
+
+#include
+#include
+#include
+#include
+
+// --- WebP --------------------------------------------------------------------
+// https://github.com/webmproject/libwebp/blob/master/doc/webp-container-spec.txt
+// https://github.com/webmproject/libwebp/blob/master/doc/webp-lossless-bitstream-spec.txt
+// https://datatracker.ietf.org/doc/html/rfc6386
+
+static jv
+parse_webp(jv o, const uint8_t *p, size_t len)
+{
+ // libwebp won't let us simply iterate over all chunks, so handroll it.
+ if (len < 12 || memcmp(p, "RIFF", 4) || memcmp(p + 8, "WEBP", 4))
+ return add_error(o, "not a WEBP file");
+
+ // TODO(p): This can still be parseable.
+ // TODO(p): Warn on trailing data.
+ uint32_t size = unle.u32(p + 4);
+ if (8 + size < len)
+ return add_error(o, "truncated file");
+
+ const uint8_t *end = p + 8 + size;
+ p += 12;
+
+ jv chunks = jv_array();
+ while (p < end) {
+ if (end - p < 8) {
+ o = add_warning(o, "framing mismatch");
+ printf("%ld", end - p);
+ break;
+ }
+
+ uint32_t chunk_size = unle.u32(p + 4);
+ uint32_t chunk_advance = (chunk_size + 1) & ~1;
+ if (p + 8 + chunk_advance > end) {
+ o = add_warning(o, "runaway chunk payload");
+ break;
+ }
+
+ char fourcc[5] = "";
+ memcpy(fourcc, p, 4);
+ chunks = jv_array_append(chunks, jv_string(fourcc));
+ p += 8;
+
+ // TODO(p): Decode VP8 and VP8L chunk metadata.
+ if (!strcmp(fourcc, "EXIF"))
+ o = parse_exif(o, p, chunk_size);
+ if (!strcmp(fourcc, "ICCP"))
+ o = parse_icc(o, p, chunk_size);
+ p += chunk_advance;
+ }
+ return jv_set(o, jv_string("chunks"), chunks);
+}
+
+// --- I/O ---------------------------------------------------------------------
+
+static jv
+do_file(const char *filename, jv o)
+{
+ const char *err = NULL;
+ FILE *fp = fopen(filename, "rb");
+ if (!fp) {
+ err = strerror(errno);
+ goto error;
+ }
+
+ uint8_t *data = NULL, buf[256 << 10];
+ size_t n, len = 0;
+ while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) {
+ data = realloc(data, len + n);
+ memcpy(data + len, buf, n);
+ len += n;
+ }
+ if (ferror(fp)) {
+ err = strerror(errno);
+ goto error_read;
+ }
+
+ o = parse_webp(o, data, len);
+error_read:
+ fclose(fp);
+ free(data);
+error:
+ if (err)
+ o = add_error(o, err);
+ return o;
+}
+
+int
+main(int argc, char *argv[])
+{
+ (void) parse_psir;
+
+ // XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes.
+ // Usage: find . -iname *.png -print0 | xargs -0 ./pnginfo
+ for (int i = 1; i < argc; i++) {
+ const char *filename = argv[i];
+
+ jv o = jv_object();
+ o = jv_object_set(o, jv_string("filename"), jv_string(filename));
+ o = do_file(filename, o);
+ jv_dumpf(o, stdout, 0 /* Might consider JV_PRINT_SORTED. */);
+ fputc('\n', stdout);
+ }
+ return 0;
+}
--
cgit v1.2.3-70-g09d2