From a9b34ca3f27ed0b3c10effd3fb884936d9b02485 Mon Sep 17 00:00:00 2001 From: Přemysl Eric Janouch Date: Mon, 5 Jun 2023 17:52:58 +0200 Subject: Unite most info tools into just one binary Turn this into more of an fq alternative, when used with jq. Also don't say that TIFF files are Exif. --- tools/info.c | 237 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 tools/info.c (limited to 'tools/info.c') diff --git a/tools/info.c b/tools/info.c new file mode 100644 index 0000000..03da252 --- /dev/null +++ b/tools/info.c @@ -0,0 +1,237 @@ +// +// info.c: acquire information about JPEG/TIFF/BMFF/WebP files in JSON format +// +// Copyright (c) 2021 - 2023, Přemysl Eric Janouch +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +#include "info.h" + +#include + +#include +#include +#include +#include + +// --- ISO/IEC base media file format ------------------------------------------ +// ISO/IEC 14496-12:2015(E), used to be publicly available, now there's only: +// https://mpeg.chiariglione.org/standards/mpeg-4/iso-base-media-file-format/text-isoiec-14496-12-5th-edition +// but people have managed to archive the final version as well: +// https://b.goeswhere.com/ISO_IEC_14496-12_2015.pdf +// +// ISO/IEC 23008-12:2017 Information technology - +// High efficiency coding and media delivery in heterogeneous environments - +// Part 12: Image File Format + Cor 1:2020 Technical Corrigendum 1 +// https://standards.iso.org/ittf/PubliclyAvailableStandards/ + +static jv +parse_bmff_box(jv o, const char *type, const uint8_t *data, size_t len) +{ + // TODO(p): Parse out "uuid"'s uint8_t[16] initial field, present as hex. + // TODO(p): Parse out "ftyp" contents: 14496-12:2015 4.3 + // TODO(p): Parse out other important boxes: 14496-12:2015 8+ + return add_to_subarray(o, "boxes", jv_string(type)); +} + +static bool +detect_bmff(const uint8_t *p, size_t len) +{ + // 4.2 Object Structure--this box need not be present, nor at the beginning + // TODO(p): What does `aligned(8)` mean? It's probably in bits. + return len >= 8 && !memcmp(p + 4, "ftyp", 4); +} + +static jv +parse_bmff(jv o, const uint8_t *p, size_t len) +{ + if (!detect_bmff(p, len)) + return add_error(o, "not BMFF at all or unsupported"); + + const uint8_t *end = p + len; + while (p < end) { + if (end - p < 8) { + o = add_warning(o, "box framing mismatch"); + break; + } + + char type[5] = ""; + memcpy(type, p + 4, 4); + + uint64_t box_size = u32be(p); + const uint8_t *data = p + 8; + if (box_size == 1) { + if (end - p < 16) { + o = add_warning(o, "unexpected EOF"); + break; + } + box_size = u64be(data); + data += 8; + } else if (!box_size) + box_size = end - p; + + if (box_size > (uint64_t) (end - p)) { + o = add_warning(o, "unexpected EOF"); + break; + } + + size_t data_len = box_size - (data - p); + o = parse_bmff_box(o, type, data, data_len); + p += box_size; + } + return o; +} + +// --- WebP -------------------------------------------------------------------- +// libwebp won't let us simply iterate over all chunks, so handroll it. +// +// https://github.com/webmproject/libwebp/blob/master/doc/webp-container-spec.txt +// https://github.com/webmproject/libwebp/blob/master/doc/webp-lossless-bitstream-spec.txt +// https://datatracker.ietf.org/doc/html/rfc6386 +// +// Pretty versions, hopefully not outdated: +// https://developers.google.com/speed/webp/docs/riff_container +// https://developers.google.com/speed/webp/docs/webp_lossless_bitstream_specification + +static bool +detect_webp(const uint8_t *p, size_t len) +{ + return len >= 12 && !memcmp(p, "RIFF", 4) && !memcmp(p + 8, "WEBP", 4); +} + +static jv +parse_webp(jv o, const uint8_t *p, size_t len) +{ + if (!detect_webp(p, len)) + return add_error(o, "not a WEBP file"); + + // TODO(p): This can still be parseable. + // TODO(p): Warn on trailing data. + uint32_t size = u32le(p + 4); + if (8 + size < len) + return add_error(o, "truncated file"); + + const uint8_t *end = p + 8 + size; + p += 12; + + jv chunks = jv_array(); + while (p < end) { + if (end - p < 8) { + o = add_warning(o, "framing mismatch"); + printf("%ld", end - p); + break; + } + + uint32_t chunk_size = u32le(p + 4); + uint32_t chunk_advance = (chunk_size + 1) & ~1; + if (p + 8 + chunk_advance > end) { + o = add_warning(o, "runaway chunk payload"); + break; + } + + char fourcc[5] = ""; + memcpy(fourcc, p, 4); + chunks = jv_array_append(chunks, jv_string(fourcc)); + p += 8; + + // TODO(p): Decode VP8 and VP8L chunk metadata. + if (!strcmp(fourcc, "EXIF")) + o = parse_exif(o, p, chunk_size); + if (!strcmp(fourcc, "ICCP")) + o = parse_icc(o, p, chunk_size); + p += chunk_advance; + } + return jv_set(o, jv_string("chunks"), chunks); +} + +// --- I/O --------------------------------------------------------------------- + +static struct { + const char *name; + bool (*detect) (const uint8_t *, size_t); + jv (*parse) (jv, const uint8_t *, size_t); +} formats[] = { + {"JPEG", detect_jpeg, parse_jpeg}, + {"TIFF", detect_tiff, parse_tiff}, + {"BMFF", detect_bmff, parse_bmff}, + {"WebP", detect_webp, parse_webp}, +}; + +static jv +parse_any(jv o, const uint8_t *p, size_t len) +{ + // TODO(p): Also see if the file extension is appropriate. + for (size_t i = 0; i < sizeof formats / sizeof *formats; i++) { + if (!formats[i].detect(p, len)) + continue; + if (getenv("INFO_IDENTIFY")) + o = jv_set(o, jv_string("format"), jv_string(formats[i].name)); + return formats[i].parse(o, p, len); + } + return add_error(o, "unsupported file format"); +} + +static jv +do_file(const char *filename, jv o) +{ + const char *err = NULL; + FILE *fp = fopen(filename, "rb"); + if (!fp) { + err = strerror(errno); + goto error; + } + + uint8_t *data = NULL, buf[256 << 10]; + size_t n, len = 0; + while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) { + data = realloc(data, len + n); + memcpy(data + len, buf, n); + len += n; + } + if (ferror(fp)) { + err = strerror(errno); + goto error_read; + } + +#if 0 + // Not sure if I want to ensure their existence... + o = jv_object_set(o, jv_string("info"), jv_array()); + o = jv_object_set(o, jv_string("warnings"), jv_array()); +#endif + + o = parse_any(o, data, len); +error_read: + fclose(fp); + free(data); +error: + if (err) + o = add_error(o, err); + return o; +} + +int +main(int argc, char *argv[]) +{ + // XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes. + // Usage: find . -print0 | xargs -0 ./info + for (int i = 1; i < argc; i++) { + const char *filename = argv[i]; + + jv o = jv_object(); + o = jv_object_set(o, jv_string("filename"), jv_string(filename)); + o = do_file(filename, o); + jv_dumpf(o, stdout, 0 /* JV_PRINT_SORTED would discard information. */); + fputc('\n', stdout); + } + return 0; +} -- cgit v1.2.3-54-g00ecf