diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/benchmark-io.c | 76 | ||||
-rw-r--r-- | tools/bmffinfo.c | 142 | ||||
-rw-r--r-- | tools/hotpixels.c | 210 | ||||
-rw-r--r-- | tools/info.c | 286 | ||||
-rw-r--r-- | tools/info.h | 1021 | ||||
-rw-r--r-- | tools/jpeginfo.c | 610 | ||||
-rw-r--r-- | tools/rawinfo.c | 175 | ||||
-rw-r--r-- | tools/tiffinfo.c | 79 | ||||
-rw-r--r-- | tools/webpinfo.c | 133 |
9 files changed, 1415 insertions, 1317 deletions
diff --git a/tools/benchmark-io.c b/tools/benchmark-io.c new file mode 100644 index 0000000..3dadaae --- /dev/null +++ b/tools/benchmark-io.c @@ -0,0 +1,76 @@ +// +// benchmark-io.c: measure and compare image loading times +// +// Copyright (c) 2021 - 2023, Přemysl Eric Janouch <p@janouch.name> +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +#include <gdk-pixbuf/gdk-pixbuf.h> +#include <gdk/gdk.h> +#include <time.h> + +#include "fiv-io.h" + +static double +timestamp(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec + ts.tv_nsec / 1.e9; +} + +static void +one_file(const char *filename) +{ + GFile *file = g_file_new_for_commandline_arg(filename); + double since_us = timestamp(), us = 0; + FivIoOpenContext ctx = { + .uri = g_file_get_uri(file), + .screen_dpi = 96, + // Only using this array as a redirect. + .warnings = g_ptr_array_new_with_free_func(g_free), + }; + + FivIoImage *loaded_by_us = fiv_io_open(&ctx, NULL); + g_clear_object(&file); + g_free((char *) ctx.uri); + g_ptr_array_free(ctx.warnings, TRUE); + if (!loaded_by_us) + return; + + fiv_io_image_unref(loaded_by_us); + us = timestamp() - since_us; + + double since_pixbuf = timestamp(), pixbuf = 0; + GdkPixbuf *gdk_pixbuf = gdk_pixbuf_new_from_file(filename, NULL); + if (gdk_pixbuf) { + cairo_surface_t *loaded_by_pixbuf = + gdk_cairo_surface_create_from_pixbuf(gdk_pixbuf, 1, NULL); + g_object_unref(gdk_pixbuf); + cairo_surface_destroy(loaded_by_pixbuf); + pixbuf = timestamp() - since_pixbuf; + } + + printf("%.3f\t%.3f\t%.0f%%\t%s\n", us, pixbuf, us / pixbuf * 100, filename); +} + +int +main(int argc, char *argv[]) +{ + // Needed for gdk_cairo_surface_create_from_pixbuf(). + gdk_init(&argc, &argv); + + for (int i = 1; i < argc; i++) + one_file(argv[i]); + return 0; +} diff --git a/tools/bmffinfo.c b/tools/bmffinfo.c deleted file mode 100644 index f0c6ff1..0000000 --- a/tools/bmffinfo.c +++ /dev/null @@ -1,142 +0,0 @@ -// -// bmffinfo.c: acquire information about BMFF files in JSON format -// -// Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name> -// -// Permission to use, copy, modify, and/or distribute this software for any -// purpose with or without fee is hereby granted. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -// - -#include "info.h" - -#include <jv.h> - -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -// --- ISO/IEC base media file format ------------------------------------------ -// ISO/IEC 14496-12:2015(E), used to be publicly available, now there's only: -// https://mpeg.chiariglione.org/standards/mpeg-4/iso-base-media-file-format/text-isoiec-14496-12-5th-edition -// but people have managed to archive the final version as well: -// https://b.goeswhere.com/ISO_IEC_14496-12_2015.pdf -// -// ISO/IEC 23008-12:2017 Information technology - -// High efficiency coding and media delivery in heterogeneous environments - -// Part 12: Image File Format + Cor 1:2020 Technical Corrigendum 1 -// https://standards.iso.org/ittf/PubliclyAvailableStandards/ - -static jv -parse_bmff_box(jv o, const char *type, const uint8_t *data, size_t len) -{ - // TODO(p): Parse out "uuid"'s uint8_t[16] initial field, present as hex. - // TODO(p): Parse out "ftyp" contents: 14496-12:2015 4.3 - // TODO(p): Parse out other important boxes: 14496-12:2015 8+ - return add_to_subarray(o, "boxes", jv_string(type)); -} - -static jv -parse_bmff(jv o, const uint8_t *p, size_t len) -{ - // 4.2 Object Structure--this box need not be present, nor at the beginning - // TODO(p): What does `aligned(8)` mean? It's probably in bits. - if (len < 8 || memcmp(p + 4, "ftyp", 4)) - return add_error(o, "not BMFF at all or unsupported"); - - const uint8_t *end = p + len; - while (p < end) { - if (end - p < 8) { - o = add_warning(o, "box framing mismatch"); - break; - } - - char type[5] = ""; - memcpy(type, p + 4, 4); - - uint64_t box_size = u32be(p); - const uint8_t *data = p + 8; - if (box_size == 1) { - if (end - p < 16) { - o = add_warning(o, "unexpected EOF"); - break; - } - box_size = u64be(data); - data += 8; - } else if (!box_size) - box_size = end - p; - - if (box_size > (uint64_t) (end - p)) { - o = add_warning(o, "unexpected EOF"); - break; - } - - size_t data_len = box_size - (data - p); - o = parse_bmff_box(o, type, data, data_len); - p += box_size; - } - return o; -} - -// --- I/O --------------------------------------------------------------------- - -static jv -do_file(const char *filename, jv o) -{ - const char *err = NULL; - FILE *fp = fopen(filename, "rb"); - if (!fp) { - err = strerror(errno); - goto error; - } - - uint8_t *data = NULL, buf[256 << 10]; - size_t n, len = 0; - while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) { - data = realloc(data, len + n); - memcpy(data + len, buf, n); - len += n; - } - if (ferror(fp)) { - err = strerror(errno); - goto error_read; - } - - o = parse_bmff(o, data, len); -error_read: - fclose(fp); - free(data); -error: - if (err) - o = add_error(o, err); - return o; -} - -int -main(int argc, char *argv[]) -{ - (void) parse_icc; - (void) parse_exif; - (void) parse_psir; - - // XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes. - // Usage: find . -iname *.png -print0 | xargs -0 ./pnginfo - for (int i = 1; i < argc; i++) { - const char *filename = argv[i]; - - jv o = jv_object(); - o = jv_object_set(o, jv_string("filename"), jv_string(filename)); - o = do_file(filename, o); - jv_dumpf(o, stdout, 0 /* Might consider JV_PRINT_SORTED. */); - fputc('\n', stdout); - } - return 0; -} diff --git a/tools/hotpixels.c b/tools/hotpixels.c new file mode 100644 index 0000000..ee1028c --- /dev/null +++ b/tools/hotpixels.c @@ -0,0 +1,210 @@ +// +// hotpixels.c: look for hot pixels in raw image files +// +// Usage: pass a bunch of raw photo images taken with the lens cap on at, +// e.g., ISO 8000-12800 @ 1/20-1/60, and store the resulting file as, +// e.g., Nikon D7500.badpixels, which can then be directly used by Rawtherapee. +// +// Copyright (c) 2023, Přemysl Eric Janouch <p@janouch.name> +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +#include <libraw.h> + +#if LIBRAW_VERSION < LIBRAW_MAKE_VERSION(0, 21, 0) +#error LibRaw 0.21.0 or newer is required. +#endif + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +static void * +xreallocarray(void *o, size_t n, size_t m) +{ + if (m && n > SIZE_MAX / m) { + fprintf(stderr, "xreallocarray: %s\n", strerror(ENOMEM)); + exit(EXIT_FAILURE); + } + void *p = realloc(o, n * m); + if (!p && n && m) { + fprintf(stderr, "xreallocarray: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + return p; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +struct coord { ushort x, y; }; + +static bool +coord_equals(struct coord a, struct coord b) +{ + return a.x == b.x && a.y == b.y; +} + +static int +coord_cmp(const void *a, const void *b) +{ + const struct coord *ca = (const struct coord *) a; + const struct coord *cb = (const struct coord *) b; + return ca->y != cb->y + ? (int) ca->y - (int) cb->y + : (int) ca->x - (int) cb->x; +} + +struct candidates { + struct coord *xy; + size_t len; + size_t alloc; +}; + +static void +candidates_add(struct candidates *c, ushort x, ushort y) +{ + if (c->len == c->alloc) { + c->alloc += 64; + c->xy = xreallocarray(c->xy, sizeof *c->xy, c->alloc); + } + + c->xy[c->len++] = (struct coord) {x, y}; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// A stretch of zeroes that is assumed to mean start of outliers. +#define SPAN 10 + +static const char * +process_raw(struct candidates *c, const uint8_t *p, size_t len) +{ + libraw_data_t *iprc = libraw_init(LIBRAW_OPIONS_NO_DATAERR_CALLBACK); + if (!iprc) + return "failed to obtain a LibRaw handle"; + + int err = 0; + if ((err = libraw_open_buffer(iprc, p, len)) || + (err = libraw_unpack(iprc))) { + libraw_close(iprc); + return libraw_strerror(err); + } + if (!iprc->rawdata.raw_image) { + libraw_close(iprc); + return "only Bayer raws are supported, not Foveon"; + } + + // Make a histogram. + uint64_t bins[USHRT_MAX] = {}; + for (ushort yy = 0; yy < iprc->sizes.height; yy++) { + for (ushort xx = 0; xx < iprc->sizes.width; xx++) { + ushort y = iprc->sizes.top_margin + yy; + ushort x = iprc->sizes.left_margin + xx; + bins[iprc->rawdata.raw_image[y * iprc->sizes.raw_width + x]]++; + } + } + + // Detecting outliers is not completely straight-forward, + // it may help to see the histogram. + if (getenv("HOTPIXELS_HISTOGRAM")) { + for (ushort i = 0; i < USHRT_MAX; i++) + fprintf(stderr, "%u ", (unsigned) bins[i]); + fputc('\n', stderr); + } + + // Go to the first non-zero pixel value. + size_t last = 0; + for (; last < USHRT_MAX; last++) + if (bins[last]) + break; + + // Find the last pixel value we assume to not be hot. + for (; last < USHRT_MAX - SPAN - 1; last++) { + uint64_t nonzero = 0; + for (int i = 1; i <= SPAN; i++) + nonzero += bins[last + i]; + if (!nonzero) + break; + } + + // Store coordinates for all pixels above that value. + for (ushort yy = 0; yy < iprc->sizes.height; yy++) { + for (ushort xx = 0; xx < iprc->sizes.width; xx++) { + ushort y = iprc->sizes.top_margin + yy; + ushort x = iprc->sizes.left_margin + xx; + if (iprc->rawdata.raw_image[y * iprc->sizes.raw_width + x] > last) + candidates_add(c, xx, yy); + } + } + + libraw_close(iprc); + return NULL; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static const char * +do_file(struct candidates *c, const char *filename) +{ + FILE *fp = fopen(filename, "rb"); + if (!fp) + return strerror(errno); + + uint8_t *data = NULL, buf[256 << 10]; + size_t n, len = 0; + while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) { + data = xreallocarray(data, len + n, 1); + memcpy(data + len, buf, n); + len += n; + } + + const char *err = ferror(fp) + ? strerror(errno) + : process_raw(c, data, len); + + fclose(fp); + free(data); + return err; +} + +int +main(int argc, char *argv[]) +{ + struct candidates c = {}; + for (int i = 1; i < argc; i++) { + const char *filename = argv[i], *err = do_file(&c, filename); + if (err) { + fprintf(stderr, "%s: %s\n", filename, err); + return EXIT_FAILURE; + } + } + + qsort(c.xy, c.len, sizeof *c.xy, coord_cmp); + + // If it is detected in all passed photos, it is probably indeed bad. + int count = 1; + for (size_t i = 1; i <= c.len; i++) { + if (i != c.len && coord_equals(c.xy[i - 1], c.xy[i])) { + count++; + continue; + } + + if (count == argc - 1) + printf("%u %u\n", c.xy[i - 1].x, c.xy[i - 1].y); + + count = 1; + } + return 0; +} diff --git a/tools/info.c b/tools/info.c new file mode 100644 index 0000000..440939f --- /dev/null +++ b/tools/info.c @@ -0,0 +1,286 @@ +// +// info.c: acquire information about JPEG/TIFF/BMFF/WebP files in JSON format +// +// Copyright (c) 2021 - 2023, Přemysl Eric Janouch <p@janouch.name> +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +#include "info.h" + +#include <jv.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +// --- ISO/IEC base media file format ------------------------------------------ +// ISO/IEC 14496-12:2015(E), used to be publicly available, now there's only: +// https://mpeg.chiariglione.org/standards/mpeg-4/iso-base-media-file-format/text-isoiec-14496-12-5th-edition +// but people have managed to archive the final version as well: +// https://b.goeswhere.com/ISO_IEC_14496-12_2015.pdf +// +// ISO/IEC 23008-12:2017 Information technology - +// High efficiency coding and media delivery in heterogeneous environments - +// Part 12: Image File Format + Cor 1:2020 Technical Corrigendum 1 +// https://standards.iso.org/ittf/PubliclyAvailableStandards/ + +static jv +parse_bmff_box(jv o, const char *type, const uint8_t *data, size_t len) +{ + // TODO(p): Parse out "uuid"'s uint8_t[16] initial field, present as hex. + // TODO(p): Parse out "ftyp" contents: 14496-12:2015 4.3 + // TODO(p): Parse out other important boxes: 14496-12:2015 8+ + return add_to_subarray(o, "boxes", jv_string(type)); +} + +static bool +detect_bmff(const uint8_t *p, size_t len) +{ + // 4.2 Object Structure--this box need not be present, nor at the beginning + // TODO(p): What does `aligned(8)` mean? It's probably in bits. + return len >= 8 && !memcmp(p + 4, "ftyp", 4); +} + +static jv +parse_bmff(jv o, const uint8_t *p, size_t len) +{ + if (!detect_bmff(p, len)) + return add_error(o, "not BMFF at all or unsupported"); + + const uint8_t *end = p + len; + while (p < end) { + if (end - p < 8) { + o = add_warning(o, "box framing mismatch"); + break; + } + + char type[5] = ""; + memcpy(type, p + 4, 4); + + uint64_t box_size = u32be(p); + const uint8_t *data = p + 8; + if (box_size == 1) { + if (end - p < 16) { + o = add_warning(o, "unexpected EOF"); + break; + } + box_size = u64be(data); + data += 8; + } else if (!box_size) + box_size = end - p; + + if (box_size > (uint64_t) (end - p)) { + o = add_warning(o, "unexpected EOF"); + break; + } + + size_t data_len = box_size - (data - p); + o = parse_bmff_box(o, type, data, data_len); + p += box_size; + } + return o; +} + +// --- WebP -------------------------------------------------------------------- +// libwebp won't let us simply iterate over all chunks, so handroll it. +// +// https://github.com/webmproject/libwebp/blob/master/doc/webp-container-spec.txt +// https://github.com/webmproject/libwebp/blob/master/doc/webp-lossless-bitstream-spec.txt +// https://datatracker.ietf.org/doc/html/rfc6386 +// +// Pretty versions, hopefully not outdated: +// https://developers.google.com/speed/webp/docs/riff_container +// https://developers.google.com/speed/webp/docs/webp_lossless_bitstream_specification + +static bool +detect_webp(const uint8_t *p, size_t len) +{ + return len >= 12 && !memcmp(p, "RIFF", 4) && !memcmp(p + 8, "WEBP", 4); +} + +static jv +parse_webp_vp8(jv o, const uint8_t *p, size_t len) +{ + if (len < 10 || (p[0] & 1) != 0 /* key frame */ || + p[3] != 0x9d || p[4] != 0x01 || p[5] != 0x2a) { + return add_warning(o, "invalid VP8 chunk"); + } + + o = jv_set(o, jv_string("width"), jv_number(u16le(p + 6) & 0x3fff)); + o = jv_set(o, jv_string("height"), jv_number(u16le(p + 8) & 0x3fff)); + return o; +} + +static jv +parse_webp_vp8l(jv o, const uint8_t *p, size_t len) +{ + if (len < 5 || p[0] != 0x2f) + return add_warning(o, "invalid VP8L chunk"); + + // Reading LSB-first from a little endian value means reading in order. + uint32_t header = u32le(p + 1); + o = jv_set(o, jv_string("width"), jv_number((header & 0x3fff) + 1)); + header >>= 14; + o = jv_set(o, jv_string("height"), jv_number((header & 0x3fff) + 1)); + header >>= 14; + o = jv_set(o, jv_string("alpha_is_used"), jv_bool(header & 1)); + return o; +} + +static jv +parse_webp_vp8x(jv o, const uint8_t *p, size_t len) +{ + if (len < 10) + return add_warning(o, "invalid VP8X chunk"); + + // Most of the fields in this chunk are duplicate or inferrable. + // Probably not worth decoding or verifying. + // TODO(p): For animations, we need to use the width and height from here. + uint8_t flags = p[0]; + o = jv_set(o, jv_string("animation"), jv_bool((flags >> 1) & 1)); + return o; +} + +static jv +parse_webp(jv o, const uint8_t *p, size_t len) +{ + if (!detect_webp(p, len)) + return add_error(o, "not a WEBP file"); + + // TODO(p): This can still be parseable. + // TODO(p): Warn on trailing data. + uint32_t size = u32le(p + 4); + if (8 + size < len) + return add_error(o, "truncated file"); + + const uint8_t *end = p + 8 + size; + p += 12; + + jv chunks = jv_array(); + while (p < end) { + if (end - p < 8) { + o = add_warning(o, "framing mismatch"); + printf("%ld", end - p); + break; + } + + uint32_t chunk_size = u32le(p + 4); + uint32_t chunk_advance = (chunk_size + 1) & ~1; + if (p + 8 + chunk_advance > end) { + o = add_warning(o, "runaway chunk payload"); + break; + } + + char fourcc[5] = ""; + memcpy(fourcc, p, 4); + chunks = jv_array_append(chunks, jv_string(fourcc)); + p += 8; + + // TODO(p): Decode more chunks. + if (!strcmp(fourcc, "VP8 ")) + o = parse_webp_vp8(o, p, chunk_size); + if (!strcmp(fourcc, "VP8L")) + o = parse_webp_vp8l(o, p, chunk_size); + if (!strcmp(fourcc, "VP8X")) + o = parse_webp_vp8x(o, p, chunk_size); + if (!strcmp(fourcc, "EXIF")) + o = parse_exif(o, p, chunk_size); + if (!strcmp(fourcc, "ICCP")) + o = parse_icc(o, p, chunk_size); + p += chunk_advance; + } + return jv_set(o, jv_string("chunks"), chunks); +} + +// --- I/O --------------------------------------------------------------------- + +static struct { + const char *name; + bool (*detect) (const uint8_t *, size_t); + jv (*parse) (jv, const uint8_t *, size_t); +} formats[] = { + {"JPEG", detect_jpeg, parse_jpeg}, + {"TIFF", detect_tiff, parse_tiff}, + {"BMFF", detect_bmff, parse_bmff}, + {"WebP", detect_webp, parse_webp}, +}; + +static jv +parse_any(jv o, const uint8_t *p, size_t len) +{ + // TODO(p): Also see if the file extension is appropriate. + for (size_t i = 0; i < sizeof formats / sizeof *formats; i++) { + if (!formats[i].detect(p, len)) + continue; + if (getenv("INFO_IDENTIFY")) + o = jv_set(o, jv_string("format"), jv_string(formats[i].name)); + return formats[i].parse(o, p, len); + } + return add_error(o, "unsupported file format"); +} + +static jv +do_file(const char *filename, jv o) +{ + const char *err = NULL; + FILE *fp = fopen(filename, "rb"); + if (!fp) { + err = strerror(errno); + goto error; + } + + uint8_t *data = NULL, buf[256 << 10]; + size_t n, len = 0; + while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) { + data = realloc(data, len + n); + memcpy(data + len, buf, n); + len += n; + } + if (ferror(fp)) { + err = strerror(errno); + goto error_read; + } + +#if 0 + // Not sure if I want to ensure their existence... + o = jv_object_set(o, jv_string("info"), jv_array()); + o = jv_object_set(o, jv_string("warnings"), jv_array()); +#endif + + o = parse_any(o, data, len); +error_read: + fclose(fp); + free(data); +error: + if (err) + o = add_error(o, err); + return o; +} + +int +main(int argc, char *argv[]) +{ + // XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes. + // Usage: find . -print0 | xargs -0 ./info + for (int i = 1; i < argc; i++) { + const char *filename = argv[i]; + + jv o = jv_object(); + o = jv_object_set(o, jv_string("filename"), jv_string(filename)); + o = do_file(filename, o); + jv_dumpf(o, stdout, 0 /* JV_PRINT_SORTED would discard information. */); + fputc('\n', stdout); + } + return 0; +} diff --git a/tools/info.h b/tools/info.h index 816c9cf..b6c6391 100644 --- a/tools/info.h +++ b/tools/info.h @@ -1,7 +1,7 @@ // // info.h: metadata extraction utilities // -// Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name> +// Copyright (c) 2021 - 2023, Přemysl Eric Janouch <p@janouch.name> // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted. @@ -17,350 +17,14 @@ #include <jv.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> -#include <stdbool.h> - -// --- Utilities --------------------------------------------------------------- - -static char * -binhex(const uint8_t *data, size_t len) -{ - static const char *alphabet = "0123456789abcdef"; - char *buf = calloc(1, len * 2 + 1), *p = buf; - for (size_t i = 0; i < len; i++) { - *p++ = alphabet[data[i] >> 4]; - *p++ = alphabet[data[i] & 0xF]; - } - return buf; -} - -static uint64_t -u64be(const uint8_t *p) -{ - return (uint64_t) p[0] << 56 | (uint64_t) p[1] << 48 | - (uint64_t) p[2] << 40 | (uint64_t) p[3] << 32 | - (uint64_t) p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7]; -} - -static uint32_t -u32be(const uint8_t *p) -{ - return (uint32_t) p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; -} - -static uint16_t -u16be(const uint8_t *p) -{ - return (uint16_t) p[0] << 8 | p[1]; -} - -static uint64_t -u64le(const uint8_t *p) -{ - return (uint64_t) p[7] << 56 | (uint64_t) p[6] << 48 | - (uint64_t) p[5] << 40 | (uint64_t) p[4] << 32 | - (uint64_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0]; -} - -static uint32_t -u32le(const uint8_t *p) -{ - return (uint32_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0]; -} - -static uint16_t -u16le(const uint8_t *p) -{ - return (uint16_t) p[1] << 8 | p[0]; -} - -// --- TIFF -------------------------------------------------------------------- -// libtiff is a mess, and the format is not particularly complicated. -// Exiv2 is senselessly copylefted, and cannot do much. -// libexif is only marginally better. -// ExifTool is too user-oriented. - -static struct un { - uint64_t (*u64) (const uint8_t *); - uint32_t (*u32) (const uint8_t *); - uint16_t (*u16) (const uint8_t *); -} unbe = {u64be, u32be, u16be}, unle = {u64le, u32le, u16le}; - -struct tiffer { - struct un *un; - const uint8_t *begin, *p, *end; - uint16_t remaining_fields; -}; -static bool -tiffer_u32(struct tiffer *self, uint32_t *u) -{ - if (self->p + 4 > self->end) - return false; - *u = self->un->u32(self->p); - self->p += 4; - return true; -} - -static bool -tiffer_u16(struct tiffer *self, uint16_t *u) -{ - if (self->p + 2 > self->end) - return false; - *u = self->un->u16(self->p); - self->p += 2; - return true; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -static bool -tiffer_init(struct tiffer *self, const uint8_t *tiff, size_t len) -{ - self->un = NULL; - self->begin = self->p = tiff; - self->end = tiff + len; - self->remaining_fields = 0; - - const uint8_t - le[4] = {'I', 'I', 42, 0}, - be[4] = {'M', 'M', 0, 42}; - - if (tiff + 8 > self->end) - return false; - else if (!memcmp(tiff, le, sizeof le)) - self->un = &unle; - else if (!memcmp(tiff, be, sizeof be)) - self->un = &unbe; - else - return false; - - self->p = tiff + 4; - // The first IFD needs to be read by caller explicitly, - // even though it's required to be present by TIFF 6.0. - return true; -} - -/// Read the next IFD in a sequence. -static bool -tiffer_next_ifd(struct tiffer *self) -{ - // All fields from any previous IFD need to be read first. - if (self->remaining_fields) - return false; - - uint32_t ifd_offset = 0; - if (!tiffer_u32(self, &ifd_offset)) - return false; - - // There is nothing more to read, this chain has terminated. - if (!ifd_offset) - return false; - - // Note that TIFF 6.0 requires there to be at least one entry, - // but there is no need for us to check it. - self->p = self->begin + ifd_offset; - return tiffer_u16(self, &self->remaining_fields); -} - -/// Initialize a derived TIFF reader for a subIFD at the given location. -static bool -tiffer_subifd(struct tiffer *self, uint32_t offset, struct tiffer *subreader) -{ - *subreader = *self; - subreader->p = subreader->begin + offset; - return tiffer_u16(subreader, &subreader->remaining_fields); -} - -enum tiffer_type { - BYTE = 1, ASCII, SHORT, LONG, RATIONAL, - SBYTE, UNDEFINED, SSHORT, SLONG, SRATIONAL, FLOAT, DOUBLE, - IFD // This last type from TIFF Technical Note 1 isn't really used much. -}; - -static size_t -tiffer_value_size(enum tiffer_type type) -{ - switch (type) { - case BYTE: - case SBYTE: - case ASCII: - case UNDEFINED: - return 1; - case SHORT: - case SSHORT: - return 2; - case LONG: - case SLONG: - case FLOAT: - case IFD: - return 4; - case RATIONAL: - case SRATIONAL: - case DOUBLE: - return 8; - default: - return 0; - } -} - -/// A lean iterator for values within entries. -struct tiffer_entry { - uint16_t tag; - enum tiffer_type type; - // For {S,}BYTE, ASCII, UNDEFINED, use these fields directly. - const uint8_t *p; - uint32_t remaining_count; -}; - -static bool -tiffer_next_value(struct tiffer_entry *entry) -{ - if (!entry->remaining_count) - return false; - - entry->p += tiffer_value_size(entry->type); - entry->remaining_count--; - return true; -} - -static bool -tiffer_integer( - const struct tiffer *self, const struct tiffer_entry *entry, int64_t *out) -{ - if (!entry->remaining_count) - return false; - - // Somewhat excessively lenient, intended for display. - // TIFF 6.0 only directly suggests that a reader is should accept - // any of BYTE/SHORT/LONG for unsigned integers. - switch (entry->type) { - case BYTE: - case ASCII: - case UNDEFINED: - *out = *entry->p; - return true; - case SBYTE: - *out = (int8_t) *entry->p; - return true; - case SHORT: - *out = self->un->u16(entry->p); - return true; - case SSHORT: - *out = (int16_t) self->un->u16(entry->p); - return true; - case LONG: - case IFD: - *out = self->un->u32(entry->p); - return true; - case SLONG: - *out = (int32_t) self->un->u32(entry->p); - return true; - default: - return false; - } -} - -static bool -tiffer_rational(const struct tiffer *self, const struct tiffer_entry *entry, - int64_t *numerator, int64_t *denominator) -{ - if (!entry->remaining_count) - return false; - - // Somewhat excessively lenient, intended for display. - switch (entry->type) { - case RATIONAL: - *numerator = self->un->u32(entry->p); - *denominator = self->un->u32(entry->p + 4); - return true; - case SRATIONAL: - *numerator = (int32_t) self->un->u32(entry->p); - *denominator = (int32_t) self->un->u32(entry->p + 4); - return true; - default: - if (tiffer_integer(self, entry, numerator)) { - *denominator = 1; - return true; - } - return false; - } -} - -static bool -tiffer_real( - const struct tiffer *self, const struct tiffer_entry *entry, double *out) -{ - if (!entry->remaining_count) - return false; - - // Somewhat excessively lenient, intended for display. - // Assuming the host architecture uses IEEE 754. - switch (entry->type) { - int64_t numerator, denominator; - case FLOAT: - *out = *(float *) entry->p; - return true; - case DOUBLE: - *out = *(double *) entry->p; - return true; - default: - if (tiffer_rational(self, entry, &numerator, &denominator)) { - *out = (double) numerator / denominator; - return true; - } - return false; - } -} - -static bool -tiffer_next_entry(struct tiffer *self, struct tiffer_entry *entry) -{ - if (!self->remaining_fields) - return false; - - uint16_t type = entry->type = 0xFFFF; - if (!tiffer_u16(self, &entry->tag) || !tiffer_u16(self, &type) || - !tiffer_u32(self, &entry->remaining_count)) - return false; - - // Short values may and will be inlined, rather than pointed to. - size_t values_size = tiffer_value_size(type) * entry->remaining_count; - uint32_t offset = 0; - if (values_size <= sizeof offset) { - entry->p = self->p; - self->p += sizeof offset; - } else if (tiffer_u32(self, &offset)) { - entry->p = self->begin + offset; - } else { - return false; - } - - // All entries are pre-checked not to overflow. - if (entry->p + values_size > self->end) - return false; - - // Setting it at the end may provide an indication while debugging. - entry->type = type; - self->remaining_fields--; - return true; -} - -// --- TIFF/Exif tags ---------------------------------------------------------- - -struct tiff_value { - const char *name; - uint16_t value; -}; - -struct tiff_entry { - const char *name; - uint16_t tag; - struct tiff_value *values; -}; +// --- TIFF/Exif --------------------------------------------------------------- #include "tiff-tables.h" +#include "tiffer.h" // TODO(p): Consider if these can't be inlined into `tiff_entries`. static struct { @@ -374,6 +38,27 @@ static struct { {} }; +// --- Utilities --------------------------------------------------------------- + +#define u64be tiffer_u64be +#define u32be tiffer_u32be +#define u16be tiffer_u16be +#define u64le tiffer_u64le +#define u32le tiffer_u32le +#define u16le tiffer_u16le + +static char * +binhex(const uint8_t *data, size_t len) +{ + static const char *alphabet = "0123456789abcdef"; + char *buf = calloc(1, len * 2 + 1), *p = buf; + for (size_t i = 0; i < len; i++) { + *p++ = alphabet[data[i] >> 4]; + *p++ = alphabet[data[i] & 0xF]; + } + return buf; +} + // --- Analysis ---------------------------------------------------------------- static jv @@ -399,29 +84,78 @@ add_error(jv o, const char *message) return jv_object_set(o, jv_string("error"), jv_string(message)); } +// Forward declaration. +static jv parse_jpeg(jv o, const uint8_t *p, size_t len); + // --- Exif -------------------------------------------------------------------- static jv parse_exif_ifd(struct tiffer *T, const struct tiff_entry *info); +static bool +parse_exif_subifds_entry(const struct tiffer *T, + const struct tiffer_entry *entry, struct tiffer *subT) +{ + int64_t offset = 0; + return tiffer_integer(T, entry, &offset) && + offset >= 0 && offset <= UINT32_MAX && tiffer_subifd(T, offset, subT); +} + static jv -parse_exif_subifds(struct tiffer *T, const struct tiffer_entry *entry, +parse_exif_subifds(const struct tiffer *T, struct tiffer_entry *entry, struct tiff_entry *info) { - int64_t offset = 0; struct tiffer subT = {}; - if (!tiffer_integer(T, entry, &offset) || - offset < 0 || offset > UINT32_MAX || !tiffer_subifd(T, offset, &subT)) + if (!parse_exif_subifds_entry(T, entry, &subT)) return jv_null(); - // The chain should correspond to the values in the entry - // (TIFF Technical Note 1), we are not going to verify it. - // Note that Nikon NEFs do not follow this rule. jv a = jv_array(); do a = jv_array_append(a, parse_exif_ifd(&subT, info)); while (tiffer_next_ifd(&subT)); + + // The chain should correspond to the values in the entry (see TIFF + // Technical Note 1: "the NextIFD value of Child #1 must point to Child #2, + // and so on"), but at least some Nikon NEFs do not follow this rule. + if (jv_array_length(jv_copy(a)) == 1) { + while (tiffer_next_value(entry) && + parse_exif_subifds_entry(T, entry, &subT)) + a = jv_array_append(a, parse_exif_ifd(&subT, info)); + } return a; } +// Implemented partially, out of curiosity--it is not particularly useful, +// because there is a ton more parsing to do here. +static bool +parse_exif_makernote(jv *v, const struct tiffer_entry *entry) +{ + if (!getenv("INFO_MAKERNOTE") || + entry->tag != Exif_MakerNote || entry->type != TIFFER_UNDEFINED) + return false; + + struct tiffer T = {}; + if (entry->remaining_count >= 16 && + !memcmp(entry->p, "Nikon\x00\x02", 7) && + tiffer_init(&T, entry->p + 10, entry->remaining_count - 10) && + tiffer_next_ifd(&T)) { + *v = parse_exif_ifd(&T, NULL); + return true; + } + if (entry->remaining_count >= 16 && + !memcmp(entry->p, "Apple iOS\x00\x00\x01MM", 14)) { + T.un = &tiffer_unbe; + T.begin = T.p = entry->p + 14; + T.end = entry->p + entry->remaining_count - 14; + T.remaining_fields = 0; + + struct tiffer subT = {}; + if (tiffer_subifd(&T, 0, &subT)) { + *v = parse_exif_ifd(&subT, NULL); + return true; + } + } + return false; +} + static jv parse_exif_ascii(struct tiffer_entry *entry) { @@ -472,11 +206,13 @@ parse_exif_extract_sole_array_element(jv a) } static jv -parse_exif_entry(jv o, struct tiffer *T, struct tiffer_entry *entry, +parse_exif_entry(jv o, const struct tiffer *T, struct tiffer_entry *entry, const struct tiff_entry *info) { + const struct tiff_entry *info_begin = info; + static struct tiff_entry empty[] = {{}}; if (!info) - info = (struct tiff_entry[]) {{}}; + info = empty; for (; info->name; info++) if (info->tag == entry->tag) @@ -491,13 +227,18 @@ parse_exif_entry(jv o, struct tiffer *T, struct tiffer_entry *entry, double real = 0; if (!entry->remaining_count) { v = jv_null(); - } else if (entry->type == IFD || subentries) { + } else if (entry->type == TIFFER_IFD || subentries) { v = parse_exif_subifds(T, entry, subentries); - } else if (entry->type == ASCII) { + } else if (entry->type == TIFFER_ASCII) { v = parse_exif_extract_sole_array_element(parse_exif_ascii(entry)); - } else if (entry->type == UNDEFINED && !info->values) { + } else if (info_begin == exif_entries && parse_exif_makernote(&v, entry)) { + // Already processed. + } else if (entry->type == TIFFER_UNDEFINED && !info->values) { // Several Exif entries of UNDEFINED type contain single-byte numbers. v = parse_exif_undefined(entry); + } else if (info_begin == tiff_entries && entry->tag == TIFF_XMP && + (entry->type == TIFFER_UNDEFINED || entry->type == TIFFER_BYTE)) { + v = jv_string_sized((const char *) entry->p, entry->remaining_count); } else if (tiffer_real(T, entry, &real)) { v = jv_array(); do v = jv_array_append(v, parse_exif_value(info->values, real)); @@ -513,10 +254,55 @@ parse_exif_entry(jv o, struct tiffer *T, struct tiffer_entry *entry, static jv parse_exif_ifd(struct tiffer *T, const struct tiff_entry *info) { + int64_t compression = 0, + jpeg = 0, jpeg_length = 0, strip_offsets = 0, strip_byte_counts = 0; + jv ifd = jv_object(); struct tiffer_entry entry = {}; - while (tiffer_next_entry(T, &entry)) + while (tiffer_next_entry(T, &entry)) { + switch (entry.tag) { + case TIFF_Compression: + tiffer_integer(T, &entry, &compression); + break; + case TIFF_JPEGInterchangeFormat: + tiffer_integer(T, &entry, &jpeg); + break; + case TIFF_JPEGInterchangeFormatLength: + tiffer_integer(T, &entry, &jpeg_length); + break; + case TIFF_StripOffsets: + tiffer_integer(T, &entry, &strip_offsets); + break; + case TIFF_StripByteCounts: + tiffer_integer(T, &entry, &strip_byte_counts); + break; + } + ifd = parse_exif_entry(ifd, T, &entry, info); + } + + // This is how Exif specifies it, which doesn't follow TIFF 6.0. + // Also support CR2 IFD1, which isn't tagged with compression at all. + if (info == tiff_entries && /* compression == TIFF_Compression_JPEG && */ + jpeg > 0 && jpeg_length > 0 && + jpeg + jpeg_length <= (T->end - T->begin)) { + ifd = jv_set(ifd, jv_string("JPEG image data"), + parse_jpeg( + jv_object(), T->begin + jpeg, jpeg_length)); + } + + // As specified by DRAFT TIFF Technical Note 2 + TIFFphotoshop.pdf. + // Theoretically, there may be more strips, but this is not expected. + // Also support CR2 IFD0, which is tagged with the "wrong" compression. + if (info == tiff_entries && + (compression == TIFF_Compression_JPEGDatastream || + compression == TIFF_Compression_JPEG) && + strip_offsets > 0 && strip_byte_counts > 0 && + strip_offsets + strip_byte_counts <= (T->end - T->begin)) { + ifd = jv_set(ifd, jv_string("JPEG image data"), + parse_jpeg( + jv_object(), T->begin + strip_offsets, strip_byte_counts)); + } return ifd; } @@ -531,6 +317,25 @@ parse_exif(jv o, const uint8_t *p, size_t len) return o; } +static bool +detect_tiff(const uint8_t *p, size_t len) +{ + return tiffer_init(&(struct tiffer) {}, p, len); +} + +// TODO(p): Photoshop data and ICC profiles also have their tag in TIFF, +// they're not currently processed. +static jv +parse_tiff(jv o, const uint8_t *p, size_t len) +{ + struct tiffer T = {}; + if (!tiffer_init(&T, p, len)) + return add_warning(o, "invalid TIFF"); + while (tiffer_next_ifd(&T)) + o = add_to_subarray(o, "TIFF", parse_exif_ifd(&T, tiff_entries)); + return o; +} + // --- Photoshop Image Resources ----------------------------------------------- // Adobe XMP Specification Part 3: Storage in Files, 2020/1, 1.1.3 + 3.1.3 // https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/ @@ -885,3 +690,513 @@ parse_icc(jv o, const uint8_t *profile, size_t profile_len) // The description is required, so this should be unreachable. return jv_set(o, jv_string("ICC"), jv_bool(true)); } + +// --- Multi-Picture Format ---------------------------------------------------- + +static uint32_t +parse_mpf_mpentry(jv *a, const uint8_t *p, const struct tiffer *T) +{ + uint32_t attrs = T->un->u32(p); + uint32_t offset = T->un->u32(p + 8); + + uint32_t type_number = attrs & 0xFFFFFF; + jv type = jv_number(type_number); + switch (type_number) { + break; case 0x030000: type = jv_string("Baseline MP Primary Image"); + break; case 0x010001: type = jv_string("Large Thumbnail - VGA"); + break; case 0x010002: type = jv_string("Large Thumbnail - Full HD"); + break; case 0x020001: type = jv_string("Multi-Frame Image Panorama"); + break; case 0x020002: type = jv_string("Multi-Frame Image Disparity"); + break; case 0x020003: type = jv_string("Multi-Frame Image Multi-Angle"); + break; case 0x000000: type = jv_string("Undefined"); + } + + uint32_t format_number = (attrs >> 24) & 0x7; + jv format = jv_number(format_number); + if (format_number == 0) + format = jv_string("JPEG"); + + *a = jv_array_append(*a, JV_OBJECT( + jv_string("Individual Image Attribute"), JV_OBJECT( + jv_string("Dependent Parent Image"), jv_bool((attrs >> 31) & 1), + jv_string("Dependent Child Image"), jv_bool((attrs >> 30) & 1), + jv_string("Representative Image"), jv_bool((attrs >> 29) & 1), + jv_string("Reserved"), jv_number((attrs >> 27) & 0x3), + jv_string("Image Data Format"), format, + jv_string("MP Type Code"), type + ), + jv_string("Individual Image Size"), + jv_number(T->un->u32(p + 4)), + jv_string("Individual Image Data Offset"), + jv_number(offset), + jv_string("Dependent Image 1 Entry Number"), + jv_number(T->un->u16(p + 12)), + jv_string("Dependent Image 2 Entry Number"), + jv_number(T->un->u16(p + 14)) + )); + + // Don't report non-JPEGs, even though they're unlikely. + return format_number == 0 ? offset : 0; +} + +static jv +parse_mpf_index_entry(jv o, uint32_t **offsets, const struct tiffer *T, + struct tiffer_entry *entry) +{ + // 5.2.3.3. MP Entry + if (entry->tag != MPF_MPEntry || entry->type != TIFFER_UNDEFINED || + entry->remaining_count % 16) { + return parse_exif_entry(o, T, entry, mpf_entries); + } + + uint32_t count = entry->remaining_count / 16; + jv a = jv_array_sized(count); + uint32_t *out = *offsets = calloc(sizeof *out, count + 1); + for (uint32_t i = 0; i < count; i++) { + // 5.2.3.3.3. Individual Image Data Offset + uint32_t offset = parse_mpf_mpentry(&a, entry->p + i * 16, T); + if (offset) + *out++ = offset; + } + return jv_set(o, jv_string("MP Entry"), a); +} + +static jv +parse_mpf_index_ifd(uint32_t **offsets, struct tiffer *T) +{ + jv ifd = jv_object(); + struct tiffer_entry entry = {}; + while (tiffer_next_entry(T, &entry)) + ifd = parse_mpf_index_entry(ifd, offsets, T, &entry); + return ifd; +} + +static jv +parse_mpf(jv o, const uint8_t ***individuals, const uint8_t *p, size_t len, + const uint8_t *end) +{ + struct tiffer T; + if (!tiffer_init(&T, p, len) || !tiffer_next_ifd(&T)) + return add_warning(o, "invalid MPF segment"); + + // First image: IFD0 is Index IFD, any IFD1 is Attribute IFD. + // Other images: IFD0 is Attribute IFD, there is no Index IFD. + uint32_t *offsets = NULL; + if (!*individuals) { + o = add_to_subarray(o, "MPF", parse_mpf_index_ifd(&offsets, &T)); + if (!tiffer_next_ifd(&T)) + goto out; + } + + // This isn't optimal, but it will do. + o = add_to_subarray(o, "MPF", parse_exif_ifd(&T, mpf_entries)); + +out: + if (offsets) { + size_t count = 0; + for (const uint32_t *i = offsets; *i; i++) + count++; + + free(*individuals); + const uint8_t **out = *individuals = calloc(sizeof *out, count + 1); + for (const uint32_t *i = offsets; *i; i++) { + if (*i > end - p) + o = add_warning(o, "MPF offset points past available data"); + else + *out++ = p + *i; + } + + free(offsets); + } + return o; +} + +// --- JPEG -------------------------------------------------------------------- +// Because the JPEG file format is simple, just do it manually. +// See: https://www.w3.org/Graphics/JPEG/itu-t81.pdf + +enum { + TEM = 0x01, + SOF0 = 0xC0, SOF1, SOF2, SOF3, + DHT = 0xC4, + SOF5, SOF6, SOF7, + JPG = 0xC8, + SOF9, SOF10, SOF11, + DAC = 0xCC, + SOF13, SOF14, SOF15, + + RST0 = 0xD0, RST1, RST2, RST3, RST4, RST5, RST6, RST7, + + SOI = 0xD8, + EOI = 0xD9, + SOS = 0xDA, + DQT = 0xDB, + DNL = 0xDC, + DRI = 0xDD, + DHP = 0xDE, + EXP = 0xDF, + + APP0 = 0xE0, APP1, APP2, APP3, APP4, APP5, APP6, APP7, + APP8, APP9, APP10, APP11, APP12, APP13, APP14, APP15, + + JPG0 = 0xF0, JPG1, JPG2, JPG3, JPG4, JPG5, JPG6, JPG7, + JPG8, JPG9, JPG10, JPG11, JPG12, JPG13, + + COM = 0xFE +}; + +// The rest is "RES (Reserved)", except for 0xFF (filler) and 0x00 (invalid). +static const char *marker_ids[0xFF] = { + [TEM] = "TEM", + [SOF0] = "SOF0", [SOF1] = "SOF1", [SOF2] = "SOF2", [SOF3] = "SOF3", + [DHT] = "DHT", [SOF5] = "SOF5", [SOF6] = "SOF6", [SOF7] = "SOF7", + [JPG] = "JPG", [SOF9] = "SOF9", [SOF10] = "SOF10", [SOF11] = "SOF11", + [DAC] = "DAC", [SOF13] = "SOF13", [SOF14] = "SOF14", [SOF15] = "SOF15", + [RST0] = "RST0", [RST1] = "RST1", [RST2] = "RST2", [RST3] = "RST3", + [RST4] = "RST4", [RST5] = "RST5", [RST6] = "RST6", [RST7] = "RST7", + [SOI] = "SOI", [EOI] = "EOI", [SOS] = "SOS", [DQT] = "DQT", + [DNL] = "DNL", [DRI] = "DRI", [DHP] = "DHP", [EXP] = "EXP", + [APP0] = "APP0", [APP1] = "APP1", [APP2] = "APP2", [APP3] = "APP3", + [APP4] = "APP4", [APP5] = "APP5", [APP6] = "APP6", [APP7] = "APP7", + [APP8] = "APP8", [APP9] = "APP9", [APP10] = "APP10", [APP11] = "APP11", + [APP12] = "APP12", [APP13] = "APP13", [APP14] = "APP14", [APP15] = "APP15", + [JPG0] = "JPG0", [JPG1] = "JPG1", [JPG2] = "JPG2", [JPG3] = "JPG3", + [JPG4] = "JPG4", [JPG5] = "JPG5", [JPG6] = "JPG6", [JPG7] = "JPG7", + [JPG8] = "JPG8", [JPG9] = "JPG9", [JPG10] = "JPG10", [JPG11] = "JPG11", + [JPG12] = "JPG12", [JPG13] = "JPG13", [COM] = "COM" +}; + +// The rest is "RES (Reserved)", except for 0xFF (filler) and 0x00 (invalid). +static const char *marker_descriptions[0xFF] = { + [TEM] = "For temporary private use in arithmetic coding", + [SOF0] = "Baseline DCT", + [SOF1] = "Extended sequential DCT", + [SOF2] = "Progressive DCT", + [SOF3] = "Lossless (sequential)", + [DHT] = "Define Huffman table(s)", + [SOF5] = "Differential sequential DCT", + [SOF6] = "Differential progressive DCT", + [SOF7] = "Differential lossless (sequential)", + [JPG] = "Reserved for JPEG extensions", + [SOF9] = "Extended sequential DCT", + [SOF10] = "Progressive DCT", + [SOF11] = "Lossless (sequential)", + [DAC] = "Define arithmetic coding conditioning(s)", + [SOF13] = "Differential sequential DCT", + [SOF14] = "Differential progressive DCT", + [SOF15] = "Differential lossless (sequential)", + [RST0] = "Restart with module 8 count 0", + [RST1] = "Restart with module 8 count 1", + [RST2] = "Restart with module 8 count 2", + [RST3] = "Restart with module 8 count 3", + [RST4] = "Restart with module 8 count 4", + [RST5] = "Restart with module 8 count 5", + [RST6] = "Restart with module 8 count 6", + [RST7] = "Restart with module 8 count 7", + [SOI] = "Start of image", + [EOI] = "End of image", + [SOS] = "Start of scan", + [DQT] = "Define quantization table(s)", + [DNL] = "Define number of lines", + [DRI] = "Define restart interval", + [DHP] = "Define hierarchical progression", + [EXP] = "Expand reference component(s)", + [APP0] = "Reserved for application segments, 0", + [APP1] = "Reserved for application segments, 1", + [APP2] = "Reserved for application segments, 2", + [APP3] = "Reserved for application segments, 3", + [APP4] = "Reserved for application segments, 4", + [APP5] = "Reserved for application segments, 5", + [APP6] = "Reserved for application segments, 6", + [APP7] = "Reserved for application segments, 7", + [APP8] = "Reserved for application segments, 8", + [APP9] = "Reserved for application segments, 9", + [APP10] = "Reserved for application segments, 10", + [APP11] = "Reserved for application segments, 11", + [APP12] = "Reserved for application segments, 12", + [APP13] = "Reserved for application segments, 13", + [APP14] = "Reserved for application segments, 14", + [APP15] = "Reserved for application segments, 15", + [JPG0] = "Reserved for JPEG extensions, 0", + [JPG1] = "Reserved for JPEG extensions, 1", + [JPG2] = "Reserved for JPEG extensions, 2", + [JPG3] = "Reserved for JPEG extensions, 3", + [JPG4] = "Reserved for JPEG extensions, 4", + [JPG5] = "Reserved for JPEG extensions, 5", + [JPG6] = "Reserved for JPEG extensions, 6", + [JPG7] = "Reserved for JPEG extensions, 7", + [JPG8] = "Reserved for JPEG extensions, 8", + [JPG9] = "Reserved for JPEG extensions, 9", + [JPG10] = "Reserved for JPEG extensions, 10", + [JPG11] = "Reserved for JPEG extensions, 11", + [JPG12] = "Reserved for JPEG extensions, 12", + [JPG13] = "Reserved for JPEG extensions, 13", + [COM] = "Comment", +}; + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +struct data { + bool ended; + uint8_t *exif, *icc, *psir; + size_t exif_len, icc_len, psir_len; + int icc_sequence, icc_done; + const uint8_t **mpf_individuals, **mpf_next; +}; + +static void +parse_append(uint8_t **buffer, size_t *buffer_len, const uint8_t *p, size_t len) +{ + size_t buffer_longer = *buffer_len + len; + *buffer = realloc(*buffer, buffer_longer); + memcpy(*buffer + *buffer_len, p, len); + *buffer_len = buffer_longer; +} + +static const uint8_t * +parse_marker(uint8_t marker, const uint8_t *p, const uint8_t *end, + struct data *data, jv *o) +{ + // Suspected: MJPEG? Undetected format recursion, e.g., thumbnails? + // Found: Random metadata! Multi-Picture Format! + if ((data->ended = marker == EOI)) { + // TODO(p): Handle Exifs independently--flush the last one. + if ((data->mpf_next || (data->mpf_next = data->mpf_individuals)) && + *data->mpf_next) + return *data->mpf_next++; + if (p != end) + *o = add_warning(*o, "trailing data"); + } + + // These markers stand alone, not starting a marker segment. + switch (marker) { + case RST0: + case RST1: + case RST2: + case RST3: + case RST4: + case RST5: + case RST6: + case RST7: + *o = add_warning(*o, "unexpected restart marker"); + // Fall-through + case SOI: + case EOI: + case TEM: + return p; + } + + uint16_t length = p[0] << 8 | p[1]; + const uint8_t *payload = p + 2; + if ((p += length) > end) { + *o = add_error(*o, "runaway marker segment"); + return NULL; + } + + switch (marker) { + case SOF0: + case SOF1: + case SOF2: + case SOF3: + case SOF5: + case SOF6: + case SOF7: + case SOF9: + case SOF10: + case SOF11: + case SOF13: + case SOF14: + case SOF15: + case DHP: // B.2.2 and B.3.2. + // As per B.2.5, Y can be zero, then there needs to be a DNL segment. + *o = add_to_subarray(*o, "info", JV_OBJECT( + jv_string("type"), jv_string(marker_descriptions[marker]), + jv_string("bits"), jv_number(payload[0]), + jv_string("height"), jv_number(payload[1] << 8 | payload[2]), + jv_string("width"), jv_number(payload[3] << 8 | payload[4]), + jv_string("components"), jv_number(payload[5]) + )); + return p; + } + + // See B.1.1.5, we can brute-force our way through the entropy-coded data. + if (marker == SOS) { + while (p + 2 <= end && (p[0] != 0xFF || p[1] < 0xC0 || p[1] > 0xFE || + (p[1] >= RST0 && p[1] <= RST7))) + p++; + return p; + } + + // "The interpretation is left to the application." + if (marker == COM) { + int superascii = 0; + char *buf = calloc(3, p - payload), *bufp = buf; + for (const uint8_t *q = payload; q < p; q++) { + if (*q < 128) { + *bufp++ = *q; + } else { + superascii++; + *bufp++ = 0xC0 | (*q >> 6); + *bufp++ = 0x80 | (*q & 0x3F); + } + } + *bufp++ = 0; + *o = add_to_subarray(*o, "comments", jv_string(buf)); + free(buf); + + if (superascii) + *o = add_warning(*o, "super-ASCII comments"); + } + + // These mostly contain an ASCII string header, following JPEG FIF: + // + // "Application-specific APP0 marker segments are identified + // by a zero terminated string which identifies the application + // (not 'JFIF' or 'JFXX')." + if (marker >= APP0 && marker <= APP15) { + const uint8_t *nul = memchr(payload, 0, p - payload); + int unprintable = !nul; + if (nul) { + for (const uint8_t *q = payload; q < nul; q++) + unprintable += *q < 32 || *q >= 127; + } + *o = add_to_subarray(*o, "apps", + unprintable ? jv_null() : jv_string((const char *) payload)); + } + + // CIPA DC-007-2021 (Multi-Picture Format) 5.2 + // https://www.cipa.jp/e/std/std-sec.html + if (marker == APP2 && p - payload >= 8 && !memcmp(payload, "MPF\0", 4)) { + payload += 4; + *o = parse_mpf(*o, &data->mpf_individuals, payload, p - payload, end); + } + + // CIPA DC-006 (Stereo Still Image Format for Digital Cameras) + // TODO(p): Handle by properly skipping trailing data (use Stim offsets). + + // https://www.w3.org/Graphics/JPEG/jfif3.pdf + if (marker == APP0 && p - payload >= 14 && !memcmp(payload, "JFIF\0", 5)) { + payload += 5; + + jv units = jv_number(payload[2]); + switch (payload[2]) { + break; case 0: units = jv_null(); + break; case 1: units = jv_string("DPI"); + break; case 2: units = jv_string("dots per cm"); + } + + // The rest is picture data. + *o = add_to_subarray(*o, "JFIF", JV_OBJECT( + jv_string("version"), jv_number(payload[0] * 100 + payload[1]), + jv_string("units"), units, + jv_string("density-x"), jv_number(payload[3] << 8 | payload[4]), + jv_string("density-y"), jv_number(payload[5] << 8 | payload[6]), + jv_string("thumbnail-w"), jv_number(payload[7]), + jv_string("thumbnail-h"), jv_number(payload[8]) + )); + } + if (marker == APP0 && p - payload >= 6 && !memcmp(payload, "JFXX\0", 5)) { + payload += 5; + + jv extension = jv_number(payload[0]); + switch (payload[0]) { + break; case 0x10: extension = jv_string("JPEG thumbnail"); + break; case 0x11: extension = jv_string("Paletted thumbnail"); + break; case 0x13: extension = jv_string("RGB thumbnail"); + } + + // The rest is picture data. + *o = add_to_subarray(*o, "JFXX", + JV_OBJECT(jv_string("extension"), extension)); + } + + // https://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf 4.7.2 + // Adobe XMP Specification Part 3: Storage in Files, 2020/1, 1.1.3 + if (marker == APP1 && p - payload >= 6 && !memcmp(payload, "Exif\0", 5)) { + payload += 6; + if (payload[-1] != 0) + *o = add_warning(*o, "weirdly padded Exif header"); + if (data->exif) + *o = add_warning(*o, "multiple Exif segments"); + parse_append(&data->exif, &data->exif_len, payload, p - payload); + } + + // https://www.color.org/specification/ICC1v43_2010-12.pdf B.4 + if (marker == APP2 && p - payload >= 14 && + !memcmp(payload, "ICC_PROFILE\0", 12) && !data->icc_done && + payload[12] == ++data->icc_sequence && payload[13] >= payload[12]) { + payload += 14; + parse_append(&data->icc, &data->icc_len, payload, p - payload); + data->icc_done = payload[-1] == data->icc_sequence; + } + + // Adobe XMP Specification Part 3: Storage in Files, 2020/1, 1.1.3 + 3.1.3 + // https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/ + if (marker == APP13 && p - payload >= 14 && + !memcmp(payload, "Photoshop 3.0\0", 14)) { + payload += 14; + parse_append(&data->psir, &data->psir_len, payload, p - payload); + } + + // TODO(p): Extract all XMP segments. + return p; +} + +static bool +detect_jpeg(const uint8_t *p, size_t len) +{ + return len >= 2 && p[0] == 0xff && p[1] == SOI; +} + +static jv +parse_jpeg(jv o, const uint8_t *p, size_t len) +{ + struct data data = {}; + const uint8_t *end = p + len; + jv markers = jv_array(); + while (p) { + // This is an expectable condition, use a simple warning. + if (p + 2 > end) { + if (!data.ended) + o = add_warning(o, "unexpected EOF"); + break; + } + if (*p++ != 0xFF || *p == 0) { + if (!data.ended) + o = add_error(o, "no marker found where one was expected"); + break; + } + + // Markers may be preceded by fill bytes. + if (*p == 0xFF) { + o = jv_object_set(o, jv_string("fillers"), jv_bool(true)); + continue; + } + + uint8_t marker = *p++; + markers = jv_array_append(markers, + jv_string(marker_ids[marker] ? marker_ids[marker] : "RES")); + p = parse_marker(marker, p, end, &data, &o); + } + + if (data.exif) { + // TODO(p): Probably extend it until the end of the JPEG, + // seeing as, e.g., thumbnail data can overflow into follow-up segments. + o = parse_exif(o, data.exif, data.exif_len); + free(data.exif); + } + if (data.icc) { + if (data.icc_done) + o = parse_icc(o, data.icc, data.icc_len); + else + o = add_warning(o, "bad ICC profile sequence"); + free(data.icc); + } + if (data.psir) { + o = parse_psir(o, data.psir, data.psir_len); + free(data.psir); + } + + free(data.mpf_individuals); + return jv_set(o, jv_string("markers"), markers); +} diff --git a/tools/jpeginfo.c b/tools/jpeginfo.c deleted file mode 100644 index 6a0994b..0000000 --- a/tools/jpeginfo.c +++ /dev/null @@ -1,610 +0,0 @@ -// -// jpeginfo.c: acquire information about JPEG files in JSON format -// -// Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name> -// -// Permission to use, copy, modify, and/or distribute this software for any -// purpose with or without fee is hereby granted. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -// - -#include "info.h" - -#include <jv.h> - -#include <errno.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -// --- Multi-Picture Format ---------------------------------------------------- - -enum { - MPF_MPFVersion = 45056, - MPF_NumberOfImages = 45057, - MPF_MPEntry = 45058, - MPF_ImageUIDList = 45059, - MPF_TotalFrames = 45060, - - MPF_MPIndividualNum = 45313, - MPF_PanOrientation = 45569, - MPF_PanOverlap_H = 45570, - MPF_PanOverlap_V = 45571, - MPF_BaseViewpointNum = 45572, - MPF_ConvergenceAngle = 45573, - MPF_BaselineLength = 45574, - MPF_VerticalDivergence = 45575, - MPF_AxisDistance_X = 45576, - MPF_AxisDistance_Y = 45577, - MPF_AxisDistance_Z = 45578, - MPF_YawAngle = 45579, - MPF_PitchAngle = 45580, - MPF_RollAngle = 45581 -}; - -static struct tiff_entry mpf_entries[] = { - {"MP Format Version Number", MPF_MPFVersion, NULL}, - {"Number of Images", MPF_NumberOfImages, NULL}, - {"MP Entry", MPF_MPEntry, NULL}, - {"Individual Image Unique ID List", MPF_ImageUIDList, NULL}, - {"Total Number of Captured Frames", MPF_TotalFrames, NULL}, - - {"MP Individual Image Number", MPF_MPIndividualNum, NULL}, - {"Panorama Scanning Orientation", MPF_PanOrientation, NULL}, - {"Panorama Horizontal Overlap", MPF_PanOverlap_H, NULL}, - {"Panorama Vertical Overlap", MPF_PanOverlap_V, NULL}, - {"Base Viewpoint Number", MPF_BaseViewpointNum, NULL}, - {"Convergence Angle", MPF_ConvergenceAngle, NULL}, - {"Baseline Length", MPF_BaselineLength, NULL}, - {"Divergence Angle", MPF_VerticalDivergence, NULL}, - {"Horizontal Axis Distance", MPF_AxisDistance_X, NULL}, - {"Vertical Axis Distance", MPF_AxisDistance_Y, NULL}, - {"Collimation Axis Distance", MPF_AxisDistance_Z, NULL}, - {"Yaw Angle", MPF_YawAngle, NULL}, - {"Pitch Angle", MPF_PitchAngle, NULL}, - {"Roll Angle", MPF_RollAngle, NULL}, - {} -}; - -static uint32_t -parse_mpf_mpentry(jv *a, const uint8_t *p, struct tiffer *T) -{ - uint32_t attrs = T->un->u32(p); - uint32_t offset = T->un->u32(p + 8); - - uint32_t type_number = attrs & 0xFFFFFF; - jv type = jv_number(type_number); - switch (type_number) { - break; case 0x030000: type = jv_string("Baseline MP Primary Image"); - break; case 0x010001: type = jv_string("Large Thumbnail - VGA"); - break; case 0x010002: type = jv_string("Large Thumbnail - Full HD"); - break; case 0x020001: type = jv_string("Multi-Frame Image Panorama"); - break; case 0x020002: type = jv_string("Multi-Frame Image Disparity"); - break; case 0x020003: type = jv_string("Multi-Frame Image Multi-Angle"); - break; case 0x000000: type = jv_string("Undefined"); - } - - uint32_t format_number = (attrs >> 24) & 0x7; - jv format = jv_number(format_number); - if (format_number == 0) - format = jv_string("JPEG"); - - *a = jv_array_append(*a, JV_OBJECT( - jv_string("Individual Image Attribute"), JV_OBJECT( - jv_string("Dependent Parent Image"), jv_bool((attrs >> 31) & 1), - jv_string("Dependent Child Image"), jv_bool((attrs >> 30) & 1), - jv_string("Representative Image"), jv_bool((attrs >> 29) & 1), - jv_string("Reserved"), jv_number((attrs >> 27) & 0x3), - jv_string("Image Data Format"), format, - jv_string("MP Type Code"), type - ), - jv_string("Individual Image Size"), - jv_number(T->un->u32(p + 4)), - jv_string("Individual Image Data Offset"), - jv_number(offset), - jv_string("Dependent Image 1 Entry Number"), - jv_number(T->un->u16(p + 12)), - jv_string("Dependent Image 2 Entry Number"), - jv_number(T->un->u16(p + 14)) - )); - - // Don't report non-JPEGs, even though they're unlikely. - return format_number == 0 ? offset : 0; -} - -static jv -parse_mpf_index_entry(jv o, const uint8_t ***offsets, struct tiffer *T, - struct tiffer_entry *entry) -{ - // 5.2.3.3. MP Entry - if (entry->tag != MPF_MPEntry || entry->type != UNDEFINED || - entry->remaining_count % 16) { - return parse_exif_entry(o, T, entry, mpf_entries); - } - - uint32_t count = entry->remaining_count / 16; - jv a = jv_array_sized(count); - const uint8_t **out = *offsets = calloc(sizeof *out, count + 1); - for (uint32_t i = 0; i < count; i++) { - uint32_t offset = parse_mpf_mpentry(&a, entry->p + i * 16, T); - if (offset) - *out++ = T->begin + offset; - } - return jv_set(o, jv_string("MP Entry"), a); -} - -static jv -parse_mpf_index_ifd(const uint8_t ***offsets, struct tiffer *T) -{ - jv ifd = jv_object(); - struct tiffer_entry entry = {}; - while (tiffer_next_entry(T, &entry)) - ifd = parse_mpf_index_entry(ifd, offsets, T, &entry); - return ifd; -} - -static jv -parse_mpf(jv o, const uint8_t ***offsets, const uint8_t *p, size_t len) -{ - struct tiffer T; - if (!tiffer_init(&T, p, len) || !tiffer_next_ifd(&T)) - return add_warning(o, "invalid MPF segment"); - - // First image: IFD0 is Index IFD, any IFD1 is Attribute IFD. - // Other images: IFD0 is Attribute IFD, there is no Index IFD. - if (!*offsets) { - o = add_to_subarray(o, "MPF", parse_mpf_index_ifd(offsets, &T)); - if (!tiffer_next_ifd(&T)) - return o; - } - - // This isn't optimal, but it will do. - return add_to_subarray(o, "MPF", parse_exif_ifd(&T, mpf_entries)); -} - -// --- JPEG -------------------------------------------------------------------- -// Because the JPEG file format is simple, just do it manually. -// See: https://www.w3.org/Graphics/JPEG/itu-t81.pdf - -enum { - TEM = 0x01, - SOF0 = 0xC0, SOF1, SOF2, SOF3, - DHT = 0xC4, - SOF5, SOF6, SOF7, - JPG = 0xC8, - SOF9, SOF10, SOF11, - DAC = 0xCC, - SOF13, SOF14, SOF15, - - RST0 = 0xD0, RST1, RST2, RST3, RST4, RST5, RST6, RST7, - - SOI = 0xD8, - EOI = 0xD9, - SOS = 0xDA, - DQT = 0xDB, - DNL = 0xDC, - DRI = 0xDD, - DHP = 0xDE, - EXP = 0xDF, - - APP0 = 0xE0, APP1, APP2, APP3, APP4, APP5, APP6, APP7, - APP8, APP9, APP10, APP11, APP12, APP13, APP14, APP15, - - JPG0 = 0xF0, JPG1, JPG2, JPG3, JPG4, JPG5, JPG6, JPG7, - JPG8, JPG9, JPG10, JPG11, JPG12, JPG13, - - COM = 0xFE -}; - -// The rest is "RES (Reserved)", except for 0xFF (filler) and 0x00 (invalid). -static const char *marker_ids[0xFF] = { - [TEM] = "TEM", - [SOF0] = "SOF0", [SOF1] = "SOF1", [SOF2] = "SOF2", [SOF3] = "SOF3", - [DHT] = "DHT", [SOF5] = "SOF5", [SOF6] = "SOF6", [SOF7] = "SOF7", - [JPG] = "JPG", [SOF9] = "SOF9", [SOF10] = "SOF10", [SOF11] = "SOF11", - [DAC] = "DAC", [SOF13] = "SOF13", [SOF14] = "SOF14", [SOF15] = "SOF15", - [RST0] = "RST0", [RST1] = "RST1", [RST2] = "RST2", [RST3] = "RST3", - [RST4] = "RST4", [RST5] = "RST5", [RST6] = "RST6", [RST7] = "RST7", - [SOI] = "SOI", [EOI] = "EOI", [SOS] = "SOS", [DQT] = "DQT", - [DNL] = "DNL", [DRI] = "DRI", [DHP] = "DHP", [EXP] = "EXP", - [APP0] = "APP0", [APP1] = "APP1", [APP2] = "APP2", [APP3] = "APP3", - [APP4] = "APP4", [APP5] = "APP5", [APP6] = "APP6", [APP7] = "APP7", - [APP8] = "APP8", [APP9] = "APP9", [APP10] = "APP10", [APP11] = "APP11", - [APP12] = "APP12", [APP13] = "APP13", [APP14] = "APP14", [APP15] = "APP15", - [JPG0] = "JPG0", [JPG1] = "JPG1", [JPG2] = "JPG2", [JPG3] = "JPG3", - [JPG4] = "JPG4", [JPG5] = "JPG5", [JPG6] = "JPG6", [JPG7] = "JPG7", - [JPG8] = "JPG8", [JPG9] = "JPG9", [JPG10] = "JPG10", [JPG11] = "JPG11", - [JPG12] = "JPG12", [JPG13] = "JPG13", [COM] = "COM" -}; - -// The rest is "RES (Reserved)", except for 0xFF (filler) and 0x00 (invalid). -static const char *marker_descriptions[0xFF] = { - [TEM] = "For temporary private use in arithmetic coding", - [SOF0] = "Baseline DCT", - [SOF1] = "Extended sequential DCT", - [SOF2] = "Progressive DCT", - [SOF3] = "Lossless (sequential)", - [DHT] = "Define Huffman table(s)", - [SOF5] = "Differential sequential DCT", - [SOF6] = "Differential progressive DCT", - [SOF7] = "Differential lossless (sequential)", - [JPG] = "Reserved for JPEG extensions", - [SOF9] = "Extended sequential DCT", - [SOF10] = "Progressive DCT", - [SOF11] = "Lossless (sequential)", - [DAC] = "Define arithmetic coding conditioning(s)", - [SOF13] = "Differential sequential DCT", - [SOF14] = "Differential progressive DCT", - [SOF15] = "Differential lossless (sequential)", - [RST0] = "Restart with module 8 count 0", - [RST1] = "Restart with module 8 count 1", - [RST2] = "Restart with module 8 count 2", - [RST3] = "Restart with module 8 count 3", - [RST4] = "Restart with module 8 count 4", - [RST5] = "Restart with module 8 count 5", - [RST6] = "Restart with module 8 count 6", - [RST7] = "Restart with module 8 count 7", - [SOI] = "Start of image", - [EOI] = "End of image", - [SOS] = "Start of scan", - [DQT] = "Define quantization table(s)", - [DNL] = "Define number of lines", - [DRI] = "Define restart interval", - [DHP] = "Define hierarchical progression", - [EXP] = "Expand reference component(s)", - [APP0] = "Reserved for application segments, 0", - [APP1] = "Reserved for application segments, 1", - [APP2] = "Reserved for application segments, 2", - [APP3] = "Reserved for application segments, 3", - [APP4] = "Reserved for application segments, 4", - [APP5] = "Reserved for application segments, 5", - [APP6] = "Reserved for application segments, 6", - [APP7] = "Reserved for application segments, 7", - [APP8] = "Reserved for application segments, 8", - [APP9] = "Reserved for application segments, 9", - [APP10] = "Reserved for application segments, 10", - [APP11] = "Reserved for application segments, 11", - [APP12] = "Reserved for application segments, 12", - [APP13] = "Reserved for application segments, 13", - [APP14] = "Reserved for application segments, 14", - [APP15] = "Reserved for application segments, 15", - [JPG0] = "Reserved for JPEG extensions, 0", - [JPG1] = "Reserved for JPEG extensions, 1", - [JPG2] = "Reserved for JPEG extensions, 2", - [JPG3] = "Reserved for JPEG extensions, 3", - [JPG4] = "Reserved for JPEG extensions, 4", - [JPG5] = "Reserved for JPEG extensions, 5", - [JPG6] = "Reserved for JPEG extensions, 6", - [JPG7] = "Reserved for JPEG extensions, 7", - [JPG8] = "Reserved for JPEG extensions, 8", - [JPG9] = "Reserved for JPEG extensions, 9", - [JPG10] = "Reserved for JPEG extensions, 10", - [JPG11] = "Reserved for JPEG extensions, 11", - [JPG12] = "Reserved for JPEG extensions, 12", - [JPG13] = "Reserved for JPEG extensions, 13", - [COM] = "Comment", -}; - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -struct data { - bool ended; - uint8_t *exif, *icc, *psir; - size_t exif_len, icc_len, psir_len; - int icc_sequence, icc_done; - const uint8_t **mpf_offsets, **mpf_next; -}; - -static void -parse_append(uint8_t **buffer, size_t *buffer_len, const uint8_t *p, size_t len) -{ - size_t buffer_longer = *buffer_len + len; - *buffer = realloc(*buffer, buffer_longer); - memcpy(*buffer + *buffer_len, p, len); - *buffer_len = buffer_longer; -} - -static const uint8_t * -parse_marker(uint8_t marker, const uint8_t *p, const uint8_t *end, - struct data *data, jv *o) -{ - // Suspected: MJPEG? Undetected format recursion, e.g., thumbnails? - // Found: Random metadata! Multi-Picture Format! - if ((data->ended = marker == EOI)) { - // TODO(p): Handle Exifs independently--flush the last one. - if ((data->mpf_next || (data->mpf_next = data->mpf_offsets)) && - *data->mpf_next) - return *data->mpf_next++; - if (p != end) - *o = add_warning(*o, "trailing data"); - } - - // These markers stand alone, not starting a marker segment. - switch (marker) { - case RST0: - case RST1: - case RST2: - case RST3: - case RST4: - case RST5: - case RST6: - case RST7: - *o = add_warning(*o, "unexpected restart marker"); - // Fall-through - case SOI: - case EOI: - case TEM: - return p; - } - - uint16_t length = p[0] << 8 | p[1]; - const uint8_t *payload = p + 2; - if ((p += length) > end) { - *o = add_error(*o, "runaway marker segment"); - return NULL; - } - - switch (marker) { - case SOF0: - case SOF1: - case SOF2: - case SOF3: - case SOF5: - case SOF6: - case SOF7: - case SOF9: - case SOF10: - case SOF11: - case SOF13: - case SOF14: - case SOF15: - case DHP: // B.2.2 and B.3.2. - // As per B.2.5, Y can be zero, then there needs to be a DNL segment. - *o = add_to_subarray(*o, "info", JV_OBJECT( - jv_string("type"), jv_string(marker_descriptions[marker]), - jv_string("bits"), jv_number(payload[0]), - jv_string("height"), jv_number(payload[1] << 8 | payload[2]), - jv_string("width"), jv_number(payload[3] << 8 | payload[4]), - jv_string("components"), jv_number(payload[5]) - )); - return p; - } - - // See B.1.1.5, we can brute-force our way through the entropy-coded data. - if (marker == SOS) { - while (p + 2 <= end && (p[0] != 0xFF || p[1] < 0xC0 || p[1] > 0xFE || - (p[1] >= RST0 && p[1] <= RST7))) - p++; - return p; - } - - // "The interpretation is left to the application." - if (marker == COM) { - int superascii = 0; - char *buf = calloc(3, p - payload), *bufp = buf; - for (const uint8_t *q = payload; q < p; q++) { - if (*q < 128) { - *bufp++ = *q; - } else { - superascii++; - *bufp++ = 0xC0 | (*q >> 6); - *bufp++ = 0x80 | (*q & 0x3F); - } - } - *bufp++ = 0; - *o = add_to_subarray(*o, "comments", jv_string(buf)); - free(buf); - - if (superascii) - *o = add_warning(*o, "super-ASCII comments"); - } - - // These mostly contain an ASCII string header, following JPEG FIF: - // - // "Application-specific APP0 marker segments are identified - // by a zero terminated string which identifies the application - // (not 'JFIF' or 'JFXX')." - if (marker >= APP0 && marker <= APP15) { - const uint8_t *nul = memchr(payload, 0, p - payload); - int unprintable = !nul; - if (nul) { - for (const uint8_t *q = payload; q < nul; q++) - unprintable += *q < 32 || *q >= 127; - } - *o = add_to_subarray(*o, "apps", - unprintable ? jv_null() : jv_string((const char *) payload)); - } - - // CIPA DC-007 (Multi-Picture Format) 5.2 - // http://fileformats.archiveteam.org/wiki/Multi-Picture_Format - if (marker == APP2 && p - payload >= 8 && !memcmp(payload, "MPF\0", 4)) { - payload += 4; - *o = parse_mpf(*o, &data->mpf_offsets, payload, p - payload); - } - - // CIPA DC-006 (Stereo Still Image Format for Digital Cameras) - // TODO(p): Handle by properly skipping trailing data (use Stim offsets). - - // https://www.w3.org/Graphics/JPEG/jfif3.pdf - if (marker == APP0 && p - payload >= 14 && !memcmp(payload, "JFIF\0", 5)) { - payload += 5; - - jv units = jv_number(payload[2]); - switch (payload[2]) { - break; case 0: units = jv_null(); - break; case 1: units = jv_string("DPI"); - break; case 2: units = jv_string("dots per cm"); - } - - // The rest is picture data. - *o = add_to_subarray(*o, "JFIF", JV_OBJECT( - jv_string("version"), jv_number(payload[0] * 100 + payload[1]), - jv_string("units"), units, - jv_string("density-x"), jv_number(payload[3] << 8 | payload[4]), - jv_string("density-y"), jv_number(payload[5] << 8 | payload[6]), - jv_string("thumbnail-w"), jv_number(payload[7]), - jv_string("thumbnail-h"), jv_number(payload[8]) - )); - } - if (marker == APP0 && p - payload >= 6 && !memcmp(payload, "JFXX\0", 5)) { - payload += 5; - - jv extension = jv_number(payload[0]); - switch (payload[0]) { - break; case 0x10: extension = jv_string("JPEG thumbnail"); - break; case 0x11: extension = jv_string("Paletted thumbnail"); - break; case 0x13: extension = jv_string("RGB thumbnail"); - } - - // The rest is picture data. - *o = add_to_subarray(*o, "JFXX", - JV_OBJECT(jv_string("extension"), extension)); - } - - // https://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf 4.7.2 - // Adobe XMP Specification Part 3: Storage in Files, 2020/1, 1.1.3 - if (marker == APP1 && p - payload >= 6 && !memcmp(payload, "Exif\0", 5)) { - payload += 6; - if (payload[-1] != 0) - *o = add_warning(*o, "weirdly padded Exif header"); - if (data->exif) - *o = add_warning(*o, "multiple Exif segments"); - parse_append(&data->exif, &data->exif_len, payload, p - payload); - } - - // https://www.color.org/specification/ICC1v43_2010-12.pdf B.4 - if (marker == APP2 && p - payload >= 14 && - !memcmp(payload, "ICC_PROFILE\0", 12) && !data->icc_done && - payload[12] == ++data->icc_sequence && payload[13] >= payload[12]) { - payload += 14; - parse_append(&data->icc, &data->icc_len, payload, p - payload); - data->icc_done = payload[-1] == data->icc_sequence; - } - - // Adobe XMP Specification Part 3: Storage in Files, 2020/1, 1.1.3 + 3.1.3 - // https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/ - if (marker == APP13 && p - payload >= 14 && - !memcmp(payload, "Photoshop 3.0\0", 14)) { - payload += 14; - parse_append(&data->psir, &data->psir_len, payload, p - payload); - } - - // TODO(p): Extract all XMP segments. - return p; -} - -static jv -parse_jpeg(jv o, const uint8_t *p, size_t len) -{ - struct data data = {}; - const uint8_t *end = p + len; - jv markers = jv_array(); - while (p) { - // This is an expectable condition, use a simple warning. - if (p + 2 > end) { - if (!data.ended) - o = add_warning(o, "unexpected EOF"); - break; - } - if (*p++ != 0xFF || *p == 0) { - if (!data.ended) - o = add_error(o, "no marker found where one was expected"); - break; - } - - // Markers may be preceded by fill bytes. - if (*p == 0xFF) { - o = jv_object_set(o, jv_string("fillers"), jv_bool(true)); - continue; - } - - uint8_t marker = *p++; - markers = jv_array_append(markers, - jv_string(marker_ids[marker] ? marker_ids[marker] : "RES")); - p = parse_marker(marker, p, end, &data, &o); - } - - if (data.exif) { - o = parse_exif(o, data.exif, data.exif_len); - free(data.exif); - } - if (data.icc) { - if (data.icc_done) - o = parse_icc(o, data.icc, data.icc_len); - else - o = add_warning(o, "bad ICC profile sequence"); - free(data.icc); - } - if (data.psir) { - o = parse_psir(o, data.psir, data.psir_len); - free(data.psir); - } - - free(data.mpf_offsets); - return jv_set(o, jv_string("markers"), markers); -} - -// --- I/O --------------------------------------------------------------------- - -static jv -do_file(const char *filename, jv o) -{ - const char *err = NULL; - FILE *fp = fopen(filename, "rb"); - if (!fp) { - err = strerror(errno); - goto error; - } - - uint8_t *data = NULL, buf[256 << 10]; - size_t n, len = 0; - while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) { - data = realloc(data, len + n); - memcpy(data + len, buf, n); - len += n; - } - if (ferror(fp)) { - err = strerror(errno); - goto error_read; - } - -#if 0 - // Not sure if I want to ensure their existence... - o = jv_object_set(o, jv_string("info"), jv_array()); - o = jv_object_set(o, jv_string("warnings"), jv_array()); -#endif - - o = parse_jpeg(o, data, len); -error_read: - fclose(fp); - free(data); -error: - if (err) - o = add_error(o, err); - return o; -} - -int -main(int argc, char *argv[]) -{ - // XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes. - // Usage: find . -iname *.png -print0 | xargs -0 ./pnginfo - for (int i = 1; i < argc; i++) { - const char *filename = argv[i]; - - jv o = jv_object(); - o = jv_object_set(o, jv_string("filename"), jv_string(filename)); - o = do_file(filename, o); - jv_dumpf(o, stdout, 0 /* Might consider JV_PRINT_SORTED. */); - fputc('\n', stdout); - } - return 0; -} diff --git a/tools/rawinfo.c b/tools/rawinfo.c new file mode 100644 index 0000000..6409d33 --- /dev/null +++ b/tools/rawinfo.c @@ -0,0 +1,175 @@ +// +// rawinfo.c: acquire information about raw image files in JSON format +// +// Copyright (c) 2023, Přemysl Eric Janouch <p@janouch.name> +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +#include "info.h" + +#include <jv.h> +#include <libraw.h> + +#if LIBRAW_VERSION < LIBRAW_MAKE_VERSION(0, 21, 0) +#error LibRaw 0.21.0 or newer is required. +#endif + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +// --- Raw image files --------------------------------------------------------- +// This is in principle similar to LibRaw's `raw-identify -v`, +// but the output is machine-processable. + +static jv +parse_raw(jv o, const uint8_t *p, size_t len) +{ + libraw_data_t *iprc = libraw_init(LIBRAW_OPIONS_NO_DATAERR_CALLBACK); + if (!iprc) + return add_error(o, "failed to obtain a LibRaw handle"); + + int err = 0; + if ((err = libraw_open_buffer(iprc, p, len))) { + libraw_close(iprc); + return add_error(o, libraw_strerror(err)); + } + + // -> iprc->rawparams.shot_select + o = jv_set(o, jv_string("count"), jv_number(iprc->idata.raw_count)); + + o = jv_set(o, jv_string("width"), jv_number(iprc->sizes.width)); + o = jv_set(o, jv_string("height"), jv_number(iprc->sizes.height)); + o = jv_set(o, jv_string("flip"), jv_number(iprc->sizes.flip)); + o = jv_set(o, jv_string("pixel_aspect_ratio"), + jv_number(iprc->sizes.pixel_aspect)); + + if ((err = libraw_adjust_sizes_info_only(iprc))) { + o = add_warning(o, libraw_strerror(err)); + } else { + o = jv_set( + o, jv_string("output_width"), jv_number(iprc->sizes.iwidth)); + o = jv_set( + o, jv_string("output_height"), jv_number(iprc->sizes.iheight)); + } + + jv thumbnails = jv_array(); + for (int i = 0; i < iprc->thumbs_list.thumbcount; i++) { + libraw_thumbnail_item_t *item = iprc->thumbs_list.thumblist + i; + + const char *format = "?"; + switch (item->tformat) { + case LIBRAW_INTERNAL_THUMBNAIL_UNKNOWN: + format = "unknown"; + break; + case LIBRAW_INTERNAL_THUMBNAIL_KODAK_THUMB: + format = "Kodak thumbnail"; + break; + case LIBRAW_INTERNAL_THUMBNAIL_KODAK_YCBCR: + format = "Kodak YCbCr"; + break; + case LIBRAW_INTERNAL_THUMBNAIL_KODAK_RGB: + format = "Kodak RGB"; + break; + case LIBRAW_INTERNAL_THUMBNAIL_JPEG: + format = "JPEG"; + break; + case LIBRAW_INTERNAL_THUMBNAIL_LAYER: + format = "layer"; + break; + case LIBRAW_INTERNAL_THUMBNAIL_ROLLEI: + format = "Rollei"; + break; + case LIBRAW_INTERNAL_THUMBNAIL_PPM: + format = "PPM"; + break; + case LIBRAW_INTERNAL_THUMBNAIL_PPM16: + format = "PPM16"; + break; + case LIBRAW_INTERNAL_THUMBNAIL_X3F: + format = "X3F"; + break; + } + + jv to = JV_OBJECT( + jv_string("width"), jv_number(item->twidth), + jv_string("height"), jv_number(item->theight), + jv_string("flip"), jv_number(item->tflip), + jv_string("format"), jv_string(format)); + + if (item->tformat == LIBRAW_INTERNAL_THUMBNAIL_JPEG && + item->toffset > 0 && + (size_t) item->toffset + item->tlength <= len) { + to = jv_set(to, jv_string("JPEG"), + parse_jpeg(jv_object(), p + item->toffset, item->tlength)); + } + + thumbnails = jv_array_append(thumbnails, to); + } + + libraw_close(iprc); + return jv_set(o, jv_string("thumbnails"), thumbnails); +} + +// --- I/O --------------------------------------------------------------------- + +static jv +do_file(const char *filename, jv o) +{ + const char *err = NULL; + FILE *fp = fopen(filename, "rb"); + if (!fp) { + err = strerror(errno); + goto error; + } + + uint8_t *data = NULL, buf[256 << 10]; + size_t n, len = 0; + while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) { + data = realloc(data, len + n); + memcpy(data + len, buf, n); + len += n; + } + if (ferror(fp)) { + err = strerror(errno); + goto error_read; + } + + o = parse_raw(o, data, len); + +error_read: + fclose(fp); + free(data); +error: + if (err) + o = add_error(o, err); + return o; +} + +int +main(int argc, char *argv[]) +{ + // XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes. + // Usage: find . -print0 | xargs -0 ./rawinfo + for (int i = 1; i < argc; i++) { + const char *filename = argv[i]; + + jv o = jv_object(); + o = jv_object_set(o, jv_string("filename"), jv_string(filename)); + o = do_file(filename, o); + jv_dumpf(o, stdout, 0 /* Might consider JV_PRINT_SORTED. */); + fputc('\n', stdout); + } + return 0; +} diff --git a/tools/tiffinfo.c b/tools/tiffinfo.c deleted file mode 100644 index da629c6..0000000 --- a/tools/tiffinfo.c +++ /dev/null @@ -1,79 +0,0 @@ -// -// tiffinfo.c: acquire information about TIFF files in JSON format -// -// Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name> -// -// Permission to use, copy, modify, and/or distribute this software for any -// purpose with or without fee is hereby granted. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -// - -#include "info.h" - -#include <jv.h> - -#include <errno.h> -#include <stdlib.h> -#include <string.h> -#include <stdbool.h> - -// This is essentially the same as jpeginfo.c, but we only have an Exif segment. -// TODO(p): Photoshop data and ICC profiles also have their tag, -// they're not currently processed. - -static jv -do_file(const char *filename, jv o) -{ - const char *err = NULL; - FILE *fp = fopen(filename, "rb"); - if (!fp) { - err = strerror(errno); - goto error; - } - - uint8_t *data = NULL, buf[256 << 10]; - size_t n, len = 0; - while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) { - data = realloc(data, len + n); - memcpy(data + len, buf, n); - len += n; - } - if (ferror(fp)) { - err = strerror(errno); - goto error_read; - } - - o = parse_exif(o, data, len); - -error_read: - fclose(fp); - free(data); -error: - if (err) - o = add_error(o, err); - return o; -} - -int -main(int argc, char *argv[]) -{ - // XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes. - // Usage: find . -iname *.png -print0 | xargs -0 ./pnginfo - for (int i = 1; i < argc; i++) { - const char *filename = argv[i]; - - jv o = jv_object(); - o = jv_object_set(o, jv_string("filename"), jv_string(filename)); - o = do_file(filename, o); - jv_dumpf(o, stdout, 0 /* Might consider JV_PRINT_SORTED. */); - fputc('\n', stdout); - } - return 0; -} diff --git a/tools/webpinfo.c b/tools/webpinfo.c deleted file mode 100644 index f3417f9..0000000 --- a/tools/webpinfo.c +++ /dev/null @@ -1,133 +0,0 @@ -// -// webpinfo.c: acquire information about WebP files in JSON format -// -// Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name> -// -// Permission to use, copy, modify, and/or distribute this software for any -// purpose with or without fee is hereby granted. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -// - -#include "info.h" - -#include <jv.h> - -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -// --- WebP -------------------------------------------------------------------- -// https://github.com/webmproject/libwebp/blob/master/doc/webp-container-spec.txt -// https://github.com/webmproject/libwebp/blob/master/doc/webp-lossless-bitstream-spec.txt -// https://datatracker.ietf.org/doc/html/rfc6386 -// -// Pretty versions, hopefully not outdated: -// https://developers.google.com/speed/webp/docs/riff_container -// https://developers.google.com/speed/webp/docs/webp_lossless_bitstream_specification - -static jv -parse_webp(jv o, const uint8_t *p, size_t len) -{ - // libwebp won't let us simply iterate over all chunks, so handroll it. - if (len < 12 || memcmp(p, "RIFF", 4) || memcmp(p + 8, "WEBP", 4)) - return add_error(o, "not a WEBP file"); - - // TODO(p): This can still be parseable. - // TODO(p): Warn on trailing data. - uint32_t size = u32le(p + 4); - if (8 + size < len) - return add_error(o, "truncated file"); - - const uint8_t *end = p + 8 + size; - p += 12; - - jv chunks = jv_array(); - while (p < end) { - if (end - p < 8) { - o = add_warning(o, "framing mismatch"); - printf("%ld", end - p); - break; - } - - uint32_t chunk_size = u32le(p + 4); - uint32_t chunk_advance = (chunk_size + 1) & ~1; - if (p + 8 + chunk_advance > end) { - o = add_warning(o, "runaway chunk payload"); - break; - } - - char fourcc[5] = ""; - memcpy(fourcc, p, 4); - chunks = jv_array_append(chunks, jv_string(fourcc)); - p += 8; - - // TODO(p): Decode VP8 and VP8L chunk metadata. - if (!strcmp(fourcc, "EXIF")) - o = parse_exif(o, p, chunk_size); - if (!strcmp(fourcc, "ICCP")) - o = parse_icc(o, p, chunk_size); - p += chunk_advance; - } - return jv_set(o, jv_string("chunks"), chunks); -} - -// --- I/O --------------------------------------------------------------------- - -static jv -do_file(const char *filename, jv o) -{ - const char *err = NULL; - FILE *fp = fopen(filename, "rb"); - if (!fp) { - err = strerror(errno); - goto error; - } - - uint8_t *data = NULL, buf[256 << 10]; - size_t n, len = 0; - while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) { - data = realloc(data, len + n); - memcpy(data + len, buf, n); - len += n; - } - if (ferror(fp)) { - err = strerror(errno); - goto error_read; - } - - o = parse_webp(o, data, len); -error_read: - fclose(fp); - free(data); -error: - if (err) - o = add_error(o, err); - return o; -} - -int -main(int argc, char *argv[]) -{ - (void) parse_psir; - - // XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes. - // Usage: find . -iname *.png -print0 | xargs -0 ./pnginfo - for (int i = 1; i < argc; i++) { - const char *filename = argv[i]; - - jv o = jv_object(); - o = jv_object_set(o, jv_string("filename"), jv_string(filename)); - o = do_file(filename, o); - jv_dumpf(o, stdout, 0 /* Might consider JV_PRINT_SORTED. */); - fputc('\n', stdout); - } - return 0; -} |