aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2023-05-23 02:09:15 +0200
committerPřemysl Eric Janouch <p@janouch.name>2023-05-26 15:32:34 +0200
commitbb4b895cb5938712bd09fbd2b5f49bea811d7551 (patch)
tree586902190b9fe52b1c6cb14309b852267fc9ee1e
parent0f1c61ae3325dda14be8f98ee7047ac5eda02108 (diff)
downloadfiv-bb4b895cb5938712bd09fbd2b5f49bea811d7551.tar.gz
fiv-bb4b895cb5938712bd09fbd2b5f49bea811d7551.tar.xz
fiv-bb4b895cb5938712bd09fbd2b5f49bea811d7551.zip
Extract some full-size raw previews without LibRaw
Not all image/x-nikon-nef will work like this, so don't claim their MIME type.
-rw-r--r--fiv-io.c394
-rwxr-xr-xtiff-tables.awk20
-rw-r--r--tiffer.h340
-rw-r--r--tools/info.h363
4 files changed, 716 insertions, 401 deletions
diff --git a/fiv-io.c b/fiv-io.c
index c4c61f4..7659fa8 100644
--- a/fiv-io.c
+++ b/fiv-io.c
@@ -41,6 +41,10 @@
#include <lcms2.h>
#endif // HAVE_LCMS2
+#define TIFF_TABLES_CONSTANTS_ONLY
+#include "tiff-tables.h"
+#include "tiffer.h"
+
#ifdef HAVE_LIBRAW
#include <libraw.h>
#if LIBRAW_VERSION >= LIBRAW_MAKE_VERSION(0, 21, 0)
@@ -1141,32 +1145,28 @@ fail:
// --- JPEG --------------------------------------------------------------------
-static GBytes *
-parse_jpeg_metadata(cairo_surface_t *surface, const char *data, gsize len)
+struct jpeg_metadata {
+ GByteArray *exif; ///< Exif buffer or NULL
+ GByteArray *icc; ///< ICC profile buffer or NULL
+ int width; ///< Image width
+ int height; ///< Image height
+};
+
+static void
+parse_jpeg_metadata(const char *data, size_t len, struct jpeg_metadata *meta)
{
// Because the JPEG file format is simple, just do it manually.
// See: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
enum {
- APP0 = 0xE0,
- APP1,
- APP2,
- RST0 = 0xD0,
- RST1,
- RST2,
- RST3,
- RST4,
- RST5,
- RST6,
- RST7,
- SOI = 0xD8,
- EOI = 0xD9,
- SOS = 0xDA,
TEM = 0x01,
+ SOF0 = 0xC0, SOF1, SOF2, SOF3, DHT, SOF5, SOF6, SOF7,
+ JPG, SOF9, SOF10, SOF11, DAC, SOF13, SOF14, SOF15,
+ RST0, RST1, RST2, RST3, RST4, RST5, RST6, RST7,
+ SOI, EOI, SOS, DQT, DNL, DRI, DHP, EXP,
+ APP0, APP1, APP2, APP3, APP4, APP5, APP6, APP7,
};
- GByteArray *exif = g_byte_array_new(), *icc = g_byte_array_new();
int icc_sequence = 0, icc_done = FALSE;
-
const guint8 *p = (const guint8 *) data, *end = p + len;
while (p + 3 < end && *p++ == 0xFF && *p != SOS && *p != EOI) {
// The previous byte is a fill byte, restart.
@@ -1195,49 +1195,76 @@ parse_jpeg_metadata(cairo_surface_t *surface, const char *data, gsize len)
if (G_UNLIKELY((p += length) > end))
break;
+ switch (marker) {
+ case SOF0:
+ case SOF1:
+ case SOF2:
+ case SOF3:
+ case SOF5:
+ case SOF6:
+ case SOF7:
+ case SOF9:
+ case SOF10:
+ case SOF11:
+ case SOF13:
+ case SOF14:
+ case SOF15:
+ if (length >= 5) {
+ meta->width = (payload[3] << 8) + payload[4];
+ meta->height = (payload[1] << 8) + payload[2];
+ }
+ }
+
// https://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf 4.7.2
// Adobe XMP Specification Part 3: Storage in Files, 2020/1, 1.1.3
// Not checking the padding byte is intentional.
- if (marker == APP1 && p - payload >= 6 &&
- !memcmp(payload, "Exif\0", 5) && !exif->len) {
+ // XXX: Thumbnails may in practice overflow into follow-up segments.
+ if (meta->exif && marker == APP1 && p - payload >= 6 &&
+ !memcmp(payload, "Exif\0", 5) && !meta->exif->len) {
payload += 6;
- g_byte_array_append(exif, payload, p - payload);
+ g_byte_array_append(meta->exif, payload, p - payload);
}
// https://www.color.org/specification/ICC1v43_2010-12.pdf B.4
- if (marker == APP2 && p - payload >= 14 &&
+ if (meta->icc && marker == APP2 && p - payload >= 14 &&
!memcmp(payload, "ICC_PROFILE\0", 12) && !icc_done &&
payload[12] == ++icc_sequence && payload[13] >= payload[12]) {
payload += 14;
- g_byte_array_append(icc, payload, p - payload);
+ g_byte_array_append(meta->icc, payload, p - payload);
icc_done = payload[-1] == icc_sequence;
}
// TODO(p): Extract the main XMP segment.
}
- if (exif->len)
+ if (meta->icc && !icc_done)
+ g_byte_array_set_size(meta->icc, 0);
+}
+
+static void
+load_jpeg_finalize(cairo_surface_t *surface, bool cmyk,
+ FivIoProfile destination, const char *data, size_t len)
+{
+ struct jpeg_metadata meta = {
+ .exif = g_byte_array_new(), .icc = g_byte_array_new()};
+
+ parse_jpeg_metadata(data, len, &meta);
+
+ if (meta.exif->len)
cairo_surface_set_user_data(surface, &fiv_io_key_exif,
- g_byte_array_free_to_bytes(exif),
+ g_byte_array_free_to_bytes(meta.exif),
(cairo_destroy_func_t) g_bytes_unref);
else
- g_byte_array_free(exif, TRUE);
+ g_byte_array_free(meta.exif, TRUE);
GBytes *icc_profile = NULL;
- if (icc_done)
+ if (meta.icc->len)
cairo_surface_set_user_data(surface, &fiv_io_key_icc,
- (icc_profile = g_byte_array_free_to_bytes(icc)),
+ (icc_profile = g_byte_array_free_to_bytes(meta.icc)),
(cairo_destroy_func_t) g_bytes_unref);
else
- g_byte_array_free(icc, TRUE);
- return icc_profile;
-}
+ g_byte_array_free(meta.icc, TRUE);
-static void
-load_jpeg_finalize(cairo_surface_t *surface, bool cmyk,
- FivIoProfile destination, const char *data, size_t len)
-{
- GBytes *icc_profile = parse_jpeg_metadata(surface, data, len);
FivIoProfile source = NULL;
if (icc_profile)
source = fiv_io_profile_new(
@@ -1700,6 +1727,269 @@ fail:
return result;
}
+// --- TIFF/EP + DNG -----------------------------------------------------------
+// In Nikon NEF files, which claim to be TIFF/EP-compatible, IFD0 is a tiny
+// uncompressed thumbnail with SubIFDs that, aside from raw sensor data,
+// typically contain a nearly full-size JPEG preview.
+//
+// LibRaw takes too long a time to render something that will never be as good
+// as the large preview, and libtiff can only read the horrible IFD0 thumbnail.
+// (TIFFSetSubDirectory() requires an ImageLength tag that's missing from JPEG
+// SubIFDs, and TIFFReadCustomDirectory() takes a privately defined struct that
+// may not be omitted.)
+//
+// While LibRaw since 0.21.0 provides an API that would allow us to extract
+// the JPEG, a little bit of custom processing won't hurt either.
+
+static bool
+tiffer_find(const struct tiffer *self, uint16_t tag, struct tiffer_entry *entry)
+{
+ // Note that we could employ binary search, because tags must be ordered:
+ // - TIFF 6.0: Sort Order
+ // - ISO/DIS 12234-2: 4.1.2, 5.1
+ // - CIPA DC-007-2009 (Multi-Picture Format): 5.2.3., 5.2.4.
+ // - CIPA DC-008-2019 (Exif 2.32): 4.6.2.
+ // However, it doesn't seem to warrant the ugly code.
+ struct tiffer T = *self;
+ while (tiffer_next_entry(&T, entry)) {
+ if (entry->tag == tag)
+ return true;
+ }
+ *entry = (struct tiffer_entry) {};
+ return false;
+}
+
+static bool
+tiffer_find_integer(const struct tiffer *self, uint16_t tag, int64_t *i)
+{
+ struct tiffer_entry entry = {};
+ return tiffer_find(self, tag, &entry) && tiffer_integer(self, &entry, i);
+}
+
+// In case of failure, an entry with a zero "remaining_count" is returned.
+static struct tiffer_entry
+tiff_ep_subifds_init(const struct tiffer *T)
+{
+ struct tiffer_entry entry = {};
+ (void) tiffer_find(T, TIFF_SubIFDs, &entry);
+ return entry;
+}
+
+static bool
+tiff_ep_subifds_next(
+ const struct tiffer *T, struct tiffer_entry *subifds, struct tiffer *subT)
+{
+ // XXX: Except for a zero "remaining_count", all conditions are errors,
+ // and should perhaps be reported.
+ int64_t offset = 0;
+ if (!tiffer_integer(T, subifds, &offset) ||
+ offset < 0 || offset > UINT32_MAX || !tiffer_subifd(T, offset, subT))
+ return false;
+
+ (void) tiffer_next_value(subifds);
+ return true;
+}
+
+static bool
+tiff_ep_find_main(const struct tiffer *T, struct tiffer *outputT)
+{
+ // This is a mandatory field.
+ int64_t type = 0;
+ if (!tiffer_find_integer(T, TIFF_NewSubfileType, &type))
+ return false;
+
+ // This is the main image.
+ // (See DNG rather than ISO/DIS 12234-2 for values.)
+ if (type == 0) {
+ *outputT = *T;
+ return true;
+ }
+
+ struct tiffer_entry subifds = tiff_ep_subifds_init(T);
+ struct tiffer subT = {};
+ while (tiff_ep_subifds_next(T, &subifds, &subT))
+ if (tiff_ep_find_main(&subT, outputT))
+ return true;
+ return false;
+}
+
+struct tiff_ep_jpeg {
+ const uint8_t *jpeg; ///< JPEG data stream
+ size_t jpeg_length; ///< JPEG data stream length
+ int64_t pixels; ///< Number of pixels in the JPEG
+};
+
+static void
+tiff_ep_find_jpeg_evaluate(const struct tiffer *T, struct tiff_ep_jpeg *out)
+{
+ // This is a mandatory field.
+ int64_t compression = 0;
+ if (!tiffer_find_integer(T, TIFF_Compression, &compression))
+ return;
+
+ uint16_t tag_pointer = 0, tag_length = 0;
+ switch (compression) {
+ // This is how Exif specifies it, which doesn't follow TIFF 6.0.
+ case TIFF_Compression_JPEG:
+ tag_pointer = TIFF_JPEGInterchangeFormat;
+ tag_length = TIFF_JPEGInterchangeFormatLength;
+ break;
+ // Theoretically, there may be more strips, but this is not expected.
+ case TIFF_Compression_JPEGDatastream:
+ tag_pointer = TIFF_StripOffsets;
+ tag_length = TIFF_StripByteCounts;
+ break;
+ default:
+ return;
+ }
+
+ int64_t ipointer = 0, ilength = 0;
+ if (!tiffer_find_integer(T, tag_pointer, &ipointer) ||
+ !tiffer_find_integer(T, tag_length, &ilength) ||
+ ipointer <= 0 || ilength <= 0 ||
+ (uint64_t) ilength > SIZE_MAX ||
+ ipointer + ilength > (T->end - T->begin))
+ return;
+
+ // Note that to get the largest JPEG,
+ // we don't need to descend into Exif thumbnails.
+ // TODO(p): Consider DNG 1.2.0.0 PreviewColorSpace.
+ // But first, try to find some real-world files with it.
+ const uint8_t *jpeg = T->begin + ipointer;
+ size_t jpeg_length = ilength;
+
+ struct jpeg_metadata meta = {};
+ parse_jpeg_metadata((const char *) jpeg, jpeg_length, &meta);
+ int64_t pixels = meta.width * meta.height;
+ if (pixels > out->pixels) {
+ out->jpeg = jpeg;
+ out->jpeg_length = jpeg_length;
+ out->pixels = pixels;
+ }
+}
+
+static bool
+tiff_ep_find_jpeg(const struct tiffer *T, struct tiff_ep_jpeg *out)
+{
+ // This is a mandatory field.
+ int64_t type = 0;
+ if (!tiffer_find_integer(T, TIFF_NewSubfileType, &type))
+ return false;
+
+ // This is a thumbnail of the main image.
+ // (See DNG rather than ISO/DIS 12234-2 for values.)
+ if (type == 1)
+ tiff_ep_find_jpeg_evaluate(T, out);
+
+ struct tiffer_entry subifds = tiff_ep_subifds_init(T);
+ struct tiffer subT = {};
+ while (tiff_ep_subifds_next(T, &subifds, &subT))
+ if (!tiff_ep_find_jpeg(&subT, out))
+ return false;
+ return true;
+}
+
+static cairo_surface_t *
+load_tiff_ep(
+ const struct tiffer *T, const FivIoOpenContext *ctx, GError **error)
+{
+ // ISO/DIS 12234-2 is a fuck-up that says this should be in "IFD0",
+ // but it might have intended to say "all top-level IFDs".
+ // The DNG specification shares the same problem.
+ //
+ // In any case, chained TIFFs are relatively rare.
+ struct tiffer_entry entry = {};
+ bool is_tiffep = tiffer_find(T, TIFF_TIFF_EPStandardID, &entry) &&
+ entry.type == BYTE && entry.remaining_count == 4 &&
+ entry.p[0] == 1 && !entry.p[1] && !entry.p[2] && !entry.p[3];
+
+ // Apple ProRAW, e.g., does not claim TIFF/EP compatibility,
+ // but we should still be able to make sense of it.
+ bool is_supported_dng = tiffer_find(T, TIFF_DNGBackwardVersion, &entry) &&
+ entry.type == BYTE && entry.remaining_count == 4 &&
+ entry.p[0] == 1 && entry.p[1] <= 6 && !entry.p[2] && !entry.p[3];
+ if (!is_tiffep && !is_supported_dng) {
+ set_error(error, "not a supported TIFF/EP or DNG image");
+ return NULL;
+ }
+
+ struct tiffer fullT = {};
+ if (!tiff_ep_find_main(T, &fullT)) {
+ set_error(error, "could not find a main image");
+ return NULL;
+ }
+
+ int64_t width = 0, height = 0;
+ if (!tiffer_find_integer(&fullT, TIFF_ImageWidth, &width) ||
+ !tiffer_find_integer(&fullT, TIFF_ImageLength, &height) ||
+ width <= 0 || height <= 0) {
+ set_error(error, "missing or invalid main image dimensions");
+ return NULL;
+ }
+
+ struct tiff_ep_jpeg out = {};
+ if (!tiff_ep_find_jpeg(T, &out)) {
+ set_error(error, "error looking for a full-size JPEG preview");
+ return NULL;
+ }
+
+ // Nikon NEFs seem to generally have a preview above 99 percent,
+ // (though some of them may not even reach 50 percent).
+ // Be a bit more generous than that with our crop tolerance.
+ // TODO(p): Also take into account DNG DefaultCropSize, if present.
+ if (out.pixels / ((double) width * height) < 0.95) {
+ set_error(error, "could not find a large enough JPEG preview");
+ return NULL;
+ }
+
+ cairo_surface_t *surface = open_libjpeg_turbo(
+ (const char *) out.jpeg, out.jpeg_length, ctx, error);
+ if (!surface)
+ return NULL;
+
+ // Note that Exif may override this later in fiv_io_open_from_data().
+ // TODO(p): Try to use the Orientation field nearest to the target IFD.
+ // IFD0 just happens to be fine for Nikon NEF.
+ int64_t orientation = 0;
+ if (tiffer_find_integer(T, TIFF_Orientation, &orientation) &&
+ orientation >= 1 && orientation <= 8) {
+ cairo_surface_set_user_data(surface, &fiv_io_key_orientation,
+ (void *) (uintptr_t) orientation, NULL);
+ }
+ return surface;
+}
+
+static cairo_surface_t *
+open_tiff_ep(
+ const char *data, gsize len, const FivIoOpenContext *ctx, GError **error)
+{
+ // -Wunused-function, we might want to give this its own compile unit.
+ (void) tiffer_real;
+
+ struct tiffer T = {};
+ if (!tiffer_init(&T, (const uint8_t *) data, len)) {
+ set_error(error, "not a TIFF file");
+ return NULL;
+ }
+
+ cairo_surface_t *result = NULL, *result_tail = NULL;
+ while (tiffer_next_ifd(&T)) {
+ if (!try_append_page(
+ load_tiff_ep(&T, ctx, error), &result, &result_tail)) {
+ g_clear_pointer(&result, cairo_surface_destroy);
+ return NULL;
+ }
+ if (ctx->first_frame_only)
+ break;
+
+ // TODO(p): Try to adjust tiffer so that this isn't necessary.
+ struct tiffer_entry dummy = {};
+ while (tiffer_next_entry(&T, &dummy))
+ ;
+ }
+ return result;
+}
+
// --- Optional dependencies ---------------------------------------------------
#ifdef HAVE_LIBRAW // ---------------------------------------------------------
@@ -2590,30 +2880,6 @@ open_libtiff(
if (!tiff)
goto fail;
- // In Nikon NEF files, IFD0 is a tiny uncompressed thumbnail with SubIFDs--
- // two of them JPEGs, the remaining one is raw. libtiff cannot read either
- // of those better versions.
- //
- // TODO(p): If NewSubfileType is ReducedImage, and it has SubIFDs compressed
- // as old JPEG (6), decode JPEGInterchangeFormat/JPEGInterchangeFormatLength
- // with libjpeg-turbo and insert them as the starting pages.
- //
- // This is not possible with libtiff directly, because TIFFSetSubDirectory()
- // requires an ImageLength tag that's missing, and TIFFReadCustomDirectory()
- // takes a privately defined struct that cannot be omitted.
- //
- // TODO(p): Samsung Android DNGs also claim to be TIFF/EP, but use a smaller
- // uncompressed YCbCr image. Apple ProRAW uses the new JPEG Compression (7),
- // with a weird Orientation. It also uses that value for its raw data.
- uint32_t subtype = 0;
- uint16_t subifd_count = 0;
- const uint64_t *subifd_offsets = NULL;
- if (TIFFGetField(tiff, TIFFTAG_SUBFILETYPE, &subtype) &&
- (subtype & FILETYPE_REDUCEDIMAGE) &&
- TIFFGetField(tiff, TIFFTAG_SUBIFD, &subifd_count, &subifd_offsets) &&
- subifd_count > 0 && subifd_offsets) {
- }
-
do {
// We inform about unsupported directories, but do not fail on them.
GError *err = NULL;
@@ -2824,6 +3090,14 @@ fiv_io_open_from_data(
surface = open_libwebp(data, len, ctx, error);
break;
default:
+ // Try to extract full-size previews from TIFF/EP-compatible raws.
+ if ((surface = open_tiff_ep(data, len, ctx, error)))
+ break;
+ if (error) {
+ g_debug("%s", (*error)->message);
+ g_clear_error(error);
+ }
+
#ifdef HAVE_LIBRAW // ---------------------------------------------------------
if ((surface = open_libraw(data, len, ctx, error)))
break;
diff --git a/tiff-tables.awk b/tiff-tables.awk
index 2d93c36..29b462b 100755
--- a/tiff-tables.awk
+++ b/tiff-tables.awk
@@ -2,6 +2,22 @@
BEGIN {
FS = ", *"
print "// Generated by tiff-tables.awk. DO NOT MODIFY."
+ print ""
+ print "#ifndef TIFF_TABLES_CONSTANTS_ONLY"
+ print "#include <stddef.h>"
+ print "#include <stdint.h>"
+ print ""
+ print "struct tiff_value {"
+ print "\tconst char *name;"
+ print "\tuint16_t value;"
+ print "};"
+ print ""
+ print "struct tiff_entry {"
+ print "\tconst char *name;"
+ print "\tuint16_t tag;"
+ print "\tstruct tiff_value *values;"
+ print "};"
+ print "#endif"
}
{
@@ -55,8 +71,10 @@ function flushvalues() {
function flushsection() {
if (section) {
flushvalues()
- print "};\n\n" allvalues "static struct tiff_entry " \
+ print "};\n\n" allvalues "#ifndef TIFF_TABLES_CONSTANTS_ONLY"
+ print "static struct tiff_entry " \
sectionsnakecase "_entries[] = {" fields "\n\t{}\n};"
+ print "#endif"
}
}
diff --git a/tiffer.h b/tiffer.h
new file mode 100644
index 0000000..b4cbc5d
--- /dev/null
+++ b/tiffer.h
@@ -0,0 +1,340 @@
+//
+// tiffer.h: TIFF reading utilities
+//
+// Copyright (c) 2021 - 2023, Přemysl Eric Janouch <p@janouch.name>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include <stdint.h>
+#include <string.h>
+#include <stdbool.h>
+
+// --- Utilities ---------------------------------------------------------------
+
+static uint64_t
+tiffer_u64be(const uint8_t *p)
+{
+ return (uint64_t) p[0] << 56 | (uint64_t) p[1] << 48 |
+ (uint64_t) p[2] << 40 | (uint64_t) p[3] << 32 |
+ (uint64_t) p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7];
+}
+
+static uint32_t
+tiffer_u32be(const uint8_t *p)
+{
+ return (uint32_t) p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
+}
+
+static uint16_t
+tiffer_u16be(const uint8_t *p)
+{
+ return (uint16_t) p[0] << 8 | p[1];
+}
+
+static uint64_t
+tiffer_u64le(const uint8_t *p)
+{
+ return (uint64_t) p[7] << 56 | (uint64_t) p[6] << 48 |
+ (uint64_t) p[5] << 40 | (uint64_t) p[4] << 32 |
+ (uint64_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
+}
+
+static uint32_t
+tiffer_u32le(const uint8_t *p)
+{
+ return (uint32_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
+}
+
+static uint16_t
+tiffer_u16le(const uint8_t *p)
+{
+ return (uint16_t) p[1] << 8 | p[0];
+}
+
+// --- TIFF --------------------------------------------------------------------
+// libtiff is a mess, and the format is not particularly complicated.
+// Exiv2 is senselessly copylefted, and cannot do much.
+// libexif is only marginally better.
+// ExifTool is too user-oriented.
+
+struct un {
+ uint64_t (*u64) (const uint8_t *);
+ uint32_t (*u32) (const uint8_t *);
+ uint16_t (*u16) (const uint8_t *);
+};
+
+static struct un tiffer_unbe = {tiffer_u64be, tiffer_u32be, tiffer_u16be};
+static struct un tiffer_unle = {tiffer_u64le, tiffer_u32le, tiffer_u16le};
+
+struct tiffer {
+ struct un *un;
+ const uint8_t *begin, *p, *end;
+ uint16_t remaining_fields;
+};
+
+static bool
+tiffer_u32(struct tiffer *self, uint32_t *u)
+{
+ if (self->p < self->begin || self->p + 4 > self->end)
+ return false;
+
+ *u = self->un->u32(self->p);
+ self->p += 4;
+ return true;
+}
+
+static bool
+tiffer_u16(struct tiffer *self, uint16_t *u)
+{
+ if (self->p < self->begin || self->p + 2 > self->end)
+ return false;
+
+ *u = self->un->u16(self->p);
+ self->p += 2;
+ return true;
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+static bool
+tiffer_init(struct tiffer *self, const uint8_t *tiff, size_t len)
+{
+ self->un = NULL;
+ self->begin = self->p = tiff;
+ self->end = tiff + len;
+ self->remaining_fields = 0;
+
+ const uint8_t
+ le[4] = {'I', 'I', 42, 0},
+ be[4] = {'M', 'M', 0, 42};
+
+ if (tiff + 8 > self->end)
+ return false;
+ else if (!memcmp(tiff, le, sizeof le))
+ self->un = &tiffer_unle;
+ else if (!memcmp(tiff, be, sizeof be))
+ self->un = &tiffer_unbe;
+ else
+ return false;
+
+ self->p = tiff + 4;
+ // The first IFD needs to be read by caller explicitly,
+ // even though it's required to be present by TIFF 6.0.
+ return true;
+}
+
+/// Read the next IFD in a sequence.
+static bool
+tiffer_next_ifd(struct tiffer *self)
+{
+ // All fields from any previous IFD need to be read first.
+ if (self->remaining_fields)
+ return false;
+
+ uint32_t ifd_offset = 0;
+ if (!tiffer_u32(self, &ifd_offset))
+ return false;
+
+ // There is nothing more to read, this chain has terminated.
+ if (!ifd_offset)
+ return false;
+
+ // Note that TIFF 6.0 requires there to be at least one entry,
+ // but there is no need for us to check it.
+ self->p = self->begin + ifd_offset;
+ return tiffer_u16(self, &self->remaining_fields);
+}
+
+/// Initialize a derived TIFF reader for a subIFD at the given location.
+static bool
+tiffer_subifd(
+ const struct tiffer *self, uint32_t offset, struct tiffer *subreader)
+{
+ *subreader = *self;
+ subreader->p = subreader->begin + offset;
+ return tiffer_u16(subreader, &subreader->remaining_fields);
+}
+
+enum tiffer_type {
+ BYTE = 1, ASCII, SHORT, LONG, RATIONAL,
+ SBYTE, UNDEFINED, SSHORT, SLONG, SRATIONAL, FLOAT, DOUBLE,
+ IFD // This last type from TIFF Technical Note 1 isn't really used much.
+};
+
+static size_t
+tiffer_value_size(enum tiffer_type type)
+{
+ switch (type) {
+ case BYTE:
+ case SBYTE:
+ case ASCII:
+ case UNDEFINED:
+ return 1;
+ case SHORT:
+ case SSHORT:
+ return 2;
+ case LONG:
+ case SLONG:
+ case FLOAT:
+ case IFD:
+ return 4;
+ case RATIONAL:
+ case SRATIONAL:
+ case DOUBLE:
+ return 8;
+ default:
+ return 0;
+ }
+}
+
+/// A lean iterator for values within entries.
+struct tiffer_entry {
+ uint16_t tag;
+ enum tiffer_type type;
+ // For {S,}BYTE, ASCII, UNDEFINED, use these fields directly.
+ const uint8_t *p;
+ uint32_t remaining_count;
+};
+
+static bool
+tiffer_next_value(struct tiffer_entry *entry)
+{
+ if (!entry->remaining_count)
+ return false;
+
+ entry->p += tiffer_value_size(entry->type);
+ entry->remaining_count--;
+ return true;
+}
+
+static bool
+tiffer_integer(
+ const struct tiffer *self, const struct tiffer_entry *entry, int64_t *out)
+{
+ if (!entry->remaining_count)
+ return false;
+
+ // Somewhat excessively lenient, intended for display.
+ // TIFF 6.0 only directly suggests that a reader is should accept
+ // any of BYTE/SHORT/LONG for unsigned integers.
+ switch (entry->type) {
+ case BYTE:
+ case ASCII:
+ case UNDEFINED:
+ *out = *entry->p;
+ return true;
+ case SBYTE:
+ *out = (int8_t) *entry->p;
+ return true;
+ case SHORT:
+ *out = self->un->u16(entry->p);
+ return true;
+ case SSHORT:
+ *out = (int16_t) self->un->u16(entry->p);
+ return true;
+ case LONG:
+ case IFD:
+ *out = self->un->u32(entry->p);
+ return true;
+ case SLONG:
+ *out = (int32_t) self->un->u32(entry->p);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+tiffer_rational(const struct tiffer *self, const struct tiffer_entry *entry,
+ int64_t *numerator, int64_t *denominator)
+{
+ if (!entry->remaining_count)
+ return false;
+
+ // Somewhat excessively lenient, intended for display.
+ switch (entry->type) {
+ case RATIONAL:
+ *numerator = self->un->u32(entry->p);
+ *denominator = self->un->u32(entry->p + 4);
+ return true;
+ case SRATIONAL:
+ *numerator = (int32_t) self->un->u32(entry->p);
+ *denominator = (int32_t) self->un->u32(entry->p + 4);
+ return true;
+ default:
+ if (tiffer_integer(self, entry, numerator)) {
+ *denominator = 1;
+ return true;
+ }
+ return false;
+ }
+}
+
+static bool
+tiffer_real(
+ const struct tiffer *self, const struct tiffer_entry *entry, double *out)
+{
+ if (!entry->remaining_count)
+ return false;
+
+ // Somewhat excessively lenient, intended for display.
+ // Assuming the host architecture uses IEEE 754.
+ switch (entry->type) {
+ int64_t numerator, denominator;
+ case FLOAT:
+ *out = *(float *) entry->p;
+ return true;
+ case DOUBLE:
+ *out = *(double *) entry->p;
+ return true;
+ default:
+ if (tiffer_rational(self, entry, &numerator, &denominator)) {
+ *out = (double) numerator / denominator;
+ return true;
+ }
+ return false;
+ }
+}
+
+static bool
+tiffer_next_entry(struct tiffer *self, struct tiffer_entry *entry)
+{
+ if (!self->remaining_fields)
+ return false;
+
+ uint16_t type = entry->type = 0xFFFF;
+ if (!tiffer_u16(self, &entry->tag) || !tiffer_u16(self, &type) ||
+ !tiffer_u32(self, &entry->remaining_count))
+ return false;
+
+ // Short values may and will be inlined, rather than pointed to.
+ size_t values_size = tiffer_value_size(type) * entry->remaining_count;
+ uint32_t offset = 0;
+ if (values_size <= sizeof offset) {
+ entry->p = self->p;
+ self->p += sizeof offset;
+ } else if (tiffer_u32(self, &offset)) {
+ entry->p = self->begin + offset;
+ } else {
+ return false;
+ }
+
+ // All entries are pre-checked not to overflow.
+ if (entry->p + values_size > self->end)
+ return false;
+
+ // Setting it at the end may provide an indication while debugging.
+ entry->type = type;
+ self->remaining_fields--;
+ return true;
+}
diff --git a/tools/info.h b/tools/info.h
index 28cfb36..8dcd3d2 100644
--- a/tools/info.h
+++ b/tools/info.h
@@ -21,348 +21,10 @@
#include <stdlib.h>
#include <string.h>
-// --- Utilities ---------------------------------------------------------------
-
-static char *
-binhex(const uint8_t *data, size_t len)
-{
- static const char *alphabet = "0123456789abcdef";
- char *buf = calloc(1, len * 2 + 1), *p = buf;
- for (size_t i = 0; i < len; i++) {
- *p++ = alphabet[data[i] >> 4];
- *p++ = alphabet[data[i] & 0xF];
- }
- return buf;
-}
-
-static uint64_t
-u64be(const uint8_t *p)
-{
- return (uint64_t) p[0] << 56 | (uint64_t) p[1] << 48 |
- (uint64_t) p[2] << 40 | (uint64_t) p[3] << 32 |
- (uint64_t) p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7];
-}
-
-static uint32_t
-u32be(const uint8_t *p)
-{
- return (uint32_t) p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
-}
-
-static uint16_t
-u16be(const uint8_t *p)
-{
- return (uint16_t) p[0] << 8 | p[1];
-}
-
-static uint64_t
-u64le(const uint8_t *p)
-{
- return (uint64_t) p[7] << 56 | (uint64_t) p[6] << 48 |
- (uint64_t) p[5] << 40 | (uint64_t) p[4] << 32 |
- (uint64_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
-}
-
-static uint32_t
-u32le(const uint8_t *p)
-{
- return (uint32_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
-}
-
-static uint16_t
-u16le(const uint8_t *p)
-{
- return (uint16_t) p[1] << 8 | p[0];
-}
-
-// --- TIFF --------------------------------------------------------------------
-// libtiff is a mess, and the format is not particularly complicated.
-// Exiv2 is senselessly copylefted, and cannot do much.
-// libexif is only marginally better.
-// ExifTool is too user-oriented.
-
-static struct un {
- uint64_t (*u64) (const uint8_t *);
- uint32_t (*u32) (const uint8_t *);
- uint16_t (*u16) (const uint8_t *);
-} unbe = {u64be, u32be, u16be}, unle = {u64le, u32le, u16le};
-
-struct tiffer {
- struct un *un;
- const uint8_t *begin, *p, *end;
- uint16_t remaining_fields;
-};
-
-static bool
-tiffer_u32(struct tiffer *self, uint32_t *u)
-{
- if (self->p < self->begin || self->p + 4 > self->end)
- return false;
-
- *u = self->un->u32(self->p);
- self->p += 4;
- return true;
-}
-
-static bool
-tiffer_u16(struct tiffer *self, uint16_t *u)
-{
- if (self->p < self->begin || self->p + 2 > self->end)
- return false;
-
- *u = self->un->u16(self->p);
- self->p += 2;
- return true;
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-static bool
-tiffer_init(struct tiffer *self, const uint8_t *tiff, size_t len)
-{
- self->un = NULL;
- self->begin = self->p = tiff;
- self->end = tiff + len;
- self->remaining_fields = 0;
-
- const uint8_t
- le[4] = {'I', 'I', 42, 0},
- be[4] = {'M', 'M', 0, 42};
-
- if (tiff + 8 > self->end)
- return false;
- else if (!memcmp(tiff, le, sizeof le))
- self->un = &unle;
- else if (!memcmp(tiff, be, sizeof be))
- self->un = &unbe;
- else
- return false;
-
- self->p = tiff + 4;
- // The first IFD needs to be read by caller explicitly,
- // even though it's required to be present by TIFF 6.0.
- return true;
-}
-
-/// Read the next IFD in a sequence.
-static bool
-tiffer_next_ifd(struct tiffer *self)
-{
- // All fields from any previous IFD need to be read first.
- if (self->remaining_fields)
- return false;
-
- uint32_t ifd_offset = 0;
- if (!tiffer_u32(self, &ifd_offset))
- return false;
-
- // There is nothing more to read, this chain has terminated.
- if (!ifd_offset)
- return false;
-
- // Note that TIFF 6.0 requires there to be at least one entry,
- // but there is no need for us to check it.
- self->p = self->begin + ifd_offset;
- return tiffer_u16(self, &self->remaining_fields);
-}
-
-/// Initialize a derived TIFF reader for a subIFD at the given location.
-static bool
-tiffer_subifd(struct tiffer *self, uint32_t offset, struct tiffer *subreader)
-{
- *subreader = *self;
- subreader->p = subreader->begin + offset;
- return tiffer_u16(subreader, &subreader->remaining_fields);
-}
-
-enum tiffer_type {
- BYTE = 1, ASCII, SHORT, LONG, RATIONAL,
- SBYTE, UNDEFINED, SSHORT, SLONG, SRATIONAL, FLOAT, DOUBLE,
- IFD // This last type from TIFF Technical Note 1 isn't really used much.
-};
-
-static size_t
-tiffer_value_size(enum tiffer_type type)
-{
- switch (type) {
- case BYTE:
- case SBYTE:
- case ASCII:
- case UNDEFINED:
- return 1;
- case SHORT:
- case SSHORT:
- return 2;
- case LONG:
- case SLONG:
- case FLOAT:
- case IFD:
- return 4;
- case RATIONAL:
- case SRATIONAL:
- case DOUBLE:
- return 8;
- default:
- return 0;
- }
-}
-
-/// A lean iterator for values within entries.
-struct tiffer_entry {
- uint16_t tag;
- enum tiffer_type type;
- // For {S,}BYTE, ASCII, UNDEFINED, use these fields directly.
- const uint8_t *p;
- uint32_t remaining_count;
-};
-
-static bool
-tiffer_next_value(struct tiffer_entry *entry)
-{
- if (!entry->remaining_count)
- return false;
-
- entry->p += tiffer_value_size(entry->type);
- entry->remaining_count--;
- return true;
-}
-
-static bool
-tiffer_integer(
- const struct tiffer *self, const struct tiffer_entry *entry, int64_t *out)
-{
- if (!entry->remaining_count)
- return false;
-
- // Somewhat excessively lenient, intended for display.
- // TIFF 6.0 only directly suggests that a reader is should accept
- // any of BYTE/SHORT/LONG for unsigned integers.
- switch (entry->type) {
- case BYTE:
- case ASCII:
- case UNDEFINED:
- *out = *entry->p;
- return true;
- case SBYTE:
- *out = (int8_t) *entry->p;
- return true;
- case SHORT:
- *out = self->un->u16(entry->p);
- return true;
- case SSHORT:
- *out = (int16_t) self->un->u16(entry->p);
- return true;
- case LONG:
- case IFD:
- *out = self->un->u32(entry->p);
- return true;
- case SLONG:
- *out = (int32_t) self->un->u32(entry->p);
- return true;
- default:
- return false;
- }
-}
-
-static bool
-tiffer_rational(const struct tiffer *self, const struct tiffer_entry *entry,
- int64_t *numerator, int64_t *denominator)
-{
- if (!entry->remaining_count)
- return false;
-
- // Somewhat excessively lenient, intended for display.
- switch (entry->type) {
- case RATIONAL:
- *numerator = self->un->u32(entry->p);
- *denominator = self->un->u32(entry->p + 4);
- return true;
- case SRATIONAL:
- *numerator = (int32_t) self->un->u32(entry->p);
- *denominator = (int32_t) self->un->u32(entry->p + 4);
- return true;
- default:
- if (tiffer_integer(self, entry, numerator)) {
- *denominator = 1;
- return true;
- }
- return false;
- }
-}
-
-static bool
-tiffer_real(
- const struct tiffer *self, const struct tiffer_entry *entry, double *out)
-{
- if (!entry->remaining_count)
- return false;
-
- // Somewhat excessively lenient, intended for display.
- // Assuming the host architecture uses IEEE 754.
- switch (entry->type) {
- int64_t numerator, denominator;
- case FLOAT:
- *out = *(float *) entry->p;
- return true;
- case DOUBLE:
- *out = *(double *) entry->p;
- return true;
- default:
- if (tiffer_rational(self, entry, &numerator, &denominator)) {
- *out = (double) numerator / denominator;
- return true;
- }
- return false;
- }
-}
-
-static bool
-tiffer_next_entry(struct tiffer *self, struct tiffer_entry *entry)
-{
- if (!self->remaining_fields)
- return false;
-
- uint16_t type = entry->type = 0xFFFF;
- if (!tiffer_u16(self, &entry->tag) || !tiffer_u16(self, &type) ||
- !tiffer_u32(self, &entry->remaining_count))
- return false;
-
- // Short values may and will be inlined, rather than pointed to.
- size_t values_size = tiffer_value_size(type) * entry->remaining_count;
- uint32_t offset = 0;
- if (values_size <= sizeof offset) {
- entry->p = self->p;
- self->p += sizeof offset;
- } else if (tiffer_u32(self, &offset)) {
- entry->p = self->begin + offset;
- } else {
- return false;
- }
-
- // All entries are pre-checked not to overflow.
- if (entry->p + values_size > self->end)
- return false;
-
- // Setting it at the end may provide an indication while debugging.
- entry->type = type;
- self->remaining_fields--;
- return true;
-}
-
-// --- TIFF/Exif tags ----------------------------------------------------------
-
-struct tiff_value {
- const char *name;
- uint16_t value;
-};
-
-struct tiff_entry {
- const char *name;
- uint16_t tag;
- struct tiff_value *values;
-};
+// --- TIFF/Exif ---------------------------------------------------------------
#include "tiff-tables.h"
+#include "tiffer.h"
// TODO(p): Consider if these can't be inlined into `tiff_entries`.
static struct {
@@ -376,6 +38,27 @@ static struct {
{}
};
+// --- Utilities ---------------------------------------------------------------
+
+#define u64be tiffer_u64be
+#define u32be tiffer_u32be
+#define u16be tiffer_u16be
+#define u64le tiffer_u64le
+#define u32le tiffer_u32le
+#define u16le tiffer_u16le
+
+static char *
+binhex(const uint8_t *data, size_t len)
+{
+ static const char *alphabet = "0123456789abcdef";
+ char *buf = calloc(1, len * 2 + 1), *p = buf;
+ for (size_t i = 0; i < len; i++) {
+ *p++ = alphabet[data[i] >> 4];
+ *p++ = alphabet[data[i] & 0xF];
+ }
+ return buf;
+}
+
// --- Analysis ----------------------------------------------------------------
static jv