diff options
| author | Přemysl Eric Janouch <p@janouch.name> | 2023-05-23 02:09:15 +0200 | 
|---|---|---|
| committer | Přemysl Eric Janouch <p@janouch.name> | 2023-05-26 15:32:34 +0200 | 
| commit | bb4b895cb5938712bd09fbd2b5f49bea811d7551 (patch) | |
| tree | 586902190b9fe52b1c6cb14309b852267fc9ee1e | |
| parent | 0f1c61ae3325dda14be8f98ee7047ac5eda02108 (diff) | |
| download | fiv-bb4b895cb5938712bd09fbd2b5f49bea811d7551.tar.gz fiv-bb4b895cb5938712bd09fbd2b5f49bea811d7551.tar.xz fiv-bb4b895cb5938712bd09fbd2b5f49bea811d7551.zip  | |
Extract some full-size raw previews without LibRaw
Not all image/x-nikon-nef will work like this,
so don't claim their MIME type.
| -rw-r--r-- | fiv-io.c | 394 | ||||
| -rwxr-xr-x | tiff-tables.awk | 20 | ||||
| -rw-r--r-- | tiffer.h | 340 | ||||
| -rw-r--r-- | tools/info.h | 363 | 
4 files changed, 716 insertions, 401 deletions
@@ -41,6 +41,10 @@  #include <lcms2.h>  #endif  // HAVE_LCMS2 +#define TIFF_TABLES_CONSTANTS_ONLY +#include "tiff-tables.h" +#include "tiffer.h" +  #ifdef HAVE_LIBRAW  #include <libraw.h>  #if LIBRAW_VERSION >= LIBRAW_MAKE_VERSION(0, 21, 0) @@ -1141,32 +1145,28 @@ fail:  // --- JPEG -------------------------------------------------------------------- -static GBytes * -parse_jpeg_metadata(cairo_surface_t *surface, const char *data, gsize len) +struct jpeg_metadata { +	GByteArray *exif;                   ///< Exif buffer or NULL +	GByteArray *icc;                    ///< ICC profile buffer or NULL +	int width;                          ///< Image width +	int height;                         ///< Image height +}; + +static void +parse_jpeg_metadata(const char *data, size_t len, struct jpeg_metadata *meta)  {  	// Because the JPEG file format is simple, just do it manually.  	// See: https://www.w3.org/Graphics/JPEG/itu-t81.pdf  	enum { -		APP0 = 0xE0, -		APP1, -		APP2, -		RST0 = 0xD0, -		RST1, -		RST2, -		RST3, -		RST4, -		RST5, -		RST6, -		RST7, -		SOI = 0xD8, -		EOI = 0xD9, -		SOS = 0xDA,  		TEM = 0x01, +		SOF0 = 0xC0, SOF1, SOF2, SOF3, DHT, SOF5, SOF6, SOF7, +		JPG, SOF9, SOF10, SOF11, DAC, SOF13, SOF14, SOF15, +		RST0, RST1, RST2, RST3, RST4, RST5, RST6, RST7, +		SOI, EOI, SOS, DQT, DNL, DRI, DHP, EXP, +		APP0, APP1, APP2, APP3, APP4, APP5, APP6, APP7,  	}; -	GByteArray *exif = g_byte_array_new(), *icc = g_byte_array_new();  	int icc_sequence = 0, icc_done = FALSE; -  	const guint8 *p = (const guint8 *) data, *end = p + len;  	while (p + 3 < end && *p++ == 0xFF && *p != SOS && *p != EOI) {  		// The previous byte is a fill byte, restart. @@ -1195,49 +1195,76 @@ parse_jpeg_metadata(cairo_surface_t *surface, const char *data, gsize len)  		if (G_UNLIKELY((p += length) > end))  			break; +		switch (marker) { +		case SOF0: +		case SOF1: +		case SOF2: +		case SOF3: +		case SOF5: +		case SOF6: +		case SOF7: +		case SOF9: +		case SOF10: +		case SOF11: +		case SOF13: +		case SOF14: +		case SOF15: +			if (length >= 5) { +				meta->width = (payload[3] << 8) + payload[4]; +				meta->height = (payload[1] << 8) + payload[2]; +			} +		} +  		// https://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf 4.7.2  		// Adobe XMP Specification Part 3: Storage in Files, 2020/1, 1.1.3  		// Not checking the padding byte is intentional. -		if (marker == APP1 && p - payload >= 6 && -			!memcmp(payload, "Exif\0", 5) && !exif->len) { +		// XXX: Thumbnails may in practice overflow into follow-up segments. +		if (meta->exif && marker == APP1 && p - payload >= 6 && +			!memcmp(payload, "Exif\0", 5) && !meta->exif->len) {  			payload += 6; -			g_byte_array_append(exif, payload, p - payload); +			g_byte_array_append(meta->exif, payload, p - payload);  		}  		// https://www.color.org/specification/ICC1v43_2010-12.pdf B.4 -		if (marker == APP2 && p - payload >= 14 && +		if (meta->icc && marker == APP2 && p - payload >= 14 &&  			!memcmp(payload, "ICC_PROFILE\0", 12) && !icc_done &&  			payload[12] == ++icc_sequence && payload[13] >= payload[12]) {  			payload += 14; -			g_byte_array_append(icc, payload, p - payload); +			g_byte_array_append(meta->icc, payload, p - payload);  			icc_done = payload[-1] == icc_sequence;  		}  		// TODO(p): Extract the main XMP segment.  	} -	if (exif->len) +	if (meta->icc && !icc_done) +		g_byte_array_set_size(meta->icc, 0); +} + +static void +load_jpeg_finalize(cairo_surface_t *surface, bool cmyk, +	FivIoProfile destination, const char *data, size_t len) +{ +	struct jpeg_metadata meta = { +		.exif = g_byte_array_new(), .icc = g_byte_array_new()}; + +	parse_jpeg_metadata(data, len, &meta); + +	if (meta.exif->len)  		cairo_surface_set_user_data(surface, &fiv_io_key_exif, -			g_byte_array_free_to_bytes(exif), +			g_byte_array_free_to_bytes(meta.exif),  			(cairo_destroy_func_t) g_bytes_unref);  	else -		g_byte_array_free(exif, TRUE); +		g_byte_array_free(meta.exif, TRUE);  	GBytes *icc_profile = NULL; -	if (icc_done) +	if (meta.icc->len)  		cairo_surface_set_user_data(surface, &fiv_io_key_icc, -			(icc_profile = g_byte_array_free_to_bytes(icc)), +			(icc_profile = g_byte_array_free_to_bytes(meta.icc)),  			(cairo_destroy_func_t) g_bytes_unref);  	else -		g_byte_array_free(icc, TRUE); -	return icc_profile; -} +		g_byte_array_free(meta.icc, TRUE); -static void -load_jpeg_finalize(cairo_surface_t *surface, bool cmyk, -	FivIoProfile destination, const char *data, size_t len) -{ -	GBytes *icc_profile = parse_jpeg_metadata(surface, data, len);  	FivIoProfile source = NULL;  	if (icc_profile)  		source = fiv_io_profile_new( @@ -1700,6 +1727,269 @@ fail:  	return result;  } +// --- TIFF/EP + DNG ----------------------------------------------------------- +// In Nikon NEF files, which claim to be TIFF/EP-compatible, IFD0 is a tiny +// uncompressed thumbnail with SubIFDs that, aside from raw sensor data, +// typically contain a nearly full-size JPEG preview. +// +// LibRaw takes too long a time to render something that will never be as good +// as the large preview, and libtiff can only read the horrible IFD0 thumbnail. +// (TIFFSetSubDirectory() requires an ImageLength tag that's missing from JPEG +// SubIFDs, and TIFFReadCustomDirectory() takes a privately defined struct that +// may not be omitted.) +// +// While LibRaw since 0.21.0 provides an API that would allow us to extract +// the JPEG, a little bit of custom processing won't hurt either. + +static bool +tiffer_find(const struct tiffer *self, uint16_t tag, struct tiffer_entry *entry) +{ +	// Note that we could employ binary search, because tags must be ordered: +	//  - TIFF 6.0: Sort Order +	//  - ISO/DIS 12234-2: 4.1.2, 5.1 +	//  - CIPA DC-007-2009 (Multi-Picture Format): 5.2.3., 5.2.4. +	//  - CIPA DC-008-2019 (Exif 2.32): 4.6.2. +	// However, it doesn't seem to warrant the ugly code. +	struct tiffer T = *self; +	while (tiffer_next_entry(&T, entry)) { +		if (entry->tag == tag) +			return true; +	} +	*entry = (struct tiffer_entry) {}; +	return false; +} + +static bool +tiffer_find_integer(const struct tiffer *self, uint16_t tag, int64_t *i) +{ +	struct tiffer_entry entry = {}; +	return tiffer_find(self, tag, &entry) && tiffer_integer(self, &entry, i); +} + +// In case of failure, an entry with a zero "remaining_count" is returned. +static struct tiffer_entry +tiff_ep_subifds_init(const struct tiffer *T) +{ +	struct tiffer_entry entry = {}; +	(void) tiffer_find(T, TIFF_SubIFDs, &entry); +	return entry; +} + +static bool +tiff_ep_subifds_next( +	const struct tiffer *T, struct tiffer_entry *subifds, struct tiffer *subT) +{ +	// XXX: Except for a zero "remaining_count", all conditions are errors, +	// and should perhaps be reported. +	int64_t offset = 0; +	if (!tiffer_integer(T, subifds, &offset) || +		offset < 0 || offset > UINT32_MAX || !tiffer_subifd(T, offset, subT)) +		return false; + +	(void) tiffer_next_value(subifds); +	return true; +} + +static bool +tiff_ep_find_main(const struct tiffer *T, struct tiffer *outputT) +{ +	// This is a mandatory field. +	int64_t type = 0; +	if (!tiffer_find_integer(T, TIFF_NewSubfileType, &type)) +		return false; + +	// This is the main image. +	// (See DNG rather than ISO/DIS 12234-2 for values.) +	if (type == 0) { +		*outputT = *T; +		return true; +	} + +	struct tiffer_entry subifds = tiff_ep_subifds_init(T); +	struct tiffer subT = {}; +	while (tiff_ep_subifds_next(T, &subifds, &subT)) +		if (tiff_ep_find_main(&subT, outputT)) +			return true; +	return false; +} + +struct tiff_ep_jpeg { +	const uint8_t *jpeg;                ///< JPEG data stream +	size_t jpeg_length;                 ///< JPEG data stream length +	int64_t pixels;                     ///< Number of pixels in the JPEG +}; + +static void +tiff_ep_find_jpeg_evaluate(const struct tiffer *T, struct tiff_ep_jpeg *out) +{ +	// This is a mandatory field. +	int64_t compression = 0; +	if (!tiffer_find_integer(T, TIFF_Compression, &compression)) +		return; + +	uint16_t tag_pointer = 0, tag_length = 0; +	switch (compression) { +		// This is how Exif specifies it, which doesn't follow TIFF 6.0. +	case TIFF_Compression_JPEG: +		tag_pointer = TIFF_JPEGInterchangeFormat; +		tag_length = TIFF_JPEGInterchangeFormatLength; +		break; +		// Theoretically, there may be more strips, but this is not expected. +	case TIFF_Compression_JPEGDatastream: +		tag_pointer = TIFF_StripOffsets; +		tag_length = TIFF_StripByteCounts; +		break; +	default: +		return; +	} + +	int64_t ipointer = 0, ilength = 0; +	if (!tiffer_find_integer(T, tag_pointer, &ipointer) || +		!tiffer_find_integer(T, tag_length, &ilength) || +		ipointer <= 0 || ilength <= 0 || +		(uint64_t) ilength > SIZE_MAX || +		ipointer + ilength > (T->end - T->begin)) +		return; + +	// Note that to get the largest JPEG, +	// we don't need to descend into Exif thumbnails. +	// TODO(p): Consider DNG 1.2.0.0 PreviewColorSpace. +	// But first, try to find some real-world files with it. +	const uint8_t *jpeg = T->begin + ipointer; +	size_t jpeg_length = ilength; + +	struct jpeg_metadata meta = {}; +	parse_jpeg_metadata((const char *) jpeg, jpeg_length, &meta); +	int64_t pixels = meta.width * meta.height; +	if (pixels > out->pixels) { +		out->jpeg = jpeg; +		out->jpeg_length = jpeg_length; +		out->pixels = pixels; +	} +} + +static bool +tiff_ep_find_jpeg(const struct tiffer *T, struct tiff_ep_jpeg *out) +{ +	// This is a mandatory field. +	int64_t type = 0; +	if (!tiffer_find_integer(T, TIFF_NewSubfileType, &type)) +		return false; + +	// This is a thumbnail of the main image. +	// (See DNG rather than ISO/DIS 12234-2 for values.) +	if (type == 1) +		tiff_ep_find_jpeg_evaluate(T, out); + +	struct tiffer_entry subifds = tiff_ep_subifds_init(T); +	struct tiffer subT = {}; +	while (tiff_ep_subifds_next(T, &subifds, &subT)) +		if (!tiff_ep_find_jpeg(&subT, out)) +			return false; +	return true; +} + +static cairo_surface_t * +load_tiff_ep( +	const struct tiffer *T, const FivIoOpenContext *ctx, GError **error) +{ +	// ISO/DIS 12234-2 is a fuck-up that says this should be in "IFD0", +	// but it might have intended to say "all top-level IFDs". +	// The DNG specification shares the same problem. +	// +	// In any case, chained TIFFs are relatively rare. +	struct tiffer_entry entry = {}; +	bool is_tiffep = tiffer_find(T, TIFF_TIFF_EPStandardID, &entry) && +		entry.type == BYTE && entry.remaining_count == 4 && +		entry.p[0] == 1 && !entry.p[1] && !entry.p[2] && !entry.p[3]; + +	// Apple ProRAW, e.g., does not claim TIFF/EP compatibility, +	// but we should still be able to make sense of it. +	bool is_supported_dng = tiffer_find(T, TIFF_DNGBackwardVersion, &entry) && +		entry.type == BYTE && entry.remaining_count == 4 && +		entry.p[0] == 1 && entry.p[1] <= 6 && !entry.p[2] && !entry.p[3]; +	if (!is_tiffep && !is_supported_dng) { +		set_error(error, "not a supported TIFF/EP or DNG image"); +		return NULL; +	} + +	struct tiffer fullT = {}; +	if (!tiff_ep_find_main(T, &fullT)) { +		set_error(error, "could not find a main image"); +		return NULL; +	} + +	int64_t width = 0, height = 0; +	if (!tiffer_find_integer(&fullT, TIFF_ImageWidth, &width) || +		!tiffer_find_integer(&fullT, TIFF_ImageLength, &height) || +		width <= 0 || height <= 0) { +		set_error(error, "missing or invalid main image dimensions"); +		return NULL; +	} + +	struct tiff_ep_jpeg out = {}; +	if (!tiff_ep_find_jpeg(T, &out)) { +		set_error(error, "error looking for a full-size JPEG preview"); +		return NULL; +	} + +	// Nikon NEFs seem to generally have a preview above 99 percent, +	// (though some of them may not even reach 50 percent). +	// Be a bit more generous than that with our crop tolerance. +	// TODO(p): Also take into account DNG DefaultCropSize, if present. +	if (out.pixels / ((double) width * height) < 0.95) { +		set_error(error, "could not find a large enough JPEG preview"); +		return NULL; +	} + +	cairo_surface_t *surface = open_libjpeg_turbo( +		(const char *) out.jpeg, out.jpeg_length, ctx, error); +	if (!surface) +		return NULL; + +	// Note that Exif may override this later in fiv_io_open_from_data(). +	// TODO(p): Try to use the Orientation field nearest to the target IFD. +	// IFD0 just happens to be fine for Nikon NEF. +	int64_t orientation = 0; +	if (tiffer_find_integer(T, TIFF_Orientation, &orientation) && +		orientation >= 1 && orientation <= 8) { +		cairo_surface_set_user_data(surface, &fiv_io_key_orientation, +			(void *) (uintptr_t) orientation, NULL); +	} +	return surface; +} + +static cairo_surface_t * +open_tiff_ep( +	const char *data, gsize len, const FivIoOpenContext *ctx, GError **error) +{ +	// -Wunused-function, we might want to give this its own compile unit. +	(void) tiffer_real; + +	struct tiffer T = {}; +	if (!tiffer_init(&T, (const uint8_t *) data, len)) { +		set_error(error, "not a TIFF file"); +		return NULL; +	} + +	cairo_surface_t *result = NULL, *result_tail = NULL; +	while (tiffer_next_ifd(&T)) { +		if (!try_append_page( +				load_tiff_ep(&T, ctx, error), &result, &result_tail)) { +			g_clear_pointer(&result, cairo_surface_destroy); +			return NULL; +		} +		if (ctx->first_frame_only) +			break; + +		// TODO(p): Try to adjust tiffer so that this isn't necessary. +		struct tiffer_entry dummy = {}; +		while (tiffer_next_entry(&T, &dummy)) +			; +	} +	return result; +} +  // --- Optional dependencies ---------------------------------------------------  #ifdef HAVE_LIBRAW  // --------------------------------------------------------- @@ -2590,30 +2880,6 @@ open_libtiff(  	if (!tiff)  		goto fail; -	// In Nikon NEF files, IFD0 is a tiny uncompressed thumbnail with SubIFDs-- -	// two of them JPEGs, the remaining one is raw. libtiff cannot read either -	// of those better versions. -	// -	// TODO(p): If NewSubfileType is ReducedImage, and it has SubIFDs compressed -	// as old JPEG (6), decode JPEGInterchangeFormat/JPEGInterchangeFormatLength -	// with libjpeg-turbo and insert them as the starting pages. -	// -	// This is not possible with libtiff directly, because TIFFSetSubDirectory() -	// requires an ImageLength tag that's missing, and TIFFReadCustomDirectory() -	// takes a privately defined struct that cannot be omitted. -	// -	// TODO(p): Samsung Android DNGs also claim to be TIFF/EP, but use a smaller -	// uncompressed YCbCr image. Apple ProRAW uses the new JPEG Compression (7), -	// with a weird Orientation. It also uses that value for its raw data. -	uint32_t subtype = 0; -	uint16_t subifd_count = 0; -	const uint64_t *subifd_offsets = NULL; -	if (TIFFGetField(tiff, TIFFTAG_SUBFILETYPE, &subtype) && -		(subtype & FILETYPE_REDUCEDIMAGE) && -		TIFFGetField(tiff, TIFFTAG_SUBIFD, &subifd_count, &subifd_offsets) && -		subifd_count > 0 && subifd_offsets) { -	} -  	do {  		// We inform about unsupported directories, but do not fail on them.  		GError *err = NULL; @@ -2824,6 +3090,14 @@ fiv_io_open_from_data(  		surface = open_libwebp(data, len, ctx, error);  		break;  	default: +		// Try to extract full-size previews from TIFF/EP-compatible raws. +		if ((surface = open_tiff_ep(data, len, ctx, error))) +			break; +		if (error) { +			g_debug("%s", (*error)->message); +			g_clear_error(error); +		} +  #ifdef HAVE_LIBRAW  // ---------------------------------------------------------  		if ((surface = open_libraw(data, len, ctx, error)))  			break; diff --git a/tiff-tables.awk b/tiff-tables.awk index 2d93c36..29b462b 100755 --- a/tiff-tables.awk +++ b/tiff-tables.awk @@ -2,6 +2,22 @@  BEGIN {  	FS = ", *"  	print "// Generated by tiff-tables.awk. DO NOT MODIFY." +	print "" +	print "#ifndef TIFF_TABLES_CONSTANTS_ONLY" +	print "#include <stddef.h>" +	print "#include <stdint.h>" +	print "" +	print "struct tiff_value {" +	print "\tconst char *name;" +	print "\tuint16_t value;" +	print "};" +	print "" +	print "struct tiff_entry {" +	print "\tconst char *name;" +	print "\tuint16_t tag;" +	print "\tstruct tiff_value *values;" +	print "};" +	print "#endif"  }  { @@ -55,8 +71,10 @@ function flushvalues() {  function flushsection() {  	if (section) {  		flushvalues() -		print "};\n\n" allvalues "static struct tiff_entry " \ +		print "};\n\n" allvalues "#ifndef TIFF_TABLES_CONSTANTS_ONLY" +		print "static struct tiff_entry " \  			  sectionsnakecase "_entries[] = {" fields "\n\t{}\n};" +		print "#endif"  	}  } diff --git a/tiffer.h b/tiffer.h new file mode 100644 index 0000000..b4cbc5d --- /dev/null +++ b/tiffer.h @@ -0,0 +1,340 @@ +// +// tiffer.h: TIFF reading utilities +// +// Copyright (c) 2021 - 2023, Přemysl Eric Janouch <p@janouch.name> +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +#include <stdint.h> +#include <string.h> +#include <stdbool.h> + +// --- Utilities --------------------------------------------------------------- + +static uint64_t +tiffer_u64be(const uint8_t *p) +{ +	return (uint64_t) p[0] << 56 | (uint64_t) p[1] << 48 | +		(uint64_t) p[2] << 40 | (uint64_t) p[3] << 32 | +		(uint64_t) p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7]; +} + +static uint32_t +tiffer_u32be(const uint8_t *p) +{ +	return (uint32_t) p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; +} + +static uint16_t +tiffer_u16be(const uint8_t *p) +{ +	return (uint16_t) p[0] << 8 | p[1]; +} + +static uint64_t +tiffer_u64le(const uint8_t *p) +{ +	return (uint64_t) p[7] << 56 | (uint64_t) p[6] << 48 | +		(uint64_t) p[5] << 40 | (uint64_t) p[4] << 32 | +		(uint64_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0]; +} + +static uint32_t +tiffer_u32le(const uint8_t *p) +{ +	return (uint32_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0]; +} + +static uint16_t +tiffer_u16le(const uint8_t *p) +{ +	return (uint16_t) p[1] << 8 | p[0]; +} + +// --- TIFF -------------------------------------------------------------------- +// libtiff is a mess, and the format is not particularly complicated. +// Exiv2 is senselessly copylefted, and cannot do much. +// libexif is only marginally better. +// ExifTool is too user-oriented. + +struct un { +	uint64_t (*u64) (const uint8_t *); +	uint32_t (*u32) (const uint8_t *); +	uint16_t (*u16) (const uint8_t *); +}; + +static struct un tiffer_unbe = {tiffer_u64be, tiffer_u32be, tiffer_u16be}; +static struct un tiffer_unle = {tiffer_u64le, tiffer_u32le, tiffer_u16le}; + +struct tiffer { +	struct un *un; +	const uint8_t *begin, *p, *end; +	uint16_t remaining_fields; +}; + +static bool +tiffer_u32(struct tiffer *self, uint32_t *u) +{ +	if (self->p < self->begin || self->p + 4 > self->end) +		return false; + +	*u = self->un->u32(self->p); +	self->p += 4; +	return true; +} + +static bool +tiffer_u16(struct tiffer *self, uint16_t *u) +{ +	if (self->p < self->begin || self->p + 2 > self->end) +		return false; + +	*u = self->un->u16(self->p); +	self->p += 2; +	return true; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static bool +tiffer_init(struct tiffer *self, const uint8_t *tiff, size_t len) +{ +	self->un = NULL; +	self->begin = self->p = tiff; +	self->end = tiff + len; +	self->remaining_fields = 0; + +	const uint8_t +		le[4] = {'I', 'I', 42, 0}, +		be[4] = {'M', 'M', 0, 42}; + +	if (tiff + 8 > self->end) +		return false; +	else if (!memcmp(tiff, le, sizeof le)) +		self->un = &tiffer_unle; +	else if (!memcmp(tiff, be, sizeof be)) +		self->un = &tiffer_unbe; +	else +		return false; + +	self->p = tiff + 4; +	// The first IFD needs to be read by caller explicitly, +	// even though it's required to be present by TIFF 6.0. +	return true; +} + +/// Read the next IFD in a sequence. +static bool +tiffer_next_ifd(struct tiffer *self) +{ +	// All fields from any previous IFD need to be read first. +	if (self->remaining_fields) +		return false; + +	uint32_t ifd_offset = 0; +	if (!tiffer_u32(self, &ifd_offset)) +		return false; + +	// There is nothing more to read, this chain has terminated. +	if (!ifd_offset) +		return false; + +	// Note that TIFF 6.0 requires there to be at least one entry, +	// but there is no need for us to check it. +	self->p = self->begin + ifd_offset; +	return tiffer_u16(self, &self->remaining_fields); +} + +/// Initialize a derived TIFF reader for a subIFD at the given location. +static bool +tiffer_subifd( +	const struct tiffer *self, uint32_t offset, struct tiffer *subreader) +{ +	*subreader = *self; +	subreader->p = subreader->begin + offset; +	return tiffer_u16(subreader, &subreader->remaining_fields); +} + +enum tiffer_type { +	BYTE = 1, ASCII, SHORT, LONG, RATIONAL, +	SBYTE, UNDEFINED, SSHORT, SLONG, SRATIONAL, FLOAT, DOUBLE, +	IFD  // This last type from TIFF Technical Note 1 isn't really used much. +}; + +static size_t +tiffer_value_size(enum tiffer_type type) +{ +	switch (type) { +	case BYTE: +	case SBYTE: +	case ASCII: +	case UNDEFINED: +		return 1; +	case SHORT: +	case SSHORT: +		return 2; +	case LONG: +	case SLONG: +	case FLOAT: +	case IFD: +		return 4; +	case RATIONAL: +	case SRATIONAL: +	case DOUBLE: +		return 8; +	default: +		return 0; +	} +} + +/// A lean iterator for values within entries. +struct tiffer_entry { +	uint16_t tag; +	enum tiffer_type type; +	// For {S,}BYTE, ASCII, UNDEFINED, use these fields directly. +	const uint8_t *p; +	uint32_t remaining_count; +}; + +static bool +tiffer_next_value(struct tiffer_entry *entry) +{ +	if (!entry->remaining_count) +		return false; + +	entry->p += tiffer_value_size(entry->type); +	entry->remaining_count--; +	return true; +} + +static bool +tiffer_integer( +	const struct tiffer *self, const struct tiffer_entry *entry, int64_t *out) +{ +	if (!entry->remaining_count) +		return false; + +	// Somewhat excessively lenient, intended for display. +	// TIFF 6.0 only directly suggests that a reader is should accept +	// any of BYTE/SHORT/LONG for unsigned integers. +	switch (entry->type) { +	case BYTE: +	case ASCII: +	case UNDEFINED: +		*out = *entry->p; +		return true; +	case SBYTE: +		*out = (int8_t) *entry->p; +		return true; +	case SHORT: +		*out = self->un->u16(entry->p); +		return true; +	case SSHORT: +		*out = (int16_t) self->un->u16(entry->p); +		return true; +	case LONG: +	case IFD: +		*out = self->un->u32(entry->p); +		return true; +	case SLONG: +		*out = (int32_t) self->un->u32(entry->p); +		return true; +	default: +		return false; +	} +} + +static bool +tiffer_rational(const struct tiffer *self, const struct tiffer_entry *entry, +	int64_t *numerator, int64_t *denominator) +{ +	if (!entry->remaining_count) +		return false; + +	// Somewhat excessively lenient, intended for display. +	switch (entry->type) { +	case RATIONAL: +		*numerator = self->un->u32(entry->p); +		*denominator = self->un->u32(entry->p + 4); +		return true; +	case SRATIONAL: +		*numerator = (int32_t) self->un->u32(entry->p); +		*denominator = (int32_t) self->un->u32(entry->p + 4); +		return true; +	default: +		if (tiffer_integer(self, entry, numerator)) { +			*denominator = 1; +			return true; +		} +		return false; +	} +} + +static bool +tiffer_real( +	const struct tiffer *self, const struct tiffer_entry *entry, double *out) +{ +	if (!entry->remaining_count) +		return false; + +	// Somewhat excessively lenient, intended for display. +	// Assuming the host architecture uses IEEE 754. +	switch (entry->type) { +		int64_t numerator, denominator; +	case FLOAT: +		*out = *(float *) entry->p; +		return true; +	case DOUBLE: +		*out = *(double *) entry->p; +		return true; +	default: +		if (tiffer_rational(self, entry, &numerator, &denominator)) { +			*out = (double) numerator / denominator; +			return true; +		} +		return false; +	} +} + +static bool +tiffer_next_entry(struct tiffer *self, struct tiffer_entry *entry) +{ +	if (!self->remaining_fields) +		return false; + +	uint16_t type = entry->type = 0xFFFF; +	if (!tiffer_u16(self, &entry->tag) || !tiffer_u16(self, &type) || +		!tiffer_u32(self, &entry->remaining_count)) +		return false; + +	// Short values may and will be inlined, rather than pointed to. +	size_t values_size = tiffer_value_size(type) * entry->remaining_count; +	uint32_t offset = 0; +	if (values_size <= sizeof offset) { +		entry->p = self->p; +		self->p += sizeof offset; +	} else if (tiffer_u32(self, &offset)) { +		entry->p = self->begin + offset; +	} else { +		return false; +	} + +	// All entries are pre-checked not to overflow. +	if (entry->p + values_size > self->end) +		return false; + +	// Setting it at the end may provide an indication while debugging. +	entry->type = type; +	self->remaining_fields--; +	return true; +} diff --git a/tools/info.h b/tools/info.h index 28cfb36..8dcd3d2 100644 --- a/tools/info.h +++ b/tools/info.h @@ -21,348 +21,10 @@  #include <stdlib.h>  #include <string.h> -// --- Utilities --------------------------------------------------------------- - -static char * -binhex(const uint8_t *data, size_t len) -{ -	static const char *alphabet = "0123456789abcdef"; -	char *buf = calloc(1, len * 2 + 1), *p = buf; -	for (size_t i = 0; i < len; i++) { -		*p++ = alphabet[data[i] >> 4]; -		*p++ = alphabet[data[i] & 0xF]; -	} -	return buf; -} - -static uint64_t -u64be(const uint8_t *p) -{ -	return (uint64_t) p[0] << 56 | (uint64_t) p[1] << 48 | -		(uint64_t) p[2] << 40 | (uint64_t) p[3] << 32 | -		(uint64_t) p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7]; -} - -static uint32_t -u32be(const uint8_t *p) -{ -	return (uint32_t) p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; -} - -static uint16_t -u16be(const uint8_t *p) -{ -	return (uint16_t) p[0] << 8 | p[1]; -} - -static uint64_t -u64le(const uint8_t *p) -{ -	return (uint64_t) p[7] << 56 | (uint64_t) p[6] << 48 | -		(uint64_t) p[5] << 40 | (uint64_t) p[4] << 32 | -		(uint64_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0]; -} - -static uint32_t -u32le(const uint8_t *p) -{ -	return (uint32_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0]; -} - -static uint16_t -u16le(const uint8_t *p) -{ -	return (uint16_t) p[1] << 8 | p[0]; -} - -// --- TIFF -------------------------------------------------------------------- -// libtiff is a mess, and the format is not particularly complicated. -// Exiv2 is senselessly copylefted, and cannot do much. -// libexif is only marginally better. -// ExifTool is too user-oriented. - -static struct un { -	uint64_t (*u64) (const uint8_t *); -	uint32_t (*u32) (const uint8_t *); -	uint16_t (*u16) (const uint8_t *); -} unbe = {u64be, u32be, u16be}, unle = {u64le, u32le, u16le}; - -struct tiffer { -	struct un *un; -	const uint8_t *begin, *p, *end; -	uint16_t remaining_fields; -}; - -static bool -tiffer_u32(struct tiffer *self, uint32_t *u) -{ -	if (self->p < self->begin || self->p + 4 > self->end) -		return false; - -	*u = self->un->u32(self->p); -	self->p += 4; -	return true; -} - -static bool -tiffer_u16(struct tiffer *self, uint16_t *u) -{ -	if (self->p < self->begin || self->p + 2 > self->end) -		return false; - -	*u = self->un->u16(self->p); -	self->p += 2; -	return true; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -static bool -tiffer_init(struct tiffer *self, const uint8_t *tiff, size_t len) -{ -	self->un = NULL; -	self->begin = self->p = tiff; -	self->end = tiff + len; -	self->remaining_fields = 0; - -	const uint8_t -		le[4] = {'I', 'I', 42, 0}, -		be[4] = {'M', 'M', 0, 42}; - -	if (tiff + 8 > self->end) -		return false; -	else if (!memcmp(tiff, le, sizeof le)) -		self->un = &unle; -	else if (!memcmp(tiff, be, sizeof be)) -		self->un = &unbe; -	else -		return false; - -	self->p = tiff + 4; -	// The first IFD needs to be read by caller explicitly, -	// even though it's required to be present by TIFF 6.0. -	return true; -} - -/// Read the next IFD in a sequence. -static bool -tiffer_next_ifd(struct tiffer *self) -{ -	// All fields from any previous IFD need to be read first. -	if (self->remaining_fields) -		return false; - -	uint32_t ifd_offset = 0; -	if (!tiffer_u32(self, &ifd_offset)) -		return false; - -	// There is nothing more to read, this chain has terminated. -	if (!ifd_offset) -		return false; - -	// Note that TIFF 6.0 requires there to be at least one entry, -	// but there is no need for us to check it. -	self->p = self->begin + ifd_offset; -	return tiffer_u16(self, &self->remaining_fields); -} - -/// Initialize a derived TIFF reader for a subIFD at the given location. -static bool -tiffer_subifd(struct tiffer *self, uint32_t offset, struct tiffer *subreader) -{ -	*subreader = *self; -	subreader->p = subreader->begin + offset; -	return tiffer_u16(subreader, &subreader->remaining_fields); -} - -enum tiffer_type { -	BYTE = 1, ASCII, SHORT, LONG, RATIONAL, -	SBYTE, UNDEFINED, SSHORT, SLONG, SRATIONAL, FLOAT, DOUBLE, -	IFD  // This last type from TIFF Technical Note 1 isn't really used much. -}; - -static size_t -tiffer_value_size(enum tiffer_type type) -{ -	switch (type) { -	case BYTE: -	case SBYTE: -	case ASCII: -	case UNDEFINED: -		return 1; -	case SHORT: -	case SSHORT: -		return 2; -	case LONG: -	case SLONG: -	case FLOAT: -	case IFD: -		return 4; -	case RATIONAL: -	case SRATIONAL: -	case DOUBLE: -		return 8; -	default: -		return 0; -	} -} - -/// A lean iterator for values within entries. -struct tiffer_entry { -	uint16_t tag; -	enum tiffer_type type; -	// For {S,}BYTE, ASCII, UNDEFINED, use these fields directly. -	const uint8_t *p; -	uint32_t remaining_count; -}; - -static bool -tiffer_next_value(struct tiffer_entry *entry) -{ -	if (!entry->remaining_count) -		return false; - -	entry->p += tiffer_value_size(entry->type); -	entry->remaining_count--; -	return true; -} - -static bool -tiffer_integer( -	const struct tiffer *self, const struct tiffer_entry *entry, int64_t *out) -{ -	if (!entry->remaining_count) -		return false; - -	// Somewhat excessively lenient, intended for display. -	// TIFF 6.0 only directly suggests that a reader is should accept -	// any of BYTE/SHORT/LONG for unsigned integers. -	switch (entry->type) { -	case BYTE: -	case ASCII: -	case UNDEFINED: -		*out = *entry->p; -		return true; -	case SBYTE: -		*out = (int8_t) *entry->p; -		return true; -	case SHORT: -		*out = self->un->u16(entry->p); -		return true; -	case SSHORT: -		*out = (int16_t) self->un->u16(entry->p); -		return true; -	case LONG: -	case IFD: -		*out = self->un->u32(entry->p); -		return true; -	case SLONG: -		*out = (int32_t) self->un->u32(entry->p); -		return true; -	default: -		return false; -	} -} - -static bool -tiffer_rational(const struct tiffer *self, const struct tiffer_entry *entry, -	int64_t *numerator, int64_t *denominator) -{ -	if (!entry->remaining_count) -		return false; - -	// Somewhat excessively lenient, intended for display. -	switch (entry->type) { -	case RATIONAL: -		*numerator = self->un->u32(entry->p); -		*denominator = self->un->u32(entry->p + 4); -		return true; -	case SRATIONAL: -		*numerator = (int32_t) self->un->u32(entry->p); -		*denominator = (int32_t) self->un->u32(entry->p + 4); -		return true; -	default: -		if (tiffer_integer(self, entry, numerator)) { -			*denominator = 1; -			return true; -		} -		return false; -	} -} - -static bool -tiffer_real( -	const struct tiffer *self, const struct tiffer_entry *entry, double *out) -{ -	if (!entry->remaining_count) -		return false; - -	// Somewhat excessively lenient, intended for display. -	// Assuming the host architecture uses IEEE 754. -	switch (entry->type) { -		int64_t numerator, denominator; -	case FLOAT: -		*out = *(float *) entry->p; -		return true; -	case DOUBLE: -		*out = *(double *) entry->p; -		return true; -	default: -		if (tiffer_rational(self, entry, &numerator, &denominator)) { -			*out = (double) numerator / denominator; -			return true; -		} -		return false; -	} -} - -static bool -tiffer_next_entry(struct tiffer *self, struct tiffer_entry *entry) -{ -	if (!self->remaining_fields) -		return false; - -	uint16_t type = entry->type = 0xFFFF; -	if (!tiffer_u16(self, &entry->tag) || !tiffer_u16(self, &type) || -		!tiffer_u32(self, &entry->remaining_count)) -		return false; - -	// Short values may and will be inlined, rather than pointed to. -	size_t values_size = tiffer_value_size(type) * entry->remaining_count; -	uint32_t offset = 0; -	if (values_size <= sizeof offset) { -		entry->p = self->p; -		self->p += sizeof offset; -	} else if (tiffer_u32(self, &offset)) { -		entry->p = self->begin + offset; -	} else { -		return false; -	} - -	// All entries are pre-checked not to overflow. -	if (entry->p + values_size > self->end) -		return false; - -	// Setting it at the end may provide an indication while debugging. -	entry->type = type; -	self->remaining_fields--; -	return true; -} - -// --- TIFF/Exif tags ---------------------------------------------------------- - -struct tiff_value { -	const char *name; -	uint16_t value; -}; - -struct tiff_entry { -	const char *name; -	uint16_t tag; -	struct tiff_value *values; -}; +// --- TIFF/Exif ---------------------------------------------------------------  #include "tiff-tables.h" +#include "tiffer.h"  // TODO(p): Consider if these can't be inlined into `tiff_entries`.  static struct { @@ -376,6 +38,27 @@ static struct {  	{}  }; +// --- Utilities --------------------------------------------------------------- + +#define u64be tiffer_u64be +#define u32be tiffer_u32be +#define u16be tiffer_u16be +#define u64le tiffer_u64le +#define u32le tiffer_u32le +#define u16le tiffer_u16le + +static char * +binhex(const uint8_t *data, size_t len) +{ +	static const char *alphabet = "0123456789abcdef"; +	char *buf = calloc(1, len * 2 + 1), *p = buf; +	for (size_t i = 0; i < len; i++) { +		*p++ = alphabet[data[i] >> 4]; +		*p++ = alphabet[data[i] & 0xF]; +	} +	return buf; +} +  // --- Analysis ----------------------------------------------------------------  static jv  | 
